From 1cbecc38ac7b03dfdc3a75e784473f08bda5e994 Mon Sep 17 00:00:00 2001
From: Marijn Schouten <mhkbst@gmail.com>
Date: Fri, 7 Mar 2025 11:17:39 +0000
Subject: [PATCH 1/2] Separate the unescape functions for string, byte string
 and C string, but avoid duplicating code via macro_rules.

Also plays with NonZero, since C strings cannot contain null bytes, which can be captured in the type system.
---
 compiler/rustc_ast/src/util/literal.rs        |  17 +-
 compiler/rustc_lexer/src/unescape.rs          | 515 +++++++++---------
 compiler/rustc_lexer/src/unescape/tests.rs    |   2 +-
 compiler/rustc_parse/src/lexer/mod.rs         |   6 +-
 library/core/src/num/niche_types.rs           |   2 +
 library/core/src/num/nonzero.rs               |   1 +
 .../crates/parser/src/lexed_str.rs            | 139 +++--
 .../crates/syntax/src/ast/token_ext.rs        |  26 +-
 .../crates/syntax/src/validation.rs           |  26 +-
 9 files changed, 354 insertions(+), 380 deletions(-)

diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 6896ac723fa58..dc66a42dc1c6a 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -3,7 +3,7 @@
 use std::{ascii, fmt, str};
 
 use rustc_lexer::unescape::{
-    MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
+    MixedUnit, unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str,
 };
 use rustc_span::{Span, Symbol, kw, sym};
 use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
                     // Force-inlining here is aggressive but the closure is
                     // called on every char in the string, so it can be hot in
                     // programs with many long strings containing escapes.
-                    unescape_unicode(
+                    unescape_str(
                         s,
-                        Mode::Str,
                         &mut #[inline(always)]
-                        |_, c| match c {
+                        |_, res| match res {
                             Ok(c) => buf.push(c),
                             Err(err) => {
                                 assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
             token::ByteStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
-                    Ok(c) => buf.push(byte_from_char(c)),
+                unescape_byte_str(s, &mut |_, res| match res {
+                    Ok(b) => buf.push(b),
                     Err(err) => {
                         assert!(!err.is_fatal(), "failed to unescape string literal")
                     }
@@ -128,11 +127,11 @@ impl LitKind {
             token::CStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
+                unescape_cstr(s, &mut |_span, c| match c {
                     Ok(MixedUnit::Char(c)) => {
-                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                        buf.extend_from_slice(c.get().encode_utf8(&mut [0; 4]).as_bytes())
                     }
-                    Ok(MixedUnit::HighByte(b)) => buf.push(b),
+                    Ok(MixedUnit::HighByte(b)) => buf.push(b.get()),
                     Err(err) => {
                         assert!(!err.is_fatal(), "failed to unescape C string literal")
                     }
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index d6ea4249247f3..5c7d1106f568f 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -1,6 +1,7 @@
 //! Utilities for validating string and char literals and turning them into
 //! values they represent.
 
+use std::num::NonZero;
 use std::ops::Range;
 use std::str::Chars;
 
@@ -80,203 +81,246 @@ impl EscapeError {
     }
 }
 
-/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without
-/// quotes) and produces a sequence of escaped characters or errors.
-///
-/// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
-/// the callback will be called exactly once.
-pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    match mode {
-        Char | Byte => {
-            let mut chars = src.chars();
-            let res = unescape_char_or_byte(&mut chars, mode);
-            callback(0..(src.len() - chars.as_str().len()), res);
-        }
-        Str | ByteStr => unescape_non_raw_common(src, mode, callback),
-        RawStr | RawByteStr => check_raw_common(src, mode, callback),
-        RawCStr => check_raw_common(src, mode, &mut |r, mut result| {
-            if let Ok('\0') = result {
-                result = Err(EscapeError::NulInCStr);
-            }
-            callback(r, result)
-        }),
-        CStr => unreachable!(),
-    }
-}
-
 /// Used for mixed utf8 string literals, i.e. those that allow both unicode
 /// chars and high bytes.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum MixedUnit {
-    /// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes)
+    /// Used for ASCII chars (written directly or via `\x01`..`\x7f` escapes)
     /// and Unicode chars (written directly or via `\u` escapes).
     ///
     /// For example, if '¥' appears in a string it is represented here as
     /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
     /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
-    Char(char),
+    Char(NonZero<char>),
 
     /// Used for high bytes (`\x80`..`\xff`).
     ///
     /// For example, if `\xa5` appears in a string it is represented here as
     /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
     /// byte string as the single byte `0xa5`.
-    HighByte(u8),
+    HighByte(NonZero<u8>),
 }
 
-impl From<char> for MixedUnit {
-    fn from(c: char) -> Self {
+impl From<NonZero<char>> for MixedUnit {
+    fn from(c: NonZero<char>) -> Self {
         MixedUnit::Char(c)
     }
 }
 
-impl From<u8> for MixedUnit {
-    fn from(n: u8) -> Self {
-        if n.is_ascii() { MixedUnit::Char(n as char) } else { MixedUnit::HighByte(n) }
+impl From<NonZero<u8>> for MixedUnit {
+    fn from(byte: NonZero<u8>) -> Self {
+        if byte.get().is_ascii() {
+            MixedUnit::Char(NonZero::new(byte.get() as char).unwrap())
+        } else {
+            MixedUnit::HighByte(byte)
+        }
     }
 }
+impl TryFrom<char> for MixedUnit {
+    type Error = EscapeError;
 
-/// Takes the contents of a mixed-utf8 literal (without quotes) and produces
-/// a sequence of escaped characters or errors.
-///
-/// Values are returned by invoking `callback`.
-pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
-{
-    match mode {
-        CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| {
-            if let Ok(MixedUnit::Char('\0')) = result {
-                result = Err(EscapeError::NulInCStr);
-            }
-            callback(r, result)
-        }),
-        Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(),
+    fn try_from(c: char) -> Result<Self, EscapeError> {
+        NonZero::new(c).map(MixedUnit::Char).ok_or(EscapeError::NulInCStr)
     }
 }
 
-/// Takes a contents of a char literal (without quotes), and returns an
-/// unescaped char or an error.
-pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
-    unescape_char_or_byte(&mut src.chars(), Char)
+impl TryFrom<u8> for MixedUnit {
+    type Error = EscapeError;
+
+    fn try_from(byte: u8) -> Result<Self, EscapeError> {
+        NonZero::<u8>::new(byte).map(From::from).ok_or(EscapeError::NulInCStr)
+    }
 }
 
-/// Takes a contents of a byte literal (without quotes), and returns an
-/// unescaped byte or an error.
-pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
-    unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char)
+macro_rules! check {
+    ($string_ty:literal
+     ($check:ident: $char2unit:expr => $unit:ty)) => {
+        #[doc = concat!("Take the contents of a raw ", stringify!($string_ty),
+                        " literal (without quotes) and produce a sequence of results of ",
+                        stringify!($unit_ty), " or error (returned via `callback`).",
+                        "\nNB: Raw strings don't do any unescaping, but do produce errors on bare CR.")]
+        pub fn $check(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
+        {
+            src.char_indices().for_each(|(pos, c)| {
+                callback(
+                    pos..pos + c.len_utf8(),
+                    if c == '\r' { Err(EscapeError::BareCarriageReturnInRawString) } else { $char2unit(c) },
+                );
+            });
+        }
+    };
 }
 
-/// What kind of literal do we parse.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Mode {
-    Char,
+check!("string" (check_raw_str: Ok => char));
+check!("byte string" (check_raw_byte_str: ascii_char_to_byte => u8));
+check!("C string" (check_raw_cstr: |c| NonZero::<char>::new(c).ok_or(EscapeError::NulInCStr) => NonZero<char>));
+
+macro_rules! unescape {
+    ($string_ty:literal
+     ($unescape:ident: $char2unit:expr => $unit:ty)
+     $scan_escape:ident) => {
+        #[doc = concat!("Take the contents of a ", stringify!($string_ty),
+                        " literal (without quotes) and produce a sequence of results of escaped ",
+                        stringify!($unit_ty), " or error (returned via `callback`).")]
+        pub fn $unescape(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
+        {
+            let mut chars = src.chars();
+            while let Some(c) = chars.next() {
+                let start = src.len() - chars.as_str().len() - c.len_utf8();
+                let res = match c {
+                    '\\' => {
+                        if let Some(b'\n') = chars.as_str().as_bytes().first() {
+                            let _ = chars.next();
+                            // skip whitespace for backslash newline, see [Rust language reference]
+                            // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
+                            let mut callback_err = |range, err| callback(range, Err(err));
+                            skip_ascii_whitespace(&mut chars, start, &mut callback_err);
+                            continue;
+                        } else {
+                            $scan_escape(&mut chars)
+                        }
+                    }
+                    '"' => Err(EscapeError::EscapeOnlyChar),
+                    '\r' => Err(EscapeError::BareCarriageReturn),
+                    c => $char2unit(c),
+                };
+                let end = src.len() - chars.as_str().len();
+                callback(start..end, res);
+            }
+        }
+    };
+}
 
-    Byte,
+unescape!("string" (unescape_str: Ok => char) scan_escape_str);
+unescape!("byte string" (unescape_byte_str: ascii_char_to_byte => u8) scan_escape_byte_str);
+unescape!("C string" (unescape_cstr: TryFrom::try_from => MixedUnit) scan_escape_c_str);
 
-    Str,
-    RawStr,
+/// Skip ASCII whitespace, except for the formfeed character
+/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
+/// Warns on unescaped newline and following non-ASCII whitespace.
+fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+where
+    F: FnMut(Range<usize>, EscapeError),
+{
+    let rest = chars.as_str();
+    let first_non_space = rest
+        .bytes()
+        .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
+        .unwrap_or(rest.len());
+    let (space, rest) = rest.split_at(first_non_space);
+    // backslash newline adds 2 bytes
+    let end = start + 2 + first_non_space;
+    if space.contains('\n') {
+        callback(start..end, EscapeError::MultipleSkippedLinesWarning);
+    }
+    *chars = rest.chars();
+    if let Some(c) = chars.clone().next() {
+        if c.is_whitespace() {
+            // for error reporting, include the character that was not skipped in the span
+            callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning);
+        }
+    }
+}
 
-    ByteStr,
-    RawByteStr,
+/// Takes the contents of a char literal (without quotes),
+/// and returns an unescaped char or an error.
+pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
+    unescape_char_iter(&mut src.chars())
+}
 
-    CStr,
-    RawCStr,
+/// Takes the contents of a byte literal (without quotes),
+/// and returns an unescaped byte or an error.
+pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
+    unescape_byte_iter(&mut src.chars())
 }
 
-impl Mode {
-    pub fn in_double_quotes(self) -> bool {
-        match self {
-            Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true,
-            Char | Byte => false,
+macro_rules! unescape_iter {
+    (($unescape:ident: $char2unit:expr => $unit:ty) $scan_escape:ident) => {
+        fn $unescape(chars: &mut Chars<'_>) -> Result<$unit, EscapeError> {
+            let res = match chars.next().ok_or(EscapeError::ZeroChars)? {
+                '\\' => $scan_escape(chars),
+                '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
+                '\r' => Err(EscapeError::BareCarriageReturn),
+                c => $char2unit(c),
+            }?;
+            if chars.next().is_some() {
+                return Err(EscapeError::MoreThanOneChar);
+            }
+            Ok(res)
         }
-    }
+    };
+}
 
-    /// Are `\x80`..`\xff` allowed?
-    fn allow_high_bytes(self) -> bool {
-        match self {
-            Char | Str => false,
-            Byte | ByteStr | CStr => true,
-            RawStr | RawByteStr | RawCStr => unreachable!(),
-        }
-    }
+unescape_iter!((unescape_char_iter: Ok => char) scan_escape_str);
+unescape_iter!((unescape_byte_iter: ascii_char_to_byte => u8) scan_escape_byte_str);
 
-    /// Are unicode (non-ASCII) chars allowed?
-    #[inline]
-    fn allow_unicode_chars(self) -> bool {
-        match self {
-            Byte | ByteStr | RawByteStr => false,
-            Char | Str | RawStr | CStr | RawCStr => true,
+macro_rules! scan_escape {
+    ($scan:ident: $zero_result:expr, $from_hex:expr, $from_unicode:expr => $unit:ty) => {
+        fn $scan(chars: &mut Chars<'_>) -> Result<$unit, EscapeError> {
+            // Previous character was '\\', unescape what follows.
+            let c = chars.next().ok_or(EscapeError::LoneSlash)?;
+            if c == '0' {
+                $zero_result
+            } else {
+                simple_escape(c).map(|b| b.get().try_into().unwrap()).or_else(|c| match c {
+                    'x' => $from_hex(hex_escape(chars)?),
+                    'u' => $from_unicode({
+                        let value = unicode_escape(chars)?;
+                        if value > char::MAX as u32 {
+                            Err(EscapeError::OutOfRangeUnicodeEscape)
+                        } else {
+                            char::from_u32(value).ok_or(EscapeError::LoneSurrogateUnicodeEscape)
+                        }
+                    }),
+                    _ => Err(EscapeError::InvalidEscape),
+                })
+            }
         }
-    }
+    };
+}
 
-    /// Are unicode escapes (`\u`) allowed?
-    fn allow_unicode_escapes(self) -> bool {
-        match self {
-            Byte | ByteStr => false,
-            Char | Str | CStr => true,
-            RawByteStr | RawStr | RawCStr => unreachable!(),
-        }
-    }
+scan_escape!(scan_escape_str: Ok('\0'), char_from_byte, |id| id => char);
+scan_escape!(scan_escape_byte_str: Ok(b'\0'), Ok, |_| Err(EscapeError::UnicodeEscapeInByte) => u8);
+scan_escape!(scan_escape_c_str: Err(EscapeError::NulInCStr), TryInto::try_into, |r: Result<char, _>| r?.try_into() => MixedUnit);
 
-    pub fn prefix_noraw(self) -> &'static str {
-        match self {
-            Char | Str | RawStr => "",
-            Byte | ByteStr | RawByteStr => "b",
-            CStr | RawCStr => "c",
-        }
-    }
+fn char_from_byte(b: u8) -> Result<char, EscapeError> {
+    if b.is_ascii() { Ok(b as char) } else { Err(EscapeError::OutOfRangeHexEscape) }
 }
 
-fn scan_escape<T: From<char> + From<u8>>(
-    chars: &mut Chars<'_>,
-    mode: Mode,
-) -> Result<T, EscapeError> {
+/// Parse the character of an ASCII escape (except nul) without the leading backslash.
+fn simple_escape(c: char) -> Result<NonZero<u8>, char> {
     // Previous character was '\\', unescape what follows.
-    let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? {
-        '"' => '"',
-        'n' => '\n',
-        'r' => '\r',
-        't' => '\t',
-        '\\' => '\\',
-        '\'' => '\'',
-        '0' => '\0',
-        'x' => {
-            // Parse hexadecimal character code.
-
-            let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
-            let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
-
-            let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
-            let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
-
-            let value = (hi * 16 + lo) as u8;
-
-            return if !mode.allow_high_bytes() && !value.is_ascii() {
-                Err(EscapeError::OutOfRangeHexEscape)
-            } else {
-                // This may be a high byte, but that will only happen if `T` is
-                // `MixedUnit`, because of the `allow_high_bytes` check above.
-                Ok(T::from(value))
-            };
-        }
-        'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from),
-        _ => return Err(EscapeError::InvalidEscape),
-    };
-    Ok(T::from(res))
+    Ok(NonZero::new(match c {
+        '"' => b'"',
+        'n' => b'\n',
+        'r' => b'\r',
+        't' => b'\t',
+        '\\' => b'\\',
+        '\'' => b'\'',
+        _ => Err(c)?,
+    })
+    .unwrap())
 }
 
-fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<char, EscapeError> {
-    // We've parsed '\u', now we have to parse '{..}'.
+/// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
+fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
+    let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
+    let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
+
+    let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
+    let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
+
+    Ok((hi * 16 + lo) as u8)
+}
 
+/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
+/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
+fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
     if chars.next() != Some('{') {
         return Err(EscapeError::NoBraceInUnicodeEscape);
     }
 
     // First character must be a hexadecimal digit.
-    let mut n_digits = 1;
     let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
         '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
         '}' => return Err(EscapeError::EmptyUnicodeEscape),
@@ -285,28 +329,19 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<ch
 
     // First character is valid, now parse the rest of the number
     // and closing brace.
+    let mut n_digits = 1;
     loop {
         match chars.next() {
             None => return Err(EscapeError::UnclosedUnicodeEscape),
             Some('_') => continue,
             Some('}') => {
-                if n_digits > 6 {
-                    return Err(EscapeError::OverlongUnicodeEscape);
-                }
-
                 // Incorrect syntax has higher priority for error reporting
                 // than unallowed value for a literal.
-                if !allow_unicode_escapes {
-                    return Err(EscapeError::UnicodeEscapeInByte);
-                }
-
-                break std::char::from_u32(value).ok_or({
-                    if value > 0x10FFFF {
-                        EscapeError::OutOfRangeUnicodeEscape
-                    } else {
-                        EscapeError::LoneSurrogateUnicodeEscape
-                    }
-                });
+                return if n_digits > 6 {
+                    Err(EscapeError::OverlongUnicodeEscape)
+                } else {
+                    Ok(value)
+                };
             }
             Some(c) => {
                 let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
@@ -321,118 +356,74 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<ch
     }
 }
 
-#[inline]
-fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError> {
-    if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) }
-}
-
-fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
-    let c = chars.next().ok_or(EscapeError::ZeroChars)?;
-    let res = match c {
-        '\\' => scan_escape(chars, mode),
-        '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
-        '\r' => Err(EscapeError::BareCarriageReturn),
-        _ => ascii_check(c, mode.allow_unicode_chars()),
-    }?;
-    if chars.next().is_some() {
-        return Err(EscapeError::MoreThanOneChar);
-    }
-    Ok(res)
-}
-
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of escaped characters or errors.
-fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, callback: &mut F)
+/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without quotes)
+/// and produces a sequence of unescaped characters or errors,
+/// which are returned by invoking `callback`.
+///
+/// For `Char` and `Byte` modes, the callback will be called exactly once.
+pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
 where
-    F: FnMut(Range<usize>, Result<T, EscapeError>),
+    F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
-    let mut chars = src.chars();
-    let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop
-
-    // The `start` and `end` computation here is complicated because
-    // `skip_ascii_whitespace` makes us to skip over chars without counting
-    // them in the range computation.
-    while let Some(c) = chars.next() {
-        let start = src.len() - chars.as_str().len() - c.len_utf8();
-        let res = match c {
-            '\\' => {
-                match chars.clone().next() {
-                    Some('\n') => {
-                        // Rust language specification requires us to skip whitespaces
-                        // if unescaped '\' character is followed by '\n'.
-                        // For details see [Rust language reference]
-                        // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
-                            callback(range, Err(err))
-                        });
-                        continue;
-                    }
-                    _ => scan_escape::<T>(&mut chars, mode),
-                }
-            }
-            '"' => Err(EscapeError::EscapeOnlyChar),
-            '\r' => Err(EscapeError::BareCarriageReturn),
-            _ => ascii_check(c, allow_unicode_chars).map(T::from),
-        };
-        let end = src.len() - chars.as_str().len();
-        callback(start..end, res);
+    let mut byte_callback =
+        |range, res: Result<u8, EscapeError>| callback(range, res.map(char::from));
+    match mode {
+        Char => {
+            let mut chars = src.chars();
+            let res = unescape_char_iter(&mut chars);
+            callback(0..(src.len() - chars.as_str().len()), res);
+        }
+        Byte => {
+            let mut chars = src.chars();
+            let res = unescape_byte_iter(&mut chars).map(char::from);
+            callback(0..(src.len() - chars.as_str().len()), res);
+        }
+        Str => unescape_str(src, callback),
+        ByteStr => unescape_byte_str(src, &mut byte_callback),
+        RawStr => check_raw_str(src, callback),
+        RawByteStr => check_raw_byte_str(src, &mut byte_callback),
+        RawCStr => check_raw_cstr(src, &mut |r, res: Result<NonZero<char>, EscapeError>| {
+            callback(r, res.map(|c| c.get()))
+        }),
+        CStr => unreachable!(),
     }
 }
 
-fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
-where
-    F: FnMut(Range<usize>, EscapeError),
-{
-    let tail = chars.as_str();
-    let first_non_space = tail
-        .bytes()
-        .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-        .unwrap_or(tail.len());
-    if tail[1..first_non_space].contains('\n') {
-        // The +1 accounts for the escaping slash.
-        let end = start + first_non_space + 1;
-        callback(start..end, EscapeError::MultipleSkippedLinesWarning);
-    }
-    let tail = &tail[first_non_space..];
-    if let Some(c) = tail.chars().next() {
-        if c.is_whitespace() {
-            // For error reporting, we would like the span to contain the character that was not
-            // skipped. The +1 is necessary to account for the leading \ that started the escape.
-            let end = start + first_non_space + c.len_utf8() + 1;
-            callback(start..end, EscapeError::UnskippedWhitespaceWarning);
+/// What kind of literal do we parse.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Mode {
+    Char,
+
+    Byte,
+
+    Str,
+    RawStr,
+
+    ByteStr,
+    RawByteStr,
+
+    CStr,
+    RawCStr,
+}
+
+impl Mode {
+    pub fn in_double_quotes(self) -> bool {
+        match self {
+            Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true,
+            Char | Byte => false,
         }
     }
-    *chars = tail.chars();
-}
 
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of characters or errors.
-/// NOTE: Raw strings do not perform any explicit character escaping, here we
-/// only produce errors on bare CR.
-fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    let mut chars = src.chars();
-    let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop
-
-    // The `start` and `end` computation here matches the one in
-    // `unescape_non_raw_common` for consistency, even though this function
-    // doesn't have to worry about skipping any chars.
-    while let Some(c) = chars.next() {
-        let start = src.len() - chars.as_str().len() - c.len_utf8();
-        let res = match c {
-            '\r' => Err(EscapeError::BareCarriageReturnInRawString),
-            _ => ascii_check(c, allow_unicode_chars),
-        };
-        let end = src.len() - chars.as_str().len();
-        callback(start..end, res);
+    pub fn prefix_noraw(self) -> &'static str {
+        match self {
+            Char | Str | RawStr => "",
+            Byte | ByteStr | RawByteStr => "b",
+            CStr | RawCStr => "c",
+        }
     }
 }
 
-#[inline]
-pub fn byte_from_char(c: char) -> u8 {
-    let res = c as u32;
-    debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
-    res as u8
+fn ascii_char_to_byte(c: char) -> Result<u8, EscapeError> {
+    // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
+    if c.is_ascii() { Ok(c as u8) } else { Err(EscapeError::NonAsciiCharInByte) }
 }
diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs
index 5b99495f47581..831bb83f84192 100644
--- a/compiler/rustc_lexer/src/unescape/tests.rs
+++ b/compiler/rustc_lexer/src/unescape/tests.rs
@@ -244,7 +244,7 @@ fn test_unescape_byte_str_good() {
         unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
             if let Ok(b) = &mut buf {
                 match c {
-                    Ok(c) => b.push(byte_from_char(c)),
+                    Ok(c) => b.push(c as u8),
                     Err(e) => buf = Err((range, e)),
                 }
             }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1d17290e1c706..eb4ece2cd8e00 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -985,10 +985,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         prefix_len: u32,
         postfix_len: u32,
     ) -> (token::LitKind, Symbol) {
-        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
-            unescape::unescape_mixed(src, mode, &mut |span, result| {
-                callback(span, result.map(drop))
-            })
+        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, _mode, callback| {
+            unescape::unescape_cstr(src, &mut |span, result| callback(span, result.map(drop)))
         })
     }
 }
diff --git a/library/core/src/num/niche_types.rs b/library/core/src/num/niche_types.rs
index 47ff4254e533b..b92561c9e356d 100644
--- a/library/core/src/num/niche_types.rs
+++ b/library/core/src/num/niche_types.rs
@@ -131,6 +131,8 @@ define_valid_range_type! {
     pub struct NonZeroI32Inner(i32 as u32 in 1..=0xffff_ffff);
     pub struct NonZeroI64Inner(i64 as u64 in 1..=0xffffffff_ffffffff);
     pub struct NonZeroI128Inner(i128 as u128 in 1..=0xffffffffffffffff_ffffffffffffffff);
+
+    pub struct NonZeroCharInner(char as u32 in 1..=0x10ffff);
 }
 
 #[cfg(target_pointer_width = "16")]
diff --git a/library/core/src/num/nonzero.rs b/library/core/src/num/nonzero.rs
index 7585ec140e31e..2145812885d91 100644
--- a/library/core/src/num/nonzero.rs
+++ b/library/core/src/num/nonzero.rs
@@ -79,6 +79,7 @@ impl_zeroable_primitive!(
     NonZeroI64Inner(i64),
     NonZeroI128Inner(i128),
     NonZeroIsizeInner(isize),
+    NonZeroCharInner(char),
 );
 
 /// A value that is known not to equal zero.
diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
index c97596d5097ec..09ac5b00b43bd 100644
--- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
+++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
@@ -10,14 +10,13 @@
 
 use std::ops;
 
-use rustc_lexer::unescape::{EscapeError, Mode};
-
-use crate::{
-    Edition,
-    SyntaxKind::{self, *},
-    T,
+use rustc_lexer::unescape::{
+    unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str, EscapeError, Mode,
 };
 
+use crate::SyntaxKind::{self, *};
+use crate::{Edition, T};
+
 pub struct LexedStr<'a> {
     text: &'a str,
     kind: Vec<SyntaxKind>,
@@ -149,14 +148,14 @@ impl<'a> Converter<'a> {
         self.res
     }
 
-    fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
+    fn push(&mut self, kind: SyntaxKind, len: usize, errors: Vec<String>) {
         self.res.push(kind, self.offset);
         self.offset += len;
 
-        if let Some(err) = err {
-            let token = self.res.len() as u32;
-            let msg = err.to_owned();
-            self.res.error.push(LexError { msg, token });
+        for msg in errors {
+            if !msg.is_empty() {
+                self.res.error.push(LexError { msg, token: self.res.len() as u32 });
+            }
         }
     }
 
@@ -165,14 +164,16 @@ impl<'a> Converter<'a> {
         // We drop some useful information here (see patterns with double dots `..`)
         // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
         // being `u16` that come from `rowan::SyntaxKind`.
-        let mut err = "";
+        let mut errors: Vec<String> = vec![];
 
         let syntax_kind = {
             match kind {
                 rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
                 rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
                     if !terminated {
-                        err = "Missing trailing `*/` symbols to terminate the block comment";
+                        errors.push(
+                            "Missing trailing `*/` symbols to terminate the block comment".into(),
+                        );
                     }
                     COMMENT
                 }
@@ -184,7 +185,7 @@ impl<'a> Converter<'a> {
                     SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
                 }
                 rustc_lexer::TokenKind::InvalidIdent => {
-                    err = "Ident contains invalid characters";
+                    errors.push("Ident contains invalid characters".into());
                     IDENT
                 }
 
@@ -192,7 +193,7 @@ impl<'a> Converter<'a> {
 
                 rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => {
                     // FIXME: rustc does something better for recovery.
-                    err = "Invalid string literal (reserved syntax)";
+                    errors.push("Invalid string literal (reserved syntax)".into());
                     ERROR
                 }
                 rustc_lexer::TokenKind::GuardedStrPrefix => {
@@ -208,12 +209,12 @@ impl<'a> Converter<'a> {
 
                 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
                     if *starts_with_number {
-                        err = "Lifetime name cannot start with a number";
+                        errors.push("Lifetime name cannot start with a number".into());
                     }
                     LIFETIME_IDENT
                 }
                 rustc_lexer::TokenKind::UnknownPrefixLifetime => {
-                    err = "Unknown lifetime prefix";
+                    errors.push("Unknown lifetime prefix".into());
                     LIFETIME_IDENT
                 }
                 rustc_lexer::TokenKind::RawLifetime => LIFETIME_IDENT,
@@ -248,119 +249,128 @@ impl<'a> Converter<'a> {
                 rustc_lexer::TokenKind::Unknown => ERROR,
                 rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT,
                 rustc_lexer::TokenKind::UnknownPrefix => {
-                    err = "unknown literal prefix";
+                    errors.push("unknown literal prefix".into());
                     IDENT
                 }
                 rustc_lexer::TokenKind::Eof => EOF,
             }
         };
 
-        let err = if err.is_empty() { None } else { Some(err) };
-        self.push(syntax_kind, token_text.len(), err);
+        self.push(syntax_kind, token_text.len(), errors);
     }
 
     fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
-        let mut err = "";
+        let invalid_raw_msg = String::from("Invalid raw string literal");
+
+        let mut errors = vec![];
+        let mut no_end_quote = |c: char, kind: &str| {
+            errors.push(format!("Missing trailing `{c}` symbol to terminate the {kind} literal"));
+        };
 
         let syntax_kind = match *kind {
             rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
                 if empty_int {
-                    err = "Missing digits after the integer base prefix";
+                    errors.push("Missing digits after the integer base prefix".into());
                 }
                 INT_NUMBER
             }
             rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
                 if empty_exponent {
-                    err = "Missing digits after the exponent symbol";
+                    errors.push("Missing digits after the exponent symbol".into());
                 }
                 FLOAT_NUMBER
             }
             rustc_lexer::LiteralKind::Char { terminated } => {
                 if !terminated {
-                    err = "Missing trailing `'` symbol to terminate the character literal";
+                    no_end_quote('\'', "character");
                 } else {
                     let text = &self.res.text[self.offset + 1..][..len - 1];
-                    let i = text.rfind('\'').unwrap();
-                    let text = &text[..i];
-                    if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
-                        err = error_to_diagnostic_message(e, Mode::Char);
+                    let text = &text[..text.rfind('\'').unwrap()];
+                    if let Err(e) = unescape_char(text) {
+                        errors.push(err_to_msg(e, Mode::Char));
                     }
                 }
                 CHAR
             }
             rustc_lexer::LiteralKind::Byte { terminated } => {
                 if !terminated {
-                    err = "Missing trailing `'` symbol to terminate the byte literal";
+                    no_end_quote('\'', "byte");
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
-                    let i = text.rfind('\'').unwrap();
-                    let text = &text[..i];
-                    if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
-                        err = error_to_diagnostic_message(e, Mode::Byte);
+                    let text = &text[..text.rfind('\'').unwrap()];
+                    if let Err(e) = unescape_byte(text) {
+                        errors.push(err_to_msg(e, Mode::Byte));
                     }
                 }
-
                 BYTE
             }
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
-                    err = "Missing trailing `\"` symbol to terminate the string literal";
+                    no_end_quote('"', "string");
                 } else {
                     let text = &self.res.text[self.offset + 1..][..len - 1];
-                    let i = text.rfind('"').unwrap();
-                    let text = &text[..i];
-                    err = unescape_string_error_message(text, Mode::Str);
+                    let text = &text[..text.rfind('"').unwrap()];
+                    unescape_str(text, &mut |_, res| {
+                        if let Err(e) = res {
+                            errors.push(err_to_msg(e, Mode::Str));
+                        }
+                    });
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
-                    err = "Missing trailing `\"` symbol to terminate the byte string literal";
+                    no_end_quote('"', "byte string");
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
-                    let i = text.rfind('"').unwrap();
-                    let text = &text[..i];
-                    err = unescape_string_error_message(text, Mode::ByteStr);
+                    let text = &text[..text.rfind('"').unwrap()];
+                    unescape_byte_str(text, &mut |_, res| {
+                        if let Err(e) = res {
+                            errors.push(err_to_msg(e, Mode::ByteStr));
+                        }
+                    });
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
-                    err = "Missing trailing `\"` symbol to terminate the string literal";
+                    no_end_quote('"', "C string")
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
-                    let i = text.rfind('"').unwrap();
-                    let text = &text[..i];
-                    err = unescape_string_error_message(text, Mode::CStr);
+                    let text = &text[..text.rfind('"').unwrap()];
+                    unescape_cstr(text, &mut |_, res| {
+                        if let Err(e) = res {
+                            errors.push(err_to_msg(e, Mode::CStr));
+                        }
+                    });
                 }
                 C_STRING
             }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if n_hashes.is_none() {
-                    err = "Invalid raw string literal";
+                    errors.push(invalid_raw_msg);
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
                 if n_hashes.is_none() {
-                    err = "Invalid raw string literal";
+                    errors.push(invalid_raw_msg);
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
                 if n_hashes.is_none() {
-                    err = "Invalid raw string literal";
+                    errors.push(invalid_raw_msg);
                 }
                 C_STRING
             }
         };
 
-        let err = if err.is_empty() { None } else { Some(err) };
-        self.push(syntax_kind, len, err);
+        self.push(syntax_kind, len, errors);
     }
 }
 
-fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
+fn err_to_msg(error: EscapeError, mode: Mode) -> String {
     match error {
         EscapeError::ZeroChars => "empty character literal",
         EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
@@ -396,28 +406,5 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
         EscapeError::UnskippedWhitespaceWarning => "",
         EscapeError::MultipleSkippedLinesWarning => "",
     }
-}
-
-fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
-    let mut error_message = "";
-    match mode {
-        Mode::CStr => {
-            rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| {
-                if let Err(e) = res {
-                    error_message = error_to_diagnostic_message(e, mode);
-                }
-            });
-        }
-        Mode::ByteStr | Mode::Str => {
-            rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| {
-                if let Err(e) = res {
-                    error_message = error_to_diagnostic_message(e, mode);
-                }
-            });
-        }
-        _ => {
-            // Other Modes are not supported yet or do not apply
-        }
-    }
-    error_message
+    .into()
 }
diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
index df851ab5b2525..f940438c6176b 100644
--- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
+++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
@@ -3,7 +3,8 @@
 use std::{borrow::Cow, num::ParseIntError};
 
 use rustc_lexer::unescape::{
-    unescape_byte, unescape_char, unescape_mixed, unescape_unicode, EscapeError, MixedUnit, Mode,
+    unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str, unescape_unicode,
+    EscapeError, MixedUnit, Mode,
 };
 use stdx::always;
 
@@ -218,7 +219,7 @@ impl ast::String {
         let mut buf = String::new();
         let mut prev_end = 0;
         let mut has_error = None;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
+        unescape_str(text, &mut |char_range, unescaped_char| match (
             unescaped_char,
             buf.capacity() == 0,
         ) {
@@ -259,18 +260,18 @@ impl ast::ByteString {
         let mut buf: Vec<u8> = Vec::new();
         let mut prev_end = 0;
         let mut has_error = None;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
-            unescaped_char,
+        unescape_byte_str(text, &mut |char_range, unescaped_byte| match (
+            unescaped_byte,
             buf.capacity() == 0,
         ) {
-            (Ok(c), false) => buf.push(c as u8),
+            (Ok(b), false) => buf.push(b),
             (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
                 prev_end = char_range.end
             }
-            (Ok(c), true) => {
+            (Ok(b), true) => {
                 buf.reserve_exact(text.len());
                 buf.extend_from_slice(&text.as_bytes()[..prev_end]);
-                buf.push(c as u8);
+                buf.push(b);
             }
             (Err(e), _) => has_error = Some(e),
         });
@@ -297,7 +298,7 @@ impl IsString for ast::CString {
         let text = &self.text()[text_range_no_quotes - start];
         let offset = text_range_no_quotes.start() - start;
 
-        unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| {
+        unescape_cstr(text, &mut |range, unescaped_char| {
             let text_range =
                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
             // XXX: This method should only be used for highlighting ranges. The unescaped
@@ -320,13 +321,10 @@ impl ast::CString {
         let mut prev_end = 0;
         let mut has_error = None;
         let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
-            MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
-            MixedUnit::HighByte(b) => buf.push(b),
+            MixedUnit::Char(c) => buf.extend(c.get().encode_utf8(&mut [0; 4]).as_bytes()),
+            MixedUnit::HighByte(b) => buf.push(b.get()),
         };
-        unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match (
-            unescaped,
-            buf.capacity() == 0,
-        ) {
+        unescape_cstr(text, &mut |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
             (Ok(u), false) => extend_unit(&mut buf, u),
             (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
                 prev_end = char_range.end
diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs
index 85eefac734b20..8cdfeb9d84557 100644
--- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs
+++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs
@@ -5,7 +5,9 @@
 mod block;
 
 use rowan::Direction;
-use rustc_lexer::unescape::{self, unescape_mixed, unescape_unicode, Mode};
+use rustc_lexer::unescape::{
+    self, unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str,
+};
 
 use crate::{
     algo,
@@ -140,7 +142,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::String(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 1, '"') {
-                    unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
+                    unescape_str(without_quotes, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -151,7 +153,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::ByteString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| {
+                    unescape_byte_str(without_quotes, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -162,7 +164,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::CString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
+                    unescape_cstr(without_quotes, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -172,20 +174,16 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         }
         ast::LiteralKind::Char(_) => {
             if let Some(without_quotes) = unquote(text, 1, '\'') {
-                unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
-                    if let Err(err) = char {
-                        push_err(1, range.start, err);
-                    }
-                });
+                if let Err(err) = unescape_char(without_quotes) {
+                    push_err(1, 0, err);
+                }
             }
         }
         ast::LiteralKind::Byte(_) => {
             if let Some(without_quotes) = unquote(text, 2, '\'') {
-                unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
-                    if let Err(err) = char {
-                        push_err(2, range.start, err);
-                    }
-                });
+                if let Err(err) = unescape_byte(without_quotes) {
+                    push_err(2, 0, err);
+                }
             }
         }
         ast::LiteralKind::IntNumber(_)

From 30822ec0ec25723f36f9e73c42d91a83dc121388 Mon Sep 17 00:00:00 2001
From: Marijn Schouten <mhkbst@gmail.com>
Date: Fri, 14 Mar 2025 09:42:49 +0000
Subject: [PATCH 2/2] Replace all uses of unescape_unicode: no more
 unreachable!

---
 compiler/rustc_ast/src/util/literal.rs        |  6 +-
 compiler/rustc_lexer/src/unescape.rs          | 67 +++++++++-----
 compiler/rustc_lexer/src/unescape/tests.rs    | 14 +--
 compiler/rustc_parse/src/lexer/mod.rs         | 90 ++++++------------
 compiler/rustc_parse_format/src/lib.rs        |  8 +-
 .../clippy/clippy_dev/src/update_lints.rs     |  2 +-
 .../crates/hir-expand/src/attrs.rs            |  2 +-
 .../crates/hir-expand/src/builtin/fn_macro.rs | 14 +--
 .../crates/syntax/src/ast/token_ext.rs        | 92 ++++++++-----------
 .../crates/syntax/src/validation.rs           |  6 +-
 10 files changed, 136 insertions(+), 165 deletions(-)

diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index dc66a42dc1c6a..0d44f583f3907 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -89,7 +89,7 @@ impl LitKind {
                     // programs with many long strings containing escapes.
                     unescape_str(
                         s,
-                        &mut #[inline(always)]
+                        #[inline(always)]
                         |_, res| match res {
                             Ok(c) => buf.push(c),
                             Err(err) => {
@@ -110,7 +110,7 @@ impl LitKind {
             token::ByteStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_byte_str(s, &mut |_, res| match res {
+                unescape_byte_str(s, |_, res| match res {
                     Ok(b) => buf.push(b),
                     Err(err) => {
                         assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -127,7 +127,7 @@ impl LitKind {
             token::CStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_cstr(s, &mut |_span, c| match c {
+                unescape_cstr(s, |_span, c| match c {
                     Ok(MixedUnit::Char(c)) => {
                         buf.extend_from_slice(c.get().encode_utf8(&mut [0; 4]).as_bytes())
                     }
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index 5c7d1106f568f..d991748b5b0b7 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -139,7 +139,7 @@ macro_rules! check {
                         " literal (without quotes) and produce a sequence of results of ",
                         stringify!($unit_ty), " or error (returned via `callback`).",
                         "\nNB: Raw strings don't do any unescaping, but do produce errors on bare CR.")]
-        pub fn $check(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
+        pub fn $check(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
         {
             src.char_indices().for_each(|(pos, c)| {
                 callback(
@@ -162,7 +162,7 @@ macro_rules! unescape {
         #[doc = concat!("Take the contents of a ", stringify!($string_ty),
                         " literal (without quotes) and produce a sequence of results of escaped ",
                         stringify!($unit_ty), " or error (returned via `callback`).")]
-        pub fn $unescape(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
+        pub fn $unescape(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
         {
             let mut chars = src.chars();
             while let Some(c) = chars.next() {
@@ -356,36 +356,57 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
     }
 }
 
-/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without quotes)
-/// and produces a sequence of unescaped characters or errors,
+/// Takes the contents of a literal (without quotes)
+/// and produces a sequence of errors,
 /// which are returned by invoking `callback`.
-///
-/// For `Char` and `Byte` modes, the callback will be called exactly once.
-pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    let mut byte_callback =
-        |range, res: Result<u8, EscapeError>| callback(range, res.map(char::from));
+pub fn unescape_for_errors(
+    src: &str,
+    mode: Mode,
+    mut error_callback: impl FnMut(Range<usize>, EscapeError),
+) {
     match mode {
         Char => {
             let mut chars = src.chars();
-            let res = unescape_char_iter(&mut chars);
-            callback(0..(src.len() - chars.as_str().len()), res);
+            if let Err(e) = unescape_char_iter(&mut chars) {
+                error_callback(0..(src.len() - chars.as_str().len()), e);
+            }
         }
         Byte => {
             let mut chars = src.chars();
-            let res = unescape_byte_iter(&mut chars).map(char::from);
-            callback(0..(src.len() - chars.as_str().len()), res);
+            if let Err(e) = unescape_byte_iter(&mut chars) {
+                error_callback(0..(src.len() - chars.as_str().len()), e);
+            }
         }
-        Str => unescape_str(src, callback),
-        ByteStr => unescape_byte_str(src, &mut byte_callback),
-        RawStr => check_raw_str(src, callback),
-        RawByteStr => check_raw_byte_str(src, &mut byte_callback),
-        RawCStr => check_raw_cstr(src, &mut |r, res: Result<NonZero<char>, EscapeError>| {
-            callback(r, res.map(|c| c.get()))
+        Str => unescape_str(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
+        }),
+        ByteStr => unescape_byte_str(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
+        }),
+        CStr => unescape_cstr(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
+        }),
+        RawStr => check_raw_str(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
+        }),
+        RawByteStr => check_raw_byte_str(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
+        }),
+        RawCStr => check_raw_cstr(src, |range, res| {
+            if let Err(e) = res {
+                error_callback(range, e);
+            }
         }),
-        CStr => unreachable!(),
     }
 }
 
diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs
index 831bb83f84192..c094e8d9da354 100644
--- a/compiler/rustc_lexer/src/unescape/tests.rs
+++ b/compiler/rustc_lexer/src/unescape/tests.rs
@@ -100,7 +100,7 @@ fn test_unescape_char_good() {
 fn test_unescape_str_warn() {
     fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+        unescape_str(literal, |range, res| unescaped.push((range, res)));
         assert_eq!(unescaped, expected);
     }
 
@@ -124,7 +124,7 @@ fn test_unescape_str_warn() {
 fn test_unescape_str_good() {
     fn check(literal_text: &str, expected: &str) {
         let mut buf = Ok(String::with_capacity(literal_text.len()));
-        unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
+        unescape_str(literal_text, |range, c| {
             if let Ok(b) = &mut buf {
                 match c {
                     Ok(c) => b.push(c),
@@ -241,7 +241,7 @@ fn test_unescape_byte_good() {
 fn test_unescape_byte_str_good() {
     fn check(literal_text: &str, expected: &[u8]) {
         let mut buf = Ok(Vec::with_capacity(literal_text.len()));
-        unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
+        unescape_byte_str(literal_text, |range, c| {
             if let Ok(b) = &mut buf {
                 match c {
                     Ok(c) => b.push(c as u8),
@@ -264,7 +264,7 @@ fn test_unescape_byte_str_good() {
 fn test_unescape_raw_str() {
     fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
+        check_raw_str(literal, |range, res| unescaped.push((range, res)));
         assert_eq!(unescaped, expected);
     }
 
@@ -274,13 +274,13 @@ fn test_unescape_raw_str() {
 
 #[test]
 fn test_unescape_raw_byte_str() {
-    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
+    fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
+        check_raw_byte_str(literal, |range, res| unescaped.push((range, res)));
         assert_eq!(unescaped, expected);
     }
 
     check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
     check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
-    check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
+    check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(b'a'))]);
 }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index eb4ece2cd8e00..1f990b46475b7 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,5 +1,3 @@
-use std::ops::Range;
-
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
@@ -525,7 +523,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     }
                     err.emit()
                 }
-                self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
+                self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
             }
             rustc_lexer::LiteralKind::Byte { terminated } => {
                 if !terminated {
@@ -537,7 +535,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         .with_code(E0763)
                         .emit()
                 }
-                self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
+                self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
             }
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
@@ -549,7 +547,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         .with_code(E0765)
                         .emit()
                 }
-                self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
+                self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
@@ -561,7 +559,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         .with_code(E0766)
                         .emit()
                 }
-                self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
+                self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
@@ -573,13 +571,13 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         .with_code(E0767)
                         .emit()
                 }
-                self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+                self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
             }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
                     let kind = token::StrRaw(n_hashes);
-                    self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
+                    self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
                 } else {
                     self.report_raw_str_error(start, 1);
                 }
@@ -588,7 +586,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
                     let kind = token::ByteStrRaw(n_hashes);
-                    self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
+                    self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
                 } else {
                     self.report_raw_str_error(start, 2);
                 }
@@ -597,7 +595,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
                     let kind = token::CStrRaw(n_hashes);
-                    self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+                    self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
                 } else {
                     self.report_raw_str_error(start, 2);
                 }
@@ -913,7 +911,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
 
-    fn cook_common(
+    fn cook_quoted(
         &self,
         mut kind: token::LitKind,
         mode: Mode,
@@ -921,32 +919,28 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         end: BytePos,
         prefix_len: u32,
         postfix_len: u32,
-        unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
     ) -> (token::LitKind, Symbol) {
         let content_start = start + BytePos(prefix_len);
         let content_end = end - BytePos(postfix_len);
         let lit_content = self.str_from_to(content_start, content_end);
-        unescape(lit_content, mode, &mut |range, result| {
-            // Here we only check for errors. The actual unescaping is done later.
-            if let Err(err) = result {
-                let span_with_quotes = self.mk_sp(start, end);
-                let (start, end) = (range.start as u32, range.end as u32);
-                let lo = content_start + BytePos(start);
-                let hi = lo + BytePos(end - start);
-                let span = self.mk_sp(lo, hi);
-                let is_fatal = err.is_fatal();
-                if let Some(guar) = emit_unescape_error(
-                    self.dcx(),
-                    lit_content,
-                    span_with_quotes,
-                    span,
-                    mode,
-                    range,
-                    err,
-                ) {
-                    assert!(is_fatal);
-                    kind = token::Err(guar);
-                }
+        unescape::unescape_for_errors(lit_content, mode, |range, err| {
+            let span_with_quotes = self.mk_sp(start, end);
+            let (start, end) = (range.start as u32, range.end as u32);
+            let lo = content_start + BytePos(start);
+            let hi = lo + BytePos(end - start);
+            let span = self.mk_sp(lo, hi);
+            let is_fatal = err.is_fatal();
+            if let Some(guar) = emit_unescape_error(
+                self.dcx(),
+                lit_content,
+                span_with_quotes,
+                span,
+                mode,
+                range,
+                err,
+            ) {
+                assert!(is_fatal);
+                kind = token::Err(guar);
             }
         });
 
@@ -959,36 +953,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         };
         (kind, sym)
     }
-
-    fn cook_unicode(
-        &self,
-        kind: token::LitKind,
-        mode: Mode,
-        start: BytePos,
-        end: BytePos,
-        prefix_len: u32,
-        postfix_len: u32,
-    ) -> (token::LitKind, Symbol) {
-        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
-            unescape::unescape_unicode(src, mode, &mut |span, result| {
-                callback(span, result.map(drop))
-            })
-        })
-    }
-
-    fn cook_mixed(
-        &self,
-        kind: token::LitKind,
-        mode: Mode,
-        start: BytePos,
-        end: BytePos,
-        prefix_len: u32,
-        postfix_len: u32,
-    ) -> (token::LitKind, Symbol) {
-        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, _mode, callback| {
-            unescape::unescape_cstr(src, &mut |span, result| callback(span, result.map(drop)))
-        })
-    }
 }
 
 pub fn nfc_normalize(string: &str) -> Symbol {
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
index 5b8a2fe52d3f5..73bb8ab17734e 100644
--- a/compiler/rustc_parse_format/src/lib.rs
+++ b/compiler/rustc_parse_format/src/lib.rs
@@ -1094,11 +1094,9 @@ fn find_width_map_from_snippet(
 fn unescape_string(string: &str) -> Option<String> {
     let mut buf = String::new();
     let mut ok = true;
-    unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
-        match unescaped_char {
-            Ok(c) => buf.push(c),
-            Err(_) => ok = false,
-        }
+    unescape::unescape_str(string, &mut |_, res| match res {
+        Ok(c) => buf.push(c),
+        Err(_) => ok = false,
     });
 
     ok.then_some(buf)
diff --git a/src/tools/clippy/clippy_dev/src/update_lints.rs b/src/tools/clippy/clippy_dev/src/update_lints.rs
index b80ee5aac7e76..3f785135030ec 100644
--- a/src/tools/clippy/clippy_dev/src/update_lints.rs
+++ b/src/tools/clippy/clippy_dev/src/update_lints.rs
@@ -830,7 +830,7 @@ fn remove_line_splices(s: &str) -> String {
         .and_then(|s| s.strip_suffix('"'))
         .unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
     let mut res = String::with_capacity(s.len());
-    unescape::unescape_unicode(s, unescape::Mode::Str, &mut |range, ch| {
+    unescape::unescape_str(s, |range, ch| {
         if ch.is_ok() {
             res.push_str(&s[range]);
         }
diff --git a/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs b/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
index c9c793d54f26c..3c7d0495833f6 100644
--- a/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
+++ b/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
@@ -415,7 +415,7 @@ fn unescape(s: &str) -> Option<Cow<'_, str>> {
     let mut buf = String::new();
     let mut prev_end = 0;
     let mut has_error = false;
-    unescape::unescape_unicode(s, unescape::Mode::Str, &mut |char_range, unescaped_char| match (
+    unescape::unescape_str(s, |char_range, unescaped_char| match (
         unescaped_char,
         buf.capacity() == 0,
     ) {
diff --git a/src/tools/rust-analyzer/crates/hir-expand/src/builtin/fn_macro.rs b/src/tools/rust-analyzer/crates/hir-expand/src/builtin/fn_macro.rs
index 55242ab3e57d1..02bf0c72f9221 100644
--- a/src/tools/rust-analyzer/crates/hir-expand/src/builtin/fn_macro.rs
+++ b/src/tools/rust-analyzer/crates/hir-expand/src/builtin/fn_macro.rs
@@ -9,7 +9,7 @@ use span::{Edition, EditionedFileId, Span};
 use stdx::format_to;
 use syntax::{
     format_smolstr,
-    unescape::{unescape_byte, unescape_char, unescape_unicode, Mode},
+    unescape::{unescape_byte, unescape_char, unescape_str},
 };
 use syntax_bridge::syntax_node_to_token_tree;
 
@@ -429,7 +429,7 @@ fn compile_error_expand(
             span: _,
             kind: tt::LitKind::Str | tt::LitKind::StrRaw(_),
             suffix: _,
-        }))] => ExpandError::other(span, Box::from(unescape_str(text).as_str())),
+        }))] => ExpandError::other(span, Box::from(unescape_symbol(text).as_str())),
         _ => ExpandError::other(span, "`compile_error!` argument must be a string"),
     };
 
@@ -477,7 +477,7 @@ fn concat_expand(
                         format_to!(text, "{}", it.symbol.as_str())
                     }
                     tt::LitKind::Str => {
-                        text.push_str(unescape_str(&it.symbol).as_str());
+                        text.push_str(unescape_symbol(&it.symbol).as_str());
                         record_span(it.span);
                     }
                     tt::LitKind::StrRaw(_) => {
@@ -681,7 +681,7 @@ fn parse_string(tt: &tt::TopSubtree) -> Result<(Symbol, Span), ExpandError> {
                 span,
                 kind: tt::LitKind::Str,
                 suffix: _,
-            })) => Ok((unescape_str(text), *span)),
+            })) => Ok((unescape_symbol(text), *span)),
             TtElement::Leaf(tt::Leaf::Literal(tt::Literal {
                 symbol: text,
                 span,
@@ -702,7 +702,7 @@ fn parse_string(tt: &tt::TopSubtree) -> Result<(Symbol, Span), ExpandError> {
                             span,
                             kind: tt::LitKind::Str,
                             suffix: _,
-                        })) => Some((unescape_str(text), *span)),
+                        })) => Some((unescape_symbol(text), *span)),
                         TtElement::Leaf(tt::Leaf::Literal(tt::Literal {
                             symbol: text,
                             span,
@@ -887,11 +887,11 @@ fn quote_expand(
     )
 }
 
-fn unescape_str(s: &Symbol) -> Symbol {
+fn unescape_symbol(s: &Symbol) -> Symbol {
     if s.as_str().contains('\\') {
         let s = s.as_str();
         let mut buf = String::with_capacity(s.len());
-        unescape_unicode(s, Mode::Str, &mut |_, c| {
+        unescape_str(s, |_, c| {
             if let Ok(c) = c {
                 buf.push(c)
             }
diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
index f940438c6176b..d4017e487aba1 100644
--- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
+++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
@@ -1,10 +1,11 @@
 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
 
+use std::ops::Range;
 use std::{borrow::Cow, num::ParseIntError};
 
 use rustc_lexer::unescape::{
-    unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str, unescape_unicode,
-    EscapeError, MixedUnit, Mode,
+    unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str, EscapeError,
+    MixedUnit,
 };
 use stdx::always;
 
@@ -151,7 +152,7 @@ impl QuoteOffsets {
 
 pub trait IsString: AstToken {
     const RAW_PREFIX: &'static str;
-    const MODE: Mode;
+    fn unescape(s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
     fn is_raw(&self) -> bool {
         self.text().starts_with(Self::RAW_PREFIX)
     }
@@ -186,7 +187,7 @@ pub trait IsString: AstToken {
         let text = &self.text()[text_range_no_quotes - start];
         let offset = text_range_no_quotes.start() - start;
 
-        unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
+        Self::unescape(text, &mut |range: Range<usize>, unescaped_char| {
             if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
                 cb(TextRange::new(s, e) + offset, unescaped_char);
             }
@@ -204,7 +205,9 @@ pub trait IsString: AstToken {
 
 impl IsString for ast::String {
     const RAW_PREFIX: &'static str = "r";
-    const MODE: Mode = Mode::Str;
+    fn unescape(s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_str(s, cb)
+    }
 }
 
 impl ast::String {
@@ -219,20 +222,19 @@ impl ast::String {
         let mut buf = String::new();
         let mut prev_end = 0;
         let mut has_error = None;
-        unescape_str(text, &mut |char_range, unescaped_char| match (
-            unescaped_char,
-            buf.capacity() == 0,
-        ) {
-            (Ok(c), false) => buf.push(c),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
-            }
-            (Ok(c), true) => {
-                buf.reserve_exact(text.len());
-                buf.push_str(&text[..prev_end]);
-                buf.push(c);
+        unescape_str(text, |char_range, unescaped_char| {
+            match (unescaped_char, buf.capacity() == 0) {
+                (Ok(c), false) => buf.push(c),
+                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
+                    prev_end = char_range.end
+                }
+                (Ok(c), true) => {
+                    buf.reserve_exact(text.len());
+                    buf.push_str(&text[..prev_end]);
+                    buf.push(c);
+                }
+                (Err(e), _) => has_error = Some(e),
             }
-            (Err(e), _) => has_error = Some(e),
         });
 
         match (has_error, buf.capacity() == 0) {
@@ -245,7 +247,9 @@ impl ast::String {
 
 impl IsString for ast::ByteString {
     const RAW_PREFIX: &'static str = "br";
-    const MODE: Mode = Mode::ByteStr;
+    fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
+    }
 }
 
 impl ast::ByteString {
@@ -260,20 +264,19 @@ impl ast::ByteString {
         let mut buf: Vec<u8> = Vec::new();
         let mut prev_end = 0;
         let mut has_error = None;
-        unescape_byte_str(text, &mut |char_range, unescaped_byte| match (
-            unescaped_byte,
-            buf.capacity() == 0,
-        ) {
-            (Ok(b), false) => buf.push(b),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
-            }
-            (Ok(b), true) => {
-                buf.reserve_exact(text.len());
-                buf.extend_from_slice(&text.as_bytes()[..prev_end]);
-                buf.push(b);
+        unescape_byte_str(text, |char_range, unescaped_byte| {
+            match (unescaped_byte, buf.capacity() == 0) {
+                (Ok(b), false) => buf.push(b),
+                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
+                    prev_end = char_range.end
+                }
+                (Ok(b), true) => {
+                    buf.reserve_exact(text.len());
+                    buf.extend_from_slice(&text.as_bytes()[..prev_end]);
+                    buf.push(b);
+                }
+                (Err(e), _) => has_error = Some(e),
             }
-            (Err(e), _) => has_error = Some(e),
         });
 
         match (has_error, buf.capacity() == 0) {
@@ -286,25 +289,10 @@ impl ast::ByteString {
 
 impl IsString for ast::CString {
     const RAW_PREFIX: &'static str = "cr";
-    const MODE: Mode = Mode::CStr;
-
-    fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
-        let text_range_no_quotes = match self.text_range_between_quotes() {
-            Some(it) => it,
-            None => return,
-        };
-
-        let start = self.syntax().text_range().start();
-        let text = &self.text()[text_range_no_quotes - start];
-        let offset = text_range_no_quotes.start() - start;
-
-        unescape_cstr(text, &mut |range, unescaped_char| {
-            let text_range =
-                TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
-            // XXX: This method should only be used for highlighting ranges. The unescaped
-            // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
-            cb(text_range + offset, unescaped_char.map(|_| ' '));
-        });
+    // XXX: This method should only be used for highlighting ranges. The unescaped
+    // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
+    fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_cstr(s, |range, _res| callback(range, Ok('_')))
     }
 }
 
@@ -324,7 +312,7 @@ impl ast::CString {
             MixedUnit::Char(c) => buf.extend(c.get().encode_utf8(&mut [0; 4]).as_bytes()),
             MixedUnit::HighByte(b) => buf.push(b.get()),
         };
-        unescape_cstr(text, &mut |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
+        unescape_cstr(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
             (Ok(u), false) => extend_unit(&mut buf, u),
             (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
                 prev_end = char_range.end
diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs
index 8cdfeb9d84557..275cc8a90e215 100644
--- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs
+++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs
@@ -142,7 +142,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::String(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 1, '"') {
-                    unescape_str(without_quotes, &mut |range, char| {
+                    unescape_str(without_quotes,|range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -153,7 +153,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::ByteString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_byte_str(without_quotes, &mut |range, char| {
+                    unescape_byte_str(without_quotes, |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -164,7 +164,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::CString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_cstr(without_quotes, &mut |range, char| {
+                    unescape_cstr(without_quotes, |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }