diff --git a/Cargo.lock b/Cargo.lock index 96526f7e9e7da..699dd375dd25b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3150,6 +3150,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc-literal-escaper" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" + [[package]] name = "rustc-main" version = "0.0.0" @@ -3242,10 +3248,10 @@ version = "0.0.0" dependencies = [ "bitflags", "memchr", + "rustc-literal-escaper", "rustc_ast_ir", "rustc_data_structures", "rustc_index", - "rustc_lexer", "rustc_macros", "rustc_serialize", "rustc_span", @@ -4200,6 +4206,7 @@ name = "rustc_parse" version = "0.0.0" dependencies = [ "bitflags", + "rustc-literal-escaper", "rustc_ast", "rustc_ast_pretty", "rustc_data_structures", @@ -4222,6 +4229,7 @@ dependencies = [ name = "rustc_parse_format" version = "0.0.0" dependencies = [ + "rustc-literal-escaper", "rustc_index", "rustc_lexer", ] diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index 902287d032802..b2d3b90fc4494 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -7,10 +7,10 @@ edition = "2024" # tidy-alphabetical-start bitflags = "2.4.1" memchr = "2.7.4" +rustc-literal-escaper = "0.0.2" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_index = { path = "../rustc_index" } -rustc_lexer = { path = "../rustc_lexer" } rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } rustc_span = { path = "../rustc_span" } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 6896ac723fa58..b8526cf9d9529 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,7 @@ use std::{ascii, fmt, str}; -use rustc_lexer::unescape::{ +use rustc_literal_escaper::{ MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, }; use rustc_span::{Span, Symbol, kw, sym}; diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 61638e45253fd..f9c71b2fa651c 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -26,7 +26,6 @@ // tidy-alphabetical-end mod cursor; -pub mod unescape; #[cfg(test)] mod tests; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs deleted file mode 100644 index d6ea4249247f3..0000000000000 --- a/compiler/rustc_lexer/src/unescape.rs +++ /dev/null @@ -1,438 +0,0 @@ -//! Utilities for validating string and char literals and turning them into -//! values they represent. - -use std::ops::Range; -use std::str::Chars; - -use Mode::*; - -#[cfg(test)] -mod tests; - -/// Errors and warnings that can occur during string unescaping. They mostly -/// relate to malformed escape sequences, but there are a few that are about -/// other problems. -#[derive(Debug, PartialEq, Eq)] -pub enum EscapeError { - /// Expected 1 char, but 0 were found. - ZeroChars, - /// Expected 1 char, but more than 1 were found. - MoreThanOneChar, - - /// Escaped '\' character without continuation. - LoneSlash, - /// Invalid escape character (e.g. '\z'). - InvalidEscape, - /// Raw '\r' encountered. - BareCarriageReturn, - /// Raw '\r' encountered in raw string. - BareCarriageReturnInRawString, - /// Unescaped character that was expected to be escaped (e.g. raw '\t'). - EscapeOnlyChar, - - /// Numeric character escape is too short (e.g. '\x1'). - TooShortHexEscape, - /// Invalid character in numeric escape (e.g. '\xz') - InvalidCharInHexEscape, - /// Character code in numeric escape is non-ascii (e.g. '\xFF'). - OutOfRangeHexEscape, - - /// '\u' not followed by '{'. - NoBraceInUnicodeEscape, - /// Non-hexadecimal value in '\u{..}'. - InvalidCharInUnicodeEscape, - /// '\u{}' - EmptyUnicodeEscape, - /// No closing brace in '\u{..}', e.g. '\u{12'. - UnclosedUnicodeEscape, - /// '\u{_12}' - LeadingUnderscoreUnicodeEscape, - /// More than 6 characters in '\u{..}', e.g. '\u{10FFFF_FF}' - OverlongUnicodeEscape, - /// Invalid in-bound unicode character code, e.g. '\u{DFFF}'. - LoneSurrogateUnicodeEscape, - /// Out of bounds unicode character code, e.g. '\u{FFFFFF}'. - OutOfRangeUnicodeEscape, - - /// Unicode escape code in byte literal. - UnicodeEscapeInByte, - /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. - NonAsciiCharInByte, - - // `\0` in a C string literal. - NulInCStr, - - /// After a line ending with '\', the next line contains whitespace - /// characters that are not skipped. - UnskippedWhitespaceWarning, - - /// After a line ending with '\', multiple lines are skipped. - MultipleSkippedLinesWarning, -} - -impl EscapeError { - /// Returns true for actual errors, as opposed to warnings. - pub fn is_fatal(&self) -> bool { - !matches!( - self, - EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning - ) - } -} - -/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without -/// quotes) and produces a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. For `Char` and `Byte` modes, -/// the callback will be called exactly once. -pub fn unescape_unicode(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - Char | Byte => { - let mut chars = src.chars(); - let res = unescape_char_or_byte(&mut chars, mode); - callback(0..(src.len() - chars.as_str().len()), res); - } - Str | ByteStr => unescape_non_raw_common(src, mode, callback), - RawStr | RawByteStr => check_raw_common(src, mode, callback), - RawCStr => check_raw_common(src, mode, &mut |r, mut result| { - if let Ok('\0') = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - CStr => unreachable!(), - } -} - -/// Used for mixed utf8 string literals, i.e. those that allow both unicode -/// chars and high bytes. -pub enum MixedUnit { - /// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes) - /// and Unicode chars (written directly or via `\u` escapes). - /// - /// For example, if '¥' appears in a string it is represented here as - /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte - /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]` - Char(char), - - /// Used for high bytes (`\x80`..`\xff`). - /// - /// For example, if `\xa5` appears in a string it is represented here as - /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant - /// byte string as the single byte `0xa5`. - HighByte(u8), -} - -impl From for MixedUnit { - fn from(c: char) -> Self { - MixedUnit::Char(c) - } -} - -impl From for MixedUnit { - fn from(n: u8) -> Self { - if n.is_ascii() { MixedUnit::Char(n as char) } else { MixedUnit::HighByte(n) } - } -} - -/// Takes the contents of a mixed-utf8 literal (without quotes) and produces -/// a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. -pub fn unescape_mixed(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| { - if let Ok(MixedUnit::Char('\0')) = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(), - } -} - -/// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error. -pub fn unescape_char(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Char) -} - -/// Takes a contents of a byte literal (without quotes), and returns an -/// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char) -} - -/// What kind of literal do we parse. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Mode { - Char, - - Byte, - - Str, - RawStr, - - ByteStr, - RawByteStr, - - CStr, - RawCStr, -} - -impl Mode { - pub fn in_double_quotes(self) -> bool { - match self { - Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true, - Char | Byte => false, - } - } - - /// Are `\x80`..`\xff` allowed? - fn allow_high_bytes(self) -> bool { - match self { - Char | Str => false, - Byte | ByteStr | CStr => true, - RawStr | RawByteStr | RawCStr => unreachable!(), - } - } - - /// Are unicode (non-ASCII) chars allowed? - #[inline] - fn allow_unicode_chars(self) -> bool { - match self { - Byte | ByteStr | RawByteStr => false, - Char | Str | RawStr | CStr | RawCStr => true, - } - } - - /// Are unicode escapes (`\u`) allowed? - fn allow_unicode_escapes(self) -> bool { - match self { - Byte | ByteStr => false, - Char | Str | CStr => true, - RawByteStr | RawStr | RawCStr => unreachable!(), - } - } - - pub fn prefix_noraw(self) -> &'static str { - match self { - Char | Str | RawStr => "", - Byte | ByteStr | RawByteStr => "b", - CStr | RawCStr => "c", - } - } -} - -fn scan_escape + From>( - chars: &mut Chars<'_>, - mode: Mode, -) -> Result { - // Previous character was '\\', unescape what follows. - let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', - 'x' => { - // Parse hexadecimal character code. - - let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let value = (hi * 16 + lo) as u8; - - return if !mode.allow_high_bytes() && !value.is_ascii() { - Err(EscapeError::OutOfRangeHexEscape) - } else { - // This may be a high byte, but that will only happen if `T` is - // `MixedUnit`, because of the `allow_high_bytes` check above. - Ok(T::from(value)) - }; - } - 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from), - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(T::from(res)) -} - -fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result { - // We've parsed '\u', now we have to parse '{..}'. - - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } - - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if !allow_unicode_escapes { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or({ - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - }); - } - Some(c) => { - let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; - } - }; - } -} - -#[inline] -fn ascii_check(c: char, allow_unicode_chars: bool) -> Result { - if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) } -} - -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - let c = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match c { - '\\' => scan_escape(chars, mode), - '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode.allow_unicode_chars()), - }?; - if chars.next().is_some() { - return Err(EscapeError::MoreThanOneChar); - } - Ok(res) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -fn unescape_non_raw_common + From>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here is complicated because - // `skip_ascii_whitespace` makes us to skip over chars without counting - // them in the range computation. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\\' => { - match chars.clone().next() { - Some('\n') => { - // Rust language specification requires us to skip whitespaces - // if unescaped '\' character is followed by '\n'. - // For details see [Rust language reference] - // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, &mut |range, err| { - callback(range, Err(err)) - }); - continue; - } - _ => scan_escape::(&mut chars, mode), - } - } - '"' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, allow_unicode_chars).map(T::from), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) -where - F: FnMut(Range, EscapeError), -{ - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, EscapeError::MultipleSkippedLinesWarning); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); - } - } - *chars = tail.chars(); -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only produce errors on bare CR. -fn check_raw_common(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here matches the one in - // `unescape_non_raw_common` for consistency, even though this function - // doesn't have to worry about skipping any chars. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\r' => Err(EscapeError::BareCarriageReturnInRawString), - _ => ascii_check(c, allow_unicode_chars), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -#[inline] -pub fn byte_from_char(c: char) -> u8 { - let res = c as u32; - debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr"); - res as u8 -} diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs deleted file mode 100644 index 5b99495f47581..0000000000000 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ /dev/null @@ -1,286 +0,0 @@ -use super::*; - -#[test] -fn test_unescape_char_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_char(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - check(r"\u{0}x", EscapeError::MoreThanOneChar); - check(r"\u{1F63b}}", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - check("\\\r", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - check(r"\xff", EscapeError::OutOfRangeHexEscape); - check(r"\xFF", EscapeError::OutOfRangeHexEscape); - check(r"\x80", EscapeError::OutOfRangeHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - - check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape); - - check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape); -} - -#[test] -fn test_unescape_char_good() { - fn check(literal_text: &str, expected_char: char) { - assert_eq!(unescape_char(literal_text), Ok(expected_char)); - } - - check("a", 'a'); - check("ы", 'ы'); - check("🦀", '🦀'); - - check(r#"\""#, '"'); - check(r"\n", '\n'); - check(r"\r", '\r'); - check(r"\t", '\t'); - check(r"\\", '\\'); - check(r"\'", '\''); - check(r"\0", '\0'); - - check(r"\x00", '\0'); - check(r"\x5a", 'Z'); - check(r"\x5A", 'Z'); - check(r"\x7f", 127 as char); - - check(r"\u{0}", '\0'); - check(r"\u{000000}", '\0'); - check(r"\u{41}", 'A'); - check(r"\u{0041}", 'A'); - check(r"\u{00_41}", 'A'); - check(r"\u{4__1__}", 'A'); - check(r"\u{1F63b}", '😻'); -} - -#[test] -fn test_unescape_str_warn() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - // Check we can handle escaped newlines at the end of a file. - check("\\\n", &[]); - check("\\\n ", &[]); - - check( - "\\\n \u{a0} x", - &[ - (0..5, Err(EscapeError::UnskippedWhitespaceWarning)), - (3..5, Ok('\u{a0}')), - (5..6, Ok(' ')), - (6..7, Ok('x')), - ], - ); - check("\\\n \n x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]); -} - -#[test] -fn test_unescape_str_good() { - fn check(literal_text: &str, expected: &str) { - let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::Str, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(c), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", "foo"); - check("", ""); - check(" \t\n", " \t\n"); - - check("hello \\\n world", "hello world"); - check("thread's", "thread's") -} - -#[test] -fn test_unescape_byte_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_byte(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - - check("ы", EscapeError::NonAsciiCharInByte); - check("🦀", EscapeError::NonAsciiCharInByte); - - check(r"\u{0}", EscapeError::UnicodeEscapeInByte); - check(r"\u{000000}", EscapeError::UnicodeEscapeInByte); - check(r"\u{41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0041}", EscapeError::UnicodeEscapeInByte); - check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0}x", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte); - check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{D800}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte); -} - -#[test] -fn test_unescape_byte_good() { - fn check(literal_text: &str, expected_byte: u8) { - assert_eq!(unescape_byte(literal_text), Ok(expected_byte)); - } - - check("a", b'a'); - - check(r#"\""#, b'"'); - check(r"\n", b'\n'); - check(r"\r", b'\r'); - check(r"\t", b'\t'); - check(r"\\", b'\\'); - check(r"\'", b'\''); - check(r"\0", b'\0'); - - check(r"\x00", b'\0'); - check(r"\x5a", b'Z'); - check(r"\x5A", b'Z'); - check(r"\x7f", 127); - check(r"\x80", 128); - check(r"\xff", 255); - check(r"\xFF", 255); -} - -#[test] -fn test_unescape_byte_str_good() { - fn check(literal_text: &str, expected: &[u8]) { - let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(byte_from_char(c)), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", b"foo"); - check("", b""); - check(" \t\n", b" \t\n"); - - check("hello \\\n world", b"hello world"); - check("thread's", b"thread's") -} - -#[test] -fn test_unescape_raw_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]); -} - -#[test] -fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); -} diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index c9dcab0c871dd..6504081f0b9ce 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +rustc-literal-escaper = "0.0.2" rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1d17290e1c706..4935fc03256f4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -6,8 +6,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, @@ -970,9 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_unicode(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) + unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop))) }) } @@ -986,9 +984,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_mixed(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) + unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop))) }) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c3f..ec59a1a01314e 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -4,7 +4,7 @@ use std::iter::once; use std::ops::Range; use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed}; -use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_literal_escaper::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; use tracing::debug; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 841d967d934be..9c457f150a327 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -21,7 +21,7 @@ use rustc_ast::{ }; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; -use rustc_lexer::unescape::unescape_char; +use rustc_literal_escaper::unescape_char; use rustc_macros::Subdiagnostic; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index 289e062fb5e2b..52f23c00d4bc0 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +rustc-literal-escaper = "0.0.2" rustc_lexer = { path = "../rustc_lexer" } # tidy-alphabetical-end diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 97931742985ef..c59e6cb5c33f7 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -18,7 +18,7 @@ pub use Alignment::*; pub use Count::*; pub use Position::*; -use rustc_lexer::unescape; +use rustc_literal_escaper::{Mode, unescape_unicode}; // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. @@ -1094,11 +1094,9 @@ fn find_width_map_from_snippet( fn unescape_string(string: &str) -> Option { let mut buf = String::new(); let mut ok = true; - unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => ok = false, - } + unescape_unicode(string, Mode::Str, &mut |_, unescaped_char| match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => ok = false, }); ok.then_some(buf) diff --git a/library/Cargo.lock b/library/Cargo.lock index 23d9d926eba3b..589d8e5fd55f7 100644 --- a/library/Cargo.lock +++ b/library/Cargo.lock @@ -235,6 +235,7 @@ name = "proc_macro" version = "0.0.0" dependencies = [ "core", + "rustc-literal-escaper", "std", ] @@ -310,6 +311,15 @@ dependencies = [ "rustc-std-workspace-core", ] +[[package]] +name = "rustc-literal-escaper" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" +dependencies = [ + "rustc-std-workspace-std", +] + [[package]] name = "rustc-std-workspace-alloc" version = "1.99.0" diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml index 72cb4e4166f8e..b8bc2a3af4cd4 100644 --- a/library/proc_macro/Cargo.toml +++ b/library/proc_macro/Cargo.toml @@ -9,3 +9,4 @@ std = { path = "../std" } # `core` when resolving doc links. Without this line a different `core` will be # loaded from sysroot causing duplicate lang items and other similar errors. core = { path = "../core" } +rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] } diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index d9141eab5919f..f1cf0c5a2db7c 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -27,6 +27,7 @@ #![feature(panic_can_unwind)] #![feature(restricted_std)] #![feature(rustc_attrs)] +#![feature(stmt_expr_attributes)] #![feature(extend_one)] #![recursion_limit = "256"] #![allow(internal_features)] @@ -51,11 +52,24 @@ use std::{error, fmt}; #[unstable(feature = "proc_macro_diagnostic", issue = "54140")] pub use diagnostic::{Diagnostic, Level, MultiSpan}; +#[unstable(feature = "proc_macro_value", issue = "136652")] +pub use rustc_literal_escaper::EscapeError; +use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode}; #[unstable(feature = "proc_macro_totokens", issue = "130977")] pub use to_tokens::ToTokens; use crate::escape::{EscapeOptions, escape_bytes}; +/// Errors returned when trying to retrieve a literal unescaped value. +#[unstable(feature = "proc_macro_value", issue = "136652")] +#[derive(Debug, PartialEq, Eq)] +pub enum ConversionErrorKind { + /// The literal failed to be escaped, take a look at [`EscapeError`] for more information. + FailedToUnescape(EscapeError), + /// Trying to convert a literal with the wrong type. + InvalidLiteralKind, +} + /// Determines whether proc_macro has been made accessible to the currently /// running program. /// @@ -1451,6 +1465,107 @@ impl Literal { } }) } + + /// Returns the unescaped string value if the current literal is a string or a string literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn str_value(&self) -> Result { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::Str => { + if symbol.contains('\\') { + let mut buf = String::with_capacity(symbol.len()); + let mut error = None; + // Force-inlining here is aggressive but the closure is + // called on every char in the string, so it can be hot in + // programs with many long strings containing escapes. + unescape_unicode( + symbol, + Mode::Str, + &mut #[inline(always)] + |_, c| match c { + Ok(c) => buf.push(c), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }, + ); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } else { + Ok(symbol.to_string()) + } + } + bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()), + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a c-string or a c-string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn cstr_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::CStr => { + let mut error = None; + let mut buf = Vec::with_capacity(symbol.len()); + + unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c { + Ok(MixedUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Ok(MixedUnit::HighByte(b)) => buf.push(b), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { + Err(error) + } else { + buf.push(0); + Ok(buf) + } + } + bridge::LitKind::CStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc` after appending the terminating NUL + // char. + let mut buf = symbol.to_owned().into_bytes(); + buf.push(0); + Ok(buf) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a byte string or a byte string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn byte_str_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::ByteStr => { + let mut buf = Vec::with_capacity(symbol.len()); + let mut error = None; + + unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } + bridge::LitKind::ByteStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc`. + Ok(symbol.to_owned().into_bytes()) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } } /// Parse a single literal from its stringified representation. diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index 843d474f92de8..7c60e0421421c 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -748,7 +748,7 @@ impl Build { features.push("llvm"); } // keep in sync with `bootstrap/compile.rs:rustc_cargo_env` - if self.config.rust_randomize_layout { + if self.config.rust_randomize_layout && check("rustc_randomized_layouts") { features.push("rustc_randomized_layouts"); } diff --git a/src/tools/rust-analyzer/Cargo.lock b/src/tools/rust-analyzer/Cargo.lock index 1e1d68f778247..745a8097c8a50 100644 --- a/src/tools/rust-analyzer/Cargo.lock +++ b/src/tools/rust-analyzer/Cargo.lock @@ -1270,6 +1270,7 @@ dependencies = [ "edition", "expect-test", "ra-ap-rustc_lexer", + "rustc-literal-escaper", "stdx", "tracing", ] @@ -1743,6 +1744,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +[[package]] +name = "rustc-literal-escaper" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" + [[package]] name = "rustc-stable-hash" version = "0.1.1" @@ -1978,10 +1985,10 @@ dependencies = [ "indexmap", "itertools", "parser", - "ra-ap-rustc_lexer", "rayon", "rowan", "rustc-hash 2.0.0", + "rustc-literal-escaper", "rustc_apfloat", "smol_str", "stdx", diff --git a/src/tools/rust-analyzer/Cargo.toml b/src/tools/rust-analyzer/Cargo.toml index ce2d66000e396..e22191397655d 100644 --- a/src/tools/rust-analyzer/Cargo.toml +++ b/src/tools/rust-analyzer/Cargo.toml @@ -136,6 +136,7 @@ pulldown-cmark-to-cmark = "10.0.4" pulldown-cmark = { version = "0.9.0", default-features = false } rayon = "1.8.0" rustc-hash = "2.0.0" +rustc-literal-escaper = "0.0.2" semver = "1.0.14" serde = { version = "1.0.192" } serde_derive = { version = "1.0.192" } diff --git a/src/tools/rust-analyzer/crates/parser/Cargo.toml b/src/tools/rust-analyzer/crates/parser/Cargo.toml index a36a39dbee6ce..114a66add63bd 100644 --- a/src/tools/rust-analyzer/crates/parser/Cargo.toml +++ b/src/tools/rust-analyzer/crates/parser/Cargo.toml @@ -14,6 +14,7 @@ rust-version.workspace = true [dependencies] drop_bomb = "0.1.5" ra-ap-rustc_lexer.workspace = true +rustc-literal-escaper.workspace = true tracing = { workspace = true, optional = true } edition.workspace = true diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index c97596d5097ec..b0bbc2fa5ff1f 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -10,7 +10,7 @@ use std::ops; -use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_literal_escaper::{EscapeError, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode}; use crate::{ Edition, @@ -282,7 +282,7 @@ impl<'a> Converter<'a> { let text = &self.res.text[self.offset + 1..][..len - 1]; let i = text.rfind('\'').unwrap(); let text = &text[..i]; - if let Err(e) = rustc_lexer::unescape::unescape_char(text) { + if let Err(e) = unescape_char(text) { err = error_to_diagnostic_message(e, Mode::Char); } } @@ -295,7 +295,7 @@ impl<'a> Converter<'a> { let text = &self.res.text[self.offset + 2..][..len - 2]; let i = text.rfind('\'').unwrap(); let text = &text[..i]; - if let Err(e) = rustc_lexer::unescape::unescape_byte(text) { + if let Err(e) = unescape_byte(text) { err = error_to_diagnostic_message(e, Mode::Byte); } } @@ -402,14 +402,14 @@ fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str { let mut error_message = ""; match mode { Mode::CStr => { - rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| { + unescape_mixed(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } }); } Mode::ByteStr | Mode::Str => { - rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| { + unescape_unicode(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } diff --git a/src/tools/rust-analyzer/crates/syntax/Cargo.toml b/src/tools/rust-analyzer/crates/syntax/Cargo.toml index 3fe6e01dc3c92..6b356398204e1 100644 --- a/src/tools/rust-analyzer/crates/syntax/Cargo.toml +++ b/src/tools/rust-analyzer/crates/syntax/Cargo.toml @@ -17,13 +17,12 @@ either.workspace = true itertools.workspace = true rowan = "=0.15.15" rustc-hash.workspace = true +rustc-literal-escaper.workspace = true indexmap.workspace = true smol_str.workspace = true triomphe.workspace = true tracing.workspace = true -ra-ap-rustc_lexer.workspace = true - parser.workspace = true stdx.workspace = true diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs index df851ab5b2525..08bffb9e3aad3 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, num::ParseIntError}; -use rustc_lexer::unescape::{ +use rustc_literal_escaper::{ unescape_byte, unescape_char, unescape_mixed, unescape_unicode, EscapeError, MixedUnit, Mode, }; use stdx::always; diff --git a/src/tools/rust-analyzer/crates/syntax/src/lib.rs b/src/tools/rust-analyzer/crates/syntax/src/lib.rs index c9e9f468dca74..21f1ea5f913a7 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/lib.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/lib.rs @@ -19,13 +19,6 @@ //! [RFC]: //! [Swift]: -#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] - -#[cfg(not(feature = "in-rust-tree"))] -extern crate ra_ap_rustc_lexer as rustc_lexer; -#[cfg(feature = "in-rust-tree")] -extern crate rustc_lexer; - mod parsing; mod ptr; mod syntax_error; @@ -64,7 +57,7 @@ pub use rowan::{ api::Preorder, Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent, }; -pub use rustc_lexer::unescape; +pub use rustc_literal_escaper as unescape; pub use smol_str::{format_smolstr, SmolStr, SmolStrBuilder, ToSmolStr}; /// `Parse` is the result of the parsing: a syntax tree and a collection of diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index 85eefac734b20..71c5f9a946db0 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,7 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{self, unescape_mixed, unescape_unicode, Mode}; +use rustc_literal_escaper::{unescape_mixed, unescape_unicode, EscapeError, Mode}; use crate::{ algo, @@ -44,8 +44,8 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec) { } } -fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool) { - use unescape::EscapeError as EE; +fn rustc_unescape_error_to_string(err: EscapeError) -> (&'static str, bool) { + use rustc_literal_escaper::EscapeError as EE; #[rustfmt::skip] let err_message = match err { @@ -127,7 +127,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { let text = token.text(); // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205) - let mut push_err = |prefix_len, off, err: unescape::EscapeError| { + let mut push_err = |prefix_len, off, err: EscapeError| { let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap(); let (message, is_err) = rustc_unescape_error_to_string(err); // FIXME: Emit lexer warnings diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 8f761d349cca5..682ab4875a126 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -361,6 +361,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "regex-syntax", "rustc-demangle", "rustc-hash", + "rustc-literal-escaper", "rustc-rayon", "rustc-rayon-core", "rustc-stable-hash", @@ -486,6 +487,7 @@ const PERMITTED_STDLIB_DEPENDENCIES: &[&str] = &[ "rand_core", "rand_xorshift", "rustc-demangle", + "rustc-literal-escaper", "shlex", "syn", "unicode-ident",