Skip to content

Commit a689847

Browse files
committed
Reserve guarded string literals (RFC 3593)
1 parent 58420a0 commit a689847

23 files changed

+1515
-9
lines changed

compiler/rustc_lexer/src/lib.rs

+85-8
Original file line numberDiff line numberDiff line change
@@ -191,30 +191,46 @@ pub enum DocStyle {
191191
/// `rustc_ast::ast::LitKind`).
192192
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
193193
pub enum LiteralKind {
194-
/// "12_u8", "0o100", "0b120i99", "1f32".
194+
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
195195
Int { base: Base, empty_int: bool },
196-
/// "12.34f32", "1e3", but not "1f32".
196+
/// `12.34f32`, `1e3`, but not `1f32`.
197197
Float { base: Base, empty_exponent: bool },
198-
/// "'a'", "'\\'", "'''", "';"
198+
/// `'a'`, `'\\'`, `'''`, `';`
199199
Char { terminated: bool },
200-
/// "b'a'", "b'\\'", "b'''", "b';"
200+
/// `b'a'`, `b'\\'`, `b'''`, `b';`
201201
Byte { terminated: bool },
202-
/// ""abc"", ""abc"
202+
/// `"abc"`, `"abc`
203203
Str { terminated: bool },
204-
/// "b"abc"", "b"abc"
204+
/// `b"abc"`, `b"abc`
205205
ByteStr { terminated: bool },
206206
/// `c"abc"`, `c"abc`
207207
CStr { terminated: bool },
208-
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
208+
/// Guarded string literal prefix: `#"` or `##`.
209+
///
210+
/// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
211+
/// Split into the component tokens on older editions.
212+
GuardedStrPrefix,
213+
/// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
209214
/// an invalid literal.
210215
RawStr { n_hashes: Option<u8> },
211-
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
216+
/// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
212217
/// indicates an invalid literal.
213218
RawByteStr { n_hashes: Option<u8> },
214219
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
215220
RawCStr { n_hashes: Option<u8> },
216221
}
217222

223+
/// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
224+
///
225+
/// Can capture fewer closing hashes than starting hashes,
226+
/// for more efficient lexing and better backwards diagnostics.
227+
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
228+
pub struct GuardedStr {
229+
pub n_hashes: u32,
230+
pub terminated: bool,
231+
pub token_len: u32,
232+
}
233+
218234
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
219235
pub enum RawStrError {
220236
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
@@ -403,6 +419,13 @@ impl Cursor<'_> {
403419
TokenKind::Literal { kind: literal_kind, suffix_start }
404420
}
405421

422+
// Guarded string literal prefix: `#"` or `##`
423+
'#' if matches!(self.first(), '"' | '#') => {
424+
self.bump();
425+
let suffix_start = self.pos_within_token();
426+
TokenKind::Literal { kind: GuardedStrPrefix, suffix_start }
427+
}
428+
406429
// One-symbol tokens.
407430
';' => Semi,
408431
',' => Comma,
@@ -780,6 +803,60 @@ impl Cursor<'_> {
780803
false
781804
}
782805

806+
/// Attempt to lex for a guarded string literal.
807+
///
808+
/// Used by `rustc_parse::lexer` to lex for guarded strings
809+
/// conditionally based on edition.
810+
///
811+
/// Note: this will not reset the `Cursor` when a
812+
/// guarded string is not found. It is the caller's
813+
/// responsibility to do so.
814+
pub fn guarded_double_quoted_string(&mut self) -> Option<GuardedStr> {
815+
debug_assert!(self.prev() != '#');
816+
817+
let mut n_start_hashes: u32 = 0;
818+
while self.first() == '#' {
819+
n_start_hashes += 1;
820+
self.bump();
821+
}
822+
823+
if self.first() != '"' {
824+
return None;
825+
}
826+
self.bump();
827+
debug_assert!(self.prev() == '"');
828+
829+
// Lex the string itself as a normal string literal
830+
// so we can recover that for older editions later.
831+
let terminated = self.double_quoted_string();
832+
if !terminated {
833+
let token_len = self.pos_within_token();
834+
self.reset_pos_within_token();
835+
836+
return Some(GuardedStr { n_hashes: n_start_hashes, terminated: false, token_len });
837+
}
838+
839+
// Consume closing '#' symbols.
840+
// Note that this will not consume extra trailing `#` characters:
841+
// `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
842+
// followed by a `#` token.
843+
let mut n_end_hashes = 0;
844+
while self.first() == '#' && n_end_hashes < n_start_hashes {
845+
n_end_hashes += 1;
846+
self.bump();
847+
}
848+
849+
// Reserved syntax, always an error, so it doesn't matter if
850+
// `n_start_hashes != n_end_hashes`.
851+
852+
self.eat_literal_suffix();
853+
854+
let token_len = self.pos_within_token();
855+
self.reset_pos_within_token();
856+
857+
Some(GuardedStr { n_hashes: n_start_hashes, terminated: true, token_len })
858+
}
859+
783860
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
784861
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
785862
// Wrap the actual function to handle the error with too many hashes.

compiler/rustc_lint/messages.ftl

+3
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,9 @@ lint_reserved_prefix = prefix `{$prefix}` is unknown
746746
.label = unknown prefix
747747
.suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021
748748
749+
lint_reserved_string = will be parsed as a guarded string in Rust 2024
750+
.suggestion = insert whitespace here to avoid this being parsed as a guarded string in Rust 2024
751+
749752
lint_shadowed_into_iter =
750753
this method call resolves to `<&{$target} as IntoIterator>::into_iter` (due to backwards compatibility), but will resolve to `<{$target} as IntoIterator>::into_iter` in Rust {$edition}
751754
.use_iter_suggestion = use `.iter()` instead of `.into_iter()` to avoid ambiguity

compiler/rustc_lint/src/context/diagnostics.rs

+3
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: &
176176
lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() }
177177
.decorate_lint(diag);
178178
}
179+
BuiltinLintDiag::ReservedString(suggestion) => {
180+
lints::ReservedString { suggestion }.decorate_lint(diag);
181+
}
179182
BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
180183
lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag);
181184
}

compiler/rustc_lint/src/lints.rs

+7
Original file line numberDiff line numberDiff line change
@@ -3061,3 +3061,10 @@ pub(crate) enum MutRefSugg {
30613061
#[derive(LintDiagnostic)]
30623062
#[diag(lint_unqualified_local_imports)]
30633063
pub(crate) struct UnqualifiedLocalImportsDiag {}
3064+
3065+
#[derive(LintDiagnostic)]
3066+
#[diag(lint_reserved_string)]
3067+
pub(crate) struct ReservedString {
3068+
#[suggestion(code = " ", applicability = "machine-applicable")]
3069+
pub suggestion: Span,
3070+
}

compiler/rustc_lint_defs/src/builtin.rs

+41
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ declare_lint_pass! {
9292
RUST_2021_INCOMPATIBLE_OR_PATTERNS,
9393
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
9494
RUST_2021_PRELUDE_COLLISIONS,
95+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
9596
RUST_2024_INCOMPATIBLE_PAT,
9697
RUST_2024_PRELUDE_COLLISIONS,
9798
SELF_CONSTRUCTOR_FROM_OUTER_ITEM,
@@ -4998,3 +4999,43 @@ declare_lint! {
49984999
reference: "issue #124535 <https://github.com/rust-lang/rust/issues/124535>",
49995000
};
50005001
}
5002+
5003+
declare_lint! {
5004+
/// The `rust_2024_guarded_string_incompatible_syntax` lint detects `#` tokens
5005+
/// that will be parsed as part of a guarded string literal in Rust 2024.
5006+
///
5007+
/// ### Example
5008+
///
5009+
/// ```rust,edition2021,compile_fail
5010+
/// #![deny(rust_2024_guarded_string_incompatible_syntax)]
5011+
///
5012+
/// macro_rules! m {
5013+
/// (# $x:expr #) => ();
5014+
/// (# $x:expr) => ();
5015+
/// }
5016+
///
5017+
/// m!(#"hey"#);
5018+
/// m!(#"hello");
5019+
/// ```
5020+
///
5021+
/// {{produces}}
5022+
///
5023+
/// ### Explanation
5024+
///
5025+
/// Prior to Rust 2024, `#"hey"#` is three tokens: the first `#`
5026+
/// followed by the string literal `"hey"` then the final `#`.
5027+
/// In Rust 2024, the whole sequence is considered a single token.
5028+
///
5029+
/// This lint suggests to add whitespace between the leading `#`
5030+
/// and the string to keep them separated in Rust 2024.
5031+
// Allow this lint -- rustdoc doesn't yet support threading edition into this lint's parser.
5032+
#[allow(rustdoc::invalid_rust_codeblocks)]
5033+
pub RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
5034+
Allow,
5035+
"will be parsed as a guarded string in Rust 2024",
5036+
@future_incompatible = FutureIncompatibleInfo {
5037+
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2024),
5038+
reference: "issue #123735 <https://github.com/rust-lang/rust/issues/123735>",
5039+
};
5040+
crate_level_only
5041+
}

compiler/rustc_lint_defs/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ pub enum BuiltinLintDiag {
614614
ReservedPrefix(Span, String),
615615
/// `'r#` in edition < 2021.
616616
RawPrefix(Span),
617+
/// `##` or `#"` is edition < 2024.
618+
ReservedString(Span),
617619
TrailingMacro(bool, Ident),
618620
BreakWithLabelAndLoop(Span),
619621
UnicodeTextFlow(Span, String),

compiler/rustc_parse/messages.ftl

+4
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
699699
.label = the label
700700
.suggestion = add `:` after the label
701701
702+
parse_reserved_string = invalid string literal
703+
.note = unprefixed guarded string literals are reserved for future use since Rust 2024
704+
.suggestion_whitespace = consider inserting whitespace here
705+
702706
parse_return_types_use_thin_arrow = return types are denoted using `->`
703707
.suggestion = use `->` instead
704708

compiler/rustc_parse/src/errors.rs

+18
Original file line numberDiff line numberDiff line change
@@ -2110,6 +2110,24 @@ pub(crate) enum UnknownPrefixSugg {
21102110
},
21112111
}
21122112

2113+
#[derive(Diagnostic)]
2114+
#[diag(parse_reserved_string)]
2115+
#[note]
2116+
pub(crate) struct ReservedString {
2117+
#[primary_span]
2118+
pub span: Span,
2119+
#[subdiagnostic]
2120+
pub sugg: Option<GuardedStringSugg>,
2121+
}
2122+
#[derive(Subdiagnostic)]
2123+
#[suggestion(
2124+
parse_suggestion_whitespace,
2125+
code = " ",
2126+
applicability = "maybe-incorrect",
2127+
style = "verbose"
2128+
)]
2129+
pub(crate) struct GuardedStringSugg(#[primary_span] pub Span);
2130+
21132131
#[derive(Diagnostic)]
21142132
#[diag(parse_too_many_hashes)]
21152133
pub(crate) struct TooManyHashes {

compiler/rustc_parse/src/lexer/mod.rs

+87-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ use rustc_lexer::unescape::{self, EscapeError, Mode};
1010
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
1111
use rustc_session::lint::BuiltinLintDiag;
1212
use rustc_session::lint::builtin::{
13-
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
13+
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
14+
TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
1415
};
1516
use rustc_session::parse::ParseSess;
1617
use rustc_span::symbol::Symbol;
@@ -251,6 +252,10 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
251252
let prefix_span = self.mk_sp(start, lit_start);
252253
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
253254
}
255+
rustc_lexer::TokenKind::Literal {
256+
kind: rustc_lexer::LiteralKind::GuardedStrPrefix,
257+
..
258+
} => self.maybe_report_guarded_str(start, str_before),
254259
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
255260
let suffix_start = start + BytePos(suffix_start);
256261
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -602,6 +607,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
602607
}
603608
(kind, self.symbol_from_to(start, end))
604609
}
610+
rustc_lexer::LiteralKind::GuardedStrPrefix => unreachable!(),
605611
}
606612
}
607613

@@ -781,6 +787,86 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
781787
}
782788
}
783789

790+
/// Detect guarded string literal syntax
791+
///
792+
/// RFC 3598 reserved this syntax for future use. As of Rust 2024,
793+
/// using this syntax produces an error. In earlier editions, however, it
794+
/// only results in an (allowed by default) lint, and is treated as
795+
/// separate tokens.
796+
fn maybe_report_guarded_str(&mut self, start: BytePos, str_before: &'src str) -> TokenKind {
797+
let span = self.mk_sp(start, self.pos);
798+
let edition2024 = span.edition().at_least_rust_2024();
799+
800+
let space_pos = start + BytePos(1);
801+
let space_span = self.mk_sp(space_pos, space_pos);
802+
803+
let mut cursor = Cursor::new(str_before);
804+
805+
let (span, unterminated) = match cursor.guarded_double_quoted_string() {
806+
Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
807+
let end = start + BytePos(token_len);
808+
let span = self.mk_sp(start, end);
809+
let str_start = start + BytePos(n_hashes);
810+
811+
if edition2024 {
812+
self.cursor = cursor;
813+
self.pos = end;
814+
}
815+
816+
let unterminated = if terminated { None } else { Some(str_start) };
817+
818+
(span, unterminated)
819+
}
820+
_ => {
821+
// We should only get here in the `##+` case.
822+
debug_assert_eq!(self.str_from_to(start, start + BytePos(2)), "##");
823+
824+
(span, None)
825+
}
826+
};
827+
if edition2024 {
828+
if let Some(str_start) = unterminated {
829+
// Only a fatal error if string is unterminated.
830+
self.dcx()
831+
.struct_span_fatal(
832+
self.mk_sp(str_start, self.pos),
833+
"unterminated double quote string",
834+
)
835+
.with_code(E0765)
836+
.emit()
837+
}
838+
839+
let sugg = if span.from_expansion() {
840+
None
841+
} else {
842+
Some(errors::GuardedStringSugg(space_span))
843+
};
844+
845+
// In Edition 2024 and later, emit a hard error.
846+
let err = self.dcx().emit_err(errors::ReservedString { span, sugg });
847+
848+
token::Literal(token::Lit {
849+
kind: token::Err(err),
850+
symbol: self.symbol_from_to(start, self.pos),
851+
suffix: None,
852+
})
853+
} else {
854+
// Before Rust 2024, only emit a lint for migration.
855+
self.psess.buffer_lint(
856+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
857+
span,
858+
ast::CRATE_NODE_ID,
859+
BuiltinLintDiag::ReservedString(space_span),
860+
);
861+
862+
// For backwards compatibility, roll back to after just the first `#`
863+
// and return the `Pound` token.
864+
self.pos = start + BytePos(1);
865+
self.cursor = Cursor::new(&str_before[1..]);
866+
token::Pound
867+
}
868+
}
869+
784870
fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
785871
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
786872
}

src/librustdoc/html/highlight.rs

+1
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ impl<'src> Classifier<'src> {
844844
| LiteralKind::RawCStr { .. } => Class::String,
845845
// Number literals.
846846
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
847+
LiteralKind::GuardedStrPrefix => return no_highlight(sink),
847848
},
848849
TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
849850
self.in_macro = true;

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

+3
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@ impl<'a> Converter<'a> {
187187
}
188188

189189
rustc_lexer::TokenKind::RawIdent => IDENT,
190+
191+
rustc_lexer::TokenKind::Literal { kind: GuardedStrPrefix, .. } => ERROR,
192+
190193
rustc_lexer::TokenKind::Literal { kind, .. } => {
191194
self.extend_literal(token_text.len(), kind);
192195
return;

0 commit comments

Comments
 (0)