-
Notifications
You must be signed in to change notification settings - Fork 13.3k
Implement RFC 3503: frontmatters #140035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Implement RFC 3503: frontmatters #140035
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -502,6 +502,8 @@ declare_features! ( | |
(incomplete, fn_delegation, "1.76.0", Some(118212)), | ||
/// Allows impls for the Freeze trait. | ||
(internal, freeze_impls, "1.78.0", Some(121675)), | ||
/// Frontmatter `---` blocks for use by external tools. | ||
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)), | ||
Comment on lines
+505
to
+506
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussion (meta, not necessarily for this PR): hm, frontmatter is going to be interesting when it comes to cooking it sufficiently for stabilization. Like |
||
/// Allows defining gen blocks and `gen fn`. | ||
(unstable, gen_blocks, "1.75.0", Some(117078)), | ||
/// Infer generic args for both consts and types. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,17 +57,27 @@ impl Token { | |
#[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||
pub enum TokenKind { | ||
/// A line comment, e.g. `// comment`. | ||
LineComment { doc_style: Option<DocStyle> }, | ||
LineComment { | ||
doc_style: Option<DocStyle>, | ||
}, | ||
|
||
/// A block comment, e.g. `/* block comment */`. | ||
/// | ||
/// Block comments can be recursive, so a sequence like `/* /* */` | ||
/// will not be considered terminated and will result in a parsing error. | ||
BlockComment { doc_style: Option<DocStyle>, terminated: bool }, | ||
BlockComment { | ||
doc_style: Option<DocStyle>, | ||
terminated: bool, | ||
}, | ||
|
||
/// Any whitespace character sequence. | ||
Whitespace, | ||
|
||
Frontmatter { | ||
has_invalid_preceding_whitespace: bool, | ||
invalid_infostring: bool, | ||
}, | ||
|
||
/// An identifier or keyword, e.g. `ident` or `continue`. | ||
Ident, | ||
|
||
|
@@ -109,10 +119,15 @@ pub enum TokenKind { | |
/// this type will need to check for and reject that case. | ||
/// | ||
/// See [LiteralKind] for more details. | ||
Literal { kind: LiteralKind, suffix_start: u32 }, | ||
Literal { | ||
kind: LiteralKind, | ||
suffix_start: u32, | ||
}, | ||
|
||
/// A lifetime, e.g. `'a`. | ||
Lifetime { starts_with_number: bool }, | ||
Lifetime { | ||
starts_with_number: bool, | ||
}, | ||
|
||
/// `;` | ||
Semi, | ||
|
@@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> { | |
#[inline] | ||
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> { | ||
debug_assert!(!input.is_empty()); | ||
let mut cursor = Cursor::new(input); | ||
let mut cursor = Cursor::new(input, false); | ||
Comment on lines
-283
to
+298
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit (readability): I find this at call sites somewhat hard to understand. Maybe introduce an enum like |
||
// Move past the leading `r` or `br`. | ||
for _ in 0..prefix_len { | ||
cursor.bump().unwrap(); | ||
|
@@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> | |
|
||
/// Creates an iterator that produces tokens from the input string. | ||
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> { | ||
let mut cursor = Cursor::new(input); | ||
let mut cursor = Cursor::new(input, false); | ||
std::iter::from_fn(move || { | ||
let token = cursor.advance_token(); | ||
if token.kind != TokenKind::Eof { Some(token) } else { None } | ||
|
@@ -361,7 +376,30 @@ impl Cursor<'_> { | |
Some(c) => c, | ||
None => return Token::new(TokenKind::Eof, 0), | ||
}; | ||
|
||
let token_kind = match first_char { | ||
c if self.frontmatter_allowed && is_whitespace(c) => { | ||
let mut last = first_char; | ||
while is_whitespace(self.first()) { | ||
let Some(c) = self.bump() else { | ||
break; | ||
}; | ||
last = c; | ||
} | ||
// invalid frontmatter opening as whitespace preceding it isn't newline. | ||
// combine the whitespace and the frontmatter to a single token as we shall | ||
// error later. | ||
if last != '\n' && self.as_str().starts_with("---") { | ||
self.bump(); | ||
self.frontmatter(true) | ||
} else { | ||
Whitespace | ||
} | ||
} | ||
'-' if self.frontmatter_allowed && self.as_str().starts_with("--") => { | ||
// happy path | ||
self.frontmatter(false) | ||
} | ||
// Slash, comment or block comment. | ||
'/' => match self.first() { | ||
'/' => self.line_comment(), | ||
|
@@ -464,11 +502,82 @@ impl Cursor<'_> { | |
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(), | ||
_ => Unknown, | ||
}; | ||
if self.frontmatter_allowed { | ||
self.frontmatter_allowed = matches!(token_kind, Whitespace); | ||
} | ||
let res = Token::new(token_kind, self.pos_within_token()); | ||
self.reset_pos_within_token(); | ||
res | ||
} | ||
|
||
/// Given that one `-` was eaten, eat the rest of the frontmatter. | ||
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind { | ||
debug_assert_eq!('-', self.prev()); | ||
|
||
let pos = self.pos_within_token(); | ||
self.eat_while(|c| c == '-'); | ||
|
||
// one `-` is eaten by the caller. | ||
let length_opening = self.pos_within_token() - pos + 1; | ||
|
||
// must be ensured by the caller | ||
debug_assert!(length_opening >= 3); | ||
|
||
self.eat_identifier(); | ||
self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); | ||
let invalid_infostring = self.first() != '\n'; | ||
|
||
let mut s = self.as_str(); | ||
let mut found = false; | ||
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) { | ||
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1); | ||
if s[preceding_chars_start..closing].chars().all(is_whitespace) { | ||
// candidate found | ||
self.bump_bytes(closing); | ||
// in case like | ||
// ---cargo | ||
// --- blahblah | ||
// or | ||
// ---cargo | ||
// ---- | ||
// combine those stuff into this frontmatter token such that it gets detected later. | ||
self.eat_until(b'\n'); | ||
found = true; | ||
break; | ||
} else { | ||
s = &s[closing + length_opening as usize..]; | ||
} | ||
} | ||
|
||
if !found { | ||
// recovery strategy: a closing statement might have precending whitespace/newline | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks reasonable! If we don't find a |
||
// but not have enough dashes to properly close. In this case, we eat until there, | ||
// and report a mismatch in the parser. | ||
let mut potential_closing = None; | ||
let mut rest = self.as_str(); | ||
while let Some(closing) = rest.find("---") { | ||
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1); | ||
if rest[preceding_chars_start..closing].chars().all(is_whitespace) { | ||
// candidate found | ||
potential_closing = Some(closing); | ||
break; | ||
} else { | ||
rest = &rest[closing + 3..]; | ||
} | ||
} | ||
if let Some(potential_closing) = potential_closing { | ||
// bump to the potential closing, and eat everything on that line. | ||
self.bump_bytes(potential_closing); | ||
self.eat_until(b'\n'); | ||
} else { | ||
// eat everything. this will get reported as an unclosed frontmatter. | ||
self.eat_while(|_| true); | ||
} | ||
} | ||
|
||
Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } | ||
} | ||
|
||
fn line_comment(&mut self) -> TokenKind { | ||
debug_assert!(self.prev() == '/' && self.first() == '/'); | ||
self.bump(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Discussion: (stealing this to force an inline comment, not about this line of code itself)
Some positive/negative test coverage that you may or may not want to consider (but not necessarily blocking for this initial PR, but may be blockers prior to stabilization):
include!()
?file!()
/line!()
?-Zunpretty
as mentioned by @fmease in Add unstable frontmatter support #137193 (comment)?//@ run-rustfix
.