Skip to content

Commit e6af4d3

Browse files
committed
Mark grammar roots inline in productions
We check that the list of grammar "roots" -- that is, productions that are not used in any other production -- is what we expect it to be. We had hard coded this list of roots in `mdbook-spec`. Let's instead add a way to specify this in our syntax for productions by prefixing the production with `@root`.
1 parent c8a3451 commit e6af4d3

File tree

8 files changed

+35
-33
lines changed

8 files changed

+35
-33
lines changed

Diff for: docs/grammar.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ The category is used to group similar productions on the grammar summary page in
1616

1717
The syntax for the grammar itself is pretty close to what is described in the [Notation chapter](../src/notation.md), though there are some rendering differences.
1818

19+
A "root" production, marked with `@root`, is one that is not used in any other production.
20+
1921
The syntax for the grammar itself (written in itself, hopefully that's not too confusing) is:
2022

2123
```
@@ -25,7 +27,7 @@ BACKTICK -> U+0060
2527
2628
LF -> U+000A
2729
28-
Production -> Name ` ->` Expression
30+
Production -> `@root`? Name ` ->` Expression
2931
3032
Name -> <Alphanumeric or `_`>+
3133

Diff for: mdbook-spec/src/grammar.rs

+14-21
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub struct Production {
2828
expression: Expression,
2929
/// The path to the chapter where this is defined.
3030
path: PathBuf,
31+
is_root: bool,
3132
}
3233

3334
#[derive(Debug)]
@@ -139,7 +140,7 @@ impl Expression {
139140
static GRAMMAR_RE: LazyLock<Regex> =
140141
LazyLock::new(|| Regex::new(r"(?ms)^```grammar,([^\n]+)\n(.*?)^```").unwrap());
141142
static NAMES_RE: LazyLock<Regex> =
142-
LazyLock::new(|| Regex::new(r"(?m)^([A-Za-z0-9_]+)(?: \([^)]+\))? ->").unwrap());
143+
LazyLock::new(|| Regex::new(r"(?m)^(?:@root )?([A-Za-z0-9_]+)(?: \([^)]+\))? ->").unwrap());
143144

144145
/// Loads the [`Grammar`] from the book.
145146
pub fn load_grammar(book: &Book, diag: &mut Diagnostics) -> Grammar {
@@ -183,38 +184,30 @@ fn check_unexpected_roots(grammar: &Grammar, diag: &mut Diagnostics) {
183184
grammar.visit_nt(&mut |nt| {
184185
set.remove(nt);
185186
});
186-
// TODO: We may want to rethink how some of these are structured.
187-
let expected: HashSet<_> = [
188-
"CfgAttrAttribute",
189-
"CfgAttribute",
190-
"CHAR",
191-
"Crate",
192-
"INNER_LINE_DOC",
193-
"LINE_COMMENT",
194-
"MetaListIdents",
195-
"MetaListNameValueStr",
196-
"MetaListPaths",
197-
"MetaWord",
198-
"OUTER_LINE_DOC",
199-
]
200-
.into_iter()
201-
.collect();
187+
let expected: HashSet<_> = grammar
188+
.productions
189+
.values()
190+
.filter_map(|p| p.is_root.then(|| p.name.as_str()))
191+
.collect();
202192
if set != expected {
203193
let new: Vec<_> = set.difference(&expected).collect();
204194
let removed: Vec<_> = expected.difference(&set).collect();
205195
if !new.is_empty() {
206196
warn_or_err!(
207197
diag,
208-
"New grammar production detected that is not used in any other production.\n\
209-
If this is expected, add it to the `check_unexpected_roots` function.\n\
210-
If not, make sure it is spelled correctly and used in another production.\n\
198+
"New grammar production detected that is not used in any other\n\
199+
production. If this is expected, mark the production with\n\
200+
`@root`. If not, make sure it is spelled correctly and used in\n\
201+
another production.\n\
202+
\n\
211203
The new names are: {new:?}\n"
212204
);
213205
} else if !removed.is_empty() {
214206
warn_or_err!(
215207
diag,
216208
"Old grammar production root seems to have been removed.\n\
217-
If this is expected, remove it from the `check_unexpected_roots` function.\n\
209+
If this is expected, remove `@root` from the production.\n\
210+
\n\
218211
The removed names are: {removed:?}\n"
219212
);
220213
} else {

Diff for: mdbook-spec/src/grammar/parser.rs

+7
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ impl Parser<'_> {
136136
}
137137

138138
fn parse_production(&mut self, category: &str, path: &Path) -> Result<Production> {
139+
let is_root = self.parse_is_root();
140+
self.space0();
139141
let name = self
140142
.parse_name()
141143
.ok_or_else(|| self.error("expected production name".to_string()))?;
@@ -148,9 +150,14 @@ impl Parser<'_> {
148150
category: category.to_string(),
149151
expression,
150152
path: path.to_owned(),
153+
is_root,
151154
})
152155
}
153156

157+
fn parse_is_root(&mut self) -> bool {
158+
self.take_str("@root")
159+
}
160+
154161
fn parse_name(&mut self) -> Option<String> {
155162
let name = self.take_while(&|c: char| c.is_alphanumeric() || c == '_');
156163
if name.is_empty() {

Diff for: src/attributes.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,19 @@ forms:
164164

165165
r[attributes.meta.builtin.syntax]
166166
```grammar,attributes
167-
MetaWord ->
167+
@root MetaWord ->
168168
IDENTIFIER
169169
170170
MetaNameValueStr ->
171171
IDENTIFIER `=` (STRING_LITERAL | RAW_STRING_LITERAL)
172172
173-
MetaListPaths ->
173+
@root MetaListPaths ->
174174
IDENTIFIER `(` ( SimplePath (`,` SimplePath)* `,`? )? `)`
175175
176-
MetaListIdents ->
176+
@root MetaListIdents ->
177177
IDENTIFIER `(` ( IDENTIFIER (`,` IDENTIFIER)* `,`? )? `)`
178178
179-
MetaListNameValueStr ->
179+
@root MetaListNameValueStr ->
180180
IDENTIFIER `(` ( MetaNameValueStr (`,` MetaNameValueStr)* `,`? )? `)`
181181
```
182182

Diff for: src/comments.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ r[comments]
33

44
r[comments.syntax]
55
```grammar,lexer
6-
LINE_COMMENT ->
6+
@root LINE_COMMENT ->
77
`//` (~[`/` `!` LF] | `//`) ~LF*
88
| `//`
99
@@ -15,13 +15,13 @@ BLOCK_COMMENT ->
1515
| `/**/`
1616
| `/***/`
1717
18-
INNER_LINE_DOC ->
18+
@root INNER_LINE_DOC ->
1919
`//!` ~[LF CR]*
2020
2121
INNER_BLOCK_DOC ->
2222
`/*!` ( BlockCommentOrDoc | ~[`*/` CR] )* `*/`
2323
24-
OUTER_LINE_DOC ->
24+
@root OUTER_LINE_DOC ->
2525
`///` (~`/` ~[LF CR]*)?
2626
2727
OUTER_BLOCK_DOC ->

Diff for: src/conditional-compilation.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ r[cfg.attr]
315315

316316
r[cfg.attr.syntax]
317317
```grammar,configuration
318-
CfgAttribute -> `cfg` `(` ConfigurationPredicate `)`
318+
@root CfgAttribute -> `cfg` `(` ConfigurationPredicate `)`
319319
```
320320

321321
<!-- should we say they're active attributes here? -->
@@ -382,7 +382,7 @@ r[cfg.cfg_attr]
382382

383383
r[cfg.cfg_attr.syntax]
384384
```grammar,configuration
385-
CfgAttrAttribute -> `cfg_attr` `(` ConfigurationPredicate `,` CfgAttrs? `)`
385+
@root CfgAttrAttribute -> `cfg_attr` `(` ConfigurationPredicate `,` CfgAttrs? `)`
386386
387387
CfgAttrs -> Attr (`,` Attr)* `,`?
388388
```

Diff for: src/crates-and-source-files.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ r[crate]
33

44
r[crate.syntax]
55
```grammar,items
6-
Crate ->
6+
@root Crate ->
77
InnerAttribute*
88
Item*
99
```

Diff for: src/notation.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ The following are common definitions used in the grammar.
5252

5353
r[input.syntax]
5454
```grammar,lexer
55-
CHAR -> <a Unicode scalar value>
55+
@root CHAR -> <a Unicode scalar value>
5656
5757
NUL -> U+0000
5858

0 commit comments

Comments
 (0)