Skip to content

Commit d95d304

Browse files
committed
Auto merge of #78429 - casey:doctest-attribute-splitting, r=jyn514
[librustdoc] Only split lang string on `,`, ` `, and `\t` Split markdown lang strings into tokens on `,`. The previous behavior was to split lang strings into tokens on any character that wasn't a `_`, `_`, or alphanumeric. This is a potentially breaking change, so please scrutinize! See discussion in #78344. I noticed some test cases that made me wonder if there might have been some reason for the original behavior: ``` t("{.no_run .example}", false, true, Ignore::None, true, false, false, false, v(), None); t("{.sh .should_panic}", true, false, Ignore::None, false, false, false, false, v(), None); t("{.example .rust}", false, false, Ignore::None, true, false, false, false, v(), None); t("{.test_harness .rust}", false, false, Ignore::None, true, true, false, false, v(), None); ``` It seemed pretty peculiar to specifically test lang strings in braces, with all the tokens prefixed by `.`. I did some digging, and it looks like the test cases were added way back in [this commit from 2014](3fef7a74ca9a) by `@skade.` It looks like they were added just to make sure that the splitting was permissive, and aren't testing that those strings in particular are accepted. Closes #78344.
2 parents c0a54cc + 66f4883 commit d95d304

File tree

6 files changed

+84
-20
lines changed

6 files changed

+84
-20
lines changed

compiler/rustc_error_codes/src/error_codes/E0761.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Multiple candidate files were found for an out-of-line module.
22

33
Erroneous code example:
44

5-
```ignore (multiple source files required for compile_fail)
5+
```ignore (Multiple source files are required for compile_fail.)
66
// file: ambiguous_module/mod.rs
77
88
fn foo() {}

compiler/rustc_mir/src/dataflow/framework/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
//! fixpoint solution to your dataflow problem, or implement the `ResultsVisitor` interface and use
1111
//! `visit_results`. The following example uses the `ResultsCursor` approach.
1212
//!
13-
//! ```ignore(cross-crate-imports)
13+
//! ```ignore (cross-crate-imports)
1414
//! use rustc_mir::dataflow::Analysis; // Makes `into_engine` available.
1515
//!
1616
//! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) {
@@ -211,7 +211,7 @@ pub trait Analysis<'tcx>: AnalysisDomain<'tcx> {
211211
/// default impl and the one for all `A: GenKillAnalysis` will do the right thing.
212212
/// Its purpose is to enable method chaining like so:
213213
///
214-
/// ```ignore(cross-crate-imports)
214+
/// ```ignore (cross-crate-imports)
215215
/// let results = MyAnalysis::new(tcx, body)
216216
/// .into_engine(tcx, body, def_id)
217217
/// .iterate_to_fixpoint()

library/core/src/option.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ impl<T> Option<T> {
336336
/// assert_eq!(x.expect("fruits are healthy"), "value");
337337
/// ```
338338
///
339-
/// ```{.should_panic}
339+
/// ```should_panic
340340
/// let x: Option<&str> = None;
341341
/// x.expect("fruits are healthy"); // panics with `fruits are healthy`
342342
/// ```
@@ -372,7 +372,7 @@ impl<T> Option<T> {
372372
/// assert_eq!(x.unwrap(), "air");
373373
/// ```
374374
///
375-
/// ```{.should_panic}
375+
/// ```should_panic
376376
/// let x: Option<&str> = None;
377377
/// assert_eq!(x.unwrap(), "air"); // fails
378378
/// ```
@@ -1114,7 +1114,7 @@ impl<T: fmt::Debug> Option<T> {
11141114
/// }
11151115
/// ```
11161116
///
1117-
/// ```{.should_panic}
1117+
/// ```should_panic
11181118
/// #![feature(option_expect_none)]
11191119
///
11201120
/// use std::collections::HashMap;
@@ -1156,7 +1156,7 @@ impl<T: fmt::Debug> Option<T> {
11561156
/// }
11571157
/// ```
11581158
///
1159-
/// ```{.should_panic}
1159+
/// ```should_panic
11601160
/// #![feature(option_unwrap_none)]
11611161
///
11621162
/// use std::collections::HashMap;

library/core/src/result.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@
112112
//! assert success with [`expect`]. This will panic if the
113113
//! write fails, providing a marginally useful message indicating why:
114114
//!
115-
//! ```{.no_run}
115+
//! ```no_run
116116
//! use std::fs::File;
117117
//! use std::io::prelude::*;
118118
//!
@@ -122,7 +122,7 @@
122122
//!
123123
//! You might also simply assert success:
124124
//!
125-
//! ```{.no_run}
125+
//! ```no_run
126126
//! # use std::fs::File;
127127
//! # use std::io::prelude::*;
128128
//! # let mut file = File::create("valuable_data.txt").unwrap();
@@ -984,7 +984,7 @@ impl<T, E: fmt::Debug> Result<T, E> {
984984
///
985985
/// Basic usage:
986986
///
987-
/// ```{.should_panic}
987+
/// ```should_panic
988988
/// let x: Result<u32, &str> = Err("emergency failure");
989989
/// x.expect("Testing expect"); // panics with `Testing expect: emergency failure`
990990
/// ```
@@ -1024,7 +1024,7 @@ impl<T, E: fmt::Debug> Result<T, E> {
10241024
/// assert_eq!(x.unwrap(), 2);
10251025
/// ```
10261026
///
1027-
/// ```{.should_panic}
1027+
/// ```should_panic
10281028
/// let x: Result<u32, &str> = Err("emergency failure");
10291029
/// x.unwrap(); // panics with `emergency failure`
10301030
/// ```
@@ -1052,7 +1052,7 @@ impl<T: fmt::Debug, E> Result<T, E> {
10521052
///
10531053
/// Basic usage:
10541054
///
1055-
/// ```{.should_panic}
1055+
/// ```should_panic
10561056
/// let x: Result<u32, &str> = Ok(10);
10571057
/// x.expect_err("Testing expect_err"); // panics with `Testing expect_err: 10`
10581058
/// ```
@@ -1075,7 +1075,7 @@ impl<T: fmt::Debug, E> Result<T, E> {
10751075
///
10761076
/// # Examples
10771077
///
1078-
/// ```{.should_panic}
1078+
/// ```should_panic
10791079
/// let x: Result<u32, &str> = Ok(2);
10801080
/// x.unwrap_err(); // panics with `2`
10811081
/// ```

src/librustdoc/html/markdown.rs

+29-3
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,31 @@ impl LangString {
780780
Self::parse(string, allow_error_code_check, enable_per_target_ignores, None)
781781
}
782782

783+
fn tokens(string: &str) -> impl Iterator<Item = &str> {
784+
// Pandoc, which Rust once used for generating documentation,
785+
// expects lang strings to be surrounded by `{}` and for each token
786+
// to be proceeded by a `.`. Since some of these lang strings are still
787+
// loose in the wild, we strip a pair of surrounding `{}` from the lang
788+
// string and a leading `.` from each token.
789+
790+
let string = string.trim();
791+
792+
let first = string.chars().next();
793+
let last = string.chars().last();
794+
795+
let string = if first == Some('{') && last == Some('}') {
796+
&string[1..string.len() - 1]
797+
} else {
798+
string
799+
};
800+
801+
string
802+
.split(|c| c == ',' || c == ' ' || c == '\t')
803+
.map(str::trim)
804+
.map(|token| if token.chars().next() == Some('.') { &token[1..] } else { token })
805+
.filter(|token| !token.is_empty())
806+
}
807+
783808
fn parse(
784809
string: &str,
785810
allow_error_code_check: ErrorCodes,
@@ -793,11 +818,11 @@ impl LangString {
793818
let mut ignores = vec![];
794819

795820
data.original = string.to_owned();
796-
let tokens = string.split(|c: char| !(c == '_' || c == '-' || c.is_alphanumeric()));
821+
822+
let tokens = Self::tokens(string).collect::<Vec<&str>>();
797823

798824
for token in tokens {
799-
match token.trim() {
800-
"" => {}
825+
match token {
801826
"should_panic" => {
802827
data.should_panic = true;
803828
seen_rust_tags = !seen_other_tags;
@@ -894,6 +919,7 @@ impl LangString {
894919
_ => seen_other_tags = true,
895920
}
896921
}
922+
897923
// ignore-foo overrides ignore
898924
if !ignores.is_empty() {
899925
data.ignore = Ignore::Some(ignores);

src/librustdoc/html/markdown/tests.rs

+42-4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ fn test_lang_string_parse() {
5858

5959
t(Default::default());
6060
t(LangString { original: "rust".into(), ..Default::default() });
61+
t(LangString { original: ".rust".into(), ..Default::default() });
62+
t(LangString { original: "{rust}".into(), ..Default::default() });
63+
t(LangString { original: "{.rust}".into(), ..Default::default() });
6164
t(LangString { original: "sh".into(), rust: false, ..Default::default() });
6265
t(LangString { original: "ignore".into(), ignore: Ignore::All, ..Default::default() });
6366
t(LangString {
@@ -75,16 +78,16 @@ fn test_lang_string_parse() {
7578
..Default::default()
7679
});
7780
t(LangString { original: "allow_fail".into(), allow_fail: true, ..Default::default() });
78-
t(LangString { original: "{.no_run .example}".into(), no_run: true, ..Default::default() });
81+
t(LangString { original: "no_run,example".into(), no_run: true, ..Default::default() });
7982
t(LangString {
80-
original: "{.sh .should_panic}".into(),
83+
original: "sh,should_panic".into(),
8184
should_panic: true,
8285
rust: false,
8386
..Default::default()
8487
});
85-
t(LangString { original: "{.example .rust}".into(), ..Default::default() });
88+
t(LangString { original: "example,rust".into(), ..Default::default() });
8689
t(LangString {
87-
original: "{.test_harness .rust}".into(),
90+
original: "test_harness,.rust".into(),
8891
test_harness: true,
8992
..Default::default()
9093
});
@@ -100,6 +103,18 @@ fn test_lang_string_parse() {
100103
rust: false,
101104
..Default::default()
102105
});
106+
t(LangString {
107+
original: "text,no_run, ".into(),
108+
no_run: true,
109+
rust: false,
110+
..Default::default()
111+
});
112+
t(LangString {
113+
original: "text,no_run,".into(),
114+
no_run: true,
115+
rust: false,
116+
..Default::default()
117+
});
103118
t(LangString {
104119
original: "edition2015".into(),
105120
edition: Some(Edition::Edition2015),
@@ -112,6 +127,29 @@ fn test_lang_string_parse() {
112127
});
113128
}
114129

130+
#[test]
131+
fn test_lang_string_tokenizer() {
132+
fn case(lang_string: &str, want: &[&str]) {
133+
let have = LangString::tokens(lang_string).collect::<Vec<&str>>();
134+
assert_eq!(have, want, "Unexpected lang string split for `{}`", lang_string);
135+
}
136+
137+
case("", &[]);
138+
case("foo", &["foo"]);
139+
case("foo,bar", &["foo", "bar"]);
140+
case(".foo,.bar", &["foo", "bar"]);
141+
case("{.foo,.bar}", &["foo", "bar"]);
142+
case(" {.foo,.bar} ", &["foo", "bar"]);
143+
case("foo bar", &["foo", "bar"]);
144+
case("foo\tbar", &["foo", "bar"]);
145+
case("foo\t, bar", &["foo", "bar"]);
146+
case(" foo , bar ", &["foo", "bar"]);
147+
case(",,foo,,bar,,", &["foo", "bar"]);
148+
case("foo=bar", &["foo=bar"]);
149+
case("a-b-c", &["a-b-c"]);
150+
case("a_b_c", &["a_b_c"]);
151+
}
152+
115153
#[test]
116154
fn test_header() {
117155
fn t(input: &str, expect: &str) {

0 commit comments

Comments
 (0)