Skip to content

Commit 901ada6

Browse files
authored
Rollup merge of #83992 - GuillaumeGomez:merge-idents, r=notriddle
Merge idents when generating source content The idea here is to not have a span for each part of a path. Currently, for `a::b::c` we generate `<span>a</span>::<span>b</span>::<span>c</span>`, with this change, we will generate `<span>a::b::c</span>`. A nice "side-effect" is that it reduces the size of the output HTML too. :) cc `@notriddle`
2 parents 74b23f9 + e2708b4 commit 901ada6

File tree

3 files changed

+127
-18
lines changed

3 files changed

+127
-18
lines changed

src/librustdoc/html/highlight.rs

+107-18
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ impl Iterator for TokenIter<'a> {
136136
}
137137
}
138138

139+
fn get_real_ident_class(text: &str, edition: Edition) -> Class {
140+
match text {
141+
"ref" | "mut" => Class::RefKeyWord,
142+
"self" | "Self" => Class::Self_,
143+
"false" | "true" => Class::Bool,
144+
_ if Symbol::intern(text).is_reserved(|| edition) => Class::KeyWord,
145+
_ => Class::Ident,
146+
}
147+
}
148+
139149
/// Processes program tokens, classifying strings of text by highlighting
140150
/// category (`Class`).
141151
struct Classifier<'a> {
@@ -144,6 +154,8 @@ struct Classifier<'a> {
144154
in_macro: bool,
145155
in_macro_nonterminal: bool,
146156
edition: Edition,
157+
byte_pos: u32,
158+
src: &'a str,
147159
}
148160

149161
impl<'a> Classifier<'a> {
@@ -155,6 +167,68 @@ impl<'a> Classifier<'a> {
155167
in_macro: false,
156168
in_macro_nonterminal: false,
157169
edition,
170+
byte_pos: 0,
171+
src,
172+
}
173+
}
174+
175+
/// Concatenate colons and idents as one when possible.
176+
fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
177+
let start = self.byte_pos as usize;
178+
let mut pos = start;
179+
let mut has_ident = false;
180+
let edition = self.edition;
181+
182+
loop {
183+
let mut nb = 0;
184+
while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
185+
self.tokens.next();
186+
nb += 1;
187+
}
188+
// Ident path can start with "::" but if we already have content in the ident path,
189+
// the "::" is mandatory.
190+
if has_ident && nb == 0 {
191+
return vec![(TokenKind::Ident, start, pos)];
192+
} else if nb != 0 && nb != 2 {
193+
if has_ident {
194+
return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
195+
} else {
196+
return vec![(TokenKind::Colon, pos, pos + nb)];
197+
}
198+
}
199+
200+
if let Some((Class::Ident, text)) = self.tokens.peek().map(|(token, text)| {
201+
if *token == TokenKind::Ident {
202+
let class = get_real_ident_class(text, edition);
203+
(class, text)
204+
} else {
205+
// Doesn't matter which Class we put in here...
206+
(Class::Comment, text)
207+
}
208+
}) {
209+
// We only "add" the colon if there is an ident behind.
210+
pos += text.len() + nb;
211+
has_ident = true;
212+
self.tokens.next();
213+
} else if nb > 0 && has_ident {
214+
return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
215+
} else if nb > 0 {
216+
return vec![(TokenKind::Colon, pos, pos + nb)];
217+
} else if has_ident {
218+
return vec![(TokenKind::Ident, start, pos)];
219+
} else {
220+
return Vec::new();
221+
}
222+
}
223+
}
224+
225+
/// Wraps the tokens iteration to ensure that the byte_pos is always correct.
226+
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
227+
if let Some((kind, text)) = self.tokens.next() {
228+
self.byte_pos += text.len() as u32;
229+
Some((kind, text))
230+
} else {
231+
None
158232
}
159233
}
160234

@@ -165,8 +239,25 @@ impl<'a> Classifier<'a> {
165239
/// token is used.
166240
fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
167241
with_default_session_globals(|| {
168-
while let Some((token, text)) = self.tokens.next() {
169-
self.advance(token, text, sink);
242+
loop {
243+
if self
244+
.tokens
245+
.peek()
246+
.map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
247+
.unwrap_or(false)
248+
{
249+
let tokens = self.get_full_ident_path();
250+
for (token, start, end) in tokens {
251+
let text = &self.src[start..end];
252+
self.advance(token, text, sink);
253+
self.byte_pos += text.len() as u32;
254+
}
255+
}
256+
if let Some((token, text)) = self.next() {
257+
self.advance(token, text, sink);
258+
} else {
259+
break;
260+
}
170261
}
171262
})
172263
}
@@ -203,12 +294,12 @@ impl<'a> Classifier<'a> {
203294
},
204295
TokenKind::And => match lookahead {
205296
Some(TokenKind::And) => {
206-
let _and = self.tokens.next();
297+
self.next();
207298
sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
208299
return;
209300
}
210301
Some(TokenKind::Eq) => {
211-
let _eq = self.tokens.next();
302+
self.next();
212303
sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
213304
return;
214305
}
@@ -260,7 +351,7 @@ impl<'a> Classifier<'a> {
260351
match lookahead {
261352
// Case 1: #![inner_attribute]
262353
Some(TokenKind::Bang) => {
263-
let _not = self.tokens.next().unwrap();
354+
self.next();
264355
if let Some(TokenKind::OpenBracket) = self.peek() {
265356
self.in_attribute = true;
266357
sink(Highlight::EnterSpan { class: Class::Attribute });
@@ -304,19 +395,17 @@ impl<'a> Classifier<'a> {
304395
sink(Highlight::Token { text, class: None });
305396
return;
306397
}
307-
TokenKind::Ident => match text {
308-
"ref" | "mut" => Class::RefKeyWord,
309-
"self" | "Self" => Class::Self_,
310-
"false" | "true" => Class::Bool,
311-
"Option" | "Result" => Class::PreludeTy,
312-
"Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
313-
// Keywords are also included in the identifier set.
314-
_ if Symbol::intern(text).is_reserved(|| self.edition) => Class::KeyWord,
315-
_ if self.in_macro_nonterminal => {
316-
self.in_macro_nonterminal = false;
317-
Class::MacroNonTerminal
318-
}
319-
_ => Class::Ident,
398+
TokenKind::Ident => match get_real_ident_class(text, self.edition) {
399+
Class::Ident => match text {
400+
"Option" | "Result" => Class::PreludeTy,
401+
"Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
402+
_ if self.in_macro_nonterminal => {
403+
self.in_macro_nonterminal = false;
404+
Class::MacroNonTerminal
405+
}
406+
_ => Class::Ident,
407+
},
408+
c => c,
320409
},
321410
TokenKind::RawIdent => Class::Ident,
322411
TokenKind::Lifetime { .. } => Class::Lifetime,

src/librustdoc/html/highlight/fixtures/sample.html

+10
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
</style>
1111
<pre><code><span class="attribute">#![<span class="ident">crate_type</span> <span class="op">=</span> <span class="string">&quot;lib&quot;</span>]</span>
1212

13+
<span class="kw">use</span> <span class="ident">std::path</span>::{<span class="ident">Path</span>, <span class="ident">PathBuf</span>};
14+
1315
<span class="attribute">#[<span class="ident">cfg</span>(<span class="ident">target_os</span> <span class="op">=</span> <span class="string">&quot;linux&quot;</span>)]</span>
1416
<span class="kw">fn</span> <span class="ident">main</span>() {
1517
<span class="kw">let</span> <span class="ident">foo</span> <span class="op">=</span> <span class="bool-val">true</span> <span class="op">&amp;&amp;</span> <span class="bool-val">false</span> <span class="op">|</span><span class="op">|</span> <span class="bool-val">true</span>;
@@ -19,6 +21,14 @@
1921
<span class="kw">let</span> <span class="kw">_</span> <span class="op">=</span> <span class="kw-2">*</span><span class="ident">foo</span>;
2022
<span class="macro">mac!</span>(<span class="ident">foo</span>, <span class="kw-2">&amp;</span><span class="kw-2">mut</span> <span class="ident">bar</span>);
2123
<span class="macro">assert!</span>(<span class="self">self</span>.<span class="ident">length</span> <span class="op">&lt;</span> <span class="ident">N</span> <span class="op">&amp;&amp;</span> <span class="ident">index</span> <span class="op">&lt;</span><span class="op">=</span> <span class="self">self</span>.<span class="ident">length</span>);
24+
<span class="ident">::std::env::var</span>(<span class="string">&quot;gateau&quot;</span>).<span class="ident">is_ok</span>();
25+
<span class="attribute">#[<span class="ident">rustfmt::skip</span>]</span>
26+
<span class="kw">let</span> <span class="ident">s</span>:<span class="ident">std</span><span class="ident">::path::PathBuf</span> <span class="op">=</span> <span class="ident">std::path::PathBuf::new</span>();
27+
<span class="kw">let</span> <span class="kw-2">mut</span> <span class="ident">s</span> <span class="op">=</span> <span class="ident">String::new</span>();
28+
29+
<span class="kw">match</span> <span class="kw-2">&amp;</span><span class="ident">s</span> {
30+
<span class="kw-2">ref</span> <span class="kw-2">mut</span> <span class="ident">x</span> <span class="op">=</span><span class="op">&gt;</span> {}
31+
}
2232
}
2333

2434
<span class="macro">macro_rules!</span> <span class="ident">bar</span> {

src/librustdoc/html/highlight/fixtures/sample.rs

+10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#![crate_type = "lib"]
22

3+
use std::path::{Path, PathBuf};
4+
35
#[cfg(target_os = "linux")]
46
fn main() {
57
let foo = true && false || true;
@@ -9,6 +11,14 @@ fn main() {
911
let _ = *foo;
1012
mac!(foo, &mut bar);
1113
assert!(self.length < N && index <= self.length);
14+
::std::env::var("gateau").is_ok();
15+
#[rustfmt::skip]
16+
let s:std::path::PathBuf = std::path::PathBuf::new();
17+
let mut s = String::new();
18+
19+
match &s {
20+
ref mut x => {}
21+
}
1222
}
1323

1424
macro_rules! bar {

0 commit comments

Comments
 (0)