Skip to content

Commit 188dd72

Browse files
authored
Rollup merge of rust-lang#107655 - notriddle:notriddle/small-url-encode, r=GuillaumeGomez
rustdoc: use the same URL escape rules for fragments as for examples Carries over improvements from rust-lang#107284
2 parents be1789a + fa6c3a2 commit 188dd72

File tree

9 files changed

+71
-81
lines changed

9 files changed

+71
-81
lines changed

library/core/src/result.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@
458458
//! [`Result`] of a collection of each contained value of the original
459459
//! [`Result`] values, or [`Err`] if any of the elements was [`Err`].
460460
//!
461-
//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA%2C%20E%3E%3E-for-Result%3CV%2C%20E%3E
461+
//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA,+E%3E%3E-for-Result%3CV,+E%3E
462462
//!
463463
//! ```
464464
//! let v = [Ok(2), Ok(4), Err("err!"), Ok(8)];
@@ -474,8 +474,8 @@
474474
//! to provide the [`product`][Iterator::product] and
475475
//! [`sum`][Iterator::sum] methods.
476476
//!
477-
//! [impl-Product]: Result#impl-Product%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E
478-
//! [impl-Sum]: Result#impl-Sum%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E
477+
//! [impl-Product]: Result#impl-Product%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E
478+
//! [impl-Sum]: Result#impl-Sum%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E
479479
//!
480480
//! ```
481481
//! let v = [Err("error!"), Ok(1), Ok(2), Ok(3), Err("foo")];

src/librustdoc/html/markdown.rs

+2-41
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use crate::html::escape::Escape;
4646
use crate::html::format::Buffer;
4747
use crate::html::highlight;
4848
use crate::html::length_limit::HtmlWithLimit;
49+
use crate::html::render::small_url_encode;
4950
use crate::html::toc::TocBuilder;
5051

5152
use pulldown_cmark::{
@@ -294,47 +295,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
294295
doctest::make_test(&test, krate, false, &Default::default(), edition, None);
295296
let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
296297

297-
// These characters don't need to be escaped in a URI.
298-
// See https://url.spec.whatwg.org/#query-percent-encode-set
299-
// and https://url.spec.whatwg.org/#urlencoded-parsing
300-
// and https://url.spec.whatwg.org/#url-code-points
301-
fn dont_escape(c: u8) -> bool {
302-
(b'a' <= c && c <= b'z')
303-
|| (b'A' <= c && c <= b'Z')
304-
|| (b'0' <= c && c <= b'9')
305-
|| c == b'-'
306-
|| c == b'_'
307-
|| c == b'.'
308-
|| c == b','
309-
|| c == b'~'
310-
|| c == b'!'
311-
|| c == b'\''
312-
|| c == b'('
313-
|| c == b')'
314-
|| c == b'*'
315-
|| c == b'/'
316-
|| c == b';'
317-
|| c == b':'
318-
|| c == b'?'
319-
// As described in urlencoded-parsing, the
320-
// first `=` is the one that separates key from
321-
// value. Following `=`s are part of the value.
322-
|| c == b'='
323-
}
324-
let mut test_escaped = String::new();
325-
for b in test.bytes() {
326-
if dont_escape(b) {
327-
test_escaped.push(char::from(b));
328-
} else if b == b' ' {
329-
// URL queries are decoded with + replaced with SP
330-
test_escaped.push('+');
331-
} else if b == b'%' {
332-
test_escaped.push('%');
333-
test_escaped.push('%');
334-
} else {
335-
write!(test_escaped, "%{:02X}", b).unwrap();
336-
}
337-
}
298+
let test_escaped = small_url_encode(test);
338299
Some(format!(
339300
r#"<a class="test-arrow" target="_blank" href="{}?code={}{}&amp;edition={}">Run</a>"#,
340301
url, test_escaped, channel, edition,

src/librustdoc/html/render/mod.rs

+52-23
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub(crate) use self::span_map::{collect_spans_and_sources, LinkFromSrc};
3838

3939
use std::collections::VecDeque;
4040
use std::default::Default;
41-
use std::fmt;
41+
use std::fmt::{self, Write};
4242
use std::fs;
4343
use std::iter::Peekable;
4444
use std::path::PathBuf;
@@ -2020,31 +2020,60 @@ fn get_associated_constants(
20202020
.collect::<Vec<_>>()
20212021
}
20222022

2023-
// The point is to url encode any potential character from a type with genericity.
2024-
fn small_url_encode(s: String) -> String {
2023+
pub(crate) fn small_url_encode(s: String) -> String {
2024+
// These characters don't need to be escaped in a URI.
2025+
// See https://url.spec.whatwg.org/#query-percent-encode-set
2026+
// and https://url.spec.whatwg.org/#urlencoded-parsing
2027+
// and https://url.spec.whatwg.org/#url-code-points
2028+
fn dont_escape(c: u8) -> bool {
2029+
(b'a' <= c && c <= b'z')
2030+
|| (b'A' <= c && c <= b'Z')
2031+
|| (b'0' <= c && c <= b'9')
2032+
|| c == b'-'
2033+
|| c == b'_'
2034+
|| c == b'.'
2035+
|| c == b','
2036+
|| c == b'~'
2037+
|| c == b'!'
2038+
|| c == b'\''
2039+
|| c == b'('
2040+
|| c == b')'
2041+
|| c == b'*'
2042+
|| c == b'/'
2043+
|| c == b';'
2044+
|| c == b':'
2045+
|| c == b'?'
2046+
// As described in urlencoded-parsing, the
2047+
// first `=` is the one that separates key from
2048+
// value. Following `=`s are part of the value.
2049+
|| c == b'='
2050+
}
20252051
let mut st = String::new();
20262052
let mut last_match = 0;
2027-
for (idx, c) in s.char_indices() {
2028-
let escaped = match c {
2029-
'<' => "%3C",
2030-
'>' => "%3E",
2031-
' ' => "%20",
2032-
'?' => "%3F",
2033-
'\'' => "%27",
2034-
'&' => "%26",
2035-
',' => "%2C",
2036-
':' => "%3A",
2037-
';' => "%3B",
2038-
'[' => "%5B",
2039-
']' => "%5D",
2040-
'"' => "%22",
2041-
_ => continue,
2042-
};
2053+
for (idx, b) in s.bytes().enumerate() {
2054+
if dont_escape(b) {
2055+
continue;
2056+
}
20432057

2044-
st += &s[last_match..idx];
2045-
st += escaped;
2046-
// NOTE: we only expect single byte characters here - which is fine as long as we
2047-
// only match single byte characters
2058+
if last_match != idx {
2059+
// Invariant: `idx` must be the first byte in a character at this point.
2060+
st += &s[last_match..idx];
2061+
}
2062+
if b == b' ' {
2063+
// URL queries are decoded with + replaced with SP.
2064+
// While the same is not true for hashes, rustdoc only needs to be
2065+
// consistent with itself when encoding them.
2066+
st += "+";
2067+
} else if b == b'%' {
2068+
st += "%%";
2069+
} else {
2070+
write!(st, "%{:02X}", b).unwrap();
2071+
}
2072+
// Invariant: if the current byte is not at the start of a multi-byte character,
2073+
// we need to get down here so that when the next turn of the loop comes around,
2074+
// last_match winds up equalling idx.
2075+
//
2076+
// In other words, dont_escape must always return `false` in multi-byte character.
20482077
last_match = idx + 1;
20492078
}
20502079

tests/rustdoc/const-generics/const-generics-docs.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ pub use extern_crate::WTrait;
2121
// 'pub trait Trait<const N: usize>'
2222
// @has - '//*[@id="impl-Trait%3C1%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<1> for u8'
2323
// @has - '//*[@id="impl-Trait%3C2%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<2> for u8'
24-
// @has - '//*[@id="impl-Trait%3C{1%20+%202}%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
25-
// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8%3B%20N%5D"]//h3[@class="code-header"]' \
24+
// @has - '//*[@id="impl-Trait%3C%7B1+%2B+2%7D%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
25+
// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8;+N%5D"]//h3[@class="code-header"]' \
2626
// 'impl<const N: usize> Trait<N> for [u8; N]'
2727
pub trait Trait<const N: usize> {}
2828
impl Trait<1> for u8 {}
@@ -47,7 +47,7 @@ impl<const M: usize> Foo<M> where u8: Trait<M> {
4747
}
4848
}
4949

50-
// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8%2C%20M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
50+
// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8,+M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
5151
impl<const M: usize> Bar<u8, M> {
5252
// @has - '//*[@id="method.hey"]' \
5353
// 'pub fn hey<const N: usize>(&self) -> Foo<N>where u8: Trait<N>'

tests/rustdoc/const-generics/const-impl.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,20 @@ pub enum Order {
99
}
1010

1111
// @has foo/struct.VSet.html '//pre[@class="rust item-decl"]' 'pub struct VSet<T, const ORDER: Order>'
12-
// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
13-
// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
12+
// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
13+
// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
1414
pub struct VSet<T, const ORDER: Order> {
1515
inner: Vec<T>,
1616
}
1717

18-
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3ASorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
18+
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Sorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
1919
impl<T> VSet<T, { Order::Sorted }> {
2020
pub fn new() -> Self {
2121
Self { inner: Vec::new() }
2222
}
2323
}
2424

25-
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3AUnsorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
25+
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Unsorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
2626
impl<T> VSet<T, { Order::Unsorted }> {
2727
pub fn new() -> Self {
2828
Self { inner: Vec::new() }
@@ -31,7 +31,7 @@ impl<T> VSet<T, { Order::Unsorted }> {
3131

3232
pub struct Escape<const S: &'static str>;
3333

34-
// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr#%22%3Cscript%3Ealert(%22Escape%22)%3B%3C/script%3E%22#%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
34+
// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr%23%22%3Cscript%3Ealert(%22Escape%22);%3C/script%3E%22%23%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
3535
impl Escape<r#"<script>alert("Escape");</script>"#> {
3636
pub fn f() {}
3737
}

tests/rustdoc/double-quote-escape.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ pub trait Foo<T> {
77
pub struct Bar;
88

99
// @has foo/struct.Bar.html
10-
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe%20extern%20%22C%22%20fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
10+
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe+extern+%22C%22+fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
1111
impl Foo<unsafe extern "C" fn()> for Bar {}

tests/rustdoc/primitive-tuple-variadic.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
pub trait Foo {}
77

88
// @has foo/trait.Foo.html
9-
// @has - '//section[@id="impl-Foo-for-(T%2C)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
9+
// @has - '//section[@id="impl-Foo-for-(T,)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
1010
#[doc(fake_variadic)]
1111
impl<T> Foo for (T,) {}
1212

1313
pub trait Bar {}
1414

1515
// @has foo/trait.Bar.html
16-
// @has - '//section[@id="impl-Bar-for-(U%2C)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
16+
// @has - '//section[@id="impl-Bar-for-(U,)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
1717
#[doc(fake_variadic)]
1818
impl<U: Foo> Bar for (U,) {}

tests/rustdoc/sidebar-links-to-foreign-impl.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
// @has - '//h2[@id="foreign-impls"]' 'Implementations on Foreign Types'
88
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-u32"]' 'u32'
99
// @has - '//*[@id="impl-Foo-for-u32"]//h3[@class="code-header"]' 'impl Foo for u32'
10-
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-%26%27a%20str"]' "&'a str"
11-
// @has - '//*[@id="impl-Foo-for-%26%27a%20str"]//h3[@class="code-header"]' "impl<'a> Foo for &'a str"
10+
// @has - "//*[@class=\"sidebar-elems\"]//section//a[@href=\"#impl-Foo-for-%26'a+str\"]" "&'a str"
11+
// @has - "//*[@id=\"impl-Foo-for-%26'a+str\"]//h3[@class=\"code-header\"]" "impl<'a> Foo for &'a str"
1212
pub trait Foo {}
1313

1414
impl Foo for u32 {}

tests/rustdoc/where-clause-order.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ where
77
}
88

99
// @has 'foo/trait.SomeTrait.html'
10-
// @has - "//*[@id='impl-SomeTrait%3C(A%2C%20B%2C%20C%2C%20D%2C%20E)%3E-for-(A%2C%20B%2C%20C%2C%20D%2C%20E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
10+
// @has - "//*[@id='impl-SomeTrait%3C(A,+B,+C,+D,+E)%3E-for-(A,+B,+C,+D,+E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
1111
impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)
1212
where
1313
A: PartialOrd<A> + PartialEq<A>,

0 commit comments

Comments
 (0)