Skip to content

Commit 069718b

Browse files
committed
Normalize URL paths: convert /.//p, /..//p, and //p to p
1 parent 39a1201 commit 069718b

File tree

3 files changed

+93
-5
lines changed

3 files changed

+93
-5
lines changed

url/src/lib.rs

+53-1
Original file line numberDiff line numberDiff line change
@@ -1756,6 +1756,39 @@ impl Url {
17561756
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
17571757
let cannot_be_a_base = self.cannot_be_a_base();
17581758
let scheme_type = SchemeType::from(self.scheme());
1759+
let mut path_empty = false;
1760+
1761+
// Check ':' and then see if the next character is '/'
1762+
let mut has_host = if let Some(index) = self.serialization.find(":") {
1763+
if self.serialization.len() > index + 1
1764+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1765+
{
1766+
let rest = &self.serialization[(index + ":/".len())..];
1767+
let host_part = rest.split('/').next().unwrap_or("");
1768+
path_empty = rest.is_empty();
1769+
!host_part.is_empty() && !host_part.contains('@')
1770+
} else {
1771+
false
1772+
}
1773+
} else {
1774+
false
1775+
};
1776+
1777+
// Ensure the path length is greater than 1 to account
1778+
// for cases where "/." is already appended from serialization
1779+
// If we set path, then we already checked the other two conditions:
1780+
// https://url.spec.whatwg.org/#url-serializing
1781+
// 1. The host is null
1782+
// 2. the first segment of the URL's path is an empty string
1783+
if self.path().len() + path.len() > 1 {
1784+
if let Some(index) = self.serialization.find(":") {
1785+
let removal_start = index + ":".len();
1786+
if self.serialization[removal_start..].starts_with("/.") {
1787+
self.path_start = removal_start as u32;
1788+
}
1789+
}
1790+
}
1791+
17591792
self.serialization.truncate(self.path_start as usize);
17601793
self.mutate(|parser| {
17611794
if cannot_be_a_base {
@@ -1765,14 +1798,33 @@ impl Url {
17651798
}
17661799
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
17671800
} else {
1768-
let mut has_host = true; // FIXME
17691801
parser.parse_path_start(
17701802
scheme_type,
17711803
&mut has_host,
17721804
parser::Input::new_no_trim(path),
17731805
);
17741806
}
17751807
});
1808+
1809+
// For cases where normalization is applied across both the serialization and the path.
1810+
// Append "/." immediately after the scheme (up to ":")
1811+
// This is done if three conditions are met.
1812+
// https://url.spec.whatwg.org/#url-serializing
1813+
// 1. The host is null
1814+
// 2. The url's path length is greater than 1
1815+
// 3. the first segment of the URL's path is an empty string
1816+
if !has_host && path.len() > 1 && path_empty {
1817+
if let Some(index) = self.serialization.find(":") {
1818+
if self.serialization.len() > index + 2
1819+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1820+
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
1821+
{
1822+
self.serialization.insert_str(index + ":".len(), "/.");
1823+
self.path_start += "/.".len() as u32;
1824+
}
1825+
}
1826+
}
1827+
17761828
self.restore_after_path(old_after_path_pos, &after_path);
17771829
}
17781830

url/tests/expected_failures.txt

-4
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,3 @@
4343
<file://monkey/> set pathname to <\\\\>
4444
<file:///unicorn> set pathname to <//\\/>
4545
<file:///unicorn> set pathname to <//monkey/..//>
46-
<non-spec:/> set pathname to </.//p>
47-
<non-spec:/> set pathname to </..//p>
48-
<non-spec:/> set pathname to <//p>
49-
<non-spec:/.//> set pathname to <p>

url/tests/unit.rs

+40
Original file line numberDiff line numberDiff line change
@@ -1387,3 +1387,43 @@ fn serde_error_message() {
13871387
r#"relative URL without a base: "§invalid#+#*Ä" at line 1 column 25"#
13881388
);
13891389
}
1390+
1391+
#[test]
1392+
fn test_fuzzing_uri_failures() {
1393+
use url::quirks;
1394+
let mut url = Url::parse("data:/.dummy.path").unwrap();
1395+
assert!(!url.cannot_be_a_base());
1396+
1397+
url.set_path(".dummy.path");
1398+
assert_eq!(url.as_str(), "data:/.dummy.path");
1399+
assert_eq!(url.path(), "/.dummy.path");
1400+
url.check_invariants().unwrap();
1401+
1402+
url.path_segments_mut()
1403+
.expect("should have path segments")
1404+
.push(".another.dummy.path");
1405+
assert_eq!(url.as_str(), "data:/.dummy.path/.another.dummy.path");
1406+
assert_eq!(url.path(), "/.dummy.path/.another.dummy.path");
1407+
url.check_invariants().unwrap();
1408+
1409+
url = Url::parse("web+demo:/").unwrap();
1410+
assert!(!url.cannot_be_a_base());
1411+
1412+
url.set_path("//.dummy.path");
1413+
assert_eq!(url.path(), "//.dummy.path");
1414+
1415+
let segments: Vec<_> = url
1416+
.path_segments()
1417+
.expect("should have path segments")
1418+
.collect();
1419+
assert_eq!(segments, vec!["", ".dummy.path"]);
1420+
assert_eq!(url.as_str(), "web+demo:/.//.dummy.path");
1421+
1422+
quirks::set_hostname(&mut url, ".dummy.host").unwrap();
1423+
assert_eq!(url.as_str(), "web+demo://.dummy.host//.dummy.path");
1424+
url.check_invariants().unwrap();
1425+
1426+
quirks::set_hostname(&mut url, "").unwrap();
1427+
assert_eq!(url.as_str(), "web+demo:////.dummy.path");
1428+
url.check_invariants().unwrap();
1429+
}

0 commit comments

Comments
 (0)