Skip to content

Commit c52b15d

Browse files
committed
Normalize URL paths: convert /.//p, /..//p, and //p to p
1 parent ca4b5dc commit c52b15d

File tree

2 files changed

+53
-4
lines changed

2 files changed

+53
-4
lines changed

url/src/lib.rs

+53
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,39 @@ impl Url {
17571757
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
17581758
let cannot_be_a_base = self.cannot_be_a_base();
17591759
let scheme_type = SchemeType::from(self.scheme());
1760+
let mut path_empty = false;
1761+
1762+
// Check ':' and then see if the next character is '/'
1763+
let mut has_host = if let Some(index) = self.serialization.find(":") {
1764+
if self.serialization.len() > index + 1
1765+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1766+
{
1767+
let rest = &self.serialization[(index + ":/".len())..];
1768+
let host_part = rest.split('/').next().unwrap_or("");
1769+
path_empty = rest.is_empty();
1770+
!host_part.is_empty() && !host_part.contains('@')
1771+
} else {
1772+
false
1773+
}
1774+
} else {
1775+
false
1776+
};
1777+
1778+
// Ensure the path length is greater than 1 to account
1779+
// for cases where "/." is already appended from serialization
1780+
// If we set path, then we already checked the other two conditions:
1781+
// https://url.spec.whatwg.org/#url-serializing
1782+
// 1. The host is null
1783+
// 2. the first segment of the URL's path is an empty string
1784+
if path.len() > 1 {
1785+
if let Some(index) = self.serialization.find(":") {
1786+
let removal_start = index + ":".len();
1787+
if self.serialization[removal_start..].starts_with("/.") {
1788+
self.path_start -= "/.".len() as u32;
1789+
}
1790+
}
1791+
}
1792+
17601793
self.serialization.truncate(self.path_start as usize);
17611794
self.mutate(|parser| {
17621795
if cannot_be_a_base {
@@ -1774,6 +1807,26 @@ impl Url {
17741807
);
17751808
}
17761809
});
1810+
1811+
// For cases where normalization is applied across both the serialization and the path.
1812+
// Append "/." immediately after the scheme (up to ":")
1813+
// This is done if three conditions are met.
1814+
// https://url.spec.whatwg.org/#url-serializing
1815+
// 1. The host is null
1816+
// 2. The url's path length is greater than 1
1817+
// 3. the first segment of the URL's path is an empty string
1818+
if !has_host && path.len() > 1 && path_empty {
1819+
if let Some(index) = self.serialization.find(":") {
1820+
if self.serialization.len() > index + 2
1821+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1822+
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
1823+
{
1824+
self.serialization.insert_str(index + ":".len(), "/.");
1825+
self.path_start += "/.".len() as u32;
1826+
}
1827+
}
1828+
}
1829+
17771830
self.restore_after_path(old_after_path_pos, &after_path);
17781831
}
17791832

url/tests/expected_failures.txt

-4
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,3 @@
4141
<file://monkey/> set pathname to <\\\\>
4242
<file:///unicorn> set pathname to <//\\/>
4343
<file:///unicorn> set pathname to <//monkey/..//>
44-
<non-spec:/> set pathname to </.//p>
45-
<non-spec:/> set pathname to </..//p>
46-
<non-spec:/> set pathname to <//p>
47-
<non-spec:/.//> set pathname to <p>

0 commit comments

Comments
 (0)