Skip to content

Commit 29237f0

Browse files
committed
Implement normalize lexically
1 parent 904d8f6 commit 29237f0

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

library/std/src/path.rs

+71
Original file line numberDiff line numberDiff line change
@@ -2148,6 +2148,13 @@ pub struct Path {
21482148
#[stable(since = "1.7.0", feature = "strip_prefix")]
21492149
pub struct StripPrefixError(());
21502150

2151+
/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference
2152+
/// would escape the path.
2153+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2154+
#[derive(Debug, PartialEq)]
2155+
#[non_exhaustive]
2156+
pub struct NormalizeError;
2157+
21512158
impl Path {
21522159
// The following (private!) function allows construction of a path from a u8
21532160
// slice, which is only safe when it is known to follow the OsStr encoding.
@@ -2962,6 +2969,61 @@ impl Path {
29622969
fs::canonicalize(self)
29632970
}
29642971

2972+
/// Normalize a path, including `..` without traversing the filesystem.
2973+
///
2974+
/// <div class="warning">
2975+
///
2976+
/// This function always resolves `..` to the "lexical" parent.
2977+
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
2978+
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`.
2979+
///
2980+
/// </div>
2981+
///
2982+
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
2983+
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
2984+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2985+
pub fn normalize_lexically(&self) -> Result<PathBuf, NormalizeError> {
2986+
let mut lexical = PathBuf::new();
2987+
let mut iter = self.components().peekable();
2988+
2989+
// Find the root, if any.
2990+
let root = match iter.peek() {
2991+
Some(Component::ParentDir) => return Err(NormalizeError),
2992+
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
2993+
lexical.push(p);
2994+
iter.next();
2995+
lexical.as_os_str().len()
2996+
}
2997+
Some(Component::Prefix(prefix)) => {
2998+
lexical.push(prefix.as_os_str());
2999+
iter.next();
3000+
if let Some(p @ Component::RootDir) = iter.peek() {
3001+
lexical.push(p);
3002+
iter.next();
3003+
}
3004+
lexical.as_os_str().len()
3005+
}
3006+
None => return Ok(PathBuf::new()),
3007+
Some(Component::Normal(_)) => 0,
3008+
};
3009+
3010+
for component in iter {
3011+
match component {
3012+
Component::RootDir | Component::Prefix(_) => return Err(NormalizeError),
3013+
Component::CurDir => continue,
3014+
Component::ParentDir => {
3015+
if lexical.as_os_str().len() == root {
3016+
return Err(NormalizeError);
3017+
} else {
3018+
lexical.pop();
3019+
}
3020+
}
3021+
Component::Normal(path) => lexical.push(path),
3022+
}
3023+
}
3024+
Ok(lexical)
3025+
}
3026+
29653027
/// Reads a symbolic link, returning the file that the link points to.
29663028
///
29673029
/// This is an alias to [`fs::read_link`].
@@ -3503,6 +3565,15 @@ impl Error for StripPrefixError {
35033565
}
35043566
}
35053567

3568+
#[unstable(feature = "normalize_lexically", issue = "none")]
3569+
impl fmt::Display for NormalizeError {
3570+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3571+
f.write_str("parent reference `..` points outside of base directory")
3572+
}
3573+
}
3574+
#[unstable(feature = "normalize_lexically", issue = "none")]
3575+
impl Error for NormalizeError {}
3576+
35063577
/// Makes the path absolute without accessing the filesystem.
35073578
///
35083579
/// If the path is relative, the current directory is used as the base directory.

library/std/src/path/tests.rs

+48
Original file line numberDiff line numberDiff line change
@@ -2079,3 +2079,51 @@ fn clone_to_uninit() {
20792079
unsafe { a.clone_to_uninit(ptr::from_mut::<Path>(&mut b).cast()) };
20802080
assert_eq!(a, &*b);
20812081
}
2082+
2083+
#[test]
2084+
fn normalize_lexically() {
2085+
#[track_caller]
2086+
fn check(a: &str, b: Result<&str, NormalizeError>) {
2087+
assert_eq!(Path::new(a).normalize_lexically(), b.map(PathBuf::from));
2088+
}
2089+
2090+
// Relative paths
2091+
check("a", Ok("a"));
2092+
check("./a", Ok("./a"));
2093+
check("a/b/c", Ok("a/b/c"));
2094+
check("a/././b/./c/.", Ok("a/b/c"));
2095+
check("a/../c", Ok("c"));
2096+
check("./a/b", Ok("./a/b"));
2097+
check("a/../b/c/..", Ok("b"));
2098+
2099+
check("..", Err(NormalizeError));
2100+
check("../..", Err(NormalizeError));
2101+
check("a/../..", Err(NormalizeError));
2102+
check("a/../../b", Err(NormalizeError));
2103+
check("a/../../b/c", Err(NormalizeError));
2104+
check("a/../b/../..", Err(NormalizeError));
2105+
2106+
// Check we don't escape the root or prefix
2107+
#[cfg(unix)]
2108+
{
2109+
check("/..", Err(NormalizeError));
2110+
check("/a/../..", Err(NormalizeError));
2111+
}
2112+
#[cfg(windows)]
2113+
{
2114+
check(r"C:\..", Err(NormalizeError));
2115+
check(r"C:\a\..\..", Err(NormalizeError));
2116+
2117+
check(r"C:..", Err(NormalizeError));
2118+
check(r"C:a\..\..", Err(NormalizeError));
2119+
2120+
check(r"\\server\share\..", Err(NormalizeError));
2121+
check(r"\\server\share\a\..\..", Err(NormalizeError));
2122+
2123+
check(r"\..", Err(NormalizeError));
2124+
check(r"\a\..\..", Err(NormalizeError));
2125+
2126+
check(r"\\?\UNC\server\share\..", Err(NormalizeError));
2127+
check(r"\\?\UNC\server\share\a\..\..", Err(NormalizeError));
2128+
}
2129+
}

0 commit comments

Comments
 (0)