From 8a3fb66c99da3c55812628c2534ed3bc402f58f2 Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Mon, 23 Dec 2024 15:09:51 +0000 Subject: [PATCH] Implement normalize lexically --- library/std/src/path.rs | 73 +++++++++++++++++++++++++++++++++++ library/std/src/path/tests.rs | 48 +++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 35e920ab34476..ff4f189f36d45 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2148,6 +2148,13 @@ pub struct Path { #[stable(since = "1.7.0", feature = "strip_prefix")] pub struct StripPrefixError(()); +/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference +/// would escape the path. +#[unstable(feature = "normalize_lexically", issue = "134694")] +#[derive(Debug, PartialEq)] +#[non_exhaustive] +pub struct NormalizeError; + impl Path { // The following (private!) function allows construction of a path from a u8 // slice, which is only safe when it is known to follow the OsStr encoding. @@ -2962,6 +2969,63 @@ impl Path { fs::canonicalize(self) } + /// Normalize a path, including `..` without traversing the filesystem. + /// + /// Returns an error if normalization would leave leading `..` components. + /// + ///
+ /// + /// This function always resolves `..` to the "lexical" parent. + /// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path. + /// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`. + /// + ///
+ /// + /// [`path::absolute`](absolute) is an alternative that preserves `..`. + /// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem. + #[unstable(feature = "normalize_lexically", issue = "134694")] + pub fn normalize_lexically(&self) -> Result { + let mut lexical = PathBuf::new(); + let mut iter = self.components().peekable(); + + // Find the root, if any. + let root = match iter.peek() { + Some(Component::ParentDir) => return Err(NormalizeError), + Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => { + lexical.push(p); + iter.next(); + lexical.as_os_str().len() + } + Some(Component::Prefix(prefix)) => { + lexical.push(prefix.as_os_str()); + iter.next(); + if let Some(p @ Component::RootDir) = iter.peek() { + lexical.push(p); + iter.next(); + } + lexical.as_os_str().len() + } + None => return Ok(PathBuf::new()), + Some(Component::Normal(_)) => 0, + }; + + for component in iter { + match component { + Component::RootDir | Component::Prefix(_) => return Err(NormalizeError), + Component::CurDir => continue, + Component::ParentDir => { + if lexical.as_os_str().len() == root { + return Err(NormalizeError); + } else { + lexical.pop(); + } + } + Component::Normal(path) => lexical.push(path), + } + } + Ok(lexical) + } + /// Reads a symbolic link, returning the file that the link points to. /// /// This is an alias to [`fs::read_link`]. @@ -3503,6 +3567,15 @@ impl Error for StripPrefixError { } } +#[unstable(feature = "normalize_lexically", issue = "134694")] +impl fmt::Display for NormalizeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("parent reference `..` points outside of base directory") + } +} +#[unstable(feature = "normalize_lexically", issue = "134694")] +impl Error for NormalizeError {} + /// Makes the path absolute without accessing the filesystem. /// /// If the path is relative, the current directory is used as the base directory. diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index ff3f7151bb834..a346d8a43fc62 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -2079,3 +2079,51 @@ fn clone_to_uninit() { unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b).cast()) }; assert_eq!(a, &*b); } + +#[test] +fn normalize_lexically() { + #[track_caller] + fn check(a: &str, b: Result<&str, NormalizeError>) { + assert_eq!(Path::new(a).normalize_lexically(), b.map(PathBuf::from)); + } + + // Relative paths + check("a", Ok("a")); + check("./a", Ok("./a")); + check("a/b/c", Ok("a/b/c")); + check("a/././b/./c/.", Ok("a/b/c")); + check("a/../c", Ok("c")); + check("./a/b", Ok("./a/b")); + check("a/../b/c/..", Ok("b")); + + check("..", Err(NormalizeError)); + check("../..", Err(NormalizeError)); + check("a/../..", Err(NormalizeError)); + check("a/../../b", Err(NormalizeError)); + check("a/../../b/c", Err(NormalizeError)); + check("a/../b/../..", Err(NormalizeError)); + + // Check we don't escape the root or prefix + #[cfg(unix)] + { + check("/..", Err(NormalizeError)); + check("/a/../..", Err(NormalizeError)); + } + #[cfg(windows)] + { + check(r"C:\..", Err(NormalizeError)); + check(r"C:\a\..\..", Err(NormalizeError)); + + check(r"C:..", Err(NormalizeError)); + check(r"C:a\..\..", Err(NormalizeError)); + + check(r"\\server\share\..", Err(NormalizeError)); + check(r"\\server\share\a\..\..", Err(NormalizeError)); + + check(r"\..", Err(NormalizeError)); + check(r"\a\..\..", Err(NormalizeError)); + + check(r"\\?\UNC\server\share\..", Err(NormalizeError)); + check(r"\\?\UNC\server\share\a\..\..", Err(NormalizeError)); + } +}