Skip to content

Commit c08079c

Browse files
committed
Implement normalize lexically
1 parent 904d8f6 commit c08079c

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

library/std/src/path.rs

+73
Original file line numberDiff line numberDiff line change
@@ -2148,6 +2148,13 @@ pub struct Path {
21482148
#[stable(since = "1.7.0", feature = "strip_prefix")]
21492149
pub struct StripPrefixError(());
21502150

2151+
/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference
2152+
/// would escape the path.
2153+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2154+
#[derive(Debug, PartialEq)]
2155+
#[non_exhaustive]
2156+
pub struct NormalizeError;
2157+
21512158
impl Path {
21522159
// The following (private!) function allows construction of a path from a u8
21532160
// slice, which is only safe when it is known to follow the OsStr encoding.
@@ -2962,6 +2969,63 @@ impl Path {
29622969
fs::canonicalize(self)
29632970
}
29642971

2972+
/// Normalize a path, including `..` without traversing the filesystem.
2973+
///
2974+
/// Returns an error if normalization would leave leading `..` components.
2975+
///
2976+
/// <div class="warning">
2977+
///
2978+
/// This function always resolves `..` to the "lexical" parent.
2979+
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
2980+
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`.
2981+
///
2982+
/// </div>
2983+
///
2984+
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
2985+
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
2986+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2987+
pub fn normalize_lexically(&self) -> Result<PathBuf, NormalizeError> {
2988+
let mut lexical = PathBuf::new();
2989+
let mut iter = self.components().peekable();
2990+
2991+
// Find the root, if any.
2992+
let root = match iter.peek() {
2993+
Some(Component::ParentDir) => return Err(NormalizeError),
2994+
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
2995+
lexical.push(p);
2996+
iter.next();
2997+
lexical.as_os_str().len()
2998+
}
2999+
Some(Component::Prefix(prefix)) => {
3000+
lexical.push(prefix.as_os_str());
3001+
iter.next();
3002+
if let Some(p @ Component::RootDir) = iter.peek() {
3003+
lexical.push(p);
3004+
iter.next();
3005+
}
3006+
lexical.as_os_str().len()
3007+
}
3008+
None => return Ok(PathBuf::new()),
3009+
Some(Component::Normal(_)) => 0,
3010+
};
3011+
3012+
for component in iter {
3013+
match component {
3014+
Component::RootDir | Component::Prefix(_) => return Err(NormalizeError),
3015+
Component::CurDir => continue,
3016+
Component::ParentDir => {
3017+
if lexical.as_os_str().len() == root {
3018+
return Err(NormalizeError);
3019+
} else {
3020+
lexical.pop();
3021+
}
3022+
}
3023+
Component::Normal(path) => lexical.push(path),
3024+
}
3025+
}
3026+
Ok(lexical)
3027+
}
3028+
29653029
/// Reads a symbolic link, returning the file that the link points to.
29663030
///
29673031
/// This is an alias to [`fs::read_link`].
@@ -3503,6 +3567,15 @@ impl Error for StripPrefixError {
35033567
}
35043568
}
35053569

3570+
#[unstable(feature = "normalize_lexically", issue = "none")]
3571+
impl fmt::Display for NormalizeError {
3572+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3573+
f.write_str("parent reference `..` points outside of base directory")
3574+
}
3575+
}
3576+
#[unstable(feature = "normalize_lexically", issue = "none")]
3577+
impl Error for NormalizeError {}
3578+
35063579
/// Makes the path absolute without accessing the filesystem.
35073580
///
35083581
/// If the path is relative, the current directory is used as the base directory.

library/std/src/path/tests.rs

+48
Original file line numberDiff line numberDiff line change
@@ -2079,3 +2079,51 @@ fn clone_to_uninit() {
20792079
unsafe { a.clone_to_uninit(ptr::from_mut::<Path>(&mut b).cast()) };
20802080
assert_eq!(a, &*b);
20812081
}
2082+
2083+
#[test]
2084+
fn normalize_lexically() {
2085+
#[track_caller]
2086+
fn check(a: &str, b: Result<&str, NormalizeError>) {
2087+
assert_eq!(Path::new(a).normalize_lexically(), b.map(PathBuf::from));
2088+
}
2089+
2090+
// Relative paths
2091+
check("a", Ok("a"));
2092+
check("./a", Ok("./a"));
2093+
check("a/b/c", Ok("a/b/c"));
2094+
check("a/././b/./c/.", Ok("a/b/c"));
2095+
check("a/../c", Ok("c"));
2096+
check("./a/b", Ok("./a/b"));
2097+
check("a/../b/c/..", Ok("b"));
2098+
2099+
check("..", Err(NormalizeError));
2100+
check("../..", Err(NormalizeError));
2101+
check("a/../..", Err(NormalizeError));
2102+
check("a/../../b", Err(NormalizeError));
2103+
check("a/../../b/c", Err(NormalizeError));
2104+
check("a/../b/../..", Err(NormalizeError));
2105+
2106+
// Check we don't escape the root or prefix
2107+
#[cfg(unix)]
2108+
{
2109+
check("/..", Err(NormalizeError));
2110+
check("/a/../..", Err(NormalizeError));
2111+
}
2112+
#[cfg(windows)]
2113+
{
2114+
check(r"C:\..", Err(NormalizeError));
2115+
check(r"C:\a\..\..", Err(NormalizeError));
2116+
2117+
check(r"C:..", Err(NormalizeError));
2118+
check(r"C:a\..\..", Err(NormalizeError));
2119+
2120+
check(r"\\server\share\..", Err(NormalizeError));
2121+
check(r"\\server\share\a\..\..", Err(NormalizeError));
2122+
2123+
check(r"\..", Err(NormalizeError));
2124+
check(r"\a\..\..", Err(NormalizeError));
2125+
2126+
check(r"\\?\UNC\server\share\..", Err(NormalizeError));
2127+
check(r"\\?\UNC\server\share\a\..\..", Err(NormalizeError));
2128+
}
2129+
}

0 commit comments

Comments
 (0)