Skip to content

Commit 1b080d6

Browse files
WeiZhang555eryugey
authored andcommitted
overlay: refactor first implementation
This commit refactor a lot to previous one, including: * Modified layer trait * Lots of bugfixes to make xfstests happy. * Performance improvement. * Whiteout logics * Remove plugin & Direct layer logics. * A better inode allocator with inode consistency guarantee. Signed-off-by: Wei Zhang <[email protected]>
1 parent 7af0d74 commit 1b080d6

File tree

19 files changed

+3841
-3218
lines changed

19 files changed

+3841
-3218
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ libc = "0.2.68"
2727
log = "0.4.6"
2828
mio = { version = "0.8", features = ["os-poll", "os-ext"] }
2929
nix = "0.24"
30+
radix_trie = "0.2.1"
3031
tokio = { version = "1", optional = true }
3132
tokio-uring = { version = "0.4.0", optional = true }
3233
vmm-sys-util = { version = "0.11", optional = true }

src/api/filesystem/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ pub use async_io::{AsyncFileSystem, AsyncZeroCopyReader, AsyncZeroCopyWriter};
3030
mod sync_io;
3131
pub use sync_io::FileSystem;
3232

33+
#[cfg(all(any(feature = "fusedev", feature = "virtiofs"), target_os = "linux"))]
34+
mod overlay;
35+
#[cfg(all(any(feature = "fusedev", feature = "virtiofs"), target_os = "linux"))]
36+
pub use overlay::Layer;
37+
3338
/// Information about a path in the filesystem.
3439
#[derive(Copy, Clone, Debug)]
3540
pub struct Entry {

src/api/filesystem/overlay.rs

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
// Copyright (C) 2023 Ant Group. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE-BSD-3-Clause file.
4+
5+
#![allow(missing_docs)]
6+
7+
use std::ffi::{CStr, CString};
8+
use std::io::{Error, ErrorKind, Result};
9+
10+
use super::{Context, Entry, FileSystem, GetxattrReply};
11+
use crate::abi::fuse_abi::stat64;
12+
13+
pub const OPAQUE_XATTR_LEN: u32 = 16;
14+
pub const OPAQUE_XATTR: &str = "user.fuseoverlayfs.opaque";
15+
pub const UNPRIVILEGED_OPAQUE_XATTR: &str = "user.overlay.opaque";
16+
pub const PRIVILEGED_OPAQUE_XATTR: &str = "trusted.overlay.opaque";
17+
18+
/// A filesystem must implement Layer trait, or it cannot be used as an OverlayFS layer.
19+
pub trait Layer: FileSystem {
20+
/// Return the root inode number
21+
fn root_inode(&self) -> Self::Inode;
22+
23+
/// Create whiteout file with name <name>.
24+
///
25+
/// If this call is successful then the lookup count of the `Inode` associated with the returned
26+
/// `Entry` must be increased by 1.
27+
fn create_whiteout(&self, ctx: &Context, parent: Self::Inode, name: &CStr) -> Result<Entry> {
28+
// Use temp value to avoid moved 'parent'.
29+
let ino: u64 = parent.into();
30+
match self.lookup(ctx, ino.into(), name) {
31+
Ok(v) => {
32+
// Find whiteout char dev.
33+
if is_whiteout(v.attr) {
34+
return Ok(v);
35+
}
36+
// Non-negative entry with inode larger than 0 indicates file exists.
37+
if v.inode != 0 {
38+
// Decrease the refcount.
39+
self.forget(ctx, v.inode.into(), 1);
40+
// File exists with same name, create whiteout file is not allowed.
41+
return Err(Error::from_raw_os_error(libc::EEXIST));
42+
}
43+
}
44+
Err(e) => match e.raw_os_error() {
45+
Some(raw_error) => {
46+
// We expect ENOENT error.
47+
if raw_error != libc::ENOENT {
48+
return Err(e);
49+
}
50+
}
51+
None => return Err(e),
52+
},
53+
}
54+
55+
// Try to create whiteout char device with 0/0 device number.
56+
let dev = libc::makedev(0, 0);
57+
let mode = libc::S_IFCHR | 0o777;
58+
self.mknod(ctx, ino.into(), name, mode, dev as u32, 0)
59+
}
60+
61+
/// Delete whiteout file with name <name>.
62+
fn delete_whiteout(&self, ctx: &Context, parent: Self::Inode, name: &CStr) -> Result<()> {
63+
// Use temp value to avoid moved 'parent'.
64+
let ino: u64 = parent.into();
65+
match self.lookup(ctx, ino.into(), name) {
66+
Ok(v) => {
67+
if v.inode != 0 {
68+
// Decrease the refcount since we make a lookup call.
69+
self.forget(ctx, v.inode.into(), 1);
70+
}
71+
72+
// Find whiteout so we can safely delete it.
73+
if is_whiteout(v.attr) {
74+
return self.unlink(ctx, ino.into(), name);
75+
}
76+
// Non-negative entry with inode larger than 0 indicates file exists.
77+
if v.inode != 0 {
78+
// File exists but not whiteout file.
79+
return Err(Error::from_raw_os_error(libc::EINVAL));
80+
}
81+
}
82+
Err(e) => match e.raw_os_error() {
83+
Some(raw_error) => {
84+
// ENOENT is acceptable.
85+
if raw_error != libc::ENOENT {
86+
return Err(e);
87+
}
88+
}
89+
None => return Err(e),
90+
},
91+
}
92+
Ok(())
93+
}
94+
95+
/// Check if the Inode is a whiteout file
96+
fn is_whiteout(&self, ctx: &Context, inode: Self::Inode) -> Result<bool> {
97+
let (st, _) = self.getattr(ctx, inode, None)?;
98+
99+
// Check attributes of the inode to see if it's a whiteout char device.
100+
Ok(is_whiteout(st))
101+
}
102+
103+
/// Set the directory to opaque.
104+
fn set_opaque(&self, ctx: &Context, inode: Self::Inode) -> Result<()> {
105+
// Use temp value to avoid moved 'parent'.
106+
let ino: u64 = inode.into();
107+
108+
// Get attributes and check if it's directory.
109+
let (st, _d) = self.getattr(ctx, ino.into(), None)?;
110+
if !is_dir(st) {
111+
// Only directory can be set to opaque.
112+
return Err(Error::from_raw_os_error(libc::ENOTDIR));
113+
}
114+
// A directory is made opaque by setting the xattr "trusted.overlay.opaque" to "y".
115+
// See ref: https://docs.kernel.org/filesystems/overlayfs.html#whiteouts-and-opaque-directories
116+
self.setxattr(
117+
ctx,
118+
ino.into(),
119+
to_cstring(OPAQUE_XATTR)?.as_c_str(),
120+
b"y",
121+
0,
122+
)
123+
}
124+
125+
/// Check if the directory is opaque.
126+
fn is_opaque(&self, ctx: &Context, inode: Self::Inode) -> Result<bool> {
127+
// Use temp value to avoid moved 'parent'.
128+
let ino: u64 = inode.into();
129+
130+
// Get attributes of the directory.
131+
let (st, _d) = self.getattr(ctx, ino.into(), None)?;
132+
if !is_dir(st) {
133+
return Err(Error::from_raw_os_error(libc::ENOTDIR));
134+
}
135+
136+
// Return Result<is_opaque>.
137+
let check_attr = |inode: Self::Inode, attr_name: &str, attr_size: u32| -> Result<bool> {
138+
let cname = CString::new(attr_name)?;
139+
match self.getxattr(ctx, inode, cname.as_c_str(), attr_size) {
140+
Ok(v) => {
141+
// xattr name exists and we get value.
142+
if let GetxattrReply::Value(buf) = v {
143+
if buf.len() == 1 && buf[0].to_ascii_lowercase() == b'y' {
144+
return Ok(true);
145+
}
146+
}
147+
// No value found, go on to next check.
148+
Ok(false)
149+
}
150+
Err(e) => {
151+
if let Some(raw_error) = e.raw_os_error() {
152+
if raw_error == libc::ENODATA {
153+
return Ok(false);
154+
}
155+
}
156+
157+
Err(e)
158+
}
159+
}
160+
};
161+
162+
// A directory is made opaque by setting some specific xattr to "y".
163+
// See ref: https://docs.kernel.org/filesystems/overlayfs.html#whiteouts-and-opaque-directories
164+
165+
// Check our customized version of the xattr "user.fuseoverlayfs.opaque".
166+
let is_opaque = check_attr(ino.into(), OPAQUE_XATTR, OPAQUE_XATTR_LEN)?;
167+
if is_opaque {
168+
return Ok(true);
169+
}
170+
171+
// Also check for the unprivileged version of the xattr "trusted.overlay.opaque".
172+
let is_opaque = check_attr(ino.into(), PRIVILEGED_OPAQUE_XATTR, OPAQUE_XATTR_LEN)?;
173+
if is_opaque {
174+
return Ok(true);
175+
}
176+
177+
// Also check for the unprivileged version of the xattr "user.overlay.opaque".
178+
let is_opaque = check_attr(ino.into(), UNPRIVILEGED_OPAQUE_XATTR, OPAQUE_XATTR_LEN)?;
179+
if is_opaque {
180+
return Ok(true);
181+
}
182+
183+
Ok(false)
184+
}
185+
}
186+
187+
pub(crate) fn is_dir(st: stat64) -> bool {
188+
st.st_mode & libc::S_IFMT == libc::S_IFDIR
189+
}
190+
191+
pub(crate) fn is_chardev(st: stat64) -> bool {
192+
st.st_mode & libc::S_IFMT == libc::S_IFCHR
193+
}
194+
195+
pub(crate) fn is_whiteout(st: stat64) -> bool {
196+
// A whiteout is created as a character device with 0/0 device number.
197+
// See ref: https://docs.kernel.org/filesystems/overlayfs.html#whiteouts-and-opaque-directories
198+
let major = unsafe { libc::major(st.st_rdev) };
199+
let minor = unsafe { libc::minor(st.st_rdev) };
200+
is_chardev(st) && major == 0 && minor == 0
201+
}
202+
203+
pub(crate) fn to_cstring(name: &str) -> Result<CString> {
204+
CString::new(name).map_err(|e| Error::new(ErrorKind::InvalidData, e))
205+
}

src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,11 @@ pub type Result<T> = ::std::result::Result<T, Error>;
117117
pub mod abi;
118118
pub mod api;
119119

120+
#[cfg(all(any(feature = "fusedev", feature = "virtiofs"), target_os = "linux"))]
121+
pub mod overlayfs;
120122
#[cfg(all(any(feature = "fusedev", feature = "virtiofs"), target_os = "linux"))]
121123
pub mod passthrough;
122124
pub mod transport;
123-
pub mod overlayfs;
124125

125126
pub mod common;
126127
pub use self::common::*;

src/overlayfs/config.rs

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,45 @@
1+
// Copyright (C) 2023 Ant Group. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
13

2-
use std::time::Duration;
34
use self::super::CachePolicy;
45
use std::fmt;
6+
use std::time::Duration;
57

68
#[derive(Default, Clone, Debug)]
79
pub struct Config {
8-
pub upper: String,
9-
pub lower: Vec<String>,
10-
pub work: String,
11-
pub mountpoint: String,
12-
pub do_import: bool,
13-
pub writeback: bool,
14-
pub no_open: bool,
15-
pub no_opendir: bool,
16-
pub killpriv_v2: bool,
17-
pub no_readdir: bool,
18-
pub xattr: bool,
19-
pub xattr_permissions: bool,
20-
pub perfile_dax: bool,
21-
pub cache_policy: CachePolicy,
22-
pub attr_timeout: Duration,
23-
pub entry_timeout: Duration,
24-
}
25-
26-
impl Default for CachePolicy {
27-
fn default() -> Self {
28-
CachePolicy::Auto
29-
}
10+
pub mountpoint: String,
11+
pub work: String,
12+
pub do_import: bool,
13+
// Filesystem options.
14+
pub writeback: bool,
15+
pub no_open: bool,
16+
pub no_opendir: bool,
17+
pub killpriv_v2: bool,
18+
pub no_readdir: bool,
19+
pub perfile_dax: bool,
20+
pub cache_policy: CachePolicy,
21+
pub attr_timeout: Duration,
22+
pub entry_timeout: Duration,
3023
}
3124

3225
impl Clone for CachePolicy {
33-
fn clone(&self) -> Self {
34-
match *self {
35-
CachePolicy::Never => CachePolicy::Never,
36-
CachePolicy::Always => CachePolicy::Always,
37-
CachePolicy::Auto => CachePolicy::Auto,
38-
}
39-
}
26+
fn clone(&self) -> Self {
27+
match *self {
28+
CachePolicy::Never => CachePolicy::Never,
29+
CachePolicy::Always => CachePolicy::Always,
30+
CachePolicy::Auto => CachePolicy::Auto,
31+
}
32+
}
4033
}
4134

4235
impl fmt::Debug for CachePolicy {
43-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
44-
let policy =
45-
match *self {
46-
CachePolicy::Never => "Never",
47-
CachePolicy::Always => "Always",
48-
CachePolicy::Auto => "Auto",
49-
};
36+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37+
let policy = match *self {
38+
CachePolicy::Never => "Never",
39+
CachePolicy::Always => "Always",
40+
CachePolicy::Auto => "Auto",
41+
};
5042

51-
write!(f, "CachePolicy: {}", policy)
52-
}
43+
write!(f, "CachePolicy: {}", policy)
44+
}
5345
}

src/overlayfs/datasource.rs

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)