From e28617714c987c1e84697b234b550b052a042136 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 30 Oct 2023 22:02:31 +0800 Subject: [PATCH 1/2] ptfs: switch to new implementation of unix credentials Switch to new implementation of unix credentials, with support of supplemental group ids. Signed-off-by: Jiang Liu --- src/passthrough/credentials.rs | 149 +++++++++++++++++++-------------- src/passthrough/file_handle.rs | 2 - src/passthrough/mod.rs | 88 +------------------ src/passthrough/sync_io.rs | 22 +++-- src/passthrough/util.rs | 45 ++++++++++ 5 files changed, 140 insertions(+), 166 deletions(-) diff --git a/src/passthrough/credentials.rs b/src/passthrough/credentials.rs index f75b80f73..80d298229 100644 --- a/src/passthrough/credentials.rs +++ b/src/passthrough/credentials.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: BSD-3-Clause -use crate::oslib; -use crate::passthrough::util::einval; use std::io; +use super::util::{dropsupgroups, seteffgid, seteffuid, setsupgroup}; + pub struct UnixCredentials { uid: libc::uid_t, gid: libc::gid_t, @@ -24,6 +24,7 @@ impl UnixCredentials { /// Set a supplementary group. Set `supported_extension` to `false` to signal that a /// supplementary group maybe required, but the guest was not able to tell us which, /// so we have to rely on keeping the DAC_OVERRIDE capability. + #[allow(dead_code)] pub fn supplementary_gid(self, supported_extension: bool, sup_gid: Option) -> Self { UnixCredentials { uid: self.uid, @@ -33,8 +34,9 @@ impl UnixCredentials { } } - /// Changes the effective uid/gid of the current thread to `val`. Changes - /// the thread's credentials back to root when the returned struct is dropped. + /// Changes the effective uid/gid of the current thread to `val`. + /// + /// Changes the thread's credentials back to root when the returned struct is dropped. pub fn set(self) -> io::Result> { let change_uid = self.uid != 0; let change_gid = self.gid != 0; @@ -43,15 +45,15 @@ impl UnixCredentials { // change the uid first then we lose the capability to change the gid. // However changing back can happen in any order. if let Some(sup_gid) = self.sup_gid { - oslib::setsupgroup(sup_gid)?; + setsupgroup(sup_gid)?; } if change_gid { - oslib::seteffgid(self.gid)?; + seteffgid(self.gid)?; } if change_uid { - oslib::seteffuid(self.uid)?; + seteffuid(self.uid)?; } if change_uid && self.keep_capability { @@ -61,7 +63,7 @@ impl UnixCredentials { // user ID, so we still have the 'DAC_OVERRIDE' in the permitted set. // After switching back to root the permitted set is copied to the effective set, // so no additional steps are required. - if let Err(e) = crate::util::add_cap_to_eff("DAC_OVERRIDE") { + if let Err(e) = add_cap_to_eff(caps::Capability::CAP_DAC_OVERRIDE) { warn!("failed to add 'DAC_OVERRIDE' to the effective set of capabilities: {e}"); } } @@ -87,19 +89,19 @@ pub struct UnixCredentialsGuard { impl Drop for UnixCredentialsGuard { fn drop(&mut self) { if self.reset_uid { - oslib::seteffuid(0).unwrap_or_else(|e| { + seteffuid(0).unwrap_or_else(|e| { error!("failed to change uid back to root: {e}"); }); } if self.reset_gid { - oslib::seteffgid(0).unwrap_or_else(|e| { + seteffgid(0).unwrap_or_else(|e| { error!("failed to change gid back to root: {e}"); }); } if self.drop_sup_gid { - oslib::dropsupgroups().unwrap_or_else(|e| { + dropsupgroups().unwrap_or_else(|e| { error!("failed to drop supplementary groups: {e}"); }); } @@ -107,68 +109,85 @@ impl Drop for UnixCredentialsGuard { } pub struct ScopedCaps { - cap: capng::Capability, + capability: caps::Capability, } -impl ScopedCaps { - fn new(cap_name: &str) -> io::Result> { - use capng::{Action, CUpdate, Set, Type}; - - let cap = capng::name_to_capability(cap_name).map_err(|_| { - let err = io::Error::last_os_error(); - error!( - "couldn't get the capability id for name {}: {:?}", - cap_name, err - ); - err - })?; - - if capng::have_capability(Type::EFFECTIVE, cap) { - let req = vec![CUpdate { - action: Action::DROP, - cap_type: Type::EFFECTIVE, - capability: cap, - }]; - capng::update(req).map_err(|e| { - error!("couldn't drop {} capability: {:?}", cap, e); - einval() - })?; - capng::apply(Set::CAPS).map_err(|e| { - error!( - "couldn't apply capabilities after dropping {}: {:?}", - cap, e - ); - einval() - })?; - Ok(Some(Self { cap })) - } else { - Ok(None) - } +impl Drop for ScopedCaps { + fn drop(&mut self) { + if let Err(e) = caps::raise(None, caps::CapSet::Effective, self.capability) { + error!("fail to restore thread cap_fsetid: {}", e); + }; } } -impl Drop for ScopedCaps { - fn drop(&mut self) { - use capng::{Action, CUpdate, Set, Type}; +pub fn scoped_drop_capability(capability: caps::Capability) -> io::Result> { + if !caps::has_cap(None, caps::CapSet::Effective, capability) + .map_err(|_e| io::Error::new(io::ErrorKind::PermissionDenied, "no capability"))? + { + return Ok(None); + } + caps::drop(None, caps::CapSet::Effective, capability).map_err(|_e| { + io::Error::new(io::ErrorKind::PermissionDenied, "failed to drop capability") + })?; + Ok(Some(ScopedCaps { capability })) +} - let req = vec![CUpdate { - action: Action::ADD, - cap_type: Type::EFFECTIVE, - capability: self.cap, - }]; +pub fn drop_cap_fssetid() -> io::Result> { + scoped_drop_capability(caps::Capability::CAP_FSETID) +} - if let Err(e) = capng::update(req) { - panic!("couldn't restore {} capability: {:?}", self.cap, e); - } - if let Err(e) = capng::apply(Set::CAPS) { - panic!( - "couldn't apply capabilities after restoring {}: {:?}", - self.cap, e - ); +/// Add a capability to the effective set +/// +/// # Errors +/// An error variant will be returned: +/// - if the input string does not match the name, without the 'CAP_' prefix, +/// of any of the capability defined in `linux/capabiliy.h`. +/// - if `capng::get_caps_process()` cannot get the capabilities and bounding set of the process. +/// - if `capng::update()` fails to update the internal posix capabilities settings. +/// - if `capng::apply()` fails to transfer the specified internal posix capabilities +/// settings to the kernel. +pub fn add_cap_to_eff(capability: caps::Capability) -> io::Result<()> { + caps::raise(None, caps::CapSet::Effective, capability).map_err(|_e| { + io::Error::new( + io::ErrorKind::PermissionDenied, + "failed to raise capability", + ) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use nix::unistd::getuid; + + #[test] + fn test_unix_credentials_set() { + if getuid().is_root() { + let cred = UnixCredentials::new(0, 0).set().unwrap(); + assert!(cred.is_none()); + drop(cred); + + let cred = UnixCredentials::new(1, 1); + let cred = cred.supplementary_gid(false, Some(2)); + let guard = cred.set().unwrap(); + assert!(guard.is_some()); + drop(guard); } } -} -pub fn drop_effective_cap(cap_name: &str) -> io::Result> { - ScopedCaps::new(cap_name) + #[test] + fn test_drop_cap_fssetid() { + let cap = drop_cap_fssetid().unwrap(); + let has_cap = + caps::has_cap(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID).unwrap(); + assert_eq!(has_cap, false); + drop(cap); + } + + #[test] + fn test_add_cap_to_eff() { + if getuid().is_root() { + add_cap_to_eff(caps::Capability::CAP_DAC_OVERRIDE).unwrap(); + } + } } diff --git a/src/passthrough/file_handle.rs b/src/passthrough/file_handle.rs index 5eb00c282..44c779dc2 100644 --- a/src/passthrough/file_handle.rs +++ b/src/passthrough/file_handle.rs @@ -320,9 +320,7 @@ impl OpenableFileHandle { #[cfg(test)] mod tests { use super::*; - use nix::unistd::getuid; use std::ffi::CString; - use std::io::Read; fn generate_c_file_handle( handle_bytes: usize, diff --git a/src/passthrough/mod.rs b/src/passthrough/mod.rs index 32efee1f7..c246328f2 100644 --- a/src/passthrough/mod.rs +++ b/src/passthrough/mod.rs @@ -48,6 +48,7 @@ use crate::api::{ #[cfg(feature = "async-io")] mod async_io; mod config; +mod credentials; mod file_handle; mod inode_store; mod mount_fd; @@ -888,93 +889,6 @@ impl BackendFileSystem for PassthroughFs } } -macro_rules! scoped_cred { - ($name:ident, $ty:ty, $syscall_nr:expr) => { - #[derive(Debug)] - pub(crate) struct $name; - - impl $name { - // Changes the effective uid/gid of the current thread to `val`. Changes - // the thread's credentials back to root when the returned struct is dropped. - fn new(val: $ty) -> io::Result> { - if val == 0 { - // Nothing to do since we are already uid 0. - return Ok(None); - } - - // We want credential changes to be per-thread because otherwise - // we might interfere with operations being carried out on other - // threads with different uids/gids. However, posix requires that - // all threads in a process share the same credentials. To do this - // libc uses signals to ensure that when one thread changes its - // credentials the other threads do the same thing. - // - // So instead we invoke the syscall directly in order to get around - // this limitation. Another option is to use the setfsuid and - // setfsgid systems calls. However since those calls have no way to - // return an error, it's preferable to do this instead. - - // This call is safe because it doesn't modify any memory and we - // check the return value. - let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) }; - if res == 0 { - Ok(Some($name)) - } else { - Err(io::Error::last_os_error()) - } - } - } - - impl Drop for $name { - fn drop(&mut self) { - let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) }; - if res < 0 { - error!( - "fuse: failed to change credentials back to root: {}", - io::Error::last_os_error(), - ); - } - } - } - }; -} -scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid); -scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid); - -fn set_creds( - uid: libc::uid_t, - gid: libc::gid_t, -) -> io::Result<(Option, Option)> { - // We have to change the gid before we change the uid because if we change the uid first then we - // lose the capability to change the gid. However changing back can happen in any order. - ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid))) -} - -struct CapFsetid {} - -impl Drop for CapFsetid { - fn drop(&mut self) { - if let Err(e) = caps::raise(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) { - error!("fail to restore thread cap_fsetid: {}", e); - }; - } -} - -fn drop_cap_fsetid() -> io::Result> { - if !caps::has_cap(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID) - .map_err(|_e| io::Error::new(io::ErrorKind::PermissionDenied, "no CAP_FSETID capability"))? - { - return Ok(None); - } - caps::drop(None, caps::CapSet::Effective, caps::Capability::CAP_FSETID).map_err(|_e| { - io::Error::new( - io::ErrorKind::PermissionDenied, - "failed to drop CAP_FSETID capability", - ) - })?; - Ok(Some(CapFsetid {})) -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/passthrough/sync_io.rs b/src/passthrough/sync_io.rs index 97d24c563..7eb6520f1 100644 --- a/src/passthrough/sync_io.rs +++ b/src/passthrough/sync_io.rs @@ -14,6 +14,7 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::Duration; +use super::credentials::{drop_cap_fssetid, UnixCredentials}; use super::os_compat::LinuxDirent64; use super::util::stat_fd; use super::*; @@ -185,7 +186,7 @@ impl PassthroughFs { let killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & FOPEN_IN_KILL_SUIDGID != 0) { - self::drop_cap_fsetid()? + drop_cap_fssetid()? } else { None }; @@ -423,8 +424,7 @@ impl FileSystem for PassthroughFs { let data = self.inode_map.get(parent)?; let res = { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - + let _guard = UnixCredentials::new(ctx.uid, ctx.gid).set()?; let file = data.get_file()?; // Safe because this doesn't modify any memory and we check the return value. unsafe { libc::mkdirat(file.as_raw_fd(), name.as_ptr(), mode & !umask) } @@ -556,8 +556,7 @@ impl FileSystem for PassthroughFs { let dir_file = dir.get_file()?; let new_file = { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - + let _guard = UnixCredentials::new(ctx.uid, ctx.gid).set()?; let flags = self.get_writeback_open_flags(args.flags as i32); Self::create_file_excl(&dir_file, name, flags, args.mode & !(args.umask & 0o777))? }; @@ -573,12 +572,12 @@ impl FileSystem for PassthroughFs { let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (args.fuse_flags & FOPEN_IN_KILL_SUIDGID != 0) { - self::drop_cap_fsetid()? + drop_cap_fssetid()? } else { None }; - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let _guard = UnixCredentials::new(ctx.uid, ctx.gid).set()?; self.open_inode(entry.inode, args.flags as i32)? } }; @@ -704,7 +703,7 @@ impl FileSystem for PassthroughFs { // Cap restored when _killpriv is dropped let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & WRITE_KILL_PRIV != 0) { - self::drop_cap_fsetid()? + drop_cap_fssetid()? } else { None }; @@ -809,7 +808,7 @@ impl FileSystem for PassthroughFs { let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed) && valid.contains(SetattrValid::KILL_SUIDGID) { - self::drop_cap_fsetid()? + drop_cap_fssetid()? } else { None }; @@ -925,7 +924,7 @@ impl FileSystem for PassthroughFs { let file = data.get_file()?; let res = { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let _guard = UnixCredentials::new(ctx.uid, ctx.gid).set()?; // Safe because this doesn't modify any memory and we check the return value. unsafe { @@ -990,8 +989,7 @@ impl FileSystem for PassthroughFs { let data = self.inode_map.get(parent)?; let res = { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - + let _guard = UnixCredentials::new(ctx.uid, ctx.gid).set()?; let file = data.get_file()?; // Safe because this doesn't modify any memory and we check the return value. unsafe { libc::symlinkat(linkname.as_ptr(), file.as_raw_fd(), name.as_ptr()) } diff --git a/src/passthrough/util.rs b/src/passthrough/util.rs index 08a31031f..f7a55c6ee 100644 --- a/src/passthrough/util.rs +++ b/src/passthrough/util.rs @@ -225,6 +225,51 @@ pub fn eperm() -> io::Error { io::Error::from_raw_os_error(libc::EPERM) } +// We want credential changes to be per-thread because otherwise +// we might interfere with operations being carried out on other +// threads with different uids/gids. However, posix requires that +// all threads in a process share the same credentials. To do this +// libc uses signals to ensure that when one thread changes its +// credentials the other threads do the same thing. +// +// So instead we invoke the syscall directly in order to get around +// this limitation. Another option is to use the setfsuid and +// setfsgid systems calls. However since those calls have no way to +// return an error, it's preferable to do this instead. +/// Set effective user ID +pub fn seteffuid(uid: libc::uid_t) -> io::Result<()> { + check_retval(unsafe { libc::syscall(libc::SYS_setresuid, -1, uid, -1) })?; + Ok(()) +} + +/// Set effective group ID +pub fn seteffgid(gid: libc::gid_t) -> io::Result<()> { + check_retval(unsafe { libc::syscall(libc::SYS_setresgid, -1, gid, -1) })?; + Ok(()) +} + +/// Set supplementary group +pub fn setsupgroup(gid: libc::gid_t) -> io::Result<()> { + check_retval(unsafe { libc::setgroups(1, &gid) })?; + Ok(()) +} + +/// Drop all supplementary groups +pub fn dropsupgroups() -> io::Result<()> { + check_retval(unsafe { libc::setgroups(0, std::ptr::null()) })?; + Ok(()) +} + +// A helper function that check the return value of a C function call +// and wraps it in a `Result` type, returning the `errno` code as `Err`. +fn check_retval + PartialEq>(t: T) -> io::Result { + if t == T::from(-1_i8) { + Err(io::Error::last_os_error()) + } else { + Ok(t) + } +} + #[cfg(test)] mod tests { use super::*; From b659ca79837f9b7e8240ba7a06b430cea09b67b9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Nov 2023 14:14:24 +0800 Subject: [PATCH 2/2] ptfs: introduce helper forget_one to reduce duplicated code Rename original forget_one() to forget_one_locked() and introduce new helper forget_one() to reduce duplicated code. Signed-off-by: Jiang Liu --- src/passthrough/mod.rs | 7 ++++++- src/passthrough/sync_io.rs | 12 ++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/passthrough/mod.rs b/src/passthrough/mod.rs index c246328f2..0567bf1e7 100644 --- a/src/passthrough/mod.rs +++ b/src/passthrough/mod.rs @@ -749,7 +749,7 @@ impl PassthroughFs { }) } - fn forget_one(&self, inodes: &mut InodeStore, inode: Inode, count: u64) { + fn forget_one_locked(&self, inodes: &mut InodeStore, inode: Inode, count: u64) { // ROOT_ID should not be forgotten, or we're not able to access to files any more. if inode == fuse::ROOT_ID { return; @@ -786,6 +786,11 @@ impl PassthroughFs { } } + fn forget_one(&self, inode: Inode, count: u64) { + let mut inodes = self.inode_map.get_map_mut(); + self.forget_one_locked(&mut inodes, inode, count) + } + fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { self.handle_map.release(handle, inode) } diff --git a/src/passthrough/sync_io.rs b/src/passthrough/sync_io.rs index 7eb6520f1..9df92c61a 100644 --- a/src/passthrough/sync_io.rs +++ b/src/passthrough/sync_io.rs @@ -373,16 +373,14 @@ impl FileSystem for PassthroughFs { } fn forget(&self, _ctx: &Context, inode: Inode, count: u64) { - let mut inodes = self.inode_map.get_map_mut(); - - self.forget_one(&mut inodes, inode, count) + self.forget_one(inode, count) } fn batch_forget(&self, _ctx: &Context, requests: Vec<(Inode, u64)>) { let mut inodes = self.inode_map.get_map_mut(); for (inode, count) in requests { - self.forget_one(&mut inodes, inode, count) + self.forget_one_locked(&mut inodes, inode, count) } } @@ -465,8 +463,7 @@ impl FileSystem for PassthroughFs { }; let entry = self.do_lookup(inode, name)?; - let mut inodes = self.inode_map.get_map_mut(); - self.forget_one(&mut inodes, entry.inode, 1); + self.forget_one(entry.inode, 1); entry.inode }; @@ -503,8 +500,7 @@ impl FileSystem for PassthroughFs { // true when size is not large enough to hold entry. if r == 0 { // Release the refcount acquired by self.do_lookup(). - let mut inodes = self.inode_map.get_map_mut(); - self.forget_one(&mut inodes, ino, 1); + self.forget_one(ino, 1); } r })