Skip to content

Commit

Permalink
sandbox: support shared process namespace
Browse files Browse the repository at this point in the history
Signed-off-by: wackxu <[email protected]>
  • Loading branch information
wackxu committed Aug 5, 2024
1 parent 72706dd commit fdb2ff6
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 48 deletions.
1 change: 1 addition & 0 deletions vmm/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ pub const RESOLV_FILENAME: &str = "resolv.conf";
pub const SANDBOX_NS_PATH: &str = "/run/sandbox-ns";
pub const NET_NAMESPACE: &str = "network";
pub const IPC_NAMESPACE: &str = "ipc";
pub const PID_NAMESPACE: &str = "pid";
pub const UTS_NAMESPACE: &str = "uts";
pub const CGROUP_NAMESPACE: &str = "cgroup";
5 changes: 4 additions & 1 deletion vmm/sandbox/src/cloud_hypervisor/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::{
devices::{console::Console, fs::Fs, pmem::Pmem, rng::Rng, vsock::Vsock},
CloudHypervisorVM,
},
sandbox::has_shared_pid_namespace,
utils::get_netns,
vm::VMFactory,
};
Expand All @@ -46,7 +47,9 @@ impl VMFactory for CloudHypervisorVMFactory {
) -> containerd_sandbox::error::Result<Self::VM> {
let netns = get_netns(&s.sandbox);
let mut vm = CloudHypervisorVM::new(id, &netns, &s.base_dir, &self.vm_config);

if has_shared_pid_namespace(&s.sandbox) {
vm.config.cmdline.push_str(" task.share_pidns")
}
// add image as a disk
if !self.vm_config.common.image_path.is_empty() {
let rootfs_device = Pmem::new("rootfs", &self.vm_config.common.image_path, true);
Expand Down
19 changes: 16 additions & 3 deletions vmm/sandbox/src/container/handler/ns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ limitations under the License.

use async_trait::async_trait;
use containerd_sandbox::error::Result;
use vmm_common::{CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE};
use vmm_common::{
CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, PID_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE,
};

use crate::{container::handler::Handler, sandbox::KuasarSandbox, vm::VM};
use crate::{
container::handler::Handler,
sandbox::{has_shared_pid_namespace, KuasarSandbox},
vm::VM,
};

pub struct NamespaceHandler {
container_id: String,
Expand All @@ -38,6 +44,7 @@ where
T: VM + Sync + Send,
{
async fn handle(&self, sandbox: &mut KuasarSandbox<T>) -> Result<()> {
let share_pidns = has_shared_pid_namespace(&sandbox.data);
let container = sandbox.container_mut(&self.container_id)?;
let spec = if let Some(s) = &mut container.data.spec {
s
Expand All @@ -47,8 +54,14 @@ where
if let Some(l) = spec.linux.as_mut() {
l.namespaces
.retain(|n| n.r#type != NET_NAMESPACE && n.r#type != CGROUP_NAMESPACE);

l.namespaces.iter_mut().for_each(|n| {
n.path = if n.r#type == IPC_NAMESPACE || n.r#type == UTS_NAMESPACE {
// IPC and UTS namespace is shared in default
// PID namespaces is shared if it is set in pod config
n.path = if n.r#type == IPC_NAMESPACE
|| n.r#type == UTS_NAMESPACE
|| (n.r#type == PID_NAMESPACE && share_pidns)
{
format!("{}/{}", SANDBOX_NS_PATH, n.r#type)
} else {
"".to_string()
Expand Down
16 changes: 16 additions & 0 deletions vmm/sandbox/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::{collections::HashMap, io::ErrorKind, path::Path, sync::Arc};
use anyhow::anyhow;
use async_trait::async_trait;
use containerd_sandbox::{
cri::api::v1::NamespaceMode,
data::SandboxData,
error::{Error, Result},
signal::ExitSignal,
Expand Down Expand Up @@ -667,6 +668,21 @@ fn parse_dnsoptions(servers: &[String], searches: &[String], options: &[String])
resolv_content
}

pub fn has_shared_pid_namespace(data: &SandboxData) -> bool {
if let Some(conf) = &data.config {
if let Some(pid_ns_mode) = conf
.linux
.as_ref()
.and_then(|l| l.security_context.as_ref())
.and_then(|s| s.namespace_options.as_ref())
.map(|n| n.pid())
{
return pid_ns_mode == NamespaceMode::Pod;
}
}
false
}

#[derive(Default, Debug, Deserialize)]
pub struct SandboxConfig {
#[serde(default)]
Expand Down
4 changes: 4 additions & 0 deletions vmm/task/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use tokio::fs::read_to_string;
const SHAREFS_TYPE: &str = "task.sharefs_type";
const LOG_LEVEL: &str = "task.log_level";
const TASK_DEBUG: &str = "task.debug";
const SHARE_PIDNS: &str = "task.share_pidns";

macro_rules! parse_cmdline {
($param:ident, $key:ident, $field:expr) => {
Expand All @@ -41,6 +42,7 @@ macro_rules! parse_cmdline {
pub struct TaskConfig {
pub(crate) sharefs_type: String,
pub(crate) log_level: String,
pub(crate) share_pidns: bool,
pub(crate) debug: bool,
}

Expand All @@ -49,6 +51,7 @@ impl Default for TaskConfig {
TaskConfig {
sharefs_type: "9p".to_string(),
log_level: "info".to_string(),
share_pidns: false,
debug: false,
}
}
Expand All @@ -66,6 +69,7 @@ impl TaskConfig {
parse_cmdline!(param, SHAREFS_TYPE, config.sharefs_type, String::from);
parse_cmdline!(param, LOG_LEVEL, config.log_level, String::from);
parse_cmdline!(param, TASK_DEBUG, config.debug);
parse_cmdline!(param, SHARE_PIDNS, config.share_pidns);
}
Ok(config)
}
Expand Down
1 change: 1 addition & 0 deletions vmm/task/src/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub async fn listen_debug_console(addr: &str) -> Result<()> {
tokio::spawn(async move {
let mut incoming = l.incoming();
while let Some(Ok(s)) = incoming.next().await {
debug!("get a debug console request");
if let Err(e) = debug_console(s).await {
error!("failed to open debug console {:?}", e);
}
Expand Down
120 changes: 76 additions & 44 deletions vmm/task/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ limitations under the License.
#![warn(clippy::expect_fun_call, clippy::expect_used)]

use std::{
collections::HashMap, convert::TryFrom, path::Path, process::exit, str::FromStr, sync::Arc,
thread,
collections::HashMap, convert::TryFrom, os::fd::AsRawFd, path::Path, process::exit,
str::FromStr, sync::Arc,
};

use containerd_shim::{
asynchronous::{monitor::monitor_notify_by_pid, util::asyncify},
error::Error,
io_error, other,
io_error, other, other_error,
protos::{shim::shim_ttrpc_async::create_task, ttrpc::asynchronous::Server},
util::{mkdir, IntoOption},
Result,
Expand All @@ -35,17 +35,15 @@ use log::{debug, error, info, warn, LevelFilter};
use nix::{
errno::Errno,
sched::{unshare, CloneFlags},
sys::{
wait,
wait::{WaitPidFlag, WaitStatus},
},
unistd::{getpid, gettid, Pid},
sys::wait::{self, WaitPidFlag, WaitStatus},
unistd::{fork, getpid, pause, pipe, ForkResult, Pid},
};
use signal_hook_tokio::Signals;
use tokio::fs::File;
use vmm_common::{
api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, HOSTNAME_FILENAME,
IPC_NAMESPACE, KUASAR_STATE_DIR, RESOLV_FILENAME, SANDBOX_NS_PATH, UTS_NAMESPACE,
IPC_NAMESPACE, KUASAR_STATE_DIR, PID_NAMESPACE, RESOLV_FILENAME, SANDBOX_NS_PATH,
UTS_NAMESPACE,
};

use crate::{
Expand Down Expand Up @@ -137,6 +135,7 @@ lazy_static! {
static ref CLONE_FLAG_TABLE: HashMap<String, CloneFlags> = HashMap::from([
(String::from(IPC_NAMESPACE), CloneFlags::CLONE_NEWIPC),
(String::from(UTS_NAMESPACE), CloneFlags::CLONE_NEWUTS),
(String::from(PID_NAMESPACE), CloneFlags::CLONE_NEWPID),
]);
}

Expand Down Expand Up @@ -170,7 +169,7 @@ async fn start_task_server() -> anyhow::Result<()> {
}
}

late_init_call().await?;
late_init_call(&config).await?;

start_ttrpc_server().await?.start().await?;

Expand Down Expand Up @@ -305,7 +304,7 @@ async fn init_vm_rootfs() -> Result<()> {

// Continue to do initialization that depend on shared path.
// such as adding guest hook, preparing sandbox files and namespaces.
async fn late_init_call() -> Result<()> {
async fn late_init_call(config: &TaskConfig) -> Result<()> {
// Setup DNS, bind mount to /etc/resolv.conf
let dns_file = Path::new(KUASAR_STATE_DIR).join(RESOLV_FILENAME);
if dns_file.exists() {
Expand All @@ -321,7 +320,7 @@ async fn late_init_call() -> Result<()> {
}

// Setup sandbox namespace
setup_sandbox_ns().await?;
setup_sandbox_ns(config.share_pidns).await?;

Ok(())
}
Expand Down Expand Up @@ -368,12 +367,12 @@ async fn start_ttrpc_server() -> anyhow::Result<Server> {
.register_service(sandbox_service))
}

async fn setup_sandbox_ns() -> Result<()> {
setup_persistent_ns(vec![
String::from(IPC_NAMESPACE),
String::from(UTS_NAMESPACE),
])
.await?;
async fn setup_sandbox_ns(share_pidns: bool) -> Result<()> {
let mut nss = vec![String::from(IPC_NAMESPACE), String::from(UTS_NAMESPACE)];
if share_pidns {
nss.push(String::from(PID_NAMESPACE));
}
setup_persistent_ns(nss).await?;
Ok(())
}

Expand All @@ -398,36 +397,69 @@ async fn setup_persistent_ns(ns_types: Vec<String>) -> Result<()> {
.ok_or(other!("bad ns type {}", ns_type))?;
}

let operator = move || -> anyhow::Result<()> {
unshare(clone_type)?;
fork_sandbox(ns_types, clone_type)?;

Ok(())
}

fn fork_sandbox(ns_types: Vec<String>, clone_type: CloneFlags) -> Result<()> {
debug!("fork sandbox process {:?}, {:b}", ns_types, clone_type);
let (r, w) = pipe().map_err(other_error!(e, "create pipe when fork sandbox error"))?;
match unsafe { fork().map_err(other_error!(e, "failed to fork"))? } {
ForkResult::Parent { child } => {
debug!("forked process {} for the sandbox", child);
drop(w);
let mut resp = [0u8; 4];
// just wait the pipe close, do not care the read result
nix::unistd::read(r.as_raw_fd(), &mut resp).unwrap_or_default();
Ok(())
}
ForkResult::Child => {
drop(r);
unshare(clone_type).unwrap();
if !ns_types.iter().any(|n| n == PID_NAMESPACE) {
debug!("mount namespaces in child");
mount_ns(getpid(), &ns_types);
exit(0);
}
// if we need share pid ns, we fork a pause process to act as the pid 1 of the shared pid ns
match unsafe { fork().unwrap() } {
ForkResult::Parent { child } => {
mount_ns(child, &ns_types);
exit(0);
}
ForkResult::Child => {
debug!("mount namespaces in grand child");
drop(w);
loop {
pause();
}
}
}
}
}
}

// set hostname
fn mount_ns(pid: Pid, ns_types: &Vec<String>) {
if ns_types.iter().any(|n| n == UTS_NAMESPACE) {
let hostname = std::fs::read_to_string(Path::new(KUASAR_STATE_DIR).join(HOSTNAME_FILENAME))
.map(|s| s.trim().to_string())
.unwrap_or_default();
if !hostname.is_empty() {
nix::unistd::sethostname(hostname)?;
}

for ns_type in &ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
let ns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), ns_type);
mount(
Some("none"),
Some(ns_path.as_str()),
&["bind".to_string()],
&sandbox_ns_path,
)?;
}
Ok(())
};

thread::spawn(move || {
if let Err(e) = operator() {
error!("setup persistent ns failed: {:?}", e);
exit(-1)
debug!("set hostname for sandbox: {}", hostname);
nix::unistd::sethostname(hostname).unwrap();
}
});

Ok(())
}
for ns_type in ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
let ns_path = format!("/proc/{}/ns/{}", pid, ns_type);
debug!("mount {} to {}", ns_path, sandbox_ns_path);
mount(
Some("none"),
Some(ns_path.as_str()),
&["bind".to_string()],
&sandbox_ns_path,
)
.unwrap();
}
}

0 comments on commit fdb2ff6

Please sign in to comment.