Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runc: support new sandbox api #162

Merged
merged 2 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
508 changes: 469 additions & 39 deletions runc/Cargo.lock

Large diffs are not rendered by default.

15 changes: 10 additions & 5 deletions runc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[build-dependencies]
built = { version = "0.7.0", features = ["cargo-lock", "dependency-tree", "git2", "chrono", "semver"] }

[dependencies]
env_logger = "0.9.0"
anyhow = { version = "=1.0.66", default-features = false, features = ["std"] }
tokio = { version = "1.19.2", features = ["full"] }
signal-hook-tokio = { version = "0.3.1", features = ["futures-v0_3"] }
async-trait = "0.1.51"
nix = "0.25"
async-trait = "0.1.81"
nix = { version = "0.28.0", features = ["fs", "mount", "socket", "process", "sched", "signal", "term"] }
futures = { version = "0.3.21" }
log = { version = "0.4.17", features = ["std"] }
oci-spec = "0.5.4"
Expand All @@ -24,7 +27,9 @@ os_pipe = "1.1.4"
byteorder = "1.4.3"
go-flag = "0.1.0"
uuid = { version = "1.1.2", features = ["v4"] }
clap = { version = "4.5.4", features = ["derive"] }
built = { version = "0.7.0", features = ["cargo-lock", "dependency-tree", "git2", "chrono", "semver"] }

containerd-sandbox = { git = "https://github.com/kuasar-io/rust-extensions.git", rev = "6ae99540b754cd28c5389d5d6fdeff6ec7290ec5" }
containerd-shim = { git = "https://github.com/kuasar-io/rust-extensions.git", rev = "6ae99540b754cd28c5389d5d6fdeff6ec7290ec5", features = ["async"] }
runc = { git = "https://github.com/kuasar-io/rust-extensions.git", rev = "6ae99540b754cd28c5389d5d6fdeff6ec7290ec5", features = ["async"] }
containerd-sandbox = { git = "https://github.com/kuasar-io/rust-extensions.git" }
containerd-shim = { git = "https://github.com/kuasar-io/rust-extensions.git", features = ["async"] }
runc = { git = "https://github.com/kuasar-io/rust-extensions.git", features = ["async"] }
24 changes: 24 additions & 0 deletions runc/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
Copyright 2024 The Kuasar Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

use std::process::exit;

fn main() {
if let Err(e) = built::write_built_file() {
eprint!("Failed to acquire build-time information: {:?}", e);
exit(-1)
}
}
4 changes: 4 additions & 0 deletions runc/rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
newline_style = "Unix"
unstable_features = true
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
60 changes: 60 additions & 0 deletions runc/src/args.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
Copyright 2024 The Kuasar Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

use clap::Parser;

#[derive(Parser, Debug)]
#[command(author, about, long_about = None)]
pub struct Args {
/// Version info
#[arg(short, long)]
pub version: bool,

/// Sandboxer working directory, default is `/run/kuasar-runc`
#[arg(short, long, value_name = "DIR", default_value = "/run/kuasar-runc")]
pub dir: String,

/// Address for sandboxer's server, default is `/run/runc-sandboxer.sock`
#[arg(
short,
long,
value_name = "FILE",
default_value = "/run/runc-sandboxer.sock"
)]
pub listen: String,

// log_level is optional and should not have default value if not given, since
// it can be defined in configuration file.
/// Logging level for sandboxer [trace, debug, info, warn, error, fatal, panic]
#[arg(long, value_name = "STRING")]
pub log_level: Option<String>,
}

#[cfg(test)]
mod tests {
use clap::Parser;

use crate::args::Args;

#[test]
fn test_args_parse_default() {
let args = Args::parse();
assert!(!args.version);
assert_eq!(args.dir, "/run/kuasar-runc");
assert_eq!(args.listen, "/run/runc-sandboxer.sock");
assert!(args.log_level.is_none());
}
}
18 changes: 14 additions & 4 deletions runc/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

use std::{io::IoSliceMut, ops::Deref, os::unix::io::RawFd, path::Path, sync::Arc};
use std::{
io::IoSliceMut,
ops::Deref,
os::{
fd::{FromRawFd, OwnedFd},
unix::io::RawFd,
},
path::Path,
sync::Arc,
};

use anyhow::anyhow;
use containerd_shim::{
Expand Down Expand Up @@ -164,7 +173,7 @@ pub fn create_runc(
#[derive(Default)]
pub(crate) struct CreateConfig {}

pub fn receive_socket(stream_fd: RawFd) -> containerd_shim::Result<RawFd> {
pub fn receive_socket(stream_fd: RawFd) -> containerd_shim::Result<OwnedFd> {
let mut buf = [0u8; 4096];
let mut iovec = [IoSliceMut::new(&mut buf)];
let mut space = cmsg_space!([RawFd; 2]);
Expand Down Expand Up @@ -194,8 +203,9 @@ pub fn receive_socket(stream_fd: RawFd) -> containerd_shim::Result<RawFd> {
"copy_console: console socket get path: {}, fd: {}",
path, &fds[0]
);
tcgetattr(fds[0])?;
Ok(fds[0])
let fd = unsafe { OwnedFd::from_raw_fd(fds[0]) };
tcgetattr(&fd)?;
Ok(fd)
}

pub fn has_shared_pid_namespace(spec: &Spec) -> bool {
Expand Down
104 changes: 68 additions & 36 deletions runc/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,51 +14,71 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

use std::ffi::CString;
use std::os::fd::RawFd;
use std::process::exit;
use std::{
ffi::CString,
os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd},
path::Path,
process::exit,
str::FromStr,
};

use anyhow::anyhow;
use clap::Parser;
use containerd_shim::asynchronous::monitor::monitor_notify_by_pid;
use futures::StreamExt;
use log::{debug, error, warn};
use nix::fcntl::{fcntl, FcntlArg, FdFlag, OFlag};
use nix::sched::{setns, unshare, CloneFlags};
use nix::sys::signal::{sigaction, SaFlags, SigAction, SigHandler, SigSet, SIGCHLD};
use nix::sys::stat::Mode;
use nix::unistd::{close, fork, pause, pipe, read, write, ForkResult};
use log::{debug, error, warn, LevelFilter};
use nix::{
errno::Errno,
fcntl::{fcntl, FcntlArg, FdFlag, OFlag},
libc,
sched::{setns, unshare, CloneFlags},
sys::{
signal::{sigaction, SaFlags, SigAction, SigHandler, SigSet, SIGCHLD},
stat::Mode,
wait,
wait::{WaitPidFlag, WaitStatus},
},
unistd::Pid,
unistd::{fork, pause, pipe, read, write, ForkResult, Pid},
};
use prctl::PrctlMM;
use signal_hook_tokio::Signals;
use uuid::Uuid;

use crate::sandbox::{RuncSandboxer, SandboxParent};
use crate::task::fork_task_server;
use crate::{
sandbox::{RuncSandboxer, SandboxParent},
task::fork_task_server,
};

mod args;
mod common;
mod runc;
mod sandbox;
mod task;
mod version;

fn main() {
env_logger::builder().format_timestamp_micros().init();
let args = args::Args::parse();
if args.version {
version::print_version_info();
return;
}

// Update args log level if it not presents args but in config.
let log_level =
LevelFilter::from_str(&args.log_level.unwrap_or_default()).unwrap_or(LevelFilter::Info);
env_logger::Builder::from_default_env()
.format_timestamp_micros()
.filter_module("containerd_sandbox", log_level)
.filter_module("runc_sandboxer", log_level)
.init();

let sandbox_parent = fork_sandbox_parent().unwrap();
let os_args: Vec<_> = std::env::args_os().collect();
// TODO avoid parse args multiple times
let flags = containerd_sandbox::args::parse(&os_args[1..]).unwrap();
let task_socket = format!("{}/task-{}.sock", flags.dir, Uuid::new_v4());
fork_task_server(&task_socket, &flags.dir).unwrap();

let task_socket = format!("{}/task-{}.sock", &args.dir, Uuid::new_v4());
fork_task_server(&task_socket, &args.dir).unwrap();
let runtime = tokio::runtime::Runtime::new().unwrap();
runtime.block_on(async move {
start_sandboxer(sandbox_parent, task_socket, flags.dir)
start_sandboxer(sandbox_parent, task_socket, &args.listen, &args.dir)
.await
.unwrap();
});
Expand All @@ -75,12 +95,12 @@ fn fork_sandbox_parent() -> Result<SandboxParent, anyhow::Error> {
match unsafe { fork().map_err(|e| anyhow!("failed to fork sandbox parent {}", e))? } {
ForkResult::Parent { child } => {
debug!("forked process {} for the sandbox parent", child);
close(reqr).unwrap_or_default();
close(respw).unwrap_or_default();
drop(reqr);
drop(respw);
}
ForkResult::Child => {
close(reqw).unwrap_or_default();
close(respr).unwrap_or_default();
drop(reqw);
drop(respr);
prctl::set_child_subreaper(true).unwrap();
let comm = "[sandbox-parent]";
let comm_cstr = CString::new(comm).unwrap();
Expand All @@ -95,7 +115,7 @@ fn fork_sandbox_parent() -> Result<SandboxParent, anyhow::Error> {
sigaction(SIGCHLD, &sig_action).unwrap();
}
loop {
let buffer = read_count(reqr, 512).unwrap();
let buffer = read_count(reqr.as_raw_fd(), 512).unwrap();
let id = String::from_utf8_lossy(&buffer[0..64]).to_string();
let mut zero_index = 64;
for (i, &b) in buffer.iter().enumerate().take(512).skip(64) {
Expand All @@ -106,12 +126,12 @@ fn fork_sandbox_parent() -> Result<SandboxParent, anyhow::Error> {
}
let netns = String::from_utf8_lossy(&buffer[64..zero_index]).to_string();
let sandbox_pid = fork_sandbox(&id, &netns).unwrap();
write_all(respw, sandbox_pid.to_le_bytes().as_slice()).unwrap();
write_all(&respw, sandbox_pid.to_le_bytes().as_slice()).unwrap();
}
}
}
fcntl(reqw, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)).unwrap_or_default();
fcntl(respr, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)).unwrap_or_default();
fcntl(reqw.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)).unwrap_or_default();
fcntl(respr.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)).unwrap_or_default();
Ok(SandboxParent::new(reqw, respr))
}

Expand All @@ -136,7 +156,7 @@ pub fn read_count(fd: RawFd, count: usize) -> Result<Vec<u8>, anyhow::Error> {
}
}

pub fn write_all(fd: RawFd, buf: &[u8]) -> Result<(), anyhow::Error> {
pub fn write_all(fd: &OwnedFd, buf: &[u8]) -> Result<(), anyhow::Error> {
let mut idx = 0;
let count = buf.len();
loop {
Expand All @@ -162,21 +182,21 @@ fn fork_sandbox(id: &str, netns: &str) -> Result<i32, anyhow::Error> {
match unsafe { fork().map_err(|e| anyhow!("failed to fork sandbox {}", e))? } {
ForkResult::Parent { child } => {
debug!("forked process {} for the sandbox {}", child, id);
close(w).unwrap_or_default();
drop(w);
let mut resp = [0u8; 4];
let r = read_count(r, 4)?;
let r = read_count(r.as_raw_fd(), 4)?;
resp[..].copy_from_slice(r.as_slice());
let pid = i32::from_le_bytes(resp);
Ok(pid)
}
ForkResult::Child => {
close(r).unwrap_or_default();
drop(r);
unshare(CloneFlags::CLONE_NEWIPC | CloneFlags::CLONE_NEWUTS | CloneFlags::CLONE_NEWPID)
.unwrap();
match unsafe { fork().unwrap() } {
ForkResult::Parent { child } => {
debug!("forked process {} for the sandbox {}", child, id);
write_all(w, child.as_raw().to_le_bytes().as_slice()).unwrap();
write_all(&w, child.as_raw().to_le_bytes().as_slice()).unwrap();
exit(0);
}
ForkResult::Child => {
Expand All @@ -186,7 +206,8 @@ fn fork_sandbox(id: &str, netns: &str) -> Result<i32, anyhow::Error> {
set_process_comm(addr as u64, comm_cstr.as_bytes_with_nul().len() as u64);
if !netns.is_empty() {
let netns_fd =
nix::fcntl::open(netns, OFlag::O_CLOEXEC, Mode::empty()).unwrap();
safe_open_file(Path::new(&netns), OFlag::O_CLOEXEC, Mode::empty())
.unwrap();
setns(netns_fd, CloneFlags::CLONE_NEWNET).unwrap();
}
loop {
Expand All @@ -198,6 +219,16 @@ fn fork_sandbox(id: &str, netns: &str) -> Result<i32, anyhow::Error> {
}
}

pub fn safe_open_file<P: ?Sized + nix::NixPath>(
path: &P,
oflag: OFlag,
mode: Mode,
) -> Result<OwnedFd, nix::Error> {
let fd = nix::fcntl::open(path, oflag, mode)?;
// SAFETY: contruct a OwnedFd from RawFd, close fd when OwnedFd drop
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}

fn set_process_comm(addr: u64, len: u64) {
if prctl::set_mm(PrctlMM::PR_SET_MM_ARG_START, addr).is_err() {
prctl::set_mm(PrctlMM::PR_SET_MM_ARG_END, addr + len).unwrap();
Expand Down Expand Up @@ -230,12 +261,13 @@ extern "C" fn sandbox_parent_handle_signals(_: libc::c_int) {
async fn start_sandboxer(
sandbox_parent: SandboxParent,
task_socket: String,
dir: String,
listen: &str,
dir: &str,
) -> anyhow::Result<()> {
let task_address = format!("unix://{}", task_socket);
let sandboxer = RuncSandboxer::new(sandbox_parent, &task_address).await?;
sandboxer.recover(&dir).await?;
containerd_sandbox::run("runc-sandboxer", sandboxer).await?;
sandboxer.recover(dir).await?;
containerd_sandbox::run("kuasar-runc-sandboxer", listen, dir, sandboxer).await?;
Ok(())
}

Expand Down
Loading
Loading