Skip to content

Commit

Permalink
task: report more OCI runtime errors
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang Tianyang <[email protected]>
  • Loading branch information
Burning1020 committed Jan 19, 2024
1 parent 9877880 commit 98dbb0b
Showing 1 changed file with 93 additions and 26 deletions.
119 changes: 93 additions & 26 deletions vmm/task/src/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,28 +245,44 @@ impl KuasarFactory {
}

// runtime_error will read the OCI runtime logfile retrieving OCI runtime error
pub async fn runtime_error(bundle: &str, e: runc::error::Error, msg: &str) -> Error {
pub async fn runtime_error(bundle: &str, r_err: runc::error::Error, msg: &str) -> Error {
match get_last_runtime_error(bundle).await {
Err(e) => other!(
"{}: unable to retrieve OCI runtime error ({}): {}",
msg,
e,
r_err
),
Ok(rt_msg) => {
return if rt_msg.is_empty() {
other!("{}: empty msg in log file: {}", msg, r_err)
} else {
other!("{}: {}", msg, rt_msg)
}
}
}
}

async fn get_last_runtime_error(bundle: &str) -> Result<String> {
let log_path = Path::new(bundle).join("log.json");
let mut rt_msg = String::new();
match File::open(Path::new(bundle).join("log.json")).await {
Err(err) => other!("{}: unable to open OCI runtime log file){}", msg, err),
// Use match because `await` is not allowed inside lambda
match File::open(log_path).await {
Err(e) => Err(other!("unable to open OCI runtime log file: {}", e)),
Ok(file) => {
let mut lines = BufReader::new(file).lines();
while let Ok(Some(line)) = lines.next_line().await {
// Retrieve the last runtime error
match serde_json::from_str::<Log>(&line) {
Err(err) => return other!("{}: unable to parse log msg: {}", msg, err),
Err(e) => return Err(other!("unable to parse log msg: {}", e)),
Ok(log) => {
if log.level == "error" {
rt_msg = log.msg.trim().to_string();
}
}
}
}
if !rt_msg.is_empty() {
other!("{}: {}", msg, rt_msg)
} else {
other!("{}: (no OCI runtime error in logfile) {}", msg, e)
}
Ok(rt_msg)
}
}
}
Expand Down Expand Up @@ -305,10 +321,9 @@ impl ProcessFactory<ExecProcess> for KuasarExecFactory {
#[async_trait]
impl ProcessLifecycle<InitProcess> for KuasarInitLifecycle {
async fn start(&self, p: &mut InitProcess) -> containerd_shim::Result<()> {
self.runtime
.start(p.id.as_str())
.await
.map_err(other_error!(e, "failed start"))?;
if let Err(e) = self.runtime.start(p.id.as_str()).await {
return Err(runtime_error(&p.lifecycle.bundle, e, "OCI runtime start failed").await);
}
p.state = Status::RUNNING;
Ok(())
}
Expand All @@ -319,31 +334,37 @@ impl ProcessLifecycle<InitProcess> for KuasarInitLifecycle {
signal: u32,
all: bool,
) -> containerd_shim::Result<()> {
self.runtime
if let Err(r_err) = self
.runtime
.kill(
p.id.as_str(),
signal,
Some(&runc::options::KillOpts { all }),
)
.await
.map_err(|e| check_kill_error(e.to_string()))
{
let e = runtime_error(&p.lifecycle.bundle, r_err, "OCI runtime delete failed").await;

return Err(check_kill_error(e.to_string()));
}
Ok(())
}

async fn delete(&self, p: &mut InitProcess) -> containerd_shim::Result<()> {
self.runtime
if let Err(e) = self
.runtime
.delete(
p.id.as_str(),
Some(&runc::options::DeleteOpts { force: true }),
)
.await
.or_else(|e| {
if !e.to_string().to_lowercase().contains("does not exist") {
Err(e)
} else {
Ok(())
}
})
.map_err(other_error!(e, "failed delete"))?;
{
if !e.to_string().to_lowercase().contains("does not exist") {
return Err(
runtime_error(&p.lifecycle.bundle, e, "OCI runtime delete failed").await,
);
}
}
self.exit_signal.signal();
Ok(())
}
Expand Down Expand Up @@ -416,7 +437,8 @@ impl KuasarInitLifecycle {
impl ProcessLifecycle<ExecProcess> for KuasarExecLifecycle {
async fn start(&self, p: &mut ExecProcess) -> containerd_shim::Result<()> {
rescan_pci_bus().await?;
let pid_path = Path::new(self.bundle.as_str()).join(format!("{}.pid", &p.id));
let bundle = self.bundle.to_string();
let pid_path = Path::new(&bundle).join(format!("{}.pid", &p.id));
let mut exec_opts = runc::options::ExecOpts {
io: None,
pid_file: Some(pid_path.to_owned()),
Expand All @@ -441,7 +463,7 @@ impl ProcessLifecycle<ExecProcess> for KuasarExecLifecycle {
if let Some(s) = socket {
s.clean().await;
}
return Err(other!("failed to start runc exec: {}", e));
return Err(runtime_error(&bundle, e, "OCI runtime exec failed").await);
}
copy_io_or_console(p, socket, pio, p.lifecycle.exit_signal.clone()).await?;
let pid = read_file_to_str(pid_path).await?.parse::<i32>()?;
Expand Down Expand Up @@ -613,3 +635,48 @@ pub fn check_kill_error(emsg: String) -> Error {
other!("unknown error after kill {}", emsg)
}
}

#[cfg(test)]
mod tests {
use std::path::Path;

use containerd_shim::{
other,
util::{mkdir, write_str_to_file},
};
use nix::sys::stat::Mode;
use tokio::fs::remove_dir_all;

use crate::container::runtime_error;

#[tokio::test]
async fn test_runtime_error() {
let empty_err = other!("");
let log_json = "\
{\"level\":\"info\",\"msg\":\"hello word\",\"time\":\"2022-11-25\"}\n\
{\"level\":\"error\",\"msg\":\"failed error\",\"time\":\"2022-11-26\"}\n\
{\"level\":\"error\",\"msg\":\"panic\",\"time\":\"2022-11-27\"}\n\
";
let test_dir = "/tmp/kuasar-test_runtime_error";
let _ = mkdir(test_dir, 0o711).await;
let test_log_file = Path::new(test_dir).join("log.json");
write_str_to_file(test_log_file.as_path(), log_json)
.await
.expect("write log json should not be error");

let expected_msg = "panic";
let actual_err = runtime_error(
test_log_file.to_str().unwrap().as_ref(),
empty_err,
"test_runtime_error failed",
)
.await;
remove_dir_all(test_dir).await.expect("remove test dir");
assert!(
actual_err.to_string().contains(expected_msg),
"actual error \"{}\" should contains \"{}\"",
actual_err.to_string(),
expected_msg
);
}
}

0 comments on commit 98dbb0b

Please sign in to comment.