Skip to content

Commit 6575d56

Browse files
committed
Read all cgroup v2 metrics that can be read
This commit removes any arbitration of the cgroup v2 heirarchy for a given process. We instead read anything that can be read, looping over all cgroup files present but not following the heirarchy down. Signed-off-by: Brian L. Troutwine <[email protected]>
1 parent 63f1ce0 commit 6575d56

File tree

5 files changed

+105
-158
lines changed

5 files changed

+105
-158
lines changed

Cargo.lock

+2-28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lading/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ uuid = { workspace = true }
8686
zstd = "0.13.1"
8787

8888
[target.'cfg(target_os = "linux")'.dependencies]
89-
cgroups-rs = { version = "0.3", default-features = false, features = [] }
9089
procfs = { version = "0.17", default-features = false, features = [] }
9190
async-pidfd = "0.1"
9291

lading/src/observer/linux.rs

+8-129
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use std::{collections::VecDeque, io, path::Path, sync::atomic::Ordering};
1+
mod cgroup;
2+
3+
use std::{collections::VecDeque, io, sync::atomic::Ordering};
24

3-
use cgroups_rs::cgroup::Cgroup;
45
use metrics::gauge;
56
use nix::errno::Errno;
67
use procfs::ProcError::PermissionDenied;
@@ -9,6 +10,7 @@ use rustc_hash::{FxHashMap, FxHashSet};
910
use tracing::{error, warn};
1011

1112
use crate::observer::memory::{Regions, Rollup};
13+
use cgroup::v2;
1214

1315
use super::RSS_BYTES;
1416

@@ -27,8 +29,8 @@ pub enum Error {
2729
/// Wrapper for [`procfs::ProcError`]
2830
#[error("Unable to read procfs: {0}")]
2931
Proc(#[from] procfs::ProcError),
30-
#[error("Unable to read cgroups: {0}")]
31-
CGroups(#[from] cgroups_rs::error::Error),
32+
#[error("Unable to read cgroup: {0}")]
33+
CGroup(#[from] v2::Error),
3234
}
3335

3436
macro_rules! report_status_field {
@@ -401,96 +403,8 @@ impl Sampler {
401403
// if possible we compute the working set of the cgroup
402404
// using the same heuristic as kubernetes:
403405
// total_usage - inactive_file
404-
let cgroup = get_cgroup(pid as _)?;
405-
if let Some(memory_controller) =
406-
cgroup.controller_of::<cgroups_rs::memory::MemController>()
407-
{
408-
let mem_stat = memory_controller.memory_stat();
409-
410-
let inactive_file = if cgroup.v2() {
411-
mem_stat.stat.inactive_file
412-
} else {
413-
mem_stat.stat.total_inactive_file
414-
};
415-
let usage = mem_stat.usage_in_bytes;
416-
let working_set = if usage < inactive_file {
417-
0
418-
} else {
419-
usage - inactive_file
420-
};
421-
422-
gauge!("working_set_bytes").set(working_set as f64);
423-
gauge!("memory.working_set_bytes").set(working_set as f64);
424-
425-
gauge!("memory.fail_cnt").set(mem_stat.fail_cnt as f64);
426-
gauge!("memory.limit_bytes").set(mem_stat.limit_in_bytes as f64);
427-
gauge!("memory.usage_in_bytes").set(mem_stat.usage_in_bytes as f64);
428-
gauge!("memory.max_usage_in_bytes").set(mem_stat.max_usage_in_bytes as f64);
429-
gauge!("memory.soft_limit_in_bytes").set(mem_stat.soft_limit_in_bytes as f64);
430-
431-
gauge!("memory.stat.cache").set(mem_stat.stat.cache as f64);
432-
gauge!("memory.stat.rss").set(mem_stat.stat.rss as f64);
433-
gauge!("memory.stat.rss_huge").set(mem_stat.stat.rss_huge as f64);
434-
gauge!("memory.stat.shmem").set(mem_stat.stat.shmem as f64);
435-
gauge!("memory.stat.mapped_file").set(mem_stat.stat.mapped_file as f64);
436-
gauge!("memory.stat.dirty").set(mem_stat.stat.dirty as f64);
437-
gauge!("memory.stat.writeback").set(mem_stat.stat.writeback as f64);
438-
gauge!("memory.stat.swap").set(mem_stat.stat.swap as f64);
439-
gauge!("memory.stat.pgpgin").set(mem_stat.stat.pgpgin as f64);
440-
gauge!("memory.stat.pgpgout").set(mem_stat.stat.pgpgout as f64);
441-
gauge!("memory.stat.pgfault").set(mem_stat.stat.pgfault as f64);
442-
gauge!("memory.stat.pgmajfault").set(mem_stat.stat.pgmajfault as f64);
443-
gauge!("memory.stat.inactive_anon").set(mem_stat.stat.inactive_anon as f64);
444-
gauge!("memory.stat.active_anon").set(mem_stat.stat.active_anon as f64);
445-
gauge!("memory.stat.inactive_file").set(mem_stat.stat.inactive_file as f64);
446-
gauge!("memory.stat.active_file").set(mem_stat.stat.active_file as f64);
447-
gauge!("memory.stat.unevictable").set(mem_stat.stat.unevictable as f64);
448-
gauge!("memory.stat.hierarchical_memory_limit")
449-
.set(mem_stat.stat.hierarchical_memory_limit as f64);
450-
gauge!("memory.stat.hierarchical_memsw_limit")
451-
.set(mem_stat.stat.hierarchical_memsw_limit as f64);
452-
gauge!("memory.stat.total_cache").set(mem_stat.stat.total_cache as f64);
453-
gauge!("memory.stat.total_rss").set(mem_stat.stat.total_rss as f64);
454-
gauge!("memory.stat.total_rss_huge").set(mem_stat.stat.total_rss_huge as f64);
455-
gauge!("memory.stat.total_shmem").set(mem_stat.stat.total_shmem as f64);
456-
gauge!("memory.stat.total_mapped_file").set(mem_stat.stat.total_mapped_file as f64);
457-
gauge!("memory.stat.total_dirty").set(mem_stat.stat.total_dirty as f64);
458-
gauge!("memory.stat.total_writeback").set(mem_stat.stat.total_writeback as f64);
459-
gauge!("memory.stat.total_swap").set(mem_stat.stat.total_swap as f64);
460-
gauge!("memory.stat.total_pgpgin").set(mem_stat.stat.total_pgpgin as f64);
461-
gauge!("memory.stat.total_pgpgout").set(mem_stat.stat.total_pgpgout as f64);
462-
gauge!("memory.stat.total_pgfault").set(mem_stat.stat.total_pgfault as f64);
463-
gauge!("memory.stat.total_pgmajfault").set(mem_stat.stat.total_pgmajfault as f64);
464-
gauge!("memory.stat.total_inactive_anon")
465-
.set(mem_stat.stat.total_inactive_anon as f64);
466-
gauge!("memory.stat.total_active_anon").set(mem_stat.stat.total_active_anon as f64);
467-
gauge!("memory.stat.total_inactive_file")
468-
.set(mem_stat.stat.total_inactive_file as f64);
469-
gauge!("memory.stat.total_active_file").set(mem_stat.stat.total_active_file as f64);
470-
gauge!("memory.stat.total_unevictable").set(mem_stat.stat.total_unevictable as f64);
471-
}
472-
// Load the CPU controller and get the cpu.stat String out of the
473-
// cgroup, parse whatever fields are present and report them back
474-
// out as metrics.
475-
if let Some(cpu_controller) = cgroup.controller_of::<cgroups_rs::cpu::CpuController>() {
476-
let cpu = cpu_controller.cpu();
477-
for line in cpu.stat.lines() {
478-
let mut fields = line.split_whitespace();
479-
let metric_name = fields.next().unwrap_or_default();
480-
let value = fields.next().unwrap_or_default();
481-
gauge!(format!("cpu.{metric_name}"))
482-
.set(value.parse::<f64>().unwrap_or_default());
483-
}
484-
if let Ok(shares) = cpu_controller.shares() {
485-
gauge!("cpu.shares").set(shares as f64);
486-
}
487-
if let Ok(cfs_period) = cpu_controller.cfs_period() {
488-
gauge!("cpu.cfs_period").set(cfs_period as f64);
489-
}
490-
if let Ok(cfs_quota) = cpu_controller.cfs_quota() {
491-
gauge!("cpu.cfs_quota").set(cfs_quota as f64);
492-
}
493-
}
406+
let cgroup_path = v2::get_path(pid).await?;
407+
v2::poll(cgroup_path).await?;
494408
}
495409

496410
gauge!("num_processes").set(total_processes as f64);
@@ -608,38 +522,3 @@ fn percentage(delta_ticks: f64, delta_time: f64, num_cores: f64) -> f64 {
608522

609523
overall_percentage.clamp(0.0, 100.0 * num_cores)
610524
}
611-
612-
#[inline]
613-
fn get_cgroup(pid: u32) -> Result<Cgroup, Error> {
614-
let hierarchies = cgroups_rs::hierarchies::auto();
615-
if hierarchies.v2() {
616-
// for cgroups v2, we parse `/proc/<pid>/cgroup` looking for the main cgroup
617-
// relative path. We then use this to load the correct cgroup.
618-
// For unknown reasons, the cgroups_rs lib is not able to do this on its own.
619-
// Heavily inspired by
620-
// https://github.com/containerd/rust-extensions/blob/3d4de340d83aa06dff24fbf73d7d584ebe77c7ec/crates/shim/src/cgroup.rs#L178
621-
622-
let eof = || io::Error::from(io::ErrorKind::UnexpectedEof);
623-
let path = format!("/proc/{pid}/cgroup");
624-
let content = std::fs::read_to_string(path)?;
625-
626-
let first_line = content.lines().next().ok_or_else(eof)?;
627-
let (_, path_part) = first_line.split_once("::").ok_or_else(eof)?;
628-
629-
let mut path_parts = path_part.split('/').skip(1);
630-
let namespace = path_parts.next().ok_or_else(eof)?;
631-
let cgroup_name = path_parts.next().ok_or_else(eof)?;
632-
633-
Ok(Cgroup::load(
634-
hierarchies,
635-
format!("/sys/fs/cgroup/{namespace}/{cgroup_name}").as_str(),
636-
))
637-
} else {
638-
let relative_paths = cgroups_rs::cgroup::get_cgroups_relative_paths_by_pid(pid)?;
639-
Ok(Cgroup::load_with_relative_paths(
640-
hierarchies,
641-
Path::new("."),
642-
relative_paths,
643-
))
644-
}
645-
}

lading/src/observer/linux/cgroup.rs

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/// Code to read cgroup information.
2+
pub(crate) mod v2;
+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
use std::{io, path::PathBuf};
2+
3+
use metrics::gauge;
4+
use tokio::fs;
5+
6+
#[derive(thiserror::Error, Debug)]
7+
pub enum Error {
8+
#[error("IO error: {0}")]
9+
Io(#[from] io::Error),
10+
#[error("Parse int error: {0}")]
11+
ParseInt(#[from] std::num::ParseIntError),
12+
#[error("Parse float error: {0}")]
13+
ParseFloat(#[from] std::num::ParseFloatError),
14+
#[error("Cgroup v2 not found")]
15+
CgroupV2NotFound,
16+
}
17+
18+
/// Determines the cgroup v2 path for a given PID.
19+
pub(crate) async fn get_path(pid: i32) -> Result<PathBuf, Error> {
20+
let path = format!("/proc/{pid}/cgroup");
21+
let content = fs::read_to_string(path).await?;
22+
23+
for line in content.lines() {
24+
let mut fields = line.split(':');
25+
let hierarchy_id = fields.next().ok_or(Error::CgroupV2NotFound)?;
26+
let controllers = fields.next().ok_or(Error::CgroupV2NotFound)?;
27+
let cgroup_path = fields.next().ok_or(Error::CgroupV2NotFound)?;
28+
29+
if hierarchy_id == "0" && controllers.is_empty() {
30+
// cgroup v2 detected
31+
let cgroup_mount_point = "/sys/fs/cgroup"; // Default mount point
32+
let full_cgroup_path = PathBuf::from(cgroup_mount_point)
33+
.join(cgroup_path.strip_prefix('/').unwrap_or(cgroup_path));
34+
return Ok(full_cgroup_path);
35+
}
36+
}
37+
38+
Err(Error::CgroupV2NotFound)
39+
}
40+
41+
/// Polls for any cgroup metrics that can be read, v2 version.
42+
pub(crate) async fn poll(path: PathBuf) -> Result<(), Error> {
43+
let mut entries = fs::read_dir(&path).await?;
44+
45+
while let Some(entry) = entries.next_entry().await? {
46+
let metadata = entry.metadata().await?;
47+
if metadata.is_file() {
48+
let file_name = entry.file_name();
49+
let metric_prefix = match file_name.to_str() {
50+
Some(s) => String::from(s),
51+
None => {
52+
// Skip files with non-UTF-8 names
53+
continue;
54+
}
55+
};
56+
let file_path = entry.path();
57+
58+
let content = fs::read_to_string(&file_path).await?;
59+
let content = content.trim();
60+
61+
// Cgroup files that have values are either single-valued or
62+
// key-value pairs. For single-valued files, we create a single
63+
// metric and for key-value pairs, we create metrics with the same
64+
// scheme as single-valued files but tack on the key to the metric
65+
// name.
66+
if let Ok(value) = content.parse::<f64>() {
67+
// Single-valued
68+
gauge!(metric_prefix).set(value);
69+
} else {
70+
// Key-value pairs
71+
if kv_pairs(content, &metric_prefix).is_err() {
72+
// File may fail to parse, for instance cgroup.controllers
73+
// is a list of strings.
74+
continue;
75+
}
76+
}
77+
}
78+
}
79+
80+
Ok(())
81+
}
82+
83+
fn kv_pairs(content: &str, metric_prefix: &str) -> Result<(), Error> {
84+
for line in content.lines() {
85+
let mut parts = line.split_whitespace();
86+
let key = parts.next().expect("malformed key-value pair");
87+
let value_str = parts.next().expect("malformed key-value pair");
88+
let value: f64 = value_str.parse()?;
89+
let metric_name = format!("{metric_prefix}.{key}");
90+
gauge!(metric_name).set(value);
91+
}
92+
Ok(())
93+
}

0 commit comments

Comments
 (0)