Skip to content

Commit ef43be7

Browse files
committed
bug fixes
Signed-off-by: Brian L. Troutwine <[email protected]>
1 parent 8f103b3 commit ef43be7

File tree

3 files changed

+84
-58
lines changed

3 files changed

+84
-58
lines changed

examples/lading-idle.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,2 @@
11
generator: []
2-
3-
blackhole:
4-
- tcp:
5-
binding_addr: "0.0.0.0:8080"
2+
blackhole: []

lading/src/observer/linux/cgroup.rs

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::{collections::VecDeque, io};
66
use nix::errno::Errno;
77
use procfs::process::Process;
88
use rustc_hash::FxHashSet;
9-
use tracing::{debug, error};
9+
use tracing::{debug, error, info};
1010

1111
#[derive(thiserror::Error, Debug)]
1212
/// Errors produced by functions in this module
@@ -44,61 +44,82 @@ impl Sampler {
4444
// Every sample run we collect all the child processes rooted at the
4545
// parent. As noted by the procfs documentation is this done by
4646
// dereferencing the `/proc/<pid>/root` symlink.
47+
debug!(
48+
"Polling procfs for child processes: {pid}",
49+
pid = self.parent.pid()
50+
);
4751
let mut pids: FxHashSet<i32> = FxHashSet::default();
4852
{
4953
let mut processes: VecDeque<Process> = VecDeque::with_capacity(16); // an arbitrary smallish number
50-
processes.push_back(Process::new(self.parent.pid())?);
51-
while let Some(process) = processes.pop_back() {
52-
// Search for child processes. This is done by querying for every
53-
// thread of `process` and inspecting each child of the thread. Note
54-
// that processes on linux are those threads that have their group
55-
// id equal to their pid. It's also possible for a pid to list
56-
// itself as a child so we reference the pid hashset above to avoid
57-
// infinite loops.
58-
if let Ok(tasks) = process.tasks() {
59-
for task in tasks.flatten() {
60-
if let Ok(mut children) = task.children() {
61-
for child in children
62-
.drain(..)
63-
.filter_map(|c| Process::new(c as i32).ok())
64-
{
65-
let pid = child.pid();
66-
if !pids.contains(&pid) {
67-
// We have not seen this process and do need to
68-
// record it for child scanning and sampling if
69-
// it proves to be a process.
70-
processes.push_back(child);
71-
pids.insert(pid);
54+
match Process::new(self.parent.pid()) {
55+
Ok(parent) => {
56+
pids.insert(parent.pid());
57+
processes.push_back(parent);
58+
while let Some(process) = processes.pop_back() {
59+
// Search for child processes. This is done by querying for every
60+
// thread of `process` and inspecting each child of the thread. Note
61+
// that processes on linux are those threads that have their group
62+
// id equal to their pid. It's also possible for a pid to list
63+
// itself as a child so we reference the pid hashset above to avoid
64+
// infinite loops.
65+
if let Ok(tasks) = process.tasks() {
66+
for task in tasks.flatten() {
67+
if let Ok(mut children) = task.children() {
68+
for child in children
69+
.drain(..)
70+
.filter_map(|c| Process::new(c as i32).ok())
71+
{
72+
let pid = child.pid();
73+
if !pids.contains(&pid) {
74+
// We have not seen this process and do need to
75+
// record it for child scanning and sampling if
76+
// it proves to be a process.
77+
processes.push_back(child);
78+
pids.insert(pid);
79+
}
80+
}
7281
}
7382
}
7483
}
7584
}
7685
}
86+
Err(err) => {
87+
error!(
88+
"Unable to read parent process {pid}: {err}",
89+
pid = self.parent.pid()
90+
);
91+
}
7792
}
78-
}
7993

80-
// Now iterate the pids and collect the unique names of the cgroups associated.
81-
let mut cgroups = FxHashSet::default();
82-
for pid in pids {
83-
match v2::get_path(pid).await {
84-
Ok(cgroup_path) => {
85-
cgroups.insert(cgroup_path);
86-
}
87-
Err(err) => {
88-
debug!("Unable to get cgroup path for pid {pid}: {err}");
94+
info!("Found {count} child processes", count = pids.len());
95+
// Now iterate the pids and collect the unique names of the cgroups associated.
96+
let mut cgroups = FxHashSet::default();
97+
for pid in pids {
98+
debug!("Polling cgroup for pid {pid}", pid = pid);
99+
match v2::get_path(pid).await {
100+
Ok(cgroup_path) => {
101+
cgroups.insert(cgroup_path);
102+
}
103+
Err(err) => {
104+
error!("Unable to get cgroup path for pid {pid}: {err}");
105+
}
89106
}
90107
}
91-
}
92108

93-
// Now iterate the cgroups and collect samples.
94-
for cgroup_path in cgroups {
95-
if let Err(err) = v2::poll(&cgroup_path, &self.labels).await {
96-
error!(
97-
"Unable to poll cgroup metrics for {path}: {err}",
109+
// Now iterate the cgroups and collect samples.
110+
for cgroup_path in cgroups {
111+
info!(
112+
"Polling cgroup metrics for {path}",
98113
path = cgroup_path.to_string_lossy()
99114
);
115+
if let Err(err) = v2::poll(&cgroup_path, &self.labels).await {
116+
error!(
117+
"Unable to poll cgroup metrics for {path}: {err}",
118+
path = cgroup_path.to_string_lossy()
119+
);
120+
}
100121
}
122+
Ok(())
101123
}
102-
Ok(())
103124
}
104125
}

lading/src/observer/linux/cgroup/v2.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
use core::f64;
12
use std::{
23
io,
34
path::{Path, PathBuf},
45
};
56

67
use metrics::gauge;
78
use tokio::fs;
8-
use tracing::debug;
9+
use tracing::{debug, error};
910

1011
#[derive(thiserror::Error, Debug)]
1112
pub enum Error {
@@ -49,18 +50,18 @@ pub(crate) async fn poll(file_path: &Path, labels: &[(String, String)]) -> Resul
4950
// the maximal number of files to be read.
5051
match fs::read_dir(&file_path).await {
5152
Ok(mut entries) => {
52-
match entries.next_entry().await {
53-
Ok(maybe_entry) => {
54-
if let Some(entry) = maybe_entry {
53+
loop {
54+
match entries.next_entry().await {
55+
Ok(Some(entry)) => {
5556
match entry.metadata().await {
5657
Ok(metadata) => {
5758
if metadata.is_file() {
5859
let file_name = entry.file_name();
5960
let metric_prefix = match file_name.to_str() {
60-
Some(s) => String::from(s),
61+
Some(s) => format!("cgroup.v2.{s}"),
6162
None => {
6263
// Skip files with non-UTF-8 names
63-
return Ok(());
64+
continue;
6465
}
6566
};
6667
let file_path = entry.path();
@@ -74,7 +75,9 @@ pub(crate) async fn poll(file_path: &Path, labels: &[(String, String)]) -> Resul
7475
// metric and for key-value pairs, we create metrics with the same
7576
// scheme as single-valued files but tack on the key to the metric
7677
// name.
77-
if let Ok(value) = content.parse::<f64>() {
78+
if content == "max" {
79+
gauge!(metric_prefix, labels).set(f64::MAX);
80+
} else if let Ok(value) = content.parse::<f64>() {
7881
// Single-valued
7982
gauge!(metric_prefix, labels).set(value);
8083
} else {
@@ -89,7 +92,7 @@ pub(crate) async fn poll(file_path: &Path, labels: &[(String, String)]) -> Resul
8992
{
9093
// File may fail to parse, for instance cgroup.controllers
9194
// is a list of strings.
92-
return Ok(());
95+
continue;
9396
}
9497
}
9598
}
@@ -110,12 +113,15 @@ pub(crate) async fn poll(file_path: &Path, labels: &[(String, String)]) -> Resul
110113
}
111114
}
112115
}
113-
}
114-
Err(err) => {
115-
debug!(
116-
"[{path}] failed to read entry in cgroup directory: {err:?}",
117-
path = file_path.to_string_lossy()
118-
);
116+
Ok(None) => {
117+
break;
118+
}
119+
Err(err) => {
120+
debug!(
121+
"[{path}] failed to read entry in cgroup directory: {err:?}",
122+
path = file_path.to_string_lossy()
123+
);
124+
}
119125
}
120126
}
121127
}
@@ -145,12 +151,14 @@ fn kv_pairs(
145151
"[{path}] missing value in key/value pair, skipping",
146152
path = file_path.to_string_lossy(),
147153
);
154+
return Ok(());
148155
}
149156
} else {
150157
debug!(
151158
"[{path} missing key in key/value pair, skipping",
152159
path = file_path.to_string_lossy(),
153160
);
161+
return Ok(());
154162
}
155163
}
156164
Ok(())

0 commit comments

Comments
 (0)