Skip to content

Commit 4aed0fe

Browse files
committed
adds prometheus metrics for disk availability
1 parent 32b677c commit 4aed0fe

File tree

7 files changed

+185
-10
lines changed

7 files changed

+185
-10
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Notable exceptions to parsing in the library and listing in tooling are:
2929

3030
1. Prometheus metrics for:
3131

32+
- disk availability
3233
- disk pool size
3334
- disk pool group I/O
3435
- quotas

src/bin/mmoxi/cli.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ pub fn build_prometheus() -> Command {
109109
.arg(arg_output())
110110
.after_long_help("Run on cluster manager only.");
111111

112+
let prom_disk = Command::new("disk")
113+
.about("Gather metrics from mmlsdisk.")
114+
.disable_help_flag(true)
115+
.disable_version_flag(true)
116+
.arg(arg_output())
117+
.after_long_help("Run on cluster manager only.");
118+
112119
let prom_fileset = Command::new("fileset")
113120
.about("Gather fileset metrics.")
114121
.disable_help_flag(true)
@@ -172,6 +179,7 @@ pub fn build_prometheus() -> Command {
172179
.subcommand_required(true)
173180
.arg_required_else_help(true)
174181
.subcommand(prom_df)
182+
.subcommand(prom_disk)
175183
.subcommand(prom_fileset)
176184
.subcommand(prom_pool)
177185
.subcommand(prom_quota)

src/bin/mmoxi/main.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::path::PathBuf;
88

99
use anyhow::{anyhow, Context, Result};
1010
use clap::ArgMatches;
11+
use mmoxi::prom::ToText;
1112

1213
mod cli;
1314

@@ -57,6 +58,7 @@ fn dispatch_list_manager(args: &ArgMatches) -> Result<()> {
5758
fn dispatch_prom(args: &ArgMatches) -> Result<()> {
5859
match args.subcommand() {
5960
Some(("df", args)) => run_prom_df(args),
61+
Some(("disk", args)) => run_prom_disk(args),
6062
Some(("fileset", args)) => run_prom_fileset(args),
6163
Some(("pool", args)) => dispatch_prom_pool(args),
6264
Some(("quota", args)) => run_prom_quota(args),
@@ -229,6 +231,21 @@ fn run_prom_df(args: &ArgMatches) -> Result<()> {
229231
Ok(())
230232
}
231233

234+
fn run_prom_disk(args: &ArgMatches) -> Result<()> {
235+
let mut output = output_to_bufwriter(args)?;
236+
237+
let mut all_disks = HashMap::new();
238+
239+
for fs in mmoxi::fs::names()? {
240+
let disks = mmoxi::disk::disks(&fs)?;
241+
all_disks.insert(fs, disks);
242+
}
243+
244+
all_disks.to_prom(&mut output)?;
245+
246+
Ok(())
247+
}
248+
232249
fn run_prom_fileset(args: &ArgMatches) -> Result<()> {
233250
let mut output = output_to_bufwriter(args)?;
234251

src/disk-example.in

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
mmlsdisk::HEADER:version:reserved:reserved:nsdName:driverType:sectorSize:failureGroup:metadata:data:status:availability:diskID:storagePool:remarks:numQuorumDisks:readQuorumValue:writeQuorumValue:diskSizeKB:diskUID:thinDiskType:replicaType:
22
mmlsdisk::0:1:::disk1:nsd:512:1:Yes:No:ready:up:42:system::3:2:2:1869606912:0A0A141A5AFAE37E:::
3-
mmlsdisk::0:1:::disk2:nsd:512:1:No:Yes:ready:up:43:nvme::3:2:2:1869606912:0A0A141A5AFAE386:::
4-
mmlsdisk::0:1:::disk3:nsd:512:1:No:Yes:ready:up:44:nlsas::3:2:2:1869606912:0A0A141A5AFAE382:::
3+
mmlsdisk::0:1:::disk2:nsd:512:1:No:Yes:ready:down:43:nvme::3:2:2:1869606912:0A0A141A5AFAE386:::
4+
mmlsdisk::0:1:::disk3:nsd:512:1:No:Yes:ready:recovering:44:nlsas::3:2:2:1869606912:0A0A141A5AFAE382:::
5+
mmlsdisk::0:1:::disk4:nsd:512:1:No:Yes:ready:unrecovered:45:nlsas::3:2:2:1869606912:0A0A141A5AFAE382:::

src/disk-example.prom

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# HELP gpfs_disk_availability GPFS disk state.
2+
# TYPE gpfs_disk_availability gauge
3+
gpfs_disk_availability{name="disk1",fs="gpfs1",pool="system",availability="up"} 0
4+
gpfs_disk_availability{name="disk2",fs="gpfs1",pool="nvme",availability="down"} 1
5+
gpfs_disk_availability{name="disk3",fs="gpfs1",pool="nlsas",availability="recovering"} 1
6+
gpfs_disk_availability{name="disk4",fs="gpfs1",pool="nlsas",availability="unrecovered"} 1

src/disk.rs

Lines changed: 139 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
//! `mmlsdisk` parsing.
22
3-
use std::io::BufRead;
3+
use std::collections::HashMap;
4+
use std::fmt::Display;
5+
use std::io::{BufRead, Write};
46
use std::process::Command;
7+
use std::str::FromStr;
58

69
use anyhow::{anyhow, Context, Result};
710

@@ -12,7 +15,7 @@ use crate::util::MMBool;
1215
/// # Errors
1316
///
1417
/// Returns an error if running `mmlsdisk` fails or if parsing its output fails.
15-
pub fn disks<S: AsRef<str>>(fs_name: S) -> Result<Disks> {
18+
pub fn disks(fs_name: impl AsRef<str>) -> Result<Disks> {
1619
let fs_name = fs_name.as_ref();
1720

1821
let mut cmd = Command::new("mmlsdisk");
@@ -102,12 +105,61 @@ impl FromIterator<Self> for Disks {
102105
}
103106
}
104107

108+
/// Disk availability.
109+
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
110+
#[non_exhaustive]
111+
pub enum Availability {
112+
/// Disk is available for I/O operations.
113+
Up,
114+
115+
/// No I/O operations can be performed.
116+
Down,
117+
118+
/// Intermediate state for disks coming up.
119+
Recovering,
120+
121+
/// Disk was not successfully brought up.
122+
Unrecovered,
123+
124+
/// Unknown state.
125+
Unknown(String),
126+
}
127+
128+
impl Display for Availability {
129+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
130+
let s = match self {
131+
Self::Up => "up",
132+
Self::Down => "down",
133+
Self::Recovering => "recovering",
134+
Self::Unrecovered => "unrecovered",
135+
Self::Unknown(s) => s.as_str(),
136+
};
137+
138+
write!(f, "{s}")
139+
}
140+
}
141+
142+
impl FromStr for Availability {
143+
type Err = anyhow::Error;
144+
145+
fn from_str(s: &str) -> Result<Self> {
146+
match s {
147+
"up" => Ok(Self::Up),
148+
"down" => Ok(Self::Down),
149+
"recovering" => Ok(Self::Recovering),
150+
"unrecovered" => Ok(Self::Unrecovered),
151+
unknown => Ok(Self::Unknown(unknown.into())),
152+
}
153+
}
154+
}
155+
105156
/// Disk data.
106157
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
107158
pub struct Disk {
108159
nsd_name: String,
109160
is_metadata: bool,
110161
is_objectdata: bool,
162+
availability: Availability,
111163
storage_pool: String,
112164
}
113165

@@ -159,6 +211,12 @@ impl Disk {
159211
let is_objectdata =
160212
tokens[is_objectdata_index].parse::<MMBool>()?.as_bool();
161213

214+
let availability_index = index
215+
.availability
216+
.ok_or_else(|| anyhow!("no availability index"))?;
217+
let availability =
218+
tokens[availability_index].parse::<Availability>()?;
219+
162220
let storage_pool_index = index
163221
.storage_pool
164222
.ok_or_else(|| anyhow!("no storage pool index"))?;
@@ -168,6 +226,7 @@ impl Disk {
168226
nsd_name,
169227
is_metadata,
170228
is_objectdata,
229+
availability,
171230
storage_pool,
172231
})
173232
}
@@ -178,6 +237,7 @@ struct Index {
178237
nsd_name: Option<usize>,
179238
is_metadata: Option<usize>,
180239
is_objectdata: Option<usize>,
240+
availability: Option<usize>,
181241
storage_pool: Option<usize>,
182242
}
183243

@@ -187,57 +247,128 @@ fn header_to_index(tokens: &[&str], index: &mut Index) {
187247
"nsdName" => index.nsd_name = Some(i),
188248
"metadata" => index.is_metadata = Some(i),
189249
"data" => index.is_objectdata = Some(i),
250+
"availability" => index.availability = Some(i),
190251
"storagePool" => index.storage_pool = Some(i),
191252
_ => {}
192253
}
193254
}
194255
}
195256

257+
// ----------------------------------------------------------------------------
258+
// prometheus
259+
// ----------------------------------------------------------------------------
260+
261+
impl<S: ::std::hash::BuildHasher> crate::prom::ToText
262+
for HashMap<String, Disks, S>
263+
{
264+
fn to_prom(&self, output: &mut impl Write) -> Result<()> {
265+
for (fs, disks) in self {
266+
writeln!(
267+
output,
268+
"# HELP gpfs_disk_availability GPFS disk state."
269+
)?;
270+
writeln!(output, "# TYPE gpfs_disk_availability gauge")?;
271+
272+
for disk in &disks.0 {
273+
let status = match disk.availability {
274+
Availability::Up => 0,
275+
_ => 1,
276+
};
277+
278+
writeln!(
279+
output,
280+
"gpfs_disk_availability{{name=\"{}\",fs=\"{}\",pool=\"{}\",availability=\"{}\"}} {}",
281+
disk.nsd_name,
282+
fs,
283+
disk.storage_pool,
284+
disk.availability,
285+
status,
286+
)?;
287+
}
288+
}
289+
290+
Ok(())
291+
}
292+
}
293+
196294
// ----------------------------------------------------------------------------
197295
// tests
198296
// ----------------------------------------------------------------------------
199297

200298
#[cfg(test)]
201299
mod tests {
202300
use super::*;
301+
use crate::prom::ToText;
203302

204303
#[test]
205304
fn parse() {
206305
let input = include_str!("disk-example.in");
207306

208-
let fs = Disks::from_reader(input.as_bytes()).unwrap();
209-
let mut fs = fs.0.into_iter();
307+
let disks = Disks::from_reader(input.as_bytes()).unwrap();
308+
let mut disks = disks.0.into_iter();
210309

211310
assert_eq!(
212-
fs.next(),
311+
disks.next(),
213312
Some(Disk {
214313
nsd_name: "disk1".into(),
215314
is_metadata: true,
216315
is_objectdata: false,
316+
availability: Availability::Up,
217317
storage_pool: "system".into(),
218318
})
219319
);
220320

221321
assert_eq!(
222-
fs.next(),
322+
disks.next(),
223323
Some(Disk {
224324
nsd_name: "disk2".into(),
225325
is_metadata: false,
226326
is_objectdata: true,
327+
availability: Availability::Down,
227328
storage_pool: "nvme".into(),
228329
})
229330
);
230331

231332
assert_eq!(
232-
fs.next(),
333+
disks.next(),
233334
Some(Disk {
234335
nsd_name: "disk3".into(),
235336
is_metadata: false,
236337
is_objectdata: true,
338+
availability: Availability::Recovering,
237339
storage_pool: "nlsas".into(),
238340
})
239341
);
240342

241-
assert_eq!(fs.next(), None);
343+
assert_eq!(
344+
disks.next(),
345+
Some(Disk {
346+
nsd_name: "disk4".into(),
347+
is_metadata: false,
348+
is_objectdata: true,
349+
availability: Availability::Unrecovered,
350+
storage_pool: "nlsas".into(),
351+
})
352+
);
353+
354+
assert_eq!(disks.next(), None);
355+
}
356+
357+
#[test]
358+
fn prometheus() {
359+
let input = include_str!("disk-example.in");
360+
361+
let disks = Disks::from_reader(input.as_bytes()).unwrap();
362+
363+
let mut all_disks = HashMap::new();
364+
all_disks.insert(String::from("gpfs1"), disks);
365+
366+
let mut output = vec![];
367+
all_disks.to_prom(&mut output).unwrap();
368+
369+
let metrics = std::str::from_utf8(output.as_slice()).unwrap();
370+
371+
let expected = include_str!("disk-example.prom");
372+
assert_eq!(metrics, expected);
242373
}
243374
}

src/prom.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@ use crate::fileset::Fileset;
1010
use crate::nsd::FsPoolId;
1111
use crate::sysfs;
1212

13+
/// Convert `Data` to Prometheus metrics.
14+
pub trait ToText {
15+
/// Converts the `Data` to Prometheus' text-based format.
16+
///
17+
/// # Errors
18+
///
19+
/// This function uses [`writeln`] to write to `output`. It can only fail
20+
/// if any of these [`writeln`] fails.
21+
fn to_prom(&self, output: &mut impl Write) -> Result<()>;
22+
}
23+
1324
/// Writes the `mmdf` NSD data as prometheus metrics to `output`.
1425
///
1526
/// # Errors

0 commit comments

Comments
 (0)