Skip to content

Commit 6ca87f1

Browse files
authored
CA-399669: Do not exit with error when IPMI readings aren't available (#6261)
This error made toolstack restarts fail. Because not all drivers have ipmi available, exit gracefully instead. Also try to extract the reason from the command's output and log it. This means that now functions that use Utils.exec_cmd now red the stderr of the command as well. Fixes #6252
2 parents d603928 + 1463039 commit 6ca87f1

File tree

9 files changed

+95
-73
lines changed

9 files changed

+95
-73
lines changed

ocaml/xcp-rrdd/bin/rrdp-dcmi/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
(modes exe)
33
(name rrdp_dcmi)
44
(libraries
5-
65
rrdd-plugin
6+
rrdd-plugin.base
77
rrdd_plugins_libs
88
xapi-idl.rrd
99
xapi-log

ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,31 +27,49 @@ let ipmitool_bin = "/usr/bin/ipmitool"
2727

2828
let ipmitool args =
2929
(* we connect to the local /dev/ipmi0 if available to read measurements from local BMC *)
30-
ipmitool_bin :: "-I" :: "open" :: args |> String.concat " "
30+
ipmitool_bin :: args |> String.concat " "
31+
32+
type discovery_error = Devices_missing
33+
34+
let discovery_error_to_string = function
35+
| Devices_missing ->
36+
"IPMI devices are missing"
3137

3238
let discover () =
39+
let read_out_line line =
40+
(* this code runs once on startup, logging all the output here will be useful for debugging *)
41+
D.debug "DCMI discover: %s" line ;
42+
let line = String.trim line in
43+
if String.equal line "Power management available" then
44+
Some ()
45+
else
46+
None
47+
in
48+
let read_err_line line =
49+
(* this code runs once on startup, logging all the output here will be useful for debugging *)
50+
D.debug "DCMI discover: %s" line ;
51+
let line = String.trim line in
52+
if String.starts_with ~prefix:"Could not open device at" line then
53+
Some Devices_missing
54+
else
55+
None
56+
in
3357
Utils.exec_cmd
3458
(module Process.D)
3559
~cmdstring:(ipmitool ["dcmi"; "discover"])
36-
~f:(fun line ->
37-
(* this code runs once on startup, logging all the output here will be useful for debugging *)
38-
D.debug "DCMI discover: %s" line ;
39-
if String.trim line = "Power management available" then
40-
Some ()
41-
else
42-
None
43-
)
60+
~read_out_line ~read_err_line
4461

4562
let get_dcmi_power_reading () =
63+
let read_out_line line =
64+
(* example line: ' Instantaneous power reading: 34 Watts' *)
65+
try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
66+
with _ -> None
67+
in
68+
let read_err_line _ = None in
4669
Utils.exec_cmd
4770
(module Process.D)
4871
~cmdstring:(ipmitool ["dcmi"; "power"; "reading"])
49-
~f:(fun line ->
50-
(* example line: ' Instantaneous power reading: 34 Watts' *)
51-
try
52-
Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
53-
with Scanf.Scan_failure _ | End_of_file -> None
54-
)
72+
~read_out_line ~read_err_line
5573

5674
let gen_dcmi_power_reading value =
5775
( Rrd.Host
@@ -63,18 +81,23 @@ let gen_dcmi_power_reading value =
6381

6482
let generate_dss () =
6583
match get_dcmi_power_reading () with
66-
| watts :: _ ->
84+
| watts :: _, _ ->
6785
[gen_dcmi_power_reading watts]
6886
| _ ->
6987
[]
7088

7189
let _ =
7290
initialise () ;
7391
match discover () with
74-
| [] ->
75-
D.info "IPMI DCMI power reading is unavailable" ;
76-
exit 1
77-
| _ ->
92+
| () :: _, _ ->
7893
D.info "IPMI DCMI power reading is available" ;
7994
main_loop ~neg_shift:0.5 ~target:(Reporter.Local 1)
8095
~protocol:Rrd_interface.V2 ~dss_f:generate_dss
96+
| [], errs ->
97+
let reason =
98+
List.nth_opt errs 0
99+
|> Option.map discovery_error_to_string
100+
|> Option.value ~default:"unknown"
101+
in
102+
D.warn "IPMI DCMI power readings not available, stopping. Reason: %s"
103+
reason

ocaml/xcp-rrdd/bin/rrdp-iostat/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
mtime
1111
mtime.clock.os
1212
rrdd-plugin
13+
rrdd-plugin.base
1314
rrdd_plugin_xenctrl
1415
rrdd_plugins_libs
1516
str

ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ module Iostat = struct
124124

125125
(* Keep track of how many results headers we've seen so far *)
126126
let parsing_section = ref 0 in
127-
let process_line str =
127+
let read_out_line str =
128128
let res = Utils.cut str in
129129
(* Keep values from the second set of outputs *)
130130
( if !parsing_section = 2 then
@@ -151,7 +151,10 @@ module Iostat = struct
151151
(* 2 iterations; 1 second between them *)
152152

153153
(* Iterate through each line and populate dev_values_map *)
154-
let _ = Utils.exec_cmd (module Process.D) ~cmdstring ~f:process_line in
154+
let read_err_line _ = None in
155+
let _ =
156+
Utils.exec_cmd (module Process.D) ~cmdstring ~read_out_line ~read_err_line
157+
in
155158

156159
(* Now read the values out of dev_values_map for devices for which we have data *)
157160
List.filter_map
@@ -341,16 +344,19 @@ let exec_tap_ctl_list () : ((string * string) * int) list =
341344
D.error "Could not find device with physical path %s" phypath ;
342345
None
343346
in
344-
let process_line str =
347+
let read_out_line str =
345348
try Scanf.sscanf str "pid=%d minor=%d state=%s args=%s@:%s" extract_vdis
346349
with Scanf.Scan_failure _ | Failure _ | End_of_file ->
347350
D.warn {|"%s" returned a line that could not be parsed. Ignoring.|}
348351
tap_ctl ;
349352
D.warn "Offending line: %s" str ;
350353
None
351354
in
352-
let pid_and_minor_to_sr_and_vdi =
353-
Utils.exec_cmd (module Process.D) ~cmdstring:tap_ctl ~f:process_line
355+
let read_err_line _ = None in
356+
let pid_and_minor_to_sr_and_vdi, _ =
357+
Utils.exec_cmd
358+
(module Process.D)
359+
~cmdstring:tap_ctl ~read_out_line ~read_err_line
354360
in
355361
let sr_and_vdi_to_minor =
356362
List.map

ocaml/xcp-rrdd/bin/rrdp-xenpm/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
(modes exe)
33
(name rrdp_xenpm)
44
(libraries
5-
65
rrdd-plugin
6+
rrdd-plugin.base
77
rrdd_plugins_libs
88
str
99
xapi-idl.rrd

ocaml/xcp-rrdd/bin/rrdp-xenpm/rrdp_xenpm.ml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,27 +57,30 @@ let gen_pm_cpu_averages cpu_id time =
5757

5858
let get_cpu_averages () : int64 list =
5959
let pattern = Str.regexp "average cpu frequency:[ \t]+\\([0-9]+\\)[ \t]*$" in
60-
let match_fun s =
60+
let read_out_line s =
6161
if Str.string_match pattern s 0 then
6262
Some (Int64.of_string (Str.matched_group 1 s))
6363
else
6464
None
6565
in
66+
let read_err_line _ = None in
6667
Utils.exec_cmd
6768
(module Process.D)
6869
~cmdstring:(Printf.sprintf "%s %s" xenpm_bin "get-cpufreq-average")
69-
~f:match_fun
70+
~read_out_line ~read_err_line
71+
|> fst
7072

7173
let get_states cpu_state : int64 list =
7274
let pattern =
7375
Str.regexp "[ \t]*residency[ \t]+\\[[ \t]*\\([0-9]+\\) ms\\][ \t]*"
7476
in
75-
let match_fun s =
77+
let read_out_line s =
7678
if Str.string_match pattern s 0 then
7779
Some (Int64.of_string (Str.matched_group 1 s))
7880
else
7981
None
8082
in
83+
let read_err_line _ = None in
8184
Utils.exec_cmd
8285
(module Process.D)
8386
~cmdstring:
@@ -89,7 +92,8 @@ let get_states cpu_state : int64 list =
8992
"get-cpufreq-states"
9093
)
9194
)
92-
~f:match_fun
95+
~read_out_line ~read_err_line
96+
|> fst
9397

9498
(* list_package [1;2;3;4] 2 = [[1;2];[3;4]] *)
9599
let list_package (l : 'a list) (n : int) : 'a list list =

ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,6 @@
1414

1515
(** Library to simplify writing an rrdd plugin. *)
1616

17-
(** Utility functions useful for rrdd plugins. *)
18-
module Utils : sig
19-
val now : unit -> float
20-
(** Return the current unix epoch as an int64. *)
21-
22-
val cut : string -> string list
23-
(** Split a string into a list of strings as separated by spaces and/or
24-
tabs. *)
25-
26-
val list_directory_unsafe : string -> string list
27-
(** List the contents of a directory, including . and .. *)
28-
29-
val list_directory_entries_unsafe : string -> string list
30-
(** List the contents of a directory, not including . and .. *)
31-
32-
val exec_cmd :
33-
(module Debug.DEBUG)
34-
-> cmdstring:string
35-
-> f:(string -> 'a option)
36-
-> 'a list
37-
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
38-
[cmd] outputs on stdout, and returns a list of resulting values for which
39-
applying [f] returns [Some value]. *)
40-
end
41-
4217
(** Asynchronous interface to create, cancel and query the state of stats
4318
reporting threads. *)
4419
module Reporter : sig

ocaml/xcp-rrdd/lib/plugin/utils.ml

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,15 @@ let list_directory_entries_unsafe dir =
3333
let dirlist = list_directory_unsafe dir in
3434
List.filter (fun x -> x <> "." && x <> "..") dirlist
3535

36-
let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
36+
let protect ~finally fn = Xapi_stdext_pervasives.Pervasiveext.finally fn finally
37+
38+
let exec_cmd (module D : Debug.DEBUG) ~cmdstring
39+
~(read_out_line : string -> 'a option) ~(read_err_line : string -> 'b option)
40+
=
3741
D.debug "Forking command %s" cmdstring ;
38-
(* create pipe for reading from the command's output *)
42+
(* create pipes for reading from the command's output *)
3943
let out_readme, out_writeme = Unix.pipe () in
44+
let err_readme, err_writeme = Unix.pipe () in
4045
let cmd, args =
4146
match Astring.String.cuts ~empty:false ~sep:" " cmdstring with
4247
| [] ->
@@ -45,19 +50,21 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
4550
(h, t)
4651
in
4752
let pid =
48-
Forkhelpers.safe_close_and_exec None (Some out_writeme) None [] cmd args
53+
Forkhelpers.safe_close_and_exec None (Some out_writeme) (Some err_writeme)
54+
[] cmd args
4955
in
5056
Unix.close out_writeme ;
51-
let in_channel = Unix.in_channel_of_descr out_readme in
52-
let vals = ref [] in
53-
let rec loop () =
54-
let line = input_line in_channel in
55-
let ret = f line in
56-
(match ret with None -> () | Some v -> vals := v :: !vals) ;
57-
loop ()
57+
Unix.close err_writeme ;
58+
59+
let read_and_close f fd =
60+
let in_channel = Unix.in_channel_of_descr fd in
61+
let f acc line = match f line with None -> acc | Some v -> v :: acc in
62+
protect
63+
~finally:(fun () -> Unix.close fd)
64+
(fun () -> Xapi_stdext_unix.Unixext.lines_fold f [] in_channel |> List.rev)
5865
in
59-
(try loop () with End_of_file -> ()) ;
60-
Unix.close out_readme ;
66+
let stdout = read_and_close read_out_line out_readme in
67+
let stderr = read_and_close read_err_line err_readme in
6168
let pid, status = Forkhelpers.waitpid pid in
6269
( match status with
6370
| Unix.WEXITED n ->
@@ -67,4 +74,4 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
6774
| Unix.WSTOPPED s ->
6875
D.debug "Process %d was stopped by signal %a" pid Debug.Pp.signal s
6976
) ;
70-
List.rev !vals
77+
(stdout, stderr)

ocaml/xcp-rrdd/lib/plugin/utils.mli

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,13 @@ val list_directory_entries_unsafe : string -> string list
2727
(** List the contents of a directory, not including . and .. *)
2828

2929
val exec_cmd :
30-
(module Debug.DEBUG) -> cmdstring:string -> f:(string -> 'a option) -> 'a list
31-
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
32-
[cmd] outputs on stdout, and returns a list of resulting values for which
33-
applying [f] returns [Some value]. *)
30+
(module Debug.DEBUG)
31+
-> cmdstring:string
32+
-> read_out_line:(string -> 'a option)
33+
-> read_err_line:(string -> 'b option)
34+
-> 'a list * 'b list
35+
(** [exec_cmd cmd out_line err_line] executes [cmd], applies [read_out_line] to
36+
each of the lines which [cmd] outputs on stdout, applies [read_err_line] to
37+
each of the lines which [cmd] outputs on stderr, and returns a tuple of
38+
list with each of the values that the [read_out_line] and [read_err_line]
39+
returned [Some value]. *)

0 commit comments

Comments
 (0)