Skip to content

Commit 46dec4b

Browse files
authored
Restructure valid allowed_operations computation (#6253)
In an effort to make the logic clearer, we restructure the part(s) of `Xapi_pool_helpers` responsible for determining which operations are "valid" (i.e. become members of `allowed_operations`). The previous code is somewhat tedious to understand because it is unconditionally ineffective - the logical delineation of parts of the code is implicit, as the code computes the valid operation table in order, but many of the operations will have no effect (as later code to populate an operation's entry in the validity table do nothing). This change makes the precedence of blocking and waiting operations more apparent by explicitly casing on them (in decreasing order of precedence). The differences are somewhat subtle and explained in the commit message. --- BVT+BST (211251) all green except for a few known issues. In a distant future, the relation between operations could be stated declaratively and the `allowed_operations` computed incrementally (based on ambient state changes in the database, etc.). The code populates a table with a tri-state value (allowed, unknown, and disallowed). However, absence from the table could signify the same (that the operation is valid), as every possible value is accounted for (therefore, `Unknown` is redundant).
2 parents 0242eea + eb06992 commit 46dec4b

File tree

1 file changed

+144
-84
lines changed

1 file changed

+144
-84
lines changed

ocaml/xapi/xapi_pool_helpers.ml

+144-84
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,38 @@ open Record_util
2020

2121
let finally = Xapi_stdext_pervasives.Pervasiveext.finally
2222

23+
type blocking_operations =
24+
[ `apply_updates
25+
| `cluster_create
26+
| `configure_repositories
27+
| `designate_new_master
28+
| `ha_disable
29+
| `ha_enable
30+
| `sync_bundle
31+
| `sync_updates
32+
| `tls_verification_enable ]
33+
34+
type waiting_operations =
35+
[ `cert_refresh
36+
| `copy_primary_host_certs
37+
| `eject
38+
| `exchange_ca_certificates_on_join
39+
| `exchange_certificates_on_join
40+
| `get_updates ]
41+
42+
type all_operations = [blocking_operations | waiting_operations]
43+
44+
(* Unused, ensure every API operation is statically partitioned here. *)
45+
let _id (op : API.pool_allowed_operations) : all_operations = op
46+
2347
(* psr is not included as a pool op because it can be considered in progress
2448
in between api calls (i.e. wrapping it inside with_pool_operation won't work) *)
2549

2650
(* these ops will:
2751
* a) throw an error if any other blocked op is in progress
2852
* b) wait if only a wait op is in progress
2953
*)
30-
let blocking_ops =
54+
let blocking_ops_table : (blocking_operations * string) list =
3155
[
3256
(`ha_enable, Api_errors.ha_enable_in_progress)
3357
; (`ha_disable, Api_errors.ha_disable_in_progress)
@@ -45,7 +69,7 @@ let blocking_ops =
4569
*
4670
* waiting is symmetric: if `ha_enable is in progress, and we want to perform
4771
* `copy_primary_host_certs, then we wait in this case too *)
48-
let wait_ops =
72+
let waiting_ops : waiting_operations list =
4973
[
5074
`cert_refresh
5175
; `exchange_certificates_on_join
@@ -55,107 +79,143 @@ let wait_ops =
5579
; `get_updates
5680
]
5781

58-
let all_operations = blocking_ops |> List.map fst |> List.append wait_ops
82+
(* Shadow with widening coercions to allow us to query using
83+
operations from either set, whilst maintaining the static guarantees
84+
of the original listings. *)
85+
let blocking_ops_table : (all_operations * string) list =
86+
List.map (fun (op, v) -> ((op :> all_operations), v)) blocking_ops_table
87+
88+
let blocking_ops : all_operations list = List.map fst blocking_ops_table
5989

60-
(* see [Helpers.retry]. this error code causes a 'wait' *)
61-
let wait_error = Api_errors.other_operation_in_progress
90+
let waiting_ops = List.map (fun op -> (op :> all_operations)) waiting_ops
6291

63-
(** Returns a table of operations -> API error options (None if the operation would be ok) *)
64-
let valid_operations ~__context record (pool : API.ref_pool) =
92+
let all_operations : all_operations list = blocking_ops @ waiting_ops
93+
94+
type validity = Unknown | Allowed | Disallowed of string * string list
95+
96+
(* Computes a function (all_operations -> validity) that maps each
97+
element of all_operations to a value indicating whether it would be
98+
valid for it to be executed in the inputted execution context. *)
99+
let compute_valid_operations ~__context record pool :
100+
API.pool_allowed_operations -> validity =
65101
let ref = Ref.string_of pool in
66102
let current_ops = List.map snd record.Db_actions.pool_current_operations in
67-
let table = Hashtbl.create 10 in
68-
all_operations |> List.iter (fun x -> Hashtbl.replace table x None) ;
69-
let set_errors (code : string) (params : string list)
70-
(ops : API.pool_allowed_operations_set) =
71-
List.iter
72-
(fun op ->
73-
if Hashtbl.find table op = None then
74-
Hashtbl.replace table op (Some (code, params))
75-
)
76-
ops
103+
let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in
104+
let set_validity = Hashtbl.replace table in
105+
(* Start by assuming all operations are allowed. *)
106+
List.iter (fun op -> set_validity op Allowed) all_operations ;
107+
(* Given a list of operations, map each to the given error. If an
108+
error has already been specified for a given operation, do
109+
nothing. *)
110+
let set_errors ops ((error, detail) : string * string list) =
111+
let populate op =
112+
match Hashtbl.find table op with
113+
| Allowed ->
114+
set_validity op (Disallowed (error, detail))
115+
| Disallowed _ | Unknown ->
116+
(* These cases should be impossible here. *)
117+
()
118+
in
119+
List.iter populate ops
77120
in
78-
if current_ops <> [] then (
79-
List.iter
80-
(fun (blocking_op, err) ->
81-
if List.mem blocking_op current_ops then (
82-
set_errors err [] (blocking_ops |> List.map fst) ;
83-
set_errors Api_errors.other_operation_in_progress
84-
[Datamodel_common._pool; ref]
85-
wait_ops
86-
)
87-
)
88-
blocking_ops ;
89-
List.iter
90-
(fun wait_op ->
91-
if List.mem wait_op current_ops then
92-
set_errors wait_error [Datamodel_common._pool; ref] all_operations
93-
)
94-
wait_ops
95-
) ;
96-
(* HA disable cannot run if HA is already disabled on a pool *)
97-
(* HA enable cannot run if HA is already enabled on a pool *)
98-
let ha_enabled =
99-
Db.Pool.get_ha_enabled ~__context ~self:(Helpers.get_pool ~__context)
121+
let other_operation_in_progress =
122+
(Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref])
100123
in
101-
let current_stack =
102-
Db.Pool.get_ha_cluster_stack ~__context ~self:(Helpers.get_pool ~__context)
124+
let is_current_op = Fun.flip List.mem current_ops in
125+
let blocking =
126+
List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table
103127
in
104-
if ha_enabled then (
105-
set_errors Api_errors.ha_is_enabled [] [`ha_enable] ;
106-
(* TLS verification is not allowed to run if HA is enabled *)
107-
set_errors Api_errors.ha_is_enabled [] [`tls_verification_enable]
108-
) else
109-
set_errors Api_errors.ha_not_enabled [] [`ha_disable] ;
110-
(* cluster create cannot run during a rolling pool upgrade *)
111-
if Helpers.rolling_upgrade_in_progress ~__context then (
112-
set_errors Api_errors.not_supported_during_upgrade [] [`cluster_create] ;
113-
set_errors Api_errors.not_supported_during_upgrade []
114-
[`tls_verification_enable]
115-
) ;
116-
(* cluster create cannot run if a cluster already exists on the pool *)
117-
( match Db.Cluster.get_all ~__context with
118-
| [_] ->
119-
set_errors Api_errors.cluster_already_exists [] [`cluster_create]
120-
(* indicates a bug or a need to update this code (if we ever support multiple clusters in the pool *)
121-
| _ :: _ ->
122-
failwith "Multiple clusters exist in the pool"
123-
(* cluster create cannot run if ha is already enabled *)
124-
| [] ->
125-
if ha_enabled then
126-
set_errors Api_errors.incompatible_cluster_stack_active [current_stack]
127-
[`cluster_create]
128+
let waiting = List.find_opt is_current_op waiting_ops in
129+
( match (blocking, waiting) with
130+
| Some (_, reason), _ ->
131+
(* Mark all potentially blocking operations as invalid due
132+
to the specific blocking operation's "in progress" error. *)
133+
set_errors blocking_ops (reason, []) ;
134+
(* Mark all waiting operations as invalid for the generic
135+
"OTHER_OPERATION_IN_PROGRESS" reason. *)
136+
set_errors waiting_ops other_operation_in_progress
137+
(* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this
138+
invalidates all operations (with the reason partitioned
139+
between whether the operation is blocking or waiting). *)
140+
| None, Some _ ->
141+
(* If there's no blocking operation in current operations, but
142+
there is a waiting operation, invalidate all operations for the
143+
generic reason. Again, this covers every operation. *)
144+
set_errors all_operations other_operation_in_progress
145+
| None, None -> (
146+
(* If there's no blocking or waiting operation in current
147+
operations (i.e. current operations is empty), we can report
148+
more precise reasons why operations would be invalid. *)
149+
let ha_enabled, current_stack =
150+
let self = Helpers.get_pool ~__context in
151+
Db.Pool.
152+
( get_ha_enabled ~__context ~self
153+
, get_ha_cluster_stack ~__context ~self
154+
)
155+
in
156+
if ha_enabled then (
157+
(* Can't enable HA if it's already enabled. *)
158+
let ha_is_enabled = (Api_errors.ha_is_enabled, []) in
159+
set_errors [`ha_enable] ha_is_enabled ;
160+
(* TLS verification is not allowed to run if HA is enabled. *)
161+
set_errors [`tls_verification_enable] ha_is_enabled
162+
) else (* Can't disable HA if it's not enabled. *)
163+
set_errors [`ha_disable] (Api_errors.ha_not_enabled, []) ;
164+
(* Cluster create cannot run during a rolling pool upgrade. *)
165+
if Helpers.rolling_upgrade_in_progress ~__context then (
166+
let not_supported_during_upgrade =
167+
(Api_errors.not_supported_during_upgrade, [])
168+
in
169+
set_errors [`cluster_create] not_supported_during_upgrade ;
170+
set_errors [`tls_verification_enable] not_supported_during_upgrade
171+
) ;
172+
(* Cluster create cannot run if a cluster already exists on the pool. *)
173+
match Db.Cluster.get_all ~__context with
174+
| [_] ->
175+
set_errors [`cluster_create] (Api_errors.cluster_already_exists, [])
176+
(* Indicates a bug or a need to update this code (if we ever support multiple clusters in the pool). *)
177+
| _ :: _ ->
178+
failwith "Multiple clusters exist in the pool"
179+
(* Cluster create cannot run if HA is already enabled. *)
180+
| [] ->
181+
if ha_enabled then
182+
let error =
183+
(Api_errors.incompatible_cluster_stack_active, [current_stack])
184+
in
185+
set_errors [`cluster_create] error
186+
)
128187
) ;
129-
table
130-
131-
let throw_error table op =
132-
match Hashtbl.find_opt table op with
133-
| None ->
134-
Helpers.internal_error
135-
"xapi_pool_helpers.assert_operation_valid unknown operation: %s"
136-
(pool_allowed_operations_to_string op)
137-
| Some (Some (code, params)) ->
138-
raise (Api_errors.Server_error (code, params))
139-
| Some None ->
140-
()
188+
fun op -> Hashtbl.find_opt table op |> Option.value ~default:Unknown
141189

142190
let assert_operation_valid ~__context ~self ~(op : API.pool_allowed_operations)
143191
=
144-
(* no pool operations allowed during a pending PSR *)
192+
(* No pool operations allowed during a pending PSR. *)
145193
if Db.Pool.get_is_psr_pending ~__context ~self:(Helpers.get_pool ~__context)
146194
then
147195
raise Api_errors.(Server_error (pool_secret_rotation_pending, [])) ;
148196
let all = Db.Pool.get_record_internal ~__context ~self in
149-
let table = valid_operations ~__context all self in
150-
throw_error table op
197+
let lookup = compute_valid_operations ~__context all self in
198+
match lookup op with
199+
| Allowed ->
200+
()
201+
| Disallowed (error, detail) ->
202+
raise (Api_errors.Server_error (error, detail))
203+
| Unknown ->
204+
(* This should never happen and implies our validity algorithm is incomplete. *)
205+
let detail =
206+
let op = pool_allowed_operations_to_string op in
207+
Printf.sprintf "%s.%s unknown operation: %s" __MODULE__ __FUNCTION__ op
208+
in
209+
raise Api_errors.(Server_error (internal_error, [detail]))
151210

152211
let update_allowed_operations ~__context ~self : unit =
153212
let all = Db.Pool.get_record_internal ~__context ~self in
154-
let valid = valid_operations ~__context all self in
155-
let keys =
156-
Hashtbl.fold (fun k v acc -> if v = None then k :: acc else acc) valid []
213+
let is_allowed_op =
214+
let lookup = compute_valid_operations ~__context all self in
215+
fun op -> lookup op = Allowed
157216
in
158-
Db.Pool.set_allowed_operations ~__context ~self ~value:keys
217+
let value = List.filter is_allowed_op all_operations in
218+
Db.Pool.set_allowed_operations ~__context ~self ~value
159219

160220
(** Add to the Pool's current operations, call a function and then remove from the
161221
current operations. Ensure the allowed_operations are kept up to date. *)

0 commit comments

Comments
 (0)