16
16
17
17
-module (ekka_autoheal ).
18
18
19
+ -include_lib (" snabbkaffe/include/trace.hrl" ).
20
+
19
21
-export ([ init /0
20
22
, enabled /0
21
23
, proc /1
24
26
25
27
-record (autoheal , {delay , role , proc , timer }).
26
28
27
- -type ( autoheal () :: # autoheal {}) .
29
+ -type autoheal () :: # autoheal {}.
28
30
29
31
-export_type ([autoheal / 0 ]).
30
32
@@ -47,8 +49,7 @@ enabled() ->
47
49
end .
48
50
49
51
proc (undefined ) -> undefined ;
50
- proc (# autoheal {proc = Proc }) ->
51
- Proc .
52
+ proc (# autoheal {proc = Proc }) -> Proc .
52
53
53
54
handle_msg (Msg , undefined ) ->
54
55
? LOG (error , " Autoheal not enabled! Unexpected msg: ~p " , [Msg ]), undefined ;
@@ -76,12 +77,24 @@ handle_msg(Msg = {create_splitview, Node}, Autoheal = #autoheal{delay = Delay, t
76
77
Nodes = ekka_mnesia :cluster_nodes (all ),
77
78
case rpc :multicall (Nodes , ekka_mnesia , cluster_view , [], 30000 ) of
78
79
{Views , []} ->
79
- SplitView = lists :sort (fun compare_view /2 , lists :usort (Views )),
80
- ekka_node_monitor :cast (coordinator (SplitView ), {heal_partition , SplitView });
80
+ SplitView = find_split_view (Nodes , Views ),
81
+ HealPlan = find_heal_plan (SplitView ),
82
+ case HealPlan of
83
+ {Candidates = [_ | _ ], Minority } ->
84
+ % % Non-empty list of candidates, choose a coordinator.
85
+ CoordNode = pick_coordinator (Candidates ),
86
+ ekka_node_monitor :cast (CoordNode , {heal_cluster , Minority , SplitView });
87
+ {[], Cluster } ->
88
+ % % It's very unlikely but possible to have empty list of candidates.
89
+ ekka_node_monitor :cast (node (), {heal_cluster , Cluster , SplitView });
90
+ {} ->
91
+ ignore
92
+ end ,
93
+ Autoheal # autoheal {timer = undefined };
81
94
{_Views , BadNodes } ->
82
- ? LOG (critical , " Bad nodes found when autoheal: ~p " , [BadNodes ])
83
- end ,
84
- Autoheal # autoheal { timer = undefined } ;
95
+ ? LOG (critical , " Bad nodes found when autoheal: ~p " , [BadNodes ]),
96
+ Autoheal # autoheal { timer = ekka_node_monitor : run_after ( Delay , { autoheal , Msg })}
97
+ end ;
85
98
false ->
86
99
Autoheal # autoheal {timer = ekka_node_monitor :run_after (Delay , {autoheal , Msg })}
87
100
end ;
@@ -91,51 +104,98 @@ handle_msg(Msg = {create_splitview, _Node}, Autoheal) ->
91
104
Autoheal ;
92
105
93
106
handle_msg ({heal_partition , SplitView }, Autoheal = # autoheal {proc = undefined }) ->
107
+ % % NOTE: Backward compatibility.
108
+ case SplitView of
109
+ % % No partitions.
110
+ [] -> Autoheal ;
111
+ [{_ , []}] -> Autoheal ;
112
+ % % Partitions.
113
+ SplitView ->
114
+ Proc = spawn_link (fun () -> heal_partition (SplitView ) end ),
115
+ Autoheal # autoheal {role = coordinator , proc = Proc }
116
+ end ;
117
+
118
+ handle_msg ({heal_cluster , Minority , SplitView }, Autoheal = # autoheal {proc = undefined }) ->
94
119
Proc = spawn_link (fun () ->
95
- ? LOG (info , " Healing partition: ~p " , [SplitView ]),
96
- _ = heal_partition (SplitView )
97
- end ),
120
+ ? tp (notice , " Healing cluster partition" , #{
121
+ need_reboot => Minority ,
122
+ split_view => SplitView
123
+ }),
124
+ reboot_minority (Minority -- [node ()])
125
+ end ),
98
126
Autoheal # autoheal {role = coordinator , proc = Proc };
99
127
100
- handle_msg ({heal_partition , SplitView }, Autoheal = # autoheal {proc = _Proc }) ->
128
+ handle_msg ({heal_partition , SplitView }, Autoheal = # autoheal {proc = _Proc }) ->
101
129
? LOG (critical , " Unexpected heal_partition msg: ~p " , [SplitView ]),
102
130
Autoheal ;
103
131
104
132
handle_msg ({'EXIT' , Pid , normal }, Autoheal = # autoheal {proc = Pid }) ->
105
133
Autoheal # autoheal {proc = undefined };
106
134
handle_msg ({'EXIT' , Pid , Reason }, Autoheal = # autoheal {proc = Pid }) ->
107
- ? LOG (critical , " Autoheal process crashed: ~s " , [Reason ]),
135
+ ? LOG (critical , " Autoheal process crashed: ~p " , [Reason ]),
136
+ _Retry = ekka_node_monitor :run_after (1000 , confirm_partition ),
108
137
Autoheal # autoheal {proc = undefined };
109
138
110
139
handle_msg (Msg , Autoheal ) ->
111
140
? LOG (critical , " Unexpected msg: ~p " , [Msg , Autoheal ]),
112
141
Autoheal .
113
142
114
- compare_view ({Running1 , _ } , {Running2 , _ }) ->
115
- Len1 = length (Running1 ), Len2 = length (Running2 ),
116
- if
117
- Len1 > Len2 -> true ;
118
- Len1 == Len2 -> lists :member (node (), Running1 );
119
- true -> false
143
+ find_split_view (Nodes , Views ) ->
144
+ ClusterView = lists :zipwith (
145
+ fun (N , {Running , Stopped }) -> {N , Running , Stopped } end ,
146
+ Nodes ,
147
+ Views
148
+ ),
149
+ MajorityView = lists :sort (fun compare_node_views /2 , ClusterView ),
150
+ find_split_view (MajorityView ).
151
+
152
+ compare_node_views ({_N1 , Running1 , _ }, {_N2 , Running2 , _ }) ->
153
+ Len1 = length (Running1 ),
154
+ Len2 = length (Running2 ),
155
+ case Len1 of
156
+ % % Prefer partitions with higher number of surviving nodes.
157
+ L when L > Len2 -> true ;
158
+ % % If number of nodes is the same, sort by list of running nodes.
159
+ Len2 -> Running1 < Running2 ;
160
+ L when L < Len2 -> false
120
161
end .
121
162
122
- coordinator ([{Nodes , _ } | _ ]) ->
123
- ekka_membership :coordinator (Nodes ).
124
-
125
- -spec heal_partition (list ()) -> list (node ()).
126
- heal_partition ([]) ->
127
- [];
128
- % % All nodes connected.
129
- heal_partition ([{_ , []}]) ->
130
- [];
131
- % % Partial partitions happened.
132
- heal_partition ([{Nodes , []}|_ ]) ->
163
+ find_split_view ([{_Node , _Running , []} | Views ]) ->
164
+ % % Node observes no partitions, ignore.
165
+ find_split_view (Views );
166
+ find_split_view ([View = {_Node , _Running , Partitioned } | Views ]) ->
167
+ % % Node observes some nodes as partitioned from it.
168
+ % % These nodes need to be rebooted, and as such they should not be part of split view.
169
+ Rest = lists :foldl (fun (N , Acc ) -> lists :keydelete (N , 1 , Acc ) end , Views , Partitioned ),
170
+ [View | find_split_view (Rest )];
171
+ find_split_view ([]) ->
172
+ [].
173
+
174
+ find_heal_plan ([{_Node , R0 , P0 } | Rest ]) ->
175
+ % % If we have more than one parition in split view, we need to reboot _all_ of the nodes
176
+ % % in each view's partition (i.e. ⋃(Partitions)) for better safety. But then we need to
177
+ % % find candidates to do it, as ⋃(Running) ∖ ⋃(Partitions).
178
+ {_Nodes , Rs , Ps } = lists :unzip3 (Rest ),
179
+ URunning = ordsets :union (lists :map (fun ordsets :from_list /1 , [R0 | Rs ])),
180
+ UPartitions = ordsets :union (lists :map (fun ordsets :from_list /1 , [P0 | Ps ])),
181
+ {ordsets :subtract (URunning , UPartitions ), UPartitions };
182
+ find_heal_plan ([]) ->
183
+ {}.
184
+
185
+ pick_coordinator (Candidates ) ->
186
+ case lists :member (node (), Candidates ) of
187
+ true -> node ();
188
+ false -> ekka_membership :coordinator (Candidates )
189
+ end .
190
+
191
+ heal_partition ([{Nodes , []} | _ ] = SplitView ) ->
192
+ % % Symmetric partition.
193
+ ? LOG (info , " Healing partition: ~p " , [SplitView ]),
133
194
reboot_minority (Nodes -- [node ()]);
134
- heal_partition ([{Majority , Minority }, {Minority , Majority }]) ->
135
- reboot_minority (Minority );
136
- heal_partition (SplitView ) ->
137
- ? LOG (critical , " Cannot heal the partitions: ~p " , [SplitView ]),
138
- error ({unknown_splitview , SplitView }).
195
+ heal_partition ([{Majority , Minority }, {Minority , Majority }] = SplitView ) ->
196
+ % % Symmetric partition.
197
+ ? LOG (info , " Healing partition: ~p " , [SplitView ]),
198
+ reboot_minority (Minority ).
139
199
140
200
reboot_minority (Minority ) ->
141
201
lists :foreach (fun shutdown /1 , Minority ),
@@ -155,4 +215,3 @@ ensure_cancel_timer(undefined) ->
155
215
ok ;
156
216
ensure_cancel_timer (TRef ) ->
157
217
catch erlang :cancel_timer (TRef ).
158
-
0 commit comments