From 9f12010d8edbe8af398542dd32d2be98e0337f1d Mon Sep 17 00:00:00 2001 From: woelki Date: Thu, 10 Sep 2020 00:54:43 +0200 Subject: [PATCH 1/5] reconstruct --- src/riak_core_vnode.erl | 1302 ++++++++++++++++++++------------------- 1 file changed, 678 insertions(+), 624 deletions(-) diff --git a/src/riak_core_vnode.erl b/src/riak_core_vnode.erl index 068c1d0a9..bc839ab35 100644 --- a/src/riak_core_vnode.erl +++ b/src/riak_core_vnode.erl @@ -230,18 +230,9 @@ -define(LOCK_RETRY_TIMEOUT, 10000). --record(state, - {index :: partition(), mod :: module(), - modstate :: term(), - forward :: node() | [{integer(), node()}], - handoff_target = none :: none | {integer(), node()}, - handoff_pid :: pid() | undefined, - handoff_type :: - riak_core_handoff_manager:ho_type() | undefined, - pool_pid :: pid() | undefined, - pool_config :: tuple() | undefined, - manager_event_timer :: reference() | undefined, - inactivity_timeout :: non_neg_integer()}). +%% ======== +%% API +%% ======== start_link(Mod, Index, Forward) -> start_link(Mod, Index, 0, Forward). @@ -252,352 +243,228 @@ start_link(Mod, Index, InitialInactivityTimeout, [Mod, Index, InitialInactivityTimeout, Forward], []). +%% #1 - State started +wait_for_init(Vnode) -> + gen_fsm_compat:sync_send_event(Vnode, wait_for_init, + infinity). + +%% #2 - %% Send a command message for the vnode module by Pid - %% typically to do some deferred processing after returning yourself send_command(Pid, Request) -> gen_fsm_compat:send_event(Pid, #riak_vnode_req_v1{request = Request}). -%% Sends a command to the FSM that called it after Time -%% has passed. --spec send_command_after(integer(), - term()) -> reference(). - -send_command_after(Time, Request) -> - gen_fsm_compat:send_event_after(Time, - #riak_vnode_req_v1{request = Request}). - -init([Mod, Index, InitialInactivityTimeout, Forward]) -> - process_flag(trap_exit, true), - State = #state{index = Index, mod = Mod, - forward = Forward, - inactivity_timeout = InitialInactivityTimeout}, - {ok, started, State, 0}. - -started(timeout, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> - case do_init(State) of - {ok, State2} -> - {next_state, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} - end. - -started(wait_for_init, _From, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> - case do_init(State) of - {ok, State2} -> - {reply, ok, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} - end. - -do_init(State = #state{index = Index, mod = Mod, - forward = Forward}) -> - {ModState, Props} = case Mod:init([Index]) of - {ok, MS} -> {MS, []}; - {ok, MS, P} -> {MS, P}; - {error, R} -> {error, R} - end, - case {ModState, Props} of - {error, Reason} -> {error, Reason}; - _ -> - case lists:keyfind(pool, 1, Props) of - {pool, WorkerModule, PoolSize, WorkerArgs} = - PoolConfig -> - logger:debug("starting worker pool ~p with size of " - "~p~n", - [WorkerModule, PoolSize]), - {ok, PoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, Index, - WorkerArgs, - worker_props); - _ -> PoolPid = PoolConfig = undefined - end, - riak_core_handoff_manager:remove_exclusion(Mod, Index), - Timeout = application:get_env(riak_core, - vnode_inactivity_timeout, - ?DEFAULT_TIMEOUT), - Timeout2 = Timeout + riak_core_rand:uniform(Timeout), - State2 = State#state{modstate = ModState, - inactivity_timeout = Timeout2, - pool_pid = PoolPid, pool_config = PoolConfig}, - logger:debug("vnode :: ~p/~p :: ~p~n", - [Mod, Index, Forward]), - State3 = mod_set_forwarding(Forward, State2), - {ok, State3} - end. - -wait_for_init(Vnode) -> - gen_fsm_compat:sync_send_event(Vnode, wait_for_init, - infinity). - +%% #3 - handoff_error(Vnode, Err, Reason) -> gen_fsm_compat:send_event(Vnode, {handoff_error, Err, Reason}). +%% #4 - get_mod_index(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, get_mod_index). +%% #5 set_forwarding(VNode, ForwardTo) -> gen_fsm_compat:send_all_state_event(VNode, {set_forwarding, ForwardTo}). +%% #6 trigger_handoff(VNode, TargetIdx, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, {trigger_handoff, TargetIdx, TargetNode}). +%% #7 trigger_handoff(VNode, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, {trigger_handoff, TargetNode}). +%% #8 trigger_delete(VNode) -> gen_fsm_compat:send_all_state_event(VNode, trigger_delete). +%% #9 core_status(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, core_status). +%% #10 +%% Sends a command to the FSM that called it after Time +%% has passed. +-spec send_command_after(integer(), + term()) -> reference(). + +send_command_after(Time, Request) -> + gen_fsm_compat:send_event_after(Time, + #riak_vnode_req_v1{request = Request}). + %%%%%%% %new APIs -%% # - riak_core_vnode_manager - handle_vnode_event +%% #11 - riak_core_vnode_manager - handle_vnode_event cast_finish_handoff(VNode) -> gen_fsm_compat:send_all_state_event(VNode, finish_handoff). -%% # - riak_core_vnode_manager - handle_vnode_event +%% #12 - riak_core_vnode_manager - handle_vnode_event cancel_handoff(VNode) -> gen_fsm_compat:send_all_state_event(VNode, cancel_handoff). -%% # - riak_core_vnode_master - command2 -%send_req - -%% # - riak_core_vnode_master - send_an_event +%% #13 - riak_core_vnode_master - send_an_event send_an_event(VNode, Event)-> gen_fsm_compat:send_event(VNode, Event). -%% # - riak_core_vnode_master - handle_cast/handle_call +%% #14 - riak_core_vnode_master - handle_cast/handle_call + %riak_core_vnode_master - command2 + %riak_core_vnode_proxy - handle_call send_req(VNode, Req)-> gen_fsm_compat:send_event(VNode, Req). -%% # - riak_core_vnode_master - handle_call +%% #15 - riak_core_vnode_master - handle_call send_all_proxy_req(VNode, Req)-> gen_fsm_compat:send_all_state_event(VNode, Req). -%% # - riak:core_handoff_sender - start_fold_ +%% #16 - riak:core_handoff_sender - start_fold_ handoff_complete(VNode) -> gen_fsm_compat:send_event(VNode, handoff_complete). -%% # - riak:core_handoff_sender - start_fold_ +%% #17 - riak:core_handoff_sender - start_fold_ resize_transfer_complete(VNode, NotSentAcc) -> gen_fsm_compat:send_event(VNode, {resize_transfer_complete, NotSentAcc}). -%% # - riak_core_handoff_receiver - process_message +%% #18 - riak_core_handoff_receiver - process_message handoff_data(VNode, MsgData, VNodeTimeout) -> gen_fsm_compat:sync_send_all_state_event(VNode, {handoff_data, MsgData}, VNodeTimeout). -%% # - riak_core_vnode_proxy - handle_cast +%% #19 - riak_core_vnode_proxy - handle_cast unregistered(VNode) -> gen_fsm_compat:send_event(VNode, unregistered). -%% # - riak_core_vnode_proxy - handle_call -%send_vnode_req -continue(State) -> - {next_state, active, State, - State#state.inactivity_timeout}. +%% @doc Send a reply to a vnode request. If +%% the Ref is undefined just send the reply +%% for compatibility with pre-0.12 requestors. +%% If Ref is defined, send it along with the +%% reply. +%% NOTE: We *always* send the reply using unreliable delivery. +%% +-spec reply(sender(), term()) -> any(). -continue(State, NewModState) -> - continue(State#state{modstate = NewModState}). +reply({fsm, undefined, From}, Reply) -> + riak_core_send_msg:send_event_unreliable(From, Reply); +reply({fsm, Ref, From}, Reply) -> + riak_core_send_msg:send_event_unreliable(From, + {Ref, Reply}); +reply({server, undefined, From}, Reply) -> + riak_core_send_msg:reply_unreliable(From, Reply); +reply({server, Ref, From}, Reply) -> + riak_core_send_msg:reply_unreliable(From, {Ref, Reply}); +reply({raw, Ref, From}, Reply) -> + riak_core_send_msg:bang_unreliable(From, {Ref, Reply}); +reply(ignore, _Reply) -> ok. -%% Active vnodes operate in three states: normal, handoff, and forwarding. -%% -%% In the normal state, vnode commands are passed to handle_command. When -%% a handoff is triggered, handoff_target is set and the vnode -%% is said to be in the handoff state. -%% -%% In the handoff state, vnode commands are passed to handle_handoff_command. -%% However, a vnode may be blocked during handoff (and therefore not servicing -%% commands) if the handoff procedure is blocking (eg. in riak_kv when not -%% using async fold). -%% -%% After handoff, a vnode may move into forwarding state. The forwarding state -%% is a product of the new gossip/membership code and will not occur if the -%% node is running in legacy mode. The forwarding state represents the case -%% where the vnode has already handed its data off to the new owner, but the -%% new owner is not yet listed as the current owner in the ring. This may occur -%% because additional vnodes are still waiting to handoff their data to the -%% new owner, or simply because the ring has yet to converge on the new owner. -%% In the forwarding state, all vnode commands and coverage commands are -%% forwarded to the new owner for processing. -%% -%% The above becomes a bit more complicated when the vnode takes part in resizing -%% the ring, since several transfers with a single vnode as the source are necessary -%% to complete the operation. A vnode will remain in the handoff state, for, potentially, -%% more than one transfer and may be in the handoff state despite there being no active -%% transfers with this vnode as the source. During this time requests that can be forwarded -%% to a partition for which the transfer has already completed, are forwarded. All other -%% requests are passed to handle_handoff_command. -forward_or_vnode_command(Sender, Request, - State = #state{forward = Forward, mod = Mod, - index = Index}) -> - Resizing = is_list(Forward), - RequestHash = case Resizing of - true -> Mod:request_hash(Request); - false -> undefined - end, - case {Forward, RequestHash} of - %% typical vnode operation, no forwarding set, handle request locally - {undefined, _} -> vnode_command(Sender, Request, State); - %% implicit forwarding after ownership transfer/hinted handoff - {F, _} when not is_list(F) -> - vnode_forward(implicit, {Index, Forward}, Sender, - Request, State), - continue(State); - %% during resize we can't forward a request w/o request hash, always handle locally - {_, undefined} -> vnode_command(Sender, Request, State); - %% possible forwarding during ring resizing - {_, _} -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - FutureIndex = riak_core_ring:future_index(RequestHash, - Index, R), - vnode_resize_command(Sender, Request, FutureIndex, - State) - end. +%% @doc Set up a monitor for the pid named by a {@type sender()} vnode +%% argument. If `Sender' was the atom `ignore', this function sets up +%% a monitor on `self()' in order to return a valid (if useless) +%% monitor reference. +-spec monitor(Sender :: sender()) -> Monitor :: + reference(). -vnode_command(_Sender, _Request, - State = #state{modstate = {deleted, _}}) -> - continue(State); -vnode_command(Sender, Request, - State = #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> - case catch Mod:handle_command(Request, Sender, ModState) - of - {'EXIT', ExitReason} -> - reply(Sender, {vnode_error, ExitReason}), - logger:error("~p command failed ~p", [Mod, ExitReason]), - {stop, ExitReason, State#state{modstate = ModState}}; - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} - end. +monitor({fsm, _, From}) -> + erlang:monitor(process, From); +monitor({server, _, {Pid, _Ref}}) -> + erlang:monitor(process, Pid); +monitor({raw, _, From}) -> + erlang:monitor(process, From); +monitor(ignore) -> erlang:monitor(process, self()). -vnode_coverage(Sender, Request, KeySpaces, - State = #state{index = Index, mod = Mod, - modstate = ModState, pool_pid = Pool, - forward = Forward}) -> - %% Check if we should forward - case Forward of - undefined -> - Action = Mod:handle_coverage(Request, KeySpaces, Sender, - ModState); - %% handle coverage requests locally during ring resize - Forwards when is_list(Forwards) -> - Action = Mod:handle_coverage(Request, KeySpaces, Sender, - ModState); - NextOwner -> - logger:debug("Forwarding coverage ~p -> ~p: ~p~n", - [node(), NextOwner, Index]), - riak_core_vnode_master:coverage(Request, - {Index, NextOwner}, KeySpaces, Sender, - riak_core_vnode_master:reg_name(Mod)), - Action = continue - end, - case Action of - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} - end. -vnode_handoff_command(Sender, Request, ForwardTo, - State = #state{mod = Mod, modstate = ModState, - handoff_target = HOTarget, - handoff_type = HOType, pool_pid = Pool}) -> - case Mod:handle_handoff_command(Request, Sender, - ModState) - of - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {forward, NewModState} -> - forward_request(HOType, Request, HOTarget, ForwardTo, - Sender, State), - continue(State, NewModState); - {forward, NewReq, NewModState} -> - forward_request(HOType, NewReq, HOTarget, ForwardTo, - Sender, State), - continue(State, NewModState); - {drop, NewModState} -> continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} - end. +%% ======================== +%% ======== +%% State, Mode, Init, Terminate +%% ======== +%% ======================== +-record(state, + {index :: partition(), mod :: module(), + modstate :: term(), + forward :: node() | [{integer(), node()}], + handoff_target = none :: none | {integer(), node()}, + handoff_pid :: pid() | undefined, + handoff_type :: + riak_core_handoff_manager:ho_type() | undefined, + pool_pid :: pid() | undefined, + pool_config :: tuple() | undefined, + manager_event_timer :: reference() | undefined, + inactivity_timeout :: non_neg_integer()}). -%% @private wrap the request for resize forwards, and use the resize -%% target. -forward_request(resize, Request, _HOTarget, - ResizeTarget, Sender, State) -> - %% resize op and transfer ongoing - vnode_forward(resize, ResizeTarget, Sender, - {resize_forward, Request}, State); -forward_request(undefined, Request, _HOTarget, - ResizeTarget, Sender, State) -> - %% resize op ongoing, no resize transfer ongoing, arrive here - %% via forward_or_vnode_command - vnode_forward(resize, ResizeTarget, Sender, - {resize_forward, Request}, State); -forward_request(_, Request, HOTarget, _ResizeTarget, - Sender, State) -> - %% normal explicit forwarding during owhership transfer - vnode_forward(explicit, HOTarget, Sender, Request, - State). -vnode_forward(Type, ForwardTo, Sender, Request, - State) -> - logger:debug("Forwarding (~p) {~p,~p} -> ~p~n", - [Type, State#state.index, node(), ForwardTo]), - riak_core_vnode_master:command_unreliable(ForwardTo, - Request, Sender, - riak_core_vnode_master:reg_name(State#state.mod)). +init([Mod, Index, InitialInactivityTimeout, Forward]) -> + process_flag(trap_exit, true), + State = #state{index = Index, mod = Mod, + forward = Forward, + inactivity_timeout = InitialInactivityTimeout}, + {ok, started, State, 0}. -%% @doc during ring resizing if we have completed a transfer to the index that will -%% handle request in future ring we forward to it. Otherwise we delegate -%% to the local vnode like other requests during handoff -vnode_resize_command(Sender, Request, FutureIndex, - State = #state{forward = Forward}) - when is_list(Forward) -> - case lists:keyfind(FutureIndex, 1, Forward) of - false -> vnode_command(Sender, Request, State); - {FutureIndex, FutureOwner} -> - vnode_handoff_command(Sender, Request, - {FutureIndex, FutureOwner}, State) + terminate(Reason, _StateName, + #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> +%% Shutdown if the pool is still alive and a normal `Reason' is +%% given - there could be a race on delivery of the unregistered +%% event and successfully shutting down the pool. +try case is_pid(Pool) andalso + is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) + of + true -> + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok + end +catch +Type:Reason:Stacktrace -> + logger:error("Error while shutting down vnode worker " + "pool ~p:~p trace : ~p", + [Type, Reason, Stacktrace]) +after +case ModState of + %% Handoff completed, Mod:delete has been called, now terminate. + {deleted, ModState1} -> + Mod:terminate(Reason, ModState1); + _ -> Mod:terminate(Reason, ModState) +end +end. + +code_change(_OldVsn, StateName, State, _Extra) -> +{ok, StateName, State}. + +%% ======================== +%% ======== +%% States +%% ======== +%% ======================== + + +%% started +%% ======== +started(timeout, + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> + case do_init(State) of + {ok, State2} -> + {next_state, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} + end. + + +started(wait_for_init, _From, + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> + case do_init(State) of + {ok, State2} -> + {reply, ok, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} end. +%%active +%%%%%%%%%%%% active(timeout, State = #state{mod = Mod, index = Idx}) -> riak_core_vnode_manager:vnode_event(Mod, Idx, self(), @@ -706,254 +573,75 @@ active(_Event, _From, State) -> {reply, Reply, active, State, State#state.inactivity_timeout}. -%% This code lives in riak_core_vnode rather than riak_core_vnode_manager -%% because the ring_trans call is a synchronous call to the ring manager, -%% and it is better to block an individual vnode rather than the vnode -%% manager. Blocking the manager can impact all vnodes. This code is safe -%% to execute on multiple parallel vnodes because of the synchronization -%% afforded by having all ring changes go through the single ring manager. -mark_handoff_complete(SrcIdx, Target, SeenIdxs, Mod, - resize) -> - Prev = node(), - Source = {SrcIdx, Prev}, - TransFun = fun (Ring, _) -> - Owner = riak_core_ring:index_owner(Ring, SrcIdx), - Status = riak_core_ring:resize_transfer_status(Ring, - Source, - Target, - Mod), - case {Owner, Status} of - {Prev, awaiting} -> - F = fun (SeenIdx, RingAcc) -> - riak_core_ring:schedule_resize_transfer(RingAcc, - Source, - SeenIdx) - end, - Ring2 = lists:foldl(F, Ring, - ordsets:to_list(SeenIdxs)), - Ring3 = - riak_core_ring:resize_transfer_complete(Ring2, - Source, - Target, - Mod), - %% local ring optimization (see below) - {set_only, Ring3}; - _ -> ignore - end - end, - Result = riak_core_ring_manager:ring_trans(TransFun, - []), - case Result of - {ok, _NewRing} -> resize; - _ -> continue +%% handle_event +%%%%%%%%%%%%%%%% +handle_event({set_forwarding, undefined}, _StateName, + State = #state{modstate = {deleted, _ModState}}) -> + %% The vnode must forward requests when in the deleted state, therefore + %% ignore requests to stop forwarding. + continue(State); +handle_event({set_forwarding, ForwardTo}, _StateName, + State) -> + logger:debug("vnode fwd :: ~p/~p :: ~p -> ~p~n", + [State#state.mod, State#state.index, + State#state.forward, ForwardTo]), + State2 = mod_set_forwarding(ForwardTo, State), + continue(State2#state{forward = ForwardTo}); +handle_event(finish_handoff, _StateName, + State = #state{modstate = {deleted, _ModState}}) -> + stop_manager_event_timer(State), + continue(State#state{handoff_target = none}); +handle_event(finish_handoff, _StateName, + State = #state{mod = Mod, modstate = ModState, + handoff_target = Target}) -> + stop_manager_event_timer(State), + case Target of + none -> continue(State); + _ -> + {ok, NewModState} = Mod:handoff_finished(Target, + ModState), + finish_handoff(State#state{modstate = NewModState}) end; -mark_handoff_complete(Idx, {Idx, New}, [], Mod, _) -> - Prev = node(), - Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Owner = - riak_core_ring:index_owner(Ring, - Idx), - {_, NextOwner, Status} = - riak_core_ring:next_owner(Ring, - Idx, - Mod), - NewStatus = - riak_core_ring:member_status(Ring, - New), - case {Owner, NextOwner, - NewStatus, Status} - of - {Prev, New, _, - awaiting} -> - Ring2 = - riak_core_ring:handoff_complete(Ring, - Idx, - Mod), - %% Optimization. Only alter the local ring without - %% triggering a gossip, thus implicitly coalescing - %% multiple vnode handoff completion events. In the - %% future we should decouple vnode handoff state from - %% the ring structure in order to make gossip independent - %% of ring size. - {set_only, Ring2}; - _ -> ignore - end - end, - []), - case Result of - {ok, NewRing} -> NewRing = NewRing; +handle_event(cancel_handoff, _StateName, + State = #state{mod = Mod, modstate = ModState}) -> + %% it would be nice to pass {Err, Reason} to the vnode but the + %% API doesn't currently allow for that. + stop_manager_event_timer(State), + case State#state.handoff_target of + none -> continue(State); _ -> - {ok, NewRing} = riak_core_ring_manager:get_my_ring() - end, - Owner = riak_core_ring:index_owner(NewRing, Idx), - {_, NextOwner, Status} = - riak_core_ring:next_owner(NewRing, Idx, Mod), - NewStatus = riak_core_ring:member_status(NewRing, New), - case {Owner, NextOwner, NewStatus, Status} of - {_, _, invalid, _} -> - %% Handing off to invalid node, don't give-up data. - continue; - {Prev, New, _, _} -> forward; - {Prev, _, _, _} -> - %% Handoff wasn't to node that is scheduled in next, so no change. - continue; - {_, _, _, _} -> shutdown - end. + {ok, NewModState} = Mod:handoff_cancelled(ModState), + continue(State#state{handoff_target = none, + handoff_type = undefined, + modstate = NewModState}) + end; +handle_event({trigger_handoff, TargetNode}, StateName, + State) -> + handle_event({trigger_handoff, State#state.index, + TargetNode}, + StateName, State); +handle_event({trigger_handoff, _TargetIdx, _TargetNode}, + _StateName, + State = #state{modstate = {deleted, _ModState}}) -> + continue(State); +handle_event(R = {trigger_handoff, _TargetIdx, + _TargetNode}, + _StateName, State) -> + active(R, State); +handle_event(trigger_delete, _StateName, + State = #state{modstate = {deleted, _}}) -> + continue(State); +handle_event(trigger_delete, _StateName, State) -> + active(trigger_delete, State); +handle_event(R = #riak_vnode_req_v1{}, _StateName, + State) -> + active(R, State); +handle_event(R = #riak_coverage_req_v1{}, _StateName, + State) -> + active(R, State). -finish_handoff(State) -> finish_handoff([], State). - -finish_handoff(SeenIdxs, - State = #state{mod = Mod, modstate = ModState, - index = Idx, handoff_target = Target, - handoff_type = HOType}) -> - case mark_handoff_complete(Idx, Target, SeenIdxs, Mod, - HOType) - of - continue -> - continue(State#state{handoff_target = none, - handoff_type = undefined}); - resize -> - CurrentForwarding = resize_forwarding(State), - NewForwarding = [Target | CurrentForwarding], - State2 = mod_set_forwarding(NewForwarding, State), - continue(State2#state{handoff_target = none, - handoff_type = undefined, - forward = NewForwarding}); - Res when Res == forward; Res == shutdown -> - {_, HN} = Target, - %% Have to issue the delete now. Once unregistered the - %% vnode master will spin up a new vnode on demand. - %% Shutdown the async pool beforehand, don't want callbacks - %% running on non-existant data. - maybe_shutdown_pool(State), - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode finished handoff and deleted.", - [Idx, Mod]), - riak_core_vnode_manager:unregister_vnode(Idx, Mod), - logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, State#state.index, - State#state.forward, HN]), - State2 = mod_set_forwarding(HN, State), - continue(State2#state{modstate = - {deleted, - NewModState}, % like to fail if used - handoff_target = none, handoff_type = undefined, - forward = HN}) - end. - -maybe_shutdown_pool(#state{pool_pid = Pool}) -> - case is_pid(Pool) of - true -> - %% state.pool_pid will be cleaned up by handle_info message. - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok - end. - -resize_forwarding(#state{forward = F}) - when is_list(F) -> - F; -resize_forwarding(_) -> []. - -mark_delete_complete(Idx, Mod) -> - Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Type = - riak_core_ring:vnode_type(Ring, - Idx), - {_, Next, Status} = - riak_core_ring:next_owner(Ring, - Idx), - case {Type, Next, Status} - of - {resized_primary, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, Ring3}; - {{fallback, _}, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, Ring3}; - _ -> ignore - end - end, - []), - Result. - -handle_event({set_forwarding, undefined}, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> - %% The vnode must forward requests when in the deleted state, therefore - %% ignore requests to stop forwarding. - continue(State); -handle_event({set_forwarding, ForwardTo}, _StateName, - State) -> - logger:debug("vnode fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, State#state.index, - State#state.forward, ForwardTo]), - State2 = mod_set_forwarding(ForwardTo, State), - continue(State2#state{forward = ForwardTo}); -handle_event(finish_handoff, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> - stop_manager_event_timer(State), - continue(State#state{handoff_target = none}); -handle_event(finish_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState, - handoff_target = Target}) -> - stop_manager_event_timer(State), - case Target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(State#state{modstate = NewModState}) - end; -handle_event(cancel_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState}) -> - %% it would be nice to pass {Err, Reason} to the vnode but the - %% API doesn't currently allow for that. - stop_manager_event_timer(State), - case State#state.handoff_target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_cancelled(ModState), - continue(State#state{handoff_target = none, - handoff_type = undefined, - modstate = NewModState}) - end; -handle_event({trigger_handoff, TargetNode}, StateName, - State) -> - handle_event({trigger_handoff, State#state.index, - TargetNode}, - StateName, State); -handle_event({trigger_handoff, _TargetIdx, _TargetNode}, - _StateName, - State = #state{modstate = {deleted, _ModState}}) -> - continue(State); -handle_event(R = {trigger_handoff, _TargetIdx, - _TargetNode}, - _StateName, State) -> - active(R, State); -handle_event(trigger_delete, _StateName, - State = #state{modstate = {deleted, _}}) -> - continue(State); -handle_event(trigger_delete, _StateName, State) -> - active(trigger_delete, State); -handle_event(R = #riak_vnode_req_v1{}, _StateName, - State) -> - active(R, State); -handle_event(R = #riak_coverage_req_v1{}, _StateName, - State) -> - active(R, State). +%%handle_sync_event +%%%%%%%%%%%%%%%%%%%% handle_sync_event(current_state, _From, StateName, State) -> @@ -1010,6 +698,9 @@ handle_sync_event(core_status, _From, StateName, {reply, {Mode, Status}, StateName, State, State#state.inactivity_timeout}. +%%handle_info +%%%%%%%%%%%%%% + handle_info({'$vnode_proxy_ping', From, Ref, Msgs}, StateName, State) -> riak_core_vnode_proxy:cast(From, @@ -1067,48 +758,443 @@ handle_info({'EXIT', Pid, Reason}, StateName, catch _ErrorType:undef -> {stop, linked_process_crash, State} end; -handle_info(Info, StateName, - State = #state{mod = Mod, modstate = ModState}) -> - case erlang:function_exported(Mod, handle_info, 2) of - true -> - {ok, NewModState} = Mod:handle_info(Info, ModState), - {next_state, StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - false -> - {next_state, StateName, State, - State#state.inactivity_timeout} +handle_info(Info, StateName, + State = #state{mod = Mod, modstate = ModState}) -> + case erlang:function_exported(Mod, handle_info, 2) of + true -> + {ok, NewModState} = Mod:handle_info(Info, ModState), + {next_state, StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + false -> + {next_state, StateName, State, + State#state.inactivity_timeout} + end. + + + + +%% ======================== +%% ======== +%% Internal Helper Functions +%% ======== +%% ======================== +do_init(State = #state{index = Index, mod = Mod, + forward = Forward}) -> + {ModState, Props} = case Mod:init([Index]) of + {ok, MS} -> {MS, []}; + {ok, MS, P} -> {MS, P}; + {error, R} -> {error, R} + end, + case {ModState, Props} of + {error, Reason} -> {error, Reason}; + _ -> + case lists:keyfind(pool, 1, Props) of + {pool, WorkerModule, PoolSize, WorkerArgs} = + PoolConfig -> + logger:debug("starting worker pool ~p with size of " + "~p~n", + [WorkerModule, PoolSize]), + {ok, PoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, Index, + WorkerArgs, + worker_props); + _ -> PoolPid = PoolConfig = undefined + end, + riak_core_handoff_manager:remove_exclusion(Mod, Index), + Timeout = application:get_env(riak_core, + vnode_inactivity_timeout, + ?DEFAULT_TIMEOUT), + Timeout2 = Timeout + riak_core_rand:uniform(Timeout), + State2 = State#state{modstate = ModState, + inactivity_timeout = Timeout2, + pool_pid = PoolPid, pool_config = PoolConfig}, + logger:debug("vnode :: ~p/~p :: ~p~n", + [Mod, Index, Forward]), + State3 = mod_set_forwarding(Forward, State2), + {ok, State3} + end. + + +continue(State) -> + {next_state, active, State, + State#state.inactivity_timeout}. + +continue(State, NewModState) -> + continue(State#state{modstate = NewModState}). + +%% Active vnodes operate in three states: normal, handoff, and forwarding. +%% +%% In the normal state, vnode commands are passed to handle_command. When +%% a handoff is triggered, handoff_target is set and the vnode +%% is said to be in the handoff state. +%% +%% In the handoff state, vnode commands are passed to handle_handoff_command. +%% However, a vnode may be blocked during handoff (and therefore not servicing +%% commands) if the handoff procedure is blocking (eg. in riak_kv when not +%% using async fold). +%% +%% After handoff, a vnode may move into forwarding state. The forwarding state +%% is a product of the new gossip/membership code and will not occur if the +%% node is running in legacy mode. The forwarding state represents the case +%% where the vnode has already handed its data off to the new owner, but the +%% new owner is not yet listed as the current owner in the ring. This may occur +%% because additional vnodes are still waiting to handoff their data to the +%% new owner, or simply because the ring has yet to converge on the new owner. +%% In the forwarding state, all vnode commands and coverage commands are +%% forwarded to the new owner for processing. +%% +%% The above becomes a bit more complicated when the vnode takes part in resizing +%% the ring, since several transfers with a single vnode as the source are necessary +%% to complete the operation. A vnode will remain in the handoff state, for, potentially, +%% more than one transfer and may be in the handoff state despite there being no active +%% transfers with this vnode as the source. During this time requests that can be forwarded +%% to a partition for which the transfer has already completed, are forwarded. All other +%% requests are passed to handle_handoff_command. +forward_or_vnode_command(Sender, Request, + State = #state{forward = Forward, mod = Mod, + index = Index}) -> + Resizing = is_list(Forward), + RequestHash = case Resizing of + true -> Mod:request_hash(Request); + false -> undefined + end, + case {Forward, RequestHash} of + %% typical vnode operation, no forwarding set, handle request locally + {undefined, _} -> vnode_command(Sender, Request, State); + %% implicit forwarding after ownership transfer/hinted handoff + {F, _} when not is_list(F) -> + vnode_forward(implicit, {Index, Forward}, Sender, + Request, State), + continue(State); + %% during resize we can't forward a request w/o request hash, always handle locally + {_, undefined} -> vnode_command(Sender, Request, State); + %% possible forwarding during ring resizing + {_, _} -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + FutureIndex = riak_core_ring:future_index(RequestHash, + Index, R), + vnode_resize_command(Sender, Request, FutureIndex, + State) + end. + +vnode_command(_Sender, _Request, + State = #state{modstate = {deleted, _}}) -> + continue(State); +vnode_command(Sender, Request, + State = #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> + case catch Mod:handle_command(Request, Sender, ModState) + of + {'EXIT', ExitReason} -> + reply(Sender, {vnode_error, ExitReason}), + logger:error("~p command failed ~p", [Mod, ExitReason]), + {stop, ExitReason, State#state{modstate = ModState}}; + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} + end. + +vnode_coverage(Sender, Request, KeySpaces, + State = #state{index = Index, mod = Mod, + modstate = ModState, pool_pid = Pool, + forward = Forward}) -> + %% Check if we should forward + case Forward of + undefined -> + Action = Mod:handle_coverage(Request, KeySpaces, Sender, + ModState); + %% handle coverage requests locally during ring resize + Forwards when is_list(Forwards) -> + Action = Mod:handle_coverage(Request, KeySpaces, Sender, + ModState); + NextOwner -> + logger:debug("Forwarding coverage ~p -> ~p: ~p~n", + [node(), NextOwner, Index]), + riak_core_vnode_master:coverage(Request, + {Index, NextOwner}, KeySpaces, Sender, + riak_core_vnode_master:reg_name(Mod)), + Action = continue + end, + case Action of + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} + end. + +vnode_handoff_command(Sender, Request, ForwardTo, + State = #state{mod = Mod, modstate = ModState, + handoff_target = HOTarget, + handoff_type = HOType, pool_pid = Pool}) -> + case Mod:handle_handoff_command(Request, Sender, + ModState) + of + {reply, Reply, NewModState} -> + reply(Sender, Reply), continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, Work, + From), + continue(State, NewModState); + {forward, NewModState} -> + forward_request(HOType, Request, HOTarget, ForwardTo, + Sender, State), + continue(State, NewModState); + {forward, NewReq, NewModState} -> + forward_request(HOType, NewReq, HOTarget, ForwardTo, + Sender, State), + continue(State, NewModState); + {drop, NewModState} -> continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} + end. + +%% @private wrap the request for resize forwards, and use the resize +%% target. +forward_request(resize, Request, _HOTarget, + ResizeTarget, Sender, State) -> + %% resize op and transfer ongoing + vnode_forward(resize, ResizeTarget, Sender, + {resize_forward, Request}, State); +forward_request(undefined, Request, _HOTarget, + ResizeTarget, Sender, State) -> + %% resize op ongoing, no resize transfer ongoing, arrive here + %% via forward_or_vnode_command + vnode_forward(resize, ResizeTarget, Sender, + {resize_forward, Request}, State); +forward_request(_, Request, HOTarget, _ResizeTarget, + Sender, State) -> + %% normal explicit forwarding during owhership transfer + vnode_forward(explicit, HOTarget, Sender, Request, + State). + +vnode_forward(Type, ForwardTo, Sender, Request, + State) -> + logger:debug("Forwarding (~p) {~p,~p} -> ~p~n", + [Type, State#state.index, node(), ForwardTo]), + riak_core_vnode_master:command_unreliable(ForwardTo, + Request, Sender, + riak_core_vnode_master:reg_name(State#state.mod)). + +%% @doc during ring resizing if we have completed a transfer to the index that will +%% handle request in future ring we forward to it. Otherwise we delegate +%% to the local vnode like other requests during handoff +vnode_resize_command(Sender, Request, FutureIndex, + State = #state{forward = Forward}) + when is_list(Forward) -> + case lists:keyfind(FutureIndex, 1, Forward) of + false -> vnode_command(Sender, Request, State); + {FutureIndex, FutureOwner} -> + vnode_handoff_command(Sender, Request, + {FutureIndex, FutureOwner}, State) + end. + + +%% This code lives in riak_core_vnode rather than riak_core_vnode_manager +%% because the ring_trans call is a synchronous call to the ring manager, +%% and it is better to block an individual vnode rather than the vnode +%% manager. Blocking the manager can impact all vnodes. This code is safe +%% to execute on multiple parallel vnodes because of the synchronization +%% afforded by having all ring changes go through the single ring manager. +mark_handoff_complete(SrcIdx, Target, SeenIdxs, Mod, + resize) -> + Prev = node(), + Source = {SrcIdx, Prev}, + TransFun = fun (Ring, _) -> + Owner = riak_core_ring:index_owner(Ring, SrcIdx), + Status = riak_core_ring:resize_transfer_status(Ring, + Source, + Target, + Mod), + case {Owner, Status} of + {Prev, awaiting} -> + F = fun (SeenIdx, RingAcc) -> + riak_core_ring:schedule_resize_transfer(RingAcc, + Source, + SeenIdx) + end, + Ring2 = lists:foldl(F, Ring, + ordsets:to_list(SeenIdxs)), + Ring3 = + riak_core_ring:resize_transfer_complete(Ring2, + Source, + Target, + Mod), + %% local ring optimization (see below) + {set_only, Ring3}; + _ -> ignore + end + end, + Result = riak_core_ring_manager:ring_trans(TransFun, + []), + case Result of + {ok, _NewRing} -> resize; + _ -> continue + end; +mark_handoff_complete(Idx, {Idx, New}, [], Mod, _) -> + Prev = node(), + Result = riak_core_ring_manager:ring_trans(fun (Ring, + _) -> + Owner = + riak_core_ring:index_owner(Ring, + Idx), + {_, NextOwner, Status} = + riak_core_ring:next_owner(Ring, + Idx, + Mod), + NewStatus = + riak_core_ring:member_status(Ring, + New), + case {Owner, NextOwner, + NewStatus, Status} + of + {Prev, New, _, + awaiting} -> + Ring2 = + riak_core_ring:handoff_complete(Ring, + Idx, + Mod), + %% Optimization. Only alter the local ring without + %% triggering a gossip, thus implicitly coalescing + %% multiple vnode handoff completion events. In the + %% future we should decouple vnode handoff state from + %% the ring structure in order to make gossip independent + %% of ring size. + {set_only, Ring2}; + _ -> ignore + end + end, + []), + case Result of + {ok, NewRing} -> NewRing = NewRing; + _ -> + {ok, NewRing} = riak_core_ring_manager:get_my_ring() + end, + Owner = riak_core_ring:index_owner(NewRing, Idx), + {_, NextOwner, Status} = + riak_core_ring:next_owner(NewRing, Idx, Mod), + NewStatus = riak_core_ring:member_status(NewRing, New), + case {Owner, NextOwner, NewStatus, Status} of + {_, _, invalid, _} -> + %% Handing off to invalid node, don't give-up data. + continue; + {Prev, New, _, _} -> forward; + {Prev, _, _, _} -> + %% Handoff wasn't to node that is scheduled in next, so no change. + continue; + {_, _, _, _} -> shutdown end. -terminate(Reason, _StateName, - #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> - %% Shutdown if the pool is still alive and a normal `Reason' is - %% given - there could be a race on delivery of the unregistered - %% event and successfully shutting down the pool. - try case is_pid(Pool) andalso - is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) - of - true -> - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok - end - catch - Type:Reason:Stacktrace -> - logger:error("Error while shutting down vnode worker " - "pool ~p:~p trace : ~p", - [Type, Reason, Stacktrace]) - after - case ModState of - %% Handoff completed, Mod:delete has been called, now terminate. - {deleted, ModState1} -> - Mod:terminate(Reason, ModState1); - _ -> Mod:terminate(Reason, ModState) - end +finish_handoff(State) -> finish_handoff([], State). + +finish_handoff(SeenIdxs, + State = #state{mod = Mod, modstate = ModState, + index = Idx, handoff_target = Target, + handoff_type = HOType}) -> + case mark_handoff_complete(Idx, Target, SeenIdxs, Mod, + HOType) + of + continue -> + continue(State#state{handoff_target = none, + handoff_type = undefined}); + resize -> + CurrentForwarding = resize_forwarding(State), + NewForwarding = [Target | CurrentForwarding], + State2 = mod_set_forwarding(NewForwarding, State), + continue(State2#state{handoff_target = none, + handoff_type = undefined, + forward = NewForwarding}); + Res when Res == forward; Res == shutdown -> + {_, HN} = Target, + %% Have to issue the delete now. Once unregistered the + %% vnode master will spin up a new vnode on demand. + %% Shutdown the async pool beforehand, don't want callbacks + %% running on non-existant data. + maybe_shutdown_pool(State), + {ok, NewModState} = Mod:delete(ModState), + logger:debug("~p ~p vnode finished handoff and deleted.", + [Idx, Mod]), + riak_core_vnode_manager:unregister_vnode(Idx, Mod), + logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", + [State#state.mod, State#state.index, + State#state.forward, HN]), + State2 = mod_set_forwarding(HN, State), + continue(State2#state{modstate = + {deleted, + NewModState}, % like to fail if used + handoff_target = none, handoff_type = undefined, + forward = HN}) end. -code_change(_OldVsn, StateName, State, _Extra) -> - {ok, StateName, State}. +maybe_shutdown_pool(#state{pool_pid = Pool}) -> + case is_pid(Pool) of + true -> + %% state.pool_pid will be cleaned up by handle_info message. + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok + end. + +resize_forwarding(#state{forward = F}) + when is_list(F) -> + F; +resize_forwarding(_) -> []. + +mark_delete_complete(Idx, Mod) -> + Result = riak_core_ring_manager:ring_trans(fun (Ring, + _) -> + Type = + riak_core_ring:vnode_type(Ring, + Idx), + {_, Next, Status} = + riak_core_ring:next_owner(Ring, + Idx), + case {Type, Next, Status} + of + {resized_primary, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, Ring3}; + {{fallback, _}, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, Ring3}; + _ -> ignore + end + end, + []), + Result. + maybe_handoff(_TargetIdx, _TargetNode, State = #state{modstate = {deleted, _}}) -> @@ -1184,42 +1270,7 @@ start_outbound(HOType, TargetIdx, TargetNode, Opts, State#state{modstate = NewModState} end. -%% @doc Send a reply to a vnode request. If -%% the Ref is undefined just send the reply -%% for compatibility with pre-0.12 requestors. -%% If Ref is defined, send it along with the -%% reply. -%% NOTE: We *always* send the reply using unreliable delivery. -%% --spec reply(sender(), term()) -> any(). - -reply({fsm, undefined, From}, Reply) -> - riak_core_send_msg:send_event_unreliable(From, Reply); -reply({fsm, Ref, From}, Reply) -> - riak_core_send_msg:send_event_unreliable(From, - {Ref, Reply}); -reply({server, undefined, From}, Reply) -> - riak_core_send_msg:reply_unreliable(From, Reply); -reply({server, Ref, From}, Reply) -> - riak_core_send_msg:reply_unreliable(From, {Ref, Reply}); -reply({raw, Ref, From}, Reply) -> - riak_core_send_msg:bang_unreliable(From, {Ref, Reply}); -reply(ignore, _Reply) -> ok. - -%% @doc Set up a monitor for the pid named by a {@type sender()} vnode -%% argument. If `Sender' was the atom `ignore', this function sets up -%% a monitor on `self()' in order to return a valid (if useless) -%% monitor reference. --spec monitor(Sender :: sender()) -> Monitor :: - reference(). -monitor({fsm, _, From}) -> - erlang:monitor(process, From); -monitor({server, _, {Pid, _Ref}}) -> - erlang:monitor(process, Pid); -monitor({raw, _, From}) -> - erlang:monitor(process, From); -monitor(ignore) -> erlang:monitor(process, self()). %% Individual vnode processes and the vnode manager are tightly coupled. When %% vnode events occur, the vnode must ensure that the events are forwarded to @@ -1290,6 +1341,31 @@ current_state(Pid) -> gen_fsm_compat:sync_send_all_state_event(Pid, current_state). +wait_for_process_death(Pid) -> + wait_for_process_death(Pid, is_process_alive(Pid)). + +wait_for_process_death(Pid, true) -> + wait_for_process_death(Pid, is_process_alive(Pid)); +wait_for_process_death(_Pid, false) -> ok. + +wait_for_state_update(OriginalStateData, Pid) -> + {_, CurrentStateData} = (?MODULE):current_state(Pid), + wait_for_state_update(OriginalStateData, + CurrentStateData, Pid). + +wait_for_state_update(OriginalStateData, + OriginalStateData, Pid) -> + {_, CurrentStateData} = (?MODULE):current_state(Pid), + wait_for_state_update(OriginalStateData, + CurrentStateData, Pid); +wait_for_state_update(_OriginalState, _StateData, + _Pid) -> + ok. + +%% =================================================================== +%% Test +%% =================================================================== + pool_death_test() -> %% expect error log error_logger:tty(false), @@ -1317,26 +1393,4 @@ pool_death_test() -> wait_for_process_death(Pid), meck:validate(test_pool_mod), meck:validate(test_vnode). - -wait_for_process_death(Pid) -> - wait_for_process_death(Pid, is_process_alive(Pid)). - -wait_for_process_death(Pid, true) -> - wait_for_process_death(Pid, is_process_alive(Pid)); -wait_for_process_death(_Pid, false) -> ok. - -wait_for_state_update(OriginalStateData, Pid) -> - {_, CurrentStateData} = (?MODULE):current_state(Pid), - wait_for_state_update(OriginalStateData, - CurrentStateData, Pid). - -wait_for_state_update(OriginalStateData, - OriginalStateData, Pid) -> - {_, CurrentStateData} = (?MODULE):current_state(Pid), - wait_for_state_update(OriginalStateData, - CurrentStateData, Pid); -wait_for_state_update(_OriginalState, _StateData, - _Pid) -> - ok. - -endif. From 7f0aff7308f44248dce99919b07d52b58c7aa545 Mon Sep 17 00:00:00 2001 From: woelki Date: Thu, 10 Sep 2020 00:59:38 +0200 Subject: [PATCH 2/5] format --- src/chash.erl | 112 +- src/chashbin.erl | 125 +- src/gen_fsm_compat.erl | 1078 ++++++++----- src/gen_nb_server.erl | 262 ++-- src/riak_core.erl | 410 ++--- src/riak_core_apl.erl | 552 ++++--- src/riak_core_app.erl | 37 +- src/riak_core_base64url.erl | 19 +- src/riak_core_bucket.erl | 89 +- src/riak_core_bucket_props.erl | 281 ++-- src/riak_core_claim.erl | 598 +++---- src/riak_core_claim_util.erl | 452 +++--- src/riak_core_claimant.erl | 1097 +++++++------ src/riak_core_eventhandler_guard.erl | 26 +- src/riak_core_eventhandler_sup.erl | 56 +- src/riak_core_gossip.erl | 337 ++-- src/riak_core_handoff_listener.erl | 53 +- src/riak_core_handoff_listener_sup.erl | 8 +- src/riak_core_handoff_manager.erl | 931 ++++++----- src/riak_core_handoff_receiver.erl | 171 +- src/riak_core_handoff_receiver_sup.erl | 8 +- src/riak_core_handoff_sender.erl | 693 +++++---- src/riak_core_handoff_sender_sup.erl | 12 +- src/riak_core_handoff_sup.erl | 8 +- src/riak_core_node_watcher.erl | 606 +++---- src/riak_core_node_watcher_events.erl | 39 +- src/riak_core_priority_queue.erl | 148 +- src/riak_core_rand.erl | 15 +- src/riak_core_ring.erl | 1992 +++++++++++++----------- src/riak_core_ring_events.erl | 43 +- src/riak_core_ring_handler.erl | 218 +-- src/riak_core_ring_manager.erl | 689 ++++---- src/riak_core_ring_util.erl | 97 +- src/riak_core_send_msg.erl | 19 +- src/riak_core_status.erl | 176 ++- src/riak_core_sup.erl | 32 +- src/riak_core_test_util.erl | 79 +- src/riak_core_util.erl | 823 +++++----- src/riak_core_vnode.erl | 1588 ++++++++++--------- src/riak_core_vnode_manager.erl | 991 ++++++------ src/riak_core_vnode_master.erl | 197 ++- src/riak_core_vnode_proxy.erl | 523 ++++--- src/riak_core_vnode_proxy_sup.erl | 13 +- src/riak_core_vnode_sup.erl | 10 +- src/riak_core_vnode_worker.erl | 61 +- src/riak_core_vnode_worker_pool.erl | 273 ++-- src/vclock.erl | 218 +-- 47 files changed, 9121 insertions(+), 7144 deletions(-) diff --git a/src/chash.erl b/src/chash.erl index ec42297f2..d238dc6d0 100644 --- a/src/chash.erl +++ b/src/chash.erl @@ -36,15 +36,26 @@ -module(chash). --export([contains_name/2, fresh/2, lookup/2, key_of/1, - members/1, merge_rings/2, next_index/2, nodes/1, - predecessors/2, predecessors/3, ring_increment/1, - size/1, successors/2, successors/3, update/3]). +-export([contains_name/2, + fresh/2, + lookup/2, + key_of/1, + members/1, + merge_rings/2, + next_index/2, + nodes/1, + predecessors/2, + predecessors/3, + ring_increment/1, + size/1, + successors/2, + successors/3, + update/3]). -export_type([chash/0, index/0, index_as_int/0]). -define(RINGTOP, - trunc(math:pow(2, 160) - 1)). % SHA-1 space + trunc(math:pow(2, 160) - 1)). % SHA-1 space -ifdef(TEST). @@ -75,7 +86,7 @@ %% @doc Return true if named Node owns any partitions in the ring, else false. -spec contains_name(Name :: chash_node(), - CHash :: chash()) -> boolean(). + CHash :: chash()) -> boolean(). contains_name(Name, CHash) -> {_NumPartitions, Nodes} = CHash, @@ -86,7 +97,7 @@ contains_name(Name, CHash) -> %% is not much larger than the intended eventual number of %% participating nodes, then performance will suffer. -spec fresh(NumPartitions :: num_partitions(), - SeedNode :: chash_node()) -> chash(). + SeedNode :: chash_node()) -> chash(). fresh(NumPartitions, SeedNode) -> Inc = ring_increment(NumPartitions), @@ -96,7 +107,7 @@ fresh(NumPartitions, SeedNode) -> %% @doc Find the Node that owns the partition identified by IndexAsInt. -spec lookup(IndexAsInt :: index_as_int(), - CHash :: chash()) -> chash_node(). + CHash :: chash()) -> chash_node(). lookup(IndexAsInt, CHash) -> {_NumPartitions, Nodes} = CHash, @@ -123,7 +134,7 @@ members(CHash) -> %% If multiple nodes are actively claiming nodes in the same %% time period, churn will occur. Be prepared to live with it. -spec merge_rings(CHashA :: chash(), - CHashB :: chash()) -> chash(). + CHashB :: chash()) -> chash(). merge_rings(CHashA, CHashB) -> {NumPartitions, NodesA} = CHashA, @@ -135,7 +146,7 @@ merge_rings(CHashA, CHashB) -> %% @doc Given the integer representation of a chash key, %% return the next ring index integer value. -spec next_index(IntegerKey :: integer(), - CHash :: chash()) -> index_as_int(). + CHash :: chash()) -> index_as_int(). next_index(IntegerKey, {NumPartitions, _}) -> Inc = ring_increment(NumPartitions), @@ -144,11 +155,13 @@ next_index(IntegerKey, {NumPartitions, _}) -> %% @doc Return the entire set of NodeEntries in the ring. -spec nodes(CHash :: chash()) -> [node_entry()]. -nodes(CHash) -> {_NumPartitions, Nodes} = CHash, Nodes. +nodes(CHash) -> + {_NumPartitions, Nodes} = CHash, + Nodes. %% @doc Given an object key, return all NodeEntries in order starting at Index. -spec ordered_from(Index :: index(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. ordered_from(Index, {NumPartitions, Nodes}) -> <> = Index, @@ -159,7 +172,7 @@ ordered_from(Index, {NumPartitions, Nodes}) -> %% @doc Given an object key, return all NodeEntries in reverse order %% starting at Index. -spec predecessors(Index :: index() | index_as_int(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. predecessors(Index, CHash) -> {NumPartitions, _Nodes} = CHash, @@ -168,20 +181,20 @@ predecessors(Index, CHash) -> %% @doc Given an object key, return the next N NodeEntries in reverse order %% starting at Index. -spec predecessors(Index :: index() | index_as_int(), - CHash :: chash(), N :: integer()) -> [node_entry()]. + CHash :: chash(), N :: integer()) -> [node_entry()]. predecessors(Index, CHash, N) when is_integer(Index) -> predecessors(<>, CHash, N); predecessors(Index, CHash, N) -> Num = max_n(N, CHash), {Res, _} = lists:split(Num, - lists:reverse(ordered_from(Index, CHash))), + lists:reverse(ordered_from(Index, CHash))), Res. %% @doc Return increment between ring indexes given %% the number of ring partitions. -spec ring_increment(NumPartitions :: - pos_integer()) -> pos_integer(). + pos_integer()) -> pos_integer(). ring_increment(NumPartitions) -> (?RINGTOP) div NumPartitions. @@ -190,11 +203,12 @@ ring_increment(NumPartitions) -> -spec size(CHash :: chash()) -> integer(). size(CHash) -> - {_NumPartitions, Nodes} = CHash, length(Nodes). + {_NumPartitions, Nodes} = CHash, + length(Nodes). %% @doc Given an object key, return all NodeEntries in order starting at Index. -spec successors(Index :: index(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. successors(Index, CHash) -> {NumPartitions, _Nodes} = CHash, @@ -203,24 +217,28 @@ successors(Index, CHash) -> %% @doc Given an object key, return the next N NodeEntries in order %% starting at Index. -spec successors(Index :: index(), CHash :: chash(), - N :: integer()) -> [node_entry()]. + N :: integer()) -> [node_entry()]. successors(Index, CHash, N) -> Num = max_n(N, CHash), Ordered = ordered_from(Index, CHash), {NumPartitions, _Nodes} = CHash, if Num =:= NumPartitions -> Ordered; - true -> {Res, _} = lists:split(Num, Ordered), Res + true -> + {Res, _} = lists:split(Num, Ordered), + Res end. %% @doc Make the partition beginning at IndexAsInt owned by Name'd node. -spec update(IndexAsInt :: index_as_int(), - Name :: chash_node(), CHash :: chash()) -> chash(). + Name :: chash_node(), CHash :: chash()) -> chash(). update(IndexAsInt, Name, CHash) -> {NumPartitions, Nodes} = CHash, - NewNodes = lists:keyreplace(IndexAsInt, 1, Nodes, - {IndexAsInt, Name}), + NewNodes = lists:keyreplace(IndexAsInt, + 1, + Nodes, + {IndexAsInt, Name}), {NumPartitions, NewNodes}. %% ==================================================================== @@ -231,14 +249,14 @@ update(IndexAsInt, Name, CHash) -> %% @doc Return either N or the number of partitions in the ring, whichever %% is lesser. -spec max_n(N :: integer(), - CHash :: chash()) -> integer(). + CHash :: chash()) -> integer(). max_n(N, {NumPartitions, _Nodes}) -> erlang:min(N, NumPartitions). %% @private -spec random_node(NodeA :: chash_node(), - NodeB :: chash_node()) -> chash_node(). + NodeB :: chash_node()) -> chash_node(). random_node(NodeA, NodeA) -> NodeA; random_node(NodeA, NodeB) -> @@ -255,25 +273,34 @@ update_test() -> % Create a fresh ring... CHash = chash:fresh(5, Node), GetNthIndex = fun (N, {_, Nodes}) -> - {Index, _} = lists:nth(N, Nodes), Index - end, + {Index, _} = lists:nth(N, Nodes), + Index + end, % Test update... FirstIndex = GetNthIndex(1, CHash), ThirdIndex = GetNthIndex(3, CHash), {5, - [{_, NewNode}, {_, Node}, {_, Node}, {_, Node}, - {_, Node}, {_, Node}]} = - update(FirstIndex, NewNode, CHash), + [{_, NewNode}, + {_, Node}, + {_, Node}, + {_, Node}, + {_, Node}, + {_, Node}]} = + update(FirstIndex, NewNode, CHash), {5, - [{_, Node}, {_, Node}, {_, NewNode}, {_, Node}, - {_, Node}, {_, Node}]} = - update(ThirdIndex, NewNode, CHash). + [{_, Node}, + {_, Node}, + {_, NewNode}, + {_, Node}, + {_, Node}, + {_, Node}]} = + update(ThirdIndex, NewNode, CHash). contains_test() -> CHash = chash:fresh(8, the_node), ?assertEqual(true, (contains_name(the_node, CHash))), ?assertEqual(false, - (contains_name(some_other_node, CHash))). + (contains_name(some_other_node, CHash))). max_n_test() -> CHash = chash:fresh(8, the_node), @@ -282,26 +309,27 @@ max_n_test() -> simple_size_test() -> ?assertEqual(8, - (length(chash:nodes(chash:fresh(8, the_node))))). + (length(chash:nodes(chash:fresh(8, the_node))))). successors_length_test() -> ?assertEqual(8, - (length(chash:successors(chash:key_of(0), - chash:fresh(8, the_node))))). + (length(chash:successors(chash:key_of(0), + chash:fresh(8, the_node))))). inverse_pred_test() -> CHash = chash:fresh(8, the_node), S = [I - || {I, _} <- chash:successors(chash:key_of(4), CHash)], + || {I, _} <- chash:successors(chash:key_of(4), CHash)], P = [I - || {I, _} - <- chash:predecessors(chash:key_of(4), CHash)], + || {I, _} + <- chash:predecessors(chash:key_of(4), CHash)], ?assertEqual(S, (lists:reverse(P))). merge_test() -> CHashA = chash:fresh(8, node_one), - CHashB = chash:update(0, node_one, - chash:fresh(8, node_two)), + CHashB = chash:update(0, + node_one, + chash:fresh(8, node_two)), CHash = chash:merge_rings(CHashA, CHashB), ?assertEqual(node_one, (chash:lookup(0, CHash))). diff --git a/src/chashbin.erl b/src/chashbin.erl index 6404ae0fd..689cde58c 100644 --- a/src/chashbin.erl +++ b/src/chashbin.erl @@ -23,13 +23,21 @@ %% ------------------------------------------------------------------- -module(chashbin). --export([create/1, to_chash/1, to_list/1, - to_list_filter/2, responsible_index/2, - responsible_position/2, index_owner/2, - num_partitions/1]). - --export([iterator/2, exact_iterator/2, itr_value/1, - itr_pop/2, itr_next/1, itr_next_while/2]). +-export([create/1, + to_chash/1, + to_list/1, + to_list_filter/2, + responsible_index/2, + responsible_position/2, + index_owner/2, + num_partitions/1]). + +-export([iterator/2, + exact_iterator/2, + itr_value/1, + itr_pop/2, + itr_next/1, + itr_next_while/2]). -export_type([chashbin/0]). @@ -44,29 +52,32 @@ -type index() :: chash:index_as_int(). -type pred_fun() :: fun(({index(), - node()}) -> boolean()). + node()}) -> boolean()). -type chash_key() :: index() | chash:index(). -ifndef(namespaced_types). -record(chashbin, - {size :: pos_integer(), owners :: owners_bin(), - nodes :: erlang:tuple(node())}). + {size :: pos_integer(), + owners :: owners_bin(), + nodes :: erlang:tuple(node())}). -else. -record(chashbin, - {size :: pos_integer(), owners :: owners_bin(), - nodes :: erlang:tuple(node())}). + {size :: pos_integer(), + owners :: owners_bin(), + nodes :: erlang:tuple(node())}). -endif. -type chashbin() :: #chashbin{}. -record(iterator, - {pos :: non_neg_integer(), start :: non_neg_integer(), - chbin :: chashbin()}). + {pos :: non_neg_integer(), + start :: non_neg_integer(), + chbin :: chashbin()}). -type iterator() :: #iterator{}. @@ -81,16 +92,17 @@ create({Size, Owners}) -> Nodes1 = [Node || {_, Node} <- Owners], Nodes2 = lists:usort(Nodes1), Nodes3 = lists:zip(Nodes2, - lists:seq(1, length(Nodes2))), + lists:seq(1, length(Nodes2))), Bin = create_bin(Owners, Nodes3, <<>>), #chashbin{size = Size, owners = Bin, - nodes = list_to_tuple(Nodes2)}. + nodes = list_to_tuple(Nodes2)}. %% @doc Convert a `chashbin' back to a `chash' -spec to_chash(chashbin()) -> chash:chash(). to_chash(CHBin = #chashbin{size = Size}) -> - L = to_list(CHBin), {Size, L}. + L = to_list(CHBin), + {Size, L}. %% @doc Convert a `chashbin' to a list of `{Index, Owner}' pairs -spec to_list(chashbin()) -> [{index(), node()}]. @@ -103,17 +115,17 @@ to_list(#chashbin{owners = Bin, nodes = Nodes}) -> %% Convert a `chashbin' to a list of `{Index, Owner}' pairs for %% which `Pred({Index, Owner})' returns `true' -spec to_list_filter(pred_fun(), - chashbin()) -> [{index(), node()}]. + chashbin()) -> [{index(), node()}]. to_list_filter(Pred, - #chashbin{owners = Bin, nodes = Nodes}) -> + #chashbin{owners = Bin, nodes = Nodes}) -> [{Idx, element(Id, Nodes)} || <> <= Bin, - Pred({Idx, element(Id, Nodes)})]. + Pred({Idx, element(Id, Nodes)})]. %% @doc Determine the ring index responsible for a given chash key -spec responsible_index(chash_key(), - chashbin()) -> index(). + chashbin()) -> index(). responsible_index(<>, CHBin) -> responsible_index(HashKey, CHBin); @@ -123,7 +135,7 @@ responsible_index(HashKey, #chashbin{size = Size}) -> %% @doc Determine the ring position responsible for a given chash key -spec responsible_position(chash_key(), - chashbin()) -> non_neg_integer(). + chashbin()) -> non_neg_integer(). responsible_position(<>, CHBin) -> responsible_position(HashKey, CHBin); @@ -136,10 +148,10 @@ responsible_position(HashKey, #chashbin{size = Size}) -> index_owner(Idx, CHBin) -> case itr_value(exact_iterator(Idx, CHBin)) of - {Idx, Owner} -> Owner; - _ -> - %% Match the behavior for riak_core_ring:index_owner/2 - exit({badmatch, false}) + {Idx, Owner} -> Owner; + _ -> + %% Match the behavior for riak_core_ring:index_owner/2 + exit({badmatch, false}) end. %% @doc Return the number of partitions in a given `chashbin' @@ -154,7 +166,7 @@ num_partitions(#chashbin{size = Size}) -> Size. %% @doc %% Return an iterator pointing to the index responsible for the given chash key -spec iterator(first | chash_key(), - chashbin()) -> iterator(). + chashbin()) -> iterator(). iterator(first, CHBin) -> #iterator{pos = 0, start = 0, chbin = CHBin}; @@ -168,10 +180,10 @@ iterator(HashKey, CHBin) -> -spec itr_value(iterator()) -> {index(), node()}. itr_value(#iterator{pos = Pos, - chbin = #chashbin{owners = Bin, nodes = Nodes}}) -> + chbin = #chashbin{owners = Bin, nodes = Nodes}}) -> <<_:Pos/binary-unit:176, Idx:160/integer, Id:16/integer, _/binary>> = - Bin, + Bin, Owner = element(Id, Nodes), {Idx, Owner}. @@ -179,52 +191,52 @@ itr_value(#iterator{pos = Pos, -spec itr_next(iterator()) -> iterator() | done. itr_next(Itr = #iterator{pos = Pos, start = Start, - chbin = CHBin}) -> + chbin = CHBin}) -> Pos2 = (Pos + 1) rem CHBin#chashbin.size, case Pos2 of - Start -> done; - _ -> Itr#iterator{pos = Pos2} + Start -> done; + _ -> Itr#iterator{pos = Pos2} end. %% @doc %% Advance the iterator `N' times, returning a list of the traversed %% `{Index, Owner}' pairs as well as the new iterator state -spec itr_pop(pos_integer(), iterator()) -> {[{index(), - node()}], - iterator()}. + node()}], + iterator()}. itr_pop(N, Itr = #iterator{pos = Pos, chbin = CHBin}) -> #chashbin{size = Size, owners = Bin, nodes = Nodes} = - CHBin, + CHBin, L = case Bin of - <<_:Pos/binary-unit:176, Bin2:N/binary-unit:176, - _/binary>> -> - [{Idx, element(Id, Nodes)} - || <> <= Bin2]; - _ -> - Left = N + Pos - Size, - Skip = Pos - Left, - <> = - Bin, - L1 = [{Idx, element(Id, Nodes)} - || <> <= Bin2], - L2 = [{Idx, element(Id, Nodes)} - || <> <= Bin3], - L1 ++ L2 - end, + <<_:Pos/binary-unit:176, Bin2:N/binary-unit:176, + _/binary>> -> + [{Idx, element(Id, Nodes)} + || <> <= Bin2]; + _ -> + Left = N + Pos - Size, + Skip = Pos - Left, + <> = + Bin, + L1 = [{Idx, element(Id, Nodes)} + || <> <= Bin2], + L2 = [{Idx, element(Id, Nodes)} + || <> <= Bin3], + L1 ++ L2 + end, Pos2 = (Pos + N) rem Size, Itr2 = Itr#iterator{pos = Pos2}, {L, Itr2}. %% @doc Advance the iterator while `Pred({Index, Owner})' returns `true' -spec itr_next_while(pred_fun(), - iterator()) -> iterator(). + iterator()) -> iterator(). itr_next_while(Pred, Itr) -> case Pred(itr_value(Itr)) of - false -> Itr; - true -> itr_next_while(Pred, itr_next(Itr)) + false -> Itr; + true -> itr_next_while(Pred, itr_next(Itr)) end. %% =================================================================== @@ -233,7 +245,7 @@ itr_next_while(Pred, Itr) -> %% Convert list of {Index, Owner} pairs into `chashbin' binary representation -spec create_bin([{index(), node()}], - [{node(), pos_integer()}], binary()) -> owners_bin(). + [{node(), pos_integer()}], binary()) -> owners_bin(). create_bin([], _, Bin) -> Bin; create_bin([{Idx, Owner} | Owners], Nodes, Bin) -> @@ -245,7 +257,8 @@ create_bin([{Idx, Owner} | Owners], Nodes, Bin) -> index_position(<>, CHBin) -> index_position(Idx, CHBin); index_position(Idx, #chashbin{size = Size}) -> - Inc = chash:ring_increment(Size), Idx div Inc rem Size. + Inc = chash:ring_increment(Size), + Idx div Inc rem Size. %% Return iterator pointing to the given index exact_iterator(<>, CHBin) -> diff --git a/src/gen_fsm_compat.erl b/src/gen_fsm_compat.erl index 56330dd68..5bfed9577 100644 --- a/src/gen_fsm_compat.erl +++ b/src/gen_fsm_compat.erl @@ -105,19 +105,35 @@ %%% %%% --------------------------------------------------- --export([start/3, start/4, start_link/3, start_link/4, - stop/1, stop/3, send_event/2, sync_send_event/2, - sync_send_event/3, send_all_state_event/2, - sync_send_all_state_event/2, - sync_send_all_state_event/3, reply/2, start_timer/2, - send_event_after/2, cancel_timer/1, enter_loop/4, - enter_loop/5, enter_loop/6, wake_hib/7]). +-export([start/3, + start/4, + start_link/3, + start_link/4, + stop/1, + stop/3, + send_event/2, + sync_send_event/2, + sync_send_event/3, + send_all_state_event/2, + sync_send_all_state_event/2, + sync_send_all_state_event/3, + reply/2, + start_timer/2, + send_event_after/2, + cancel_timer/1, + enter_loop/4, + enter_loop/5, + enter_loop/6, + wake_hib/7]). %% Internal exports --export([init_it/6, system_continue/3, - system_terminate/4, system_code_change/4, - system_get_state/1, system_replace_state/2, - format_status/2]). +-export([init_it/6, + system_continue/3, + system_terminate/4, + system_code_change/4, + system_get_state/1, + system_replace_state/2, + format_status/2]). -import(error_logger, [format/2]). @@ -126,77 +142,82 @@ %%% --------------------------------------------------- -callback init(Args :: term()) -> {ok, - StateName :: atom(), StateData :: term()} | - {ok, StateName :: atom(), StateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term()} | ignore. + StateName :: atom(), StateData :: term()} | + {ok, StateName :: atom(), StateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term()} | + ignore. -callback handle_event(Event :: term(), - StateName :: atom(), - StateData :: term()) -> {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term(), - NewStateData :: term()}. + StateName :: atom(), + StateData :: term()) -> {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term(), + NewStateData :: term()}. -callback handle_sync_event(Event :: term(), - From :: {pid(), Tag :: term()}, StateName :: atom(), - StateData :: term()) -> {reply, Reply :: term(), - NextStateName :: atom(), - NewStateData :: term()} | - {reply, Reply :: term(), - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term(), - Reply :: term(), - NewStateData :: term()} | - {stop, Reason :: term(), - NewStateData :: term()}. + From :: {pid(), Tag :: term()}, StateName :: atom(), + StateData :: term()) -> {reply, Reply :: term(), + NextStateName :: atom(), + NewStateData :: term()} | + {reply, Reply :: term(), + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term(), + Reply :: term(), + NewStateData :: term()} | + {stop, Reason :: term(), + NewStateData :: term()}. -callback handle_info(Info :: term(), - StateName :: atom(), - StateData :: term()) -> {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: normal | term(), - NewStateData :: term()}. - --callback terminate(Reason :: normal | shutdown | - {shutdown, term()} | term(), - StateName :: atom(), StateData :: term()) -> term(). + StateName :: atom(), + StateData :: term()) -> {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: normal | term(), + NewStateData :: term()}. + +-callback terminate(Reason :: normal | + shutdown | + {shutdown, term()} | + term(), + StateName :: atom(), StateData :: term()) -> term(). -callback code_change(OldVsn :: term() | {down, term()}, - StateName :: atom(), StateData :: term(), - Extra :: term()) -> {ok, NextStateName :: atom(), - NewStateData :: term()}. + StateName :: atom(), StateData :: term(), + Extra :: term()) -> {ok, NextStateName :: atom(), + NewStateData :: term()}. -callback format_status(Opt, - StatusData) -> Status when Opt :: normal | terminate, - StatusData :: [PDict | - State], - PDict :: [{Key :: term(), - Value :: term()}], - State :: term(), - Status :: term(). + StatusData) -> Status when Opt :: normal | terminate, + StatusData :: [PDict | + State], + PDict :: [{Key :: term(), + Value :: term()}], + State :: term(), + Status :: term(). --optional_callbacks([handle_info/3, terminate/3, - code_change/4, format_status/2]). +-optional_callbacks([handle_info/3, + terminate/3, + code_change/4, + format_status/2]). %%% --------------------------------------------------- %%% Starts a generic state machine. @@ -232,59 +253,69 @@ stop(Name, Reason, Timeout) -> gen:stop(Name, Reason, Timeout). send_event({global, Name}, Event) -> - catch global:send(Name, {'$gen_event', Event}), ok; + catch global:send(Name, {'$gen_event', Event}), + ok; send_event({via, Mod, Name}, Event) -> - catch Mod:send(Name, {'$gen_event', Event}), ok; + catch Mod:send(Name, {'$gen_event', Event}), + ok; send_event(Name, Event) -> - Name ! {'$gen_event', Event}, ok. + Name ! {'$gen_event', Event}, + ok. sync_send_event(Name, Event) -> case catch gen:call(Name, '$gen_sync_event', Event) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event]}}) end. sync_send_event(Name, Event, Timeout) -> - case catch gen:call(Name, '$gen_sync_event', Event, - Timeout) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event, Timeout]}}) + case catch gen:call(Name, + '$gen_sync_event', + Event, + Timeout) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event, Timeout]}}) end. send_all_state_event({global, Name}, Event) -> catch global:send(Name, - {'$gen_all_state_event', Event}), + {'$gen_all_state_event', Event}), ok; send_all_state_event({via, Mod, Name}, Event) -> catch Mod:send(Name, {'$gen_all_state_event', Event}), ok; send_all_state_event(Name, Event) -> - Name ! {'$gen_all_state_event', Event}, ok. + Name ! {'$gen_all_state_event', Event}, + ok. sync_send_all_state_event(Name, Event) -> - case catch gen:call(Name, '$gen_sync_all_state_event', - Event) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_all_state_event, [Name, Event]}}) + case catch gen:call(Name, + '$gen_sync_all_state_event', + Event) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_all_state_event, [Name, Event]}}) end. sync_send_all_state_event(Name, Event, Timeout) -> - case catch gen:call(Name, '$gen_sync_all_state_event', - Event, Timeout) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_all_state_event, - [Name, Event, Timeout]}}) + case catch gen:call(Name, + '$gen_sync_all_state_event', + Event, + Timeout) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, + sync_send_all_state_event, + [Name, Event, Timeout]}}) end. %% Designed to be only callable within one of the callbacks @@ -306,9 +337,9 @@ send_event_after(Time, Event) -> %% an active timer/send_event_after, false otherwise. cancel_timer(Ref) -> case erlang:cancel_timer(Ref) of - false -> - receive {timeout, Ref, _} -> 0 after 0 -> false end; - RemainingTime -> RemainingTime + false -> + receive {timeout, Ref, _} -> 0 after 0 -> false end; + RemainingTime -> RemainingTime end. %% enter_loop/4,5,6 @@ -320,31 +351,53 @@ cancel_timer(Ref) -> %% The user is responsible for any initialization of the process, %% including registering a name for it. enter_loop(Mod, Options, StateName, StateData) -> - enter_loop(Mod, Options, StateName, StateData, self(), - infinity). + enter_loop(Mod, + Options, + StateName, + StateData, + self(), + infinity). enter_loop(Mod, Options, StateName, StateData, - {Scope, _} = ServerName) + {Scope, _} = ServerName) when Scope == local; Scope == global -> - enter_loop(Mod, Options, StateName, StateData, - ServerName, infinity); + enter_loop(Mod, + Options, + StateName, + StateData, + ServerName, + infinity); enter_loop(Mod, Options, StateName, StateData, - {via, _, _} = ServerName) -> - enter_loop(Mod, Options, StateName, StateData, - ServerName, infinity); + {via, _, _} = ServerName) -> + enter_loop(Mod, + Options, + StateName, + StateData, + ServerName, + infinity); enter_loop(Mod, Options, StateName, StateData, - Timeout) -> - enter_loop(Mod, Options, StateName, StateData, self(), - Timeout). + Timeout) -> + enter_loop(Mod, + Options, + StateName, + StateData, + self(), + Timeout). enter_loop(Mod, Options, StateName, StateData, - ServerName, Timeout) -> + ServerName, Timeout) -> Name = gen:get_proc_name(ServerName), Parent = gen:get_parent(), Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), - loop(Parent, Name, StateName, StateData, Mod, Timeout, - HibernateAfterTimeout, Debug). + loop(Parent, + Name, + StateName, + StateData, + Mod, + Timeout, + HibernateAfterTimeout, + Debug). %%% --------------------------------------------------- %%% Initiate the new process. @@ -360,30 +413,42 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) -> Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), case catch Mod:init(Args) of - {ok, StateName, StateData} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, Name, StateName, StateData, Mod, infinity, - HibernateAfterTimeout, Debug); - {ok, StateName, StateData, Timeout} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, Name, StateName, StateData, Mod, Timeout, - HibernateAfterTimeout, Debug); - {stop, Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - ignore -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, ignore), - exit(normal); - {'EXIT', Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - Else -> - Error = {bad_return_value, Else}, - proc_lib:init_ack(Starter, {error, Error}), - exit(Error) + {ok, StateName, StateData} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug); + {ok, StateName, StateData, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, + Name, + StateName, + StateData, + Mod, + Timeout, + HibernateAfterTimeout, + Debug); + {stop, Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) end. %%----------------------------------------------------------------- @@ -391,96 +456,202 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) -> %%----------------------------------------------------------------- loop(Parent, Name, StateName, StateData, Mod, hibernate, HibernateAfterTimeout, Debug) -> - proc_lib:hibernate(?MODULE, wake_hib, - [Parent, Name, StateName, StateData, Mod, - HibernateAfterTimeout, Debug]); + proc_lib:hibernate(?MODULE, + wake_hib, + [Parent, + Name, + StateName, + StateData, + Mod, + HibernateAfterTimeout, + Debug]); loop(Parent, Name, StateName, StateData, Mod, infinity, HibernateAfterTimeout, Debug) -> receive - Msg -> - decode_msg(Msg, Parent, Name, StateName, StateData, Mod, - infinity, HibernateAfterTimeout, Debug, false) - after HibernateAfterTimeout -> - loop(Parent, Name, StateName, StateData, Mod, hibernate, - HibernateAfterTimeout, Debug) + Msg -> + decode_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug, + false) + after HibernateAfterTimeout -> + loop(Parent, + Name, + StateName, + StateData, + Mod, + hibernate, + HibernateAfterTimeout, + Debug) end; loop(Parent, Name, StateName, StateData, Mod, Time, HibernateAfterTimeout, Debug) -> Msg = receive - Input -> Input after Time -> {'$gen_event', timeout} - end, - decode_msg(Msg, Parent, Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout, Debug, false). + Input -> Input after Time -> {'$gen_event', timeout} + end, + decode_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug, + false). wake_hib(Parent, Name, StateName, StateData, Mod, - HibernateAfterTimeout, Debug) -> + HibernateAfterTimeout, Debug) -> Msg = receive Input -> Input end, - decode_msg(Msg, Parent, Name, StateName, StateData, Mod, - hibernate, HibernateAfterTimeout, Debug, true). + decode_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + hibernate, + HibernateAfterTimeout, + Debug, + true). decode_msg(Msg, Parent, Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout, Debug, Hib) -> + Time, HibernateAfterTimeout, Debug, Hib) -> case Msg of - {system, From, Req} -> - sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, - [Name, StateName, StateData, Mod, Time, - HibernateAfterTimeout], - Hib); - {'EXIT', Parent, Reason} -> - terminate(Reason, Name, Msg, Mod, StateName, StateData, - Debug); - _Msg when Debug =:= [] -> - handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout); - _Msg -> - Debug1 = sys:handle_debug(Debug, fun print_event/3, - {Name, StateName}, {in, Msg}), - handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout, Debug1) + {system, From, Req} -> + sys:handle_system_msg(Req, + From, + Parent, + ?MODULE, + Debug, + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout], + Hib); + {'EXIT', Parent, Reason} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + StateData, + Debug); + _Msg when Debug =:= [] -> + handle_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout); + _Msg -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, StateName}, + {in, Msg}), + handle_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug1) end. %%----------------------------------------------------------------- %% Callback functions for system messages handling. %%----------------------------------------------------------------- system_continue(Parent, Debug, - [Name, StateName, StateData, Mod, Time, - HibernateAfterTimeout]) -> - loop(Parent, Name, StateName, StateData, Mod, Time, - HibernateAfterTimeout, Debug). + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout]) -> + loop(Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug). -spec system_terminate(term(), _, _, - [term(), ...]) -> no_return(). + [term(), ...]) -> no_return(). system_terminate(Reason, _Parent, Debug, - [Name, StateName, StateData, Mod, _Time, - _HibernateAfterTimeout]) -> - terminate(Reason, Name, [], Mod, StateName, StateData, - Debug). - -system_code_change([Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout], - _Module, OldVsn, Extra) -> - case catch Mod:code_change(OldVsn, StateName, StateData, - Extra) - of - {ok, NewStateName, NewStateData} -> - {ok, - [Name, NewStateName, NewStateData, Mod, Time, - HibernateAfterTimeout]}; - Else -> Else + [Name, + StateName, + StateData, + Mod, + _Time, + _HibernateAfterTimeout]) -> + terminate(Reason, + Name, + [], + Mod, + StateName, + StateData, + Debug). + +system_code_change([Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout], + _Module, OldVsn, Extra) -> + case catch Mod:code_change(OldVsn, + StateName, + StateData, + Extra) + of + {ok, NewStateName, NewStateData} -> + {ok, + [Name, + NewStateName, + NewStateData, + Mod, + Time, + HibernateAfterTimeout]}; + Else -> Else end. -system_get_state([_Name, StateName, StateData, _Mod, - _Time, _HibernateAfterTimeout]) -> +system_get_state([_Name, + StateName, + StateData, + _Mod, + _Time, + _HibernateAfterTimeout]) -> {ok, {StateName, StateData}}. system_replace_state(StateFun, - [Name, StateName, StateData, Mod, Time, - HibernateAfterTimeout]) -> + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout]) -> Result = {NStateName, NStateData} = StateFun({StateName, - StateData}), - {ok, Result, - [Name, NStateName, NStateData, Mod, Time, + StateData}), + {ok, + Result, + [Name, + NStateName, + NStateData, + Mod, + Time, HibernateAfterTimeout]}. %%----------------------------------------------------------------- @@ -489,137 +660,242 @@ system_replace_state(StateFun, %%----------------------------------------------------------------- print_event(Dev, {in, Msg}, {Name, StateName}) -> case Msg of - {'$gen_event', Event} -> - io:format(Dev, "*DBG* ~tp got event ~tp in state ~tw~n", - [Name, Event, StateName]); - {'$gen_all_state_event', Event} -> - io:format(Dev, - "*DBG* ~tp got all_state_event ~tp in " - "state ~tw~n", - [Name, Event, StateName]); - {timeout, Ref, {'$gen_timer', Message}} -> - io:format(Dev, "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, {timeout, Ref, Message}, StateName]); - {timeout, _Ref, {'$gen_event', Event}} -> - io:format(Dev, "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, Event, StateName]); - _ -> - io:format(Dev, "*DBG* ~tp got ~tp in state ~tw~n", - [Name, Msg, StateName]) + {'$gen_event', Event} -> + io:format(Dev, + "*DBG* ~tp got event ~tp in state ~tw~n", + [Name, Event, StateName]); + {'$gen_all_state_event', Event} -> + io:format(Dev, + "*DBG* ~tp got all_state_event ~tp in " + "state ~tw~n", + [Name, Event, StateName]); + {timeout, Ref, {'$gen_timer', Message}} -> + io:format(Dev, + "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, {timeout, Ref, Message}, StateName]); + {timeout, _Ref, {'$gen_event', Event}} -> + io:format(Dev, + "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, Event, StateName]); + _ -> + io:format(Dev, + "*DBG* ~tp got ~tp in state ~tw~n", + [Name, Msg, StateName]) end; print_event(Dev, {out, Msg, To, StateName}, Name) -> io:format(Dev, - "*DBG* ~tp sent ~tp to ~tw~n and " - "switched to state ~tw~n", - [Name, Msg, To, StateName]); + "*DBG* ~tp sent ~tp to ~tw~n and " + "switched to state ~tw~n", + [Name, Msg, To, StateName]); print_event(Dev, return, {Name, StateName}) -> - io:format(Dev, "*DBG* ~tp switched to state ~tw~n", - [Name, StateName]). + io:format(Dev, + "*DBG* ~tp switched to state ~tw~n", + [Name, StateName]). handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - _Time, HibernateAfterTimeout) -> + _Time, HibernateAfterTimeout) -> %No debug here From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - loop(Parent, Name, NStateName, NStateData, Mod, - infinity, HibernateAfterTimeout, []); - {next_state, NStateName, NStateData, Time1} -> - loop(Parent, Name, NStateName, NStateData, Mod, Time1, - HibernateAfterTimeout, []); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, Name, NStateName, NStateData, Mod, - infinity, HibernateAfterTimeout, []); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, Name, NStateName, NStateData, Mod, Time1, - HibernateAfterTimeout, []); - {stop, Reason, NStateData} -> - terminate(Reason, Name, Msg, Mod, StateName, NStateData, - []); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, - StateName, NStateData, [])), - reply(From, Reply), - exit(R); - {'EXIT', - {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> - error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " - "message: ~tp~n", - [Mod, Msg]), - loop(Parent, Name, StateName, StateData, Mod, infinity, - HibernateAfterTimeout, []); - {'EXIT', What} -> - terminate(What, Name, Msg, Mod, StateName, StateData, - []); - Reply -> - terminate({bad_return_value, Reply}, Name, Msg, Mod, - StateName, StateData, []) + {next_state, NStateName, NStateData} -> + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {next_state, NStateName, NStateData, Time1} -> + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + []); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + []); + {stop, Reason, NStateData} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + []); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + [])), + reply(From, Reply), + exit(R); + {'EXIT', + {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> + error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " + "message: ~tp~n", + [Mod, Msg]), + loop(Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {'EXIT', What} -> + terminate(What, + Name, + Msg, + Mod, + StateName, + StateData, + []); + Reply -> + terminate({bad_return_value, Reply}, + Name, + Msg, + Mod, + StateName, + StateData, + []) end. handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - _Time, HibernateAfterTimeout, Debug) -> + _Time, HibernateAfterTimeout, Debug) -> From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - Debug1 = sys:handle_debug(Debug, fun print_event/3, - {Name, NStateName}, return), - loop(Parent, Name, NStateName, NStateData, Mod, - infinity, HibernateAfterTimeout, Debug1); - {next_state, NStateName, NStateData, Time1} -> - Debug1 = sys:handle_debug(Debug, fun print_event/3, - {Name, NStateName}, return), - loop(Parent, Name, NStateName, NStateData, Mod, Time1, - HibernateAfterTimeout, Debug1); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, Name, NStateName, NStateData, Mod, - infinity, HibernateAfterTimeout, Debug1); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, Name, NStateName, NStateData, Mod, Time1, - HibernateAfterTimeout, Debug1); - {stop, Reason, NStateData} -> - terminate(Reason, Name, Msg, Mod, StateName, NStateData, - Debug); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, - StateName, NStateData, Debug)), - _ = reply(Name, From, Reply, Debug, StateName), - exit(R); - {'EXIT', What} -> - terminate(What, Name, Msg, Mod, StateName, StateData, - Debug); - Reply -> - terminate({bad_return_value, Reply}, Name, Msg, Mod, - StateName, StateData, Debug) + {next_state, NStateName, NStateData} -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, NStateName}, + return), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug1); + {next_state, NStateName, NStateData, Time1} -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, NStateName}, + return), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + Debug1); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug1); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + Debug1); + {stop, Reason, NStateData} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + Debug); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + Debug)), + _ = reply(Name, From, Reply, Debug, StateName), + exit(R); + {'EXIT', What} -> + terminate(What, + Name, + Msg, + Mod, + StateName, + StateData, + Debug); + Reply -> + terminate({bad_return_value, Reply}, + Name, + Msg, + Mod, + StateName, + StateData, + Debug) end. dispatch({'$gen_event', Event}, Mod, StateName, - StateData) -> + StateData) -> Mod:StateName(Event, StateData); dispatch({'$gen_all_state_event', Event}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:handle_event(Event, StateName, StateData); dispatch({'$gen_sync_event', From, Event}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName(Event, From, StateData); dispatch({'$gen_sync_all_state_event', From, Event}, - Mod, StateName, StateData) -> - Mod:handle_sync_event(Event, From, StateName, - StateData); + Mod, StateName, StateData) -> + Mod:handle_sync_event(Event, + From, + StateName, + StateData); dispatch({timeout, Ref, {'$gen_timer', Msg}}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName({timeout, Ref, Msg}, StateData); dispatch({timeout, _Ref, {'$gen_event', Event}}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName(Event, StateData); dispatch(Info, Mod, StateName, StateData) -> Mod:handle_info(Info, StateName, StateData). @@ -634,67 +910,83 @@ reply({To, Tag}, Reply) -> catch To ! {Tag, Reply}. reply(Name, {To, Tag}, Reply, Debug, StateName) -> reply({To, Tag}, Reply), - sys:handle_debug(Debug, fun print_event/3, Name, - {out, Reply, To, StateName}). + sys:handle_debug(Debug, + fun print_event/3, + Name, + {out, Reply, To, StateName}). %%% --------------------------------------------------- %%% Terminate the server. %%% --------------------------------------------------- -spec terminate(term(), _, _, atom(), _, _, - _) -> no_return(). + _) -> no_return(). terminate(Reason, Name, Msg, Mod, StateName, StateData, - Debug) -> + Debug) -> case erlang:function_exported(Mod, terminate, 3) of - true -> - case catch Mod:terminate(Reason, StateName, StateData) - of - {'EXIT', R} -> - FmtStateData = format_status(terminate, Mod, get(), - StateData), - error_info(R, Name, Msg, StateName, FmtStateData, - Debug), - exit(R); - _ -> ok - end; - false -> ok + true -> + case catch Mod:terminate(Reason, StateName, StateData) + of + {'EXIT', R} -> + FmtStateData = format_status(terminate, + Mod, + get(), + StateData), + error_info(R, + Name, + Msg, + StateName, + FmtStateData, + Debug), + exit(R); + _ -> ok + end; + false -> ok end, case Reason of - normal -> exit(normal); - shutdown -> exit(shutdown); - {shutdown, _} = Shutdown -> exit(Shutdown); - _ -> - FmtStateData1 = format_status(terminate, Mod, get(), - StateData), - error_info(Reason, Name, Msg, StateName, FmtStateData1, - Debug), - exit(Reason) + normal -> exit(normal); + shutdown -> exit(shutdown); + {shutdown, _} = Shutdown -> exit(Shutdown); + _ -> + FmtStateData1 = format_status(terminate, + Mod, + get(), + StateData), + error_info(Reason, + Name, + Msg, + StateName, + FmtStateData1, + Debug), + exit(Reason) end. error_info(Reason, Name, Msg, StateName, StateData, - Debug) -> + Debug) -> Reason1 = case Reason of - {undef, [{M, F, A, L} | MFAs]} -> - case code:is_loaded(M) of - false -> - {'module could not be loaded', [{M, F, A, L} | MFAs]}; - _ -> - case erlang:function_exported(M, F, length(A)) of - true -> Reason; - false -> - {'function not exported', [{M, F, A, L} | MFAs]} - end - end; - _ -> Reason - end, + {undef, [{M, F, A, L} | MFAs]} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded', + [{M, F, A, L} | MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> Reason; + false -> + {'function not exported', + [{M, F, A, L} | MFAs]} + end + end; + _ -> Reason + end, Str = "** State machine ~tp terminating \n" ++ - get_msg_str(Msg) ++ - "** When State == ~tp~n** Data " - "== ~tp~n** Reason for termination = " - "~n** ~tp~n", + get_msg_str(Msg) ++ + "** When State == ~tp~n** Data " + "== ~tp~n** Reason for termination = " + "~n** ~tp~n", format(Str, - [Name, get_msg(Msg), StateName, StateData, Reason1]), + [Name, get_msg(Msg), StateName, StateData, Reason1]), sys:print_log(Debug), ok. @@ -730,62 +1022,80 @@ get_msg(Msg) -> Msg. -if((?OTP_RELEASE) >= 22). format_status(Opt, StatusData) -> - [PDict, SysState, Parent, Debug, - [Name, StateName, StateData, Mod, _Time, + [PDict, + SysState, + Parent, + Debug, + [Name, + StateName, + StateData, + Mod, + _Time, _HibernateAfterTimeout]] = - StatusData, + StatusData, Header = - gen:format_status_header("Status for state machine", - Name), + gen:format_status_header("Status for state machine", + Name), Log = sys:get_log(Debug), Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) - of - S when is_list(S) -> S; - S -> [S] - end, + of + S when is_list(S) -> S; + S -> [S] + end, [{header, Header}, {data, - [{"Status", SysState}, {"Parent", Parent}, - {"Logged events", Log}, {"StateName", StateName}]} + [{"Status", SysState}, + {"Parent", Parent}, + {"Logged events", Log}, + {"StateName", StateName}]} | Specfic]. -elif((?OTP_RELEASE) >= 21). format_status(Opt, StatusData) -> - [PDict, SysState, Parent, Debug, - [Name, StateName, StateData, Mod, _Time, + [PDict, + SysState, + Parent, + Debug, + [Name, + StateName, + StateData, + Mod, + _Time, _HibernateAfterTimeout]] = - StatusData, + StatusData, Header = - gen:format_status_header("Status for state machine", - Name), + gen:format_status_header("Status for state machine", + Name), %% Log = sys:get_log(Debug), Log = sys:get_debug(log, Debug, []), Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) - of - S when is_list(S) -> S; - S -> [S] - end, + of + S when is_list(S) -> S; + S -> [S] + end, [{header, Header}, {data, - [{"Status", SysState}, {"Parent", Parent}, - {"Logged events", Log}, {"StateName", StateName}]} + [{"Status", SysState}, + {"Parent", Parent}, + {"Logged events", Log}, + {"StateName", StateName}]} | Specfic]. -endif. format_status(Opt, Mod, PDict, State) -> DefStatus = case Opt of - terminate -> State; - _ -> [{data, [{"StateData", State}]}] - end, + terminate -> State; + _ -> [{data, [{"StateData", State}]}] + end, case erlang:function_exported(Mod, format_status, 2) of - true -> - case catch Mod:format_status(Opt, [PDict, State]) of - {'EXIT', _} -> DefStatus; - Else -> Else - end; - _ -> DefStatus + true -> + case catch Mod:format_status(Opt, [PDict, State]) of + {'EXIT', _} -> DefStatus; + Else -> Else + end; + _ -> DefStatus end. diff --git a/src/gen_nb_server.erl b/src/gen_nb_server.erl index db14b0fd8..90c884ba8 100644 --- a/src/gen_nb_server.erl +++ b/src/gen_nb_server.erl @@ -28,67 +28,71 @@ -export([start_link/4]). %% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -define(SERVER, ?MODULE). -record(state, {cb, sock, server_state}). -callback init(InitArgs :: list()) -> {ok, - State :: term()} | - {error, Reason :: term()}. + State :: term()} | + {error, Reason :: term()}. -callback handle_call(Msg :: term(), - From :: {pid(), term()}, State :: term()) -> {reply, - Reply :: - term(), - State :: - term()} | - {reply, - Reply :: - term(), - State :: - term(), - number() | - hibernate} | - {noreply, - State :: - term()} | - {noreply, - State :: - term(), - number() | - hibernate} | - {stop, - Reason :: - term(), - State :: - term()}. + From :: {pid(), term()}, State :: term()) -> {reply, + Reply :: + term(), + State :: + term()} | + {reply, + Reply :: + term(), + State :: + term(), + number() | + hibernate} | + {noreply, + State :: + term()} | + {noreply, + State :: + term(), + number() | + hibernate} | + {stop, + Reason :: + term(), + State :: + term()}. -callback handle_cast(Msg :: term(), - State :: term()) -> {noreply, State :: term()} | - {noreply, State :: term(), - number() | hibernate} | - {stop, Reason :: term(), - State :: term()}. + State :: term()) -> {noreply, State :: term()} | + {noreply, State :: term(), + number() | hibernate} | + {stop, Reason :: term(), + State :: term()}. -callback handle_info(Msg :: term(), - State :: term()) -> {noreply, State :: term()} | - {noreply, State :: term(), - number() | hibernate} | - {stop, Reason :: term(), - State :: term()}. + State :: term()) -> {noreply, State :: term()} | + {noreply, State :: term(), + number() | hibernate} | + {stop, Reason :: term(), + State :: term()}. -callback terminate(Reason :: term(), - State :: term()) -> ok. + State :: term()) -> ok. -callback sock_opts() -> [gen_tcp:listen_option()]. -callback new_connection(inet:socket(), - State :: term()) -> {ok, NewState :: term()} | - {stop, Reason :: term(), - NewState :: term()}. + State :: term()) -> {ok, NewState :: term()} | + {stop, Reason :: term(), + NewState :: term()}. %% @spec start_link(CallbackModule, IpAddr, Port, InitParams) -> Result %% CallbackModule = atom() @@ -99,98 +103,114 @@ %% @doc Start server listening on IpAddr:Port start_link(CallbackModule, IpAddr, Port, InitParams) -> gen_server:start_link(?MODULE, - [CallbackModule, IpAddr, Port, InitParams], []). + [CallbackModule, IpAddr, Port, InitParams], + []). %% @hidden init([CallbackModule, IpAddr, Port, InitParams]) -> case CallbackModule:init(InitParams) of - {ok, ServerState} -> - case listen_on(CallbackModule, IpAddr, Port) of - {ok, Sock} -> - {ok, - #state{cb = CallbackModule, sock = Sock, - server_state = ServerState}}; - Error -> - CallbackModule:terminate(Error, ServerState), Error - end; - Err -> Err + {ok, ServerState} -> + case listen_on(CallbackModule, IpAddr, Port) of + {ok, Sock} -> + {ok, + #state{cb = CallbackModule, sock = Sock, + server_state = ServerState}}; + Error -> + CallbackModule:terminate(Error, ServerState), + Error + end; + Err -> Err end. %% @hidden handle_call(Request, From, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_call(Request, From, ServerState) of - {reply, Reply, NewServerState} -> - {reply, Reply, - State#state{server_state = NewServerState}}; - {reply, Reply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {reply, Reply, - State#state{server_state = NewServerState}, Arg}; - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, Reason, - State#state{server_state = NewServerState}}; - {stop, Reason, Reply, NewServerState} -> - {stop, Reason, Reply, - State#state{server_state = NewServerState}} + {reply, Reply, NewServerState} -> + {reply, + Reply, + State#state{server_state = NewServerState}}; + {reply, Reply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {reply, + Reply, + State#state{server_state = NewServerState}, + Arg}; + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}}; + {stop, Reason, Reply, NewServerState} -> + {stop, + Reason, + Reply, + State#state{server_state = NewServerState}} end. %% @hidden handle_cast(Msg, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_cast(Msg, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, Reason, - State#state{server_state = NewServerState}} + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end. %% @hidden -handle_info({inet_async, ListSock, _Ref, - {ok, CliSocket}}, - #state{cb = Callback, server_state = ServerState} = - State) -> +handle_info({inet_async, + ListSock, + _Ref, + {ok, CliSocket}}, + #state{cb = Callback, server_state = ServerState} = + State) -> inet_db:register_socket(CliSocket, inet_tcp), case Callback:new_connection(CliSocket, ServerState) of - {ok, NewServerState} -> - {ok, _} = prim_inet:async_accept(ListSock, -1), - {noreply, State#state{server_state = NewServerState}}; - {stop, Reason, NewServerState} -> - {stop, Reason, - State#state{server_state = NewServerState}} + {ok, NewServerState} -> + {ok, _} = prim_inet:async_accept(ListSock, -1), + {noreply, State#state{server_state = NewServerState}}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end; handle_info(Info, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_info(Info, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, Reason, - State#state{server_state = NewServerState}} + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end. %% @hidden terminate(Reason, - #state{cb = Callback, sock = Sock, - server_state = ServerState}) -> + #state{cb = Callback, sock = Sock, + server_state = ServerState}) -> gen_tcp:close(Sock), Callback:terminate(Reason, ServerState), ok. @@ -208,21 +228,21 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Result = {ok, port()} | {error, any()} listen_on(CallbackModule, IpAddr, Port) when is_tuple(IpAddr) andalso - (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> + (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> SockOpts = [{ip, IpAddr} | CallbackModule:sock_opts()], case gen_tcp:listen(Port, SockOpts) of - {ok, LSock} -> - {ok, _Ref} = prim_inet:async_accept(LSock, -1), - {ok, LSock}; - Err -> Err + {ok, LSock} -> + {ok, _Ref} = prim_inet:async_accept(LSock, -1), + {ok, LSock}; + Err -> Err end; listen_on(CallbackModule, IpAddrStr, Port) -> case inet_parse:address(IpAddrStr) of - {ok, IpAddr} -> listen_on(CallbackModule, IpAddr, Port); - Err -> - logger:critical("Cannot start listener for ~p\n " - " on invalid address " - "~p:~p", - [CallbackModule, IpAddrStr, Port]), - Err + {ok, IpAddr} -> listen_on(CallbackModule, IpAddr, Port); + Err -> + logger:critical("Cannot start listener for ~p\n " + " on invalid address " + "~p:~p", + [CallbackModule, IpAddrStr, Port]), + Err end. diff --git a/src/riak_core.erl b/src/riak_core.erl index d3ae6cae2..fd3670f2b 100644 --- a/src/riak_core.erl +++ b/src/riak_core.erl @@ -21,18 +21,27 @@ %% ------------------------------------------------------------------- -module(riak_core). --export([stop/0, stop/1, join/1, join/4, staged_join/1, - remove/1, down/1, leave/0, remove_from_cluster/1]). +-export([stop/0, + stop/1, + join/1, + join/4, + staged_join/1, + remove/1, + down/1, + leave/0, + remove_from_cluster/1]). -export([vnode_modules/0, health_check/1]). --export([register/1, register/2, bucket_fixups/0, - bucket_validators/0]). +-export([register/1, + register/2, + bucket_fixups/0, + bucket_validators/0]). -export([stat_mods/0, stat_prefix/0]). -export([add_guarded_event_handler/3, - add_guarded_event_handler/4]). + add_guarded_event_handler/4]). -export([delete_guarded_event_handler/3]). @@ -86,22 +95,24 @@ join(_, Node, Auto) -> join(node(), Node, false, Auto). join(_, Node, Rejoin, Auto) -> case net_adm:ping(Node) of - pang -> {error, not_reachable}; - pong -> standard_join(Node, Rejoin, Auto) + pang -> {error, not_reachable}; + pong -> standard_join(Node, Rejoin, Auto) end. get_other_ring(Node) -> - riak_core_util:safe_rpc(Node, riak_core_ring_manager, - get_raw_ring, []). + riak_core_util:safe_rpc(Node, + riak_core_ring_manager, + get_raw_ring, + []). standard_join(Node, Rejoin, Auto) when is_atom(Node) -> case net_adm:ping(Node) of - pong -> - case get_other_ring(Node) of - {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); - _ -> {error, unable_to_get_join_ring} - end; - pang -> {error, not_reachable} + pong -> + case get_other_ring(Node) of + {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); + _ -> {error, unable_to_get_join_ring} + end; + pang -> {error, not_reachable} end. %% `init:get_status/0' will return a 2-tuple reflecting the init @@ -116,100 +127,106 @@ standard_join(Node, Ring, Rejoin, Auto) -> {ok, MyRing} = riak_core_ring_manager:get_raw_ring(), InitComplete = init_complete(init:get_status()), SameSize = riak_core_ring:num_partitions(MyRing) =:= - riak_core_ring:num_partitions(Ring), + riak_core_ring:num_partitions(Ring), Singleton = [node()] =:= - riak_core_ring:all_members(MyRing), + riak_core_ring:all_members(MyRing), case {InitComplete, Rejoin or Singleton, SameSize} of - {false, _, _} -> {error, node_still_starting}; - {_, false, _} -> {error, not_single_node}; - {_, _, false} -> {error, different_ring_sizes}; - _ -> - Ring2 = riak_core_ring:add_member(node(), Ring, node()), - Ring3 = riak_core_ring:set_owner(Ring2, node()), - Ring4 = riak_core_ring:update_member_meta(node(), Ring3, - node(), gossip_vsn, 2), - Ring5 = Ring4, - Ring6 = maybe_auto_join(Auto, node(), Ring5), - riak_core_ring_manager:set_my_ring(Ring6), - riak_core_gossip:send_ring(Node, node()) + {false, _, _} -> {error, node_still_starting}; + {_, false, _} -> {error, not_single_node}; + {_, _, false} -> {error, different_ring_sizes}; + _ -> + Ring2 = riak_core_ring:add_member(node(), Ring, node()), + Ring3 = riak_core_ring:set_owner(Ring2, node()), + Ring4 = riak_core_ring:update_member_meta(node(), + Ring3, + node(), + gossip_vsn, + 2), + Ring5 = Ring4, + Ring6 = maybe_auto_join(Auto, node(), Ring5), + riak_core_ring_manager:set_my_ring(Ring6), + riak_core_gossip:send_ring(Node, node()) end. maybe_auto_join(false, _Node, Ring) -> Ring; maybe_auto_join(true, Node, Ring) -> - riak_core_ring:update_member_meta(Node, Ring, Node, - '$autojoin', true). + riak_core_ring:update_member_meta(Node, + Ring, + Node, + '$autojoin', + true). remove(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> standard_remove(Node) + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> standard_remove(Node) end. standard_remove(Node) -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:remove_member(node(), - Ring2, - Node), - Ring4 = - riak_core_ring:ring_changed(node(), - Ring3), - {new_ring, Ring4} - end, - []), + Ring3 = + riak_core_ring:remove_member(node(), + Ring2, + Node), + Ring4 = + riak_core_ring:ring_changed(node(), + Ring3), + {new_ring, Ring4} + end, + []), ok. down(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case net_adm:ping(Node) of - pong -> {error, is_up}; - pang -> - case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> - riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:down_member(node(), - Ring2, - Node), - Ring4 = - riak_core_ring:ring_changed(node(), - Ring3), - {new_ring, Ring4} - end, - []), - ok - end + pong -> {error, is_up}; + pang -> + case {riak_core_ring:all_members(Ring), + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> + riak_core_ring_manager:ring_trans(fun (Ring2, _) -> + Ring3 = + riak_core_ring:down_member(node(), + Ring2, + Node), + Ring4 = + riak_core_ring:ring_changed(node(), + Ring3), + {new_ring, Ring4} + end, + []), + ok + end end. leave() -> Node = node(), {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> standard_leave(Node); - {_, _} -> {error, already_leaving} + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> standard_leave(Node); + {_, _} -> {error, already_leaving} end. standard_leave(Node) -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:leave_member(Node, - Ring2, - Node), - {new_ring, Ring3} - end, - []), + Ring3 = + riak_core_ring:leave_member(Node, + Ring2, + Node), + {new_ring, Ring3} + end, + []), ok. %% @spec remove_from_cluster(ExitingNode :: atom()) -> term() @@ -221,46 +238,46 @@ remove_from_cluster(ExitingNode) vnode_modules() -> case application:get_env(riak_core, vnode_modules) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. bucket_fixups() -> case application:get_env(riak_core, bucket_fixups) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. bucket_validators() -> case application:get_env(riak_core, bucket_validators) - of - undefined -> []; - {ok, Mods} -> Mods + of + undefined -> []; + {ok, Mods} -> Mods end. stat_mods() -> case application:get_env(riak_core, stat_mods) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. health_check(App) -> case application:get_env(riak_core, health_checks) of - undefined -> undefined; - {ok, Mods} -> - case lists:keyfind(App, 1, Mods) of - false -> undefined; - {App, MFA} -> MFA - end + undefined -> undefined; + {ok, Mods} -> + case lists:keyfind(App, 1, Mods) of + false -> undefined; + {App, MFA} -> MFA + end end. %% Get the application name if not supplied, first by get_application %% then by searching by module name get_app(undefined, Module) -> {ok, App} = case application:get_application(self()) of - {ok, AppName} -> {ok, AppName}; - undefined -> app_for_module(Module) - end, + {ok, AppName} -> {ok, AppName}; + undefined -> app_for_module(Module) + end, App; get_app(App, _Module) -> App. @@ -273,35 +290,41 @@ register(_App, []) -> %% to ensure the new fixups are run against %% the ring. {ok, _R} = riak_core_ring_manager:ring_trans(fun (R, - _A) -> - {new_ring, R} - end, - undefined), + _A) -> + {new_ring, R} + end, + undefined), riak_core_ring_events:force_sync_update(), ok; register(App, [{bucket_fixup, FixupMod} | T]) -> - register_mod(get_app(App, FixupMod), FixupMod, - bucket_fixups), + register_mod(get_app(App, FixupMod), + FixupMod, + bucket_fixups), register(App, T); register(App, [{repl_helper, FixupMod} | T]) -> - register_mod(get_app(App, FixupMod), FixupMod, - repl_helper), + register_mod(get_app(App, FixupMod), + FixupMod, + repl_helper), register(App, T); register(App, [{vnode_module, VNodeMod} | T]) -> - register_mod(get_app(App, VNodeMod), VNodeMod, - vnode_modules), + register_mod(get_app(App, VNodeMod), + VNodeMod, + vnode_modules), register(App, T); register(App, [{health_check, HealthMFA} | T]) -> - register_metadata(get_app(App, HealthMFA), HealthMFA, - health_checks), + register_metadata(get_app(App, HealthMFA), + HealthMFA, + health_checks), register(App, T); register(App, - [{bucket_validator, ValidationMod} | T]) -> - register_mod(get_app(App, ValidationMod), ValidationMod, - bucket_validators), + [{bucket_validator, ValidationMod} | T]) -> + register_mod(get_app(App, ValidationMod), + ValidationMod, + bucket_validators), register(App, T); register(App, [{stat_mod, StatMod} | T]) -> - register_mod(App, StatMod, stat_mods), register(App, T); + register_mod(App, StatMod, stat_mods), + register(App, T); register(App, [{permissions, Permissions} | T]) -> register_mod(App, Permissions, permissions), register(App, T); @@ -311,39 +334,42 @@ register(App, [{auth_mod, {AuthType, AuthMod}} | T]) -> register_mod(App, Module, Type) when is_atom(Type) -> case Type of - vnode_modules -> - riak_core_vnode_proxy_sup:start_proxies(Module); - stat_mods -> - %% STATS - %% riak_core_stats_sup:start_server(Module); - logger:warning("Metric collection disabled"), - ok; - _ -> ok + vnode_modules -> + riak_core_vnode_proxy_sup:start_proxies(Module); + stat_mods -> + %% STATS + %% riak_core_stats_sup:start_server(Module); + logger:warning("Metric collection disabled"), + ok; + _ -> ok end, case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Module}]); - {ok, Mods} -> - application:set_env(riak_core, Type, - lists:usort([{App, Module} | Mods])) + undefined -> + application:set_env(riak_core, Type, [{App, Module}]); + {ok, Mods} -> + application:set_env(riak_core, + Type, + lists:usort([{App, Module} | Mods])) end. register_metadata(App, Value, Type) -> case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Value}]); - {ok, Values} -> - application:set_env(riak_core, Type, - lists:usort([{App, Value} | Values])) + undefined -> + application:set_env(riak_core, Type, [{App, Value}]); + {ok, Values} -> + application:set_env(riak_core, + Type, + lists:usort([{App, Value} | Values])) end. register_proplist({Key, Value}, Type) -> case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{Key, Value}]); - {ok, Values} -> - application:set_env(riak_core, Type, - lists:keystore(Key, 1, Values, {Key, Value})) + undefined -> + application:set_env(riak_core, Type, [{Key, Value}]); + {ok, Values} -> + application:set_env(riak_core, + Type, + lists:keystore(Key, 1, Values, {Key, Value})) end. %% @spec add_guarded_event_handler(HandlerMod, Handler, Args) -> AddResult @@ -352,8 +378,10 @@ register_proplist({Key, Value}, Type) -> %% Args = list() %% AddResult = ok | {error, Reason::term()} add_guarded_event_handler(HandlerMod, Handler, Args) -> - add_guarded_event_handler(HandlerMod, Handler, Args, - undefined). + add_guarded_event_handler(HandlerMod, + Handler, + Args, + undefined). %% @spec add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> AddResult %% HandlerMod = module() @@ -368,9 +396,11 @@ add_guarded_event_handler(HandlerMod, Handler, Args) -> %% init() callback and exits when the handler crashes so it can be %% restarted by the supervisor. add_guarded_event_handler(HandlerMod, Handler, Args, - ExitFun) -> + ExitFun) -> riak_core_eventhandler_sup:start_guarded_handler(HandlerMod, - Handler, Args, ExitFun). + Handler, + Args, + ExitFun). %% @spec delete_guarded_event_handler(HandlerMod, Handler, Args) -> Result %% HandlerMod = module() @@ -389,9 +419,10 @@ add_guarded_event_handler(HandlerMod, Handler, Args, %% {error,module_not_found}. If the callback function fails with Reason, %% the function returns {'EXIT',Reason}. delete_guarded_event_handler(HandlerMod, Handler, - Args) -> + Args) -> riak_core_eventhandler_sup:stop_guarded_handler(HandlerMod, - Handler, Args). + Handler, + Args). app_for_module(Mod) -> app_for_module(application:which_applications(), Mod). @@ -400,37 +431,38 @@ app_for_module([], _Mod) -> {ok, undefined}; app_for_module([{App, _, _} | T], Mod) -> {ok, Mods} = application:get_key(App, modules), case lists:member(Mod, Mods) of - true -> {ok, App}; - false -> app_for_module(T, Mod) + true -> {ok, App}; + false -> app_for_module(T, Mod) end. wait_for_application(App) -> wait_for_application(App, 0). wait_for_application(App, Elapsed) -> - case lists:keymember(App, 1, - application:which_applications()) - of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for application ~p (~p " - "seconds)", - [App, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for application ~p to start\n " - " " - "(~p seconds).", - [App, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_application(App, - Elapsed + (?WAIT_POLL_INTERVAL)) + case lists:keymember(App, + 1, + application:which_applications()) + of + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for application ~p (~p " + "seconds)", + [App, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for application ~p to start\n " + " " + "(~p seconds).", + [App, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_application(App, + Elapsed + (?WAIT_POLL_INTERVAL)) end. wait_for_service(Service) -> @@ -438,27 +470,27 @@ wait_for_service(Service) -> wait_for_service(Service, Elapsed) -> case lists:member(Service, - riak_core_node_watcher:services(node())) - of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for service ~p (~p seconds)", - [Service, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for service ~p to start\n " - " (~p " - "seconds)", - [Service, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_service(Service, - Elapsed + (?WAIT_POLL_INTERVAL)) + riak_core_node_watcher:services(node())) + of + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for service ~p (~p seconds)", + [Service, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for service ~p to start\n " + " (~p " + "seconds)", + [Service, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_service(Service, + Elapsed + (?WAIT_POLL_INTERVAL)) end. stat_prefix() -> diff --git a/src/riak_core_apl.erl b/src/riak_core_apl.erl index f2c63531c..3366fd93d 100644 --- a/src/riak_core_apl.erl +++ b/src/riak_core_apl.erl @@ -24,14 +24,24 @@ %% ------------------------------------------------------------------- -module(riak_core_apl). --export([active_owners/1, active_owners/2, get_apl/3, - get_apl/4, get_apl_ann/2, get_apl_ann/3, get_apl_ann/4, - get_apl_ann_with_pnum/1, get_primary_apl/3, - get_primary_apl/4, get_primary_apl_chbin/4, first_up/2, - offline_owners/1, offline_owners/2]). - --export_type([preflist/0, preflist_ann/0, - preflist_with_pnum_ann/0]). +-export([active_owners/1, + active_owners/2, + get_apl/3, + get_apl/4, + get_apl_ann/2, + get_apl_ann/3, + get_apl_ann/4, + get_apl_ann_with_pnum/1, + get_primary_apl/3, + get_primary_apl/4, + get_primary_apl_chbin/4, + first_up/2, + offline_owners/1, + offline_owners/2]). + +-export_type([preflist/0, + preflist_ann/0, + preflist_with_pnum_ann/0]). -ifdef(TEST). @@ -48,15 +58,15 @@ -type preflist() :: [{index(), node()}]. -type preflist_ann() :: [{{index(), node()}, - primary | fallback}]. + primary | fallback}]. %% @type preflist_with_pnum_ann(). %% Annotated preflist where the partition value is an id/number %% (0 to ring_size-1) instead of a hash. -type preflist_with_pnum_ann() :: [{{riak_core_ring:partition_id(), - node()}, - primary | fallback}]. + node()}, + primary | fallback}]. -type iterator() :: term(). @@ -72,7 +82,7 @@ active_owners(Service) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), active_owners(Ring, - riak_core_node_watcher:nodes(Service)). + riak_core_node_watcher:nodes(Service)). -spec active_owners(ring(), [node()]) -> preflist_ann(). @@ -87,28 +97,30 @@ active_owners(Ring, UpNodes) -> get_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), - get_apl_chbin(DocIdx, N, CHBin, - riak_core_node_watcher:nodes(Service)). + get_apl_chbin(DocIdx, + N, + CHBin, + riak_core_node_watcher:nodes(Service)). %% @doc Get the active preflist taking account of which nodes are up %% for a given chash/upnodes list. -spec get_apl_chbin(docidx(), n_val(), - chashbin:chashbin(), [node()]) -> preflist(). + chashbin:chashbin(), [node()]) -> preflist(). get_apl_chbin(DocIdx, N, CHBin, UpNodes) -> [{Partition, Node} || {{Partition, Node}, _Type} - <- get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes)]. + <- get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes)]. %% @doc Get the active preflist taking account of which nodes are up %% for a given ring/upnodes list. -spec get_apl(docidx(), n_val(), ring(), - [node()]) -> preflist(). + [node()]) -> preflist(). get_apl(DocIdx, N, Ring, UpNodes) -> [{Partition, Node} || {{Partition, Node}, _Type} - <- get_apl_ann(DocIdx, N, Ring, UpNodes)]. + <- get_apl_ann(DocIdx, N, Ring, UpNodes)]. %% @doc Get the active preflist taking account of which nodes are up for a given %% chash/upnodes list and annotate each node with type of primary/fallback. @@ -120,7 +132,7 @@ get_apl_ann(DocIdx, N, UpNodes) -> %% for a given ring/upnodes list and annotate each node with type of %% primary/fallback. -spec get_apl_ann(binary(), n_val(), ring(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann(DocIdx, N, Ring, UpNodes) -> UpNodes1 = UpNodes, @@ -132,7 +144,7 @@ get_apl_ann(DocIdx, N, Ring, UpNodes) -> %% @doc Get the active preflist for a given {bucket, key} and list of nodes %% and annotate each node with type of primary/fallback. -spec get_apl_ann(riak_core_bucket:bucket(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann({Bucket, Key}, UpNodes) -> BucketProps = riak_core_bucket:get_bucket(Bucket), @@ -157,7 +169,7 @@ get_apl_ann_with_pnum(BKey) -> %% for a given chash/upnodes list and annotate each node with type of %% primary/fallback. -spec get_apl_ann_chbin(binary(), n_val(), chashbin(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes) -> UpNodes1 = UpNodes, @@ -168,16 +180,18 @@ get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes) -> %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl(binary(), n_val(), - atom()) -> preflist_ann(). + atom()) -> preflist_ann(). get_primary_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), - get_primary_apl_chbin(DocIdx, N, CHBin, - riak_core_node_watcher:nodes(Service)). + get_primary_apl_chbin(DocIdx, + N, + CHBin, + riak_core_node_watcher:nodes(Service)). %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl_chbin(binary(), n_val(), - chashbin(), [node()]) -> preflist_ann(). + chashbin(), [node()]) -> preflist_ann(). get_primary_apl_chbin(DocIdx, N, CHBin, UpNodes) -> UpNodes1 = UpNodes, @@ -188,7 +202,7 @@ get_primary_apl_chbin(DocIdx, N, CHBin, UpNodes) -> %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl(binary(), n_val(), ring(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_primary_apl(DocIdx, N, Ring, UpNodes) -> UpNodes1 = UpNodes, @@ -203,11 +217,11 @@ first_up(DocIdx, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), Itr = chashbin:iterator(DocIdx, CHBin), UpSet = - ordsets:from_list(riak_core_node_watcher:nodes(Service)), + ordsets:from_list(riak_core_node_watcher:nodes(Service)), Itr2 = chashbin:itr_next_while(fun ({_P, Node}) -> - not ordsets:is_element(Node, UpSet) - end, - Itr), + not ordsets:is_element(Node, UpSet) + end, + Itr), chashbin:itr_value(Itr2). offline_owners(Service) -> @@ -216,71 +230,77 @@ offline_owners(Service) -> offline_owners(Service, CHBin) when is_atom(Service) -> UpSet = - ordsets:from_list(riak_core_node_watcher:nodes(Service)), + ordsets:from_list(riak_core_node_watcher:nodes(Service)), offline_owners(UpSet, CHBin); offline_owners(UpSet, CHBin) when is_list(UpSet) -> %% UpSet is an ordset of available nodes DownVNodes = chashbin:to_list_filter(fun ({_Index, - Node}) -> - not is_up(Node, UpSet) - end, - CHBin), + Node}) -> + not is_up(Node, UpSet) + end, + CHBin), DownVNodes. %% @doc Split a preference list into up and down lists. -spec check_up(preflist(), [node()], preflist_ann(), - preflist()) -> {preflist_ann(), preflist()}. + preflist()) -> {preflist_ann(), preflist()}. check_up([], _UpNodes, Up, Pangs) -> {lists:reverse(Up), lists:reverse(Pangs)}; check_up([{Partition, Node} | Rest], UpNodes, Up, - Pangs) -> + Pangs) -> case is_up(Node, UpNodes) of - true -> - check_up(Rest, UpNodes, - [{{Partition, Node}, primary} | Up], Pangs); - false -> - check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) + true -> + check_up(Rest, + UpNodes, + [{{Partition, Node}, primary} | Up], + Pangs); + false -> + check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) end. %% @doc Find fallbacks for downed nodes in the preference list. -spec find_fallbacks(preflist(), preflist(), [node()], - preflist_ann()) -> preflist_ann(). + preflist_ann()) -> preflist_ann(). find_fallbacks(_Pangs, [], _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks([], _Fallbacks, _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks([{Partition, _Node} | Rest] = Pangs, - [{_, FN} | Fallbacks], UpNodes, Secondaries) -> + [{_, FN} | Fallbacks], UpNodes, Secondaries) -> case is_up(FN, UpNodes) of - true -> - find_fallbacks(Rest, Fallbacks, UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) + true -> + find_fallbacks(Rest, + Fallbacks, + UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) end. %% @doc Find fallbacks for downed nodes in the preference list. -spec find_fallbacks_chbin(preflist(), iterator(), - [node()], preflist_ann()) -> preflist_ann(). + [node()], preflist_ann()) -> preflist_ann(). find_fallbacks_chbin([], _Fallbacks, _UpNodes, - Secondaries) -> + Secondaries) -> lists:reverse(Secondaries); find_fallbacks_chbin(_, done, _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks_chbin([{Partition, _Node} | Rest] = - Pangs, - Itr, UpNodes, Secondaries) -> + Pangs, + Itr, UpNodes, Secondaries) -> {_, FN} = chashbin:itr_value(Itr), Itr2 = chashbin:itr_next(Itr), case is_up(FN, UpNodes) of - true -> - find_fallbacks_chbin(Rest, Itr2, UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) + true -> + find_fallbacks_chbin(Rest, + Itr2, + UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) end. %% @doc Return true if a node is up. @@ -288,7 +308,7 @@ is_up(Node, UpNodes) -> lists:member(Node, UpNodes). %% @doc Return annotated preflist with partition ids/nums instead of hashes. -spec apl_with_partition_nums(preflist_ann(), - riak_core_ring:ring_size()) -> preflist_with_pnum_ann(). + riak_core_ring:ring_size()) -> preflist_with_pnum_ann(). apl_with_partition_nums(Apl, Size) -> [{{riak_core_ring_util:hash_to_partition_id(Hash, Size), @@ -301,39 +321,41 @@ apl_with_partition_nums(Apl, Size) -> smallest_test() -> Ring = riak_core_ring:fresh(1, node()), ?assertEqual([{0, node()}], - (get_apl(last_in_ring(), 1, Ring, [node()]))). + (get_apl(last_in_ring(), 1, Ring, [node()]))). four_node_test() -> Nodes = [nodea, nodeb, nodec, noded], Ring = perfect_ring(8, Nodes), ?assertEqual([{0, nodea}, - {182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodec}], - (get_apl(last_in_ring(), 3, Ring, Nodes))), + {182687704666362864775460604089535377456991567872, + nodeb}, + {365375409332725729550921208179070754913983135744, + nodec}], + (get_apl(last_in_ring(), 3, Ring, Nodes))), %% With a node down ?assertEqual([{182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodec}, - {0, noded}], - (get_apl(last_in_ring(), 3, Ring, - [nodeb, nodec, noded]))), + nodeb}, + {365375409332725729550921208179070754913983135744, + nodec}, + {0, noded}], + (get_apl(last_in_ring(), + 3, + Ring, + [nodeb, nodec, noded]))), %% With two nodes down ?assertEqual([{365375409332725729550921208179070754913983135744, - nodec}, - {0, noded}, - {182687704666362864775460604089535377456991567872, - nodec}], - (get_apl(last_in_ring(), 3, Ring, [nodec, noded]))), + nodec}, + {0, noded}, + {182687704666362864775460604089535377456991567872, + nodec}], + (get_apl(last_in_ring(), 3, Ring, [nodec, noded]))), %% With the other two nodes down ?assertEqual([{0, nodea}, - {182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodea}], - (get_apl(last_in_ring(), 3, Ring, [nodea, nodeb]))). + {182687704666362864775460604089535377456991567872, + nodeb}, + {365375409332725729550921208179070754913983135744, + nodea}], + (get_apl(last_in_ring(), 3, Ring, [nodea, nodeb]))). %% Create a perfect ring - RingSize must be a multiple of nodes perfect_ring(RingSize, Nodes) @@ -341,12 +363,13 @@ perfect_ring(RingSize, Nodes) Ring = riak_core_ring:fresh(RingSize, node()), Owners = riak_core_ring:all_owners(Ring), TransferNode = fun ({Idx, _CurOwner}, - {Ring0, [NewOwner | Rest]}) -> - {riak_core_ring:transfer_node(Idx, NewOwner, Ring0), - Rest ++ [NewOwner]} - end, + {Ring0, [NewOwner | Rest]}) -> + {riak_core_ring:transfer_node(Idx, NewOwner, Ring0), + Rest ++ [NewOwner]} + end, {PerfectRing, _} = lists:foldl(TransferNode, - {Ring, Nodes}, Owners), + {Ring, Nodes}, + Owners), PerfectRing. last_in_ring() -> @@ -358,187 +381,222 @@ six_node_test() -> {ok, [Ring]} = file:consult("test/my_ring"), %DocIdx = riak_core_util:chash_key({<<"foo">>, <<"bar">>}), DocIdx = <<73, 212, 27, 234, 104, 13, 150, 207, 0, 82, - 86, 183, 125, 225, 172, 154, 135, 46, 6, 112>>, - Nodes = ['dev1@127.0.0.1', 'dev2@127.0.0.1', - 'dev3@127.0.0.1', 'dev4@127.0.0.1', 'dev5@127.0.0.1', - 'dev6@127.0.0.1'], + 86, 183, 125, 225, 172, 154, 135, 46, 6, 112>>, + Nodes = ['dev1@127.0.0.1', + 'dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev5@127.0.0.1', + 'dev6@127.0.0.1'], %% Fallbacks should be selected by finding the next-highest partition after %% the DocIdx of the key, in this case the 433883 partition. The N %% partitions at that point are the primary partitions. If any of the primaries %% are down, the next up node found by walking the preflist is used as the %% fallback for that partition. ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, Nodes))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}], + (get_apl(DocIdx, 3, Ring, Nodes))), ?assertEqual([{456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- ['dev2@127.0.0.1']))), + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}, + {433883298582611803841718934712646521460354973696, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev2@127.0.0.1']))), ?assertEqual([{479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- ['dev2@127.0.0.1', 'dev3@127.0.0.1']))), + 'dev4@127.0.0.1'}, + {433883298582611803841718934712646521460354973696, + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev2@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev1@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- - ['dev2@127.0.0.1', 'dev3@127.0.0.1', - 'dev4@127.0.0.1']))), + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev1@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- - ['dev2@127.0.0.1', 'dev3@127.0.0.1', - 'dev4@127.0.0.1', 'dev1@127.0.0.1']))), + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- ['dev4@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev6@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- ['dev4@127.0.0.1', 'dev3@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev6@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev4@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev1@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- - ['dev4@127.0.0.1', 'dev3@127.0.0.1', - 'dev6@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev1@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev2@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- - ['dev4@127.0.0.1', 'dev3@127.0.0.1', - 'dev6@127.0.0.1', 'dev1@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev2@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev2@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev2@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- - ['dev4@127.0.0.1', 'dev3@127.0.0.1', - 'dev6@127.0.0.1', 'dev1@127.0.0.1', - 'dev5@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev2@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev2@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev5@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, - Nodes -- ['dev3@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev3@127.0.0.1']))), ok. six_node_bucket_key_ann_test() -> {ok, [Ring]} = file:consult("test/my_ring"), - Nodes = ['dev1@127.0.0.1', 'dev2@127.0.0.1', - 'dev3@127.0.0.1', 'dev4@127.0.0.1', 'dev5@127.0.0.1', - 'dev6@127.0.0.1'], + Nodes = ['dev1@127.0.0.1', + 'dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev5@127.0.0.1', + 'dev6@127.0.0.1'], Bucket = <<"favorite">>, Key = <<"jethrotull">>, - application:set_env(riak_core, default_bucket_props, - [{n_val, 3}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), + application:set_env(riak_core, + default_bucket_props, + [{n_val, 3}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), riak_core_ring_manager:setup_ets(test), riak_core_ring_manager:set_ring_global(Ring), Size = riak_core_ring:num_partitions(Ring), ?assertEqual([{{34, 'dev5@127.0.0.1'}, primary}, - {{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev1@127.0.0.1'}, primary}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes), - Size))), + {{35, 'dev6@127.0.0.1'}, primary}, + {{36, 'dev1@127.0.0.1'}, primary}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes), + Size))), ?assertEqual([{{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev1@127.0.0.1'}, primary}, - {{34, 'dev2@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1']), - Size))), + {{36, 'dev1@127.0.0.1'}, primary}, + {{34, 'dev2@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1']), + Size))), ?assertEqual([{{36, 'dev1@127.0.0.1'}, primary}, - {{34, 'dev2@127.0.0.1'}, fallback}, - {{35, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1']), - Size))), + {{34, 'dev2@127.0.0.1'}, fallback}, + {{35, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev2@127.0.0.1'}, fallback}, - {{35, 'dev3@127.0.0.1'}, fallback}, - {{36, 'dev4@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1']), - Size))), + {{35, 'dev3@127.0.0.1'}, fallback}, + {{36, 'dev4@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev3@127.0.0.1'}, fallback}, - {{35, 'dev4@127.0.0.1'}, fallback}, - {{36, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1']), - Size))), + {{35, 'dev4@127.0.0.1'}, fallback}, + {{36, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev4@127.0.0.1'}, fallback}, - {{35, 'dev4@127.0.0.1'}, fallback}, - {{36, 'dev4@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1']), - Size))), + {{35, 'dev4@127.0.0.1'}, fallback}, + {{36, 'dev4@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1', + 'dev3@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev5@127.0.0.1'}, primary}, - {{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev1@127.0.0.1', - 'dev2@127.0.0.1']), - Size))), + {{35, 'dev6@127.0.0.1'}, primary}, + {{36, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev1@127.0.0.1', + 'dev2@127.0.0.1']), + Size))), riak_core_ring_manager:cleanup_ets(test), ok. @@ -548,7 +606,7 @@ chbin_test_() -> chbin_test_scenario() -> [chbin_test_scenario(Size, NumNodes) || Size <- [32, 64, 128], - NumNodes <- [1, 2, 3, 4, 5, 8, Size div 4]], + NumNodes <- [1, 2, 3, 4, 5, 8, Size div 4]], ok. chbin_test_scenario(Size, NumNodes) -> @@ -559,21 +617,23 @@ chbin_test_scenario(Size, NumNodes) -> CHBin = chashbin:create(CHash), Inc = chash:ring_increment(Size), HashKeys = [<> - || X <- lists:seq(0, RingTop, Inc div 2)], + || X <- lists:seq(0, RingTop, Inc div 2)], Shuffled = riak_core_util:shuffle(Nodes), _ = CHBin, [begin - Up = max(0, NumNodes - Down), - UpNodes = lists:sublist(Shuffled, Up), - ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), - (get_apl_chbin(HashKey, N, CHBin, UpNodes))), - ?assertEqual((get_primary_apl(HashKey, N, Ring, - UpNodes)), - (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), - ok + Up = max(0, NumNodes - Down), + UpNodes = lists:sublist(Shuffled, Up), + ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), + (get_apl_chbin(HashKey, N, CHBin, UpNodes))), + ?assertEqual((get_primary_apl(HashKey, + N, + Ring, + UpNodes)), + (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), + ok end || HashKey <- HashKeys, N <- [1, 2, 3, 4], - Down <- [0, 1, 2, Size div 2, Size - 1, Size]], + Down <- [0, 1, 2, Size div 2, Size - 1, Size]], ok. -endif. diff --git a/src/riak_core_app.erl b/src/riak_core_app.erl index 646887bbb..dea09a9e3 100644 --- a/src/riak_core_app.erl +++ b/src/riak_core_app.erl @@ -36,37 +36,38 @@ start(_StartType, _StartArgs) -> start_riak_core_sup(). stop(_State) -> - logger:info("Stopped application riak_core", []), ok. + logger:info("Stopped application riak_core", []), + ok. validate_ring_state_directory_exists() -> riak_core_util:start_app_deps(riak_core), {ok, RingStateDir} = application:get_env(riak_core, - ring_state_dir), + ring_state_dir), case filelib:ensure_dir(filename:join(RingStateDir, - "dummy")) - of - ok -> ok; - {error, RingReason} -> - logger:critical("Ring state directory ~p does not exist, " - "and could not be created: ~p", - [RingStateDir, - riak_core_util:posix_error(RingReason)]), - throw({error, invalid_ring_state_dir}) + "dummy")) + of + ok -> ok; + {error, RingReason} -> + logger:critical("Ring state directory ~p does not exist, " + "and could not be created: ~p", + [RingStateDir, + riak_core_util:posix_error(RingReason)]), + throw({error, invalid_ring_state_dir}) end. start_riak_core_sup() -> %% Spin up the supervisor; prune ring files as necessary case riak_core_sup:start_link() of - {ok, Pid} -> - ok = register_applications(), - ok = add_ring_event_handler(), - {ok, Pid}; - {error, Reason} -> {error, Reason} + {ok, Pid} -> + ok = register_applications(), + ok = add_ring_event_handler(), + {ok, Pid}; + {error, Reason} -> {error, Reason} end. register_applications() -> ok. add_ring_event_handler() -> ok = - riak_core_ring_events:add_guarded_handler(riak_core_ring_handler, - []). + riak_core_ring_events:add_guarded_handler(riak_core_ring_handler, + []). diff --git a/src/riak_core_base64url.erl b/src/riak_core_base64url.erl index b3c6d9993..7337c0742 100644 --- a/src/riak_core_base64url.erl +++ b/src/riak_core_base64url.erl @@ -27,9 +27,12 @@ -module(riak_core_base64url). --export([decode/1, decode_to_string/1, encode/1, - encode_to_string/1, mime_decode/1, - mime_decode_to_string/1]). +-export([decode/1, + decode_to_string/1, + encode/1, + encode_to_string/1, + mime_decode/1, + mime_decode_to_string/1]). -spec decode(iodata()) -> binary(). @@ -65,7 +68,7 @@ urlencode(Base64) when is_list(Base64) -> string:strip(Padded, both, $=); urlencode(Base64) when is_binary(Base64) -> Padded = << <<(urlencode_digit(D))>> - || <> <= Base64 >>, + || <> <= Base64 >>, binary:replace(Padded, <<"=">>, <<"">>, [global]). urldecode(Base64url) when is_list(Base64url) -> @@ -74,15 +77,15 @@ urldecode(Base64url) when is_list(Base64url) -> Prepad ++ Padding; urldecode(Base64url) when is_binary(Base64url) -> Prepad = << <<(urldecode_digit(D))>> - || <> <= Base64url >>, + || <> <= Base64url >>, Padding = padding(Prepad), <>. padding(Base64) when is_binary(Base64) -> case byte_size(Base64) rem 4 of - 2 -> <<"==">>; - 3 -> <<"=">>; - _ -> <<"">> + 2 -> <<"==">>; + 3 -> <<"=">>; + _ -> <<"">> end; padding(Base64) when is_list(Base64) -> binary_to_list(padding(list_to_binary(Base64))). diff --git a/src/riak_core_bucket.erl b/src/riak_core_bucket.erl index 39c437ebb..b688dbaca 100644 --- a/src/riak_core_bucket.erl +++ b/src/riak_core_bucket.erl @@ -23,10 +23,18 @@ %% @doc Functions for manipulating bucket properties. -module(riak_core_bucket). --export([append_bucket_defaults/1, set_bucket/2, - get_bucket/1, get_bucket/2, reset_bucket/1, - get_buckets/1, bucket_nval_map/1, default_object_nval/0, - merge_props/2, name/1, n_val/1, get_value/2]). +-export([append_bucket_defaults/1, + set_bucket/2, + get_bucket/1, + get_bucket/2, + reset_bucket/1, + get_buckets/1, + bucket_nval_map/1, + default_object_nval/0, + merge_props/2, + name/1, + n_val/1, + get_value/2]). -ifdef(TEST). @@ -35,7 +43,7 @@ -endif. -type property() :: {PropName :: atom(), - PropValue :: any()}. + PropValue :: any()}. -type properties() :: [property()]. @@ -48,8 +56,10 @@ -type bucket() :: binary() | {bucket_type(), binary()}. --export_type([property/0, properties/0, bucket/0, - nval_set/0]). +-export_type([property/0, + properties/0, + bucket/0, + nval_set/0]). %% @doc Add a list of defaults to global list of defaults for new %% buckets. If any item is in Items is already set in the @@ -63,38 +73,42 @@ append_bucket_defaults(Items) when is_list(Items) -> %% @doc Set the given BucketProps in Bucket or {BucketType, Bucket}. If BucketType does not %% exist, or is not active, {error, no_type} is returned. -spec set_bucket(bucket(), [{atom(), any()}]) -> ok | - {error, - no_type | [{atom(), atom()}]}. + {error, + no_type | [{atom(), atom()}]}. set_bucket({<<"default">>, Name}, BucketProps) -> set_bucket(Name, BucketProps); set_bucket(Name, BucketProps0) -> - set_bucket(fun set_bucket_in_ring/2, Name, - BucketProps0). + set_bucket(fun set_bucket_in_ring/2, + Name, + BucketProps0). set_bucket(StoreFun, Bucket, BucketProps0) -> OldBucket = get_bucket(Bucket), - case riak_core_bucket_props:validate(update, Bucket, - OldBucket, BucketProps0) - of - {ok, BucketProps} -> - NewBucket = merge_props(BucketProps, OldBucket), - StoreFun(Bucket, NewBucket); - {error, Details} -> - logger:error("Bucket properties validation failed " - "~p~n", - [Details]), - {error, Details} + case riak_core_bucket_props:validate(update, + Bucket, + OldBucket, + BucketProps0) + of + {ok, BucketProps} -> + NewBucket = merge_props(BucketProps, OldBucket), + StoreFun(Bucket, NewBucket); + {error, Details} -> + logger:error("Bucket properties validation failed " + "~p~n", + [Details]), + {error, Details} end. set_bucket_in_ring(Bucket, BucketMeta) -> F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:update_meta(bucket_key(Bucket), - BucketMeta, Ring)} - end, + {new_ring, + riak_core_ring:update_meta(bucket_key(Bucket), + BucketMeta, + Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @spec merge_props(list(), list()) -> list() @@ -138,11 +152,11 @@ reset_bucket({<<"default">>, Name}) -> reset_bucket(Name); reset_bucket(Bucket) -> F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:remove_meta(bucket_key(Bucket), Ring)} - end, + {new_ring, + riak_core_ring:remove_meta(bucket_key(Bucket), Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @doc Get bucket properties `Props' for all the buckets in the given @@ -152,12 +166,12 @@ reset_bucket(Bucket) -> get_buckets(Ring) -> RingNames = riak_core_ring:get_buckets(Ring), RingBuckets = [get_bucket(Name, Ring) - || Name <- RingNames], + || Name <- RingNames], RingBuckets. %% @doc returns a proplist containing all buckets and their respective N values -spec bucket_nval_map(riak_core_ring()) -> [{binary(), - integer()}]. + integer()}]. bucket_nval_map(Ring) -> [{riak_core_bucket:name(B), riak_core_bucket:n_val(B)} @@ -178,8 +192,8 @@ n_val(BProps) -> get_value(n_val, BProps). get_value(Key, Proplist) -> case lists:keyfind(Key, 1, Proplist) of - {Key, Value} -> Value; - _ -> undefined + {Key, Value} -> Value; + _ -> undefined end. bucket_key({<<"default">>, Name}) -> bucket_key(Name); @@ -193,8 +207,9 @@ bucket_key(Name) -> {bucket, Name}. simple_set_test() -> application:load(riak_core), - application:set_env(riak_core, ring_state_dir, - "_build/test/tmp"), + application:set_env(riak_core, + ring_state_dir, + "_build/test/tmp"), %% appending an empty list of defaults makes up for the fact that %% riak_core_app:start/2 is not called during eunit runs %% (that's where the usual defaults are set at startup), diff --git a/src/riak_core_bucket_props.erl b/src/riak_core_bucket_props.erl index dd5ee8495..19021fe2f 100644 --- a/src/riak_core_bucket_props.erl +++ b/src/riak_core_bucket_props.erl @@ -19,8 +19,11 @@ %% ------------------------------------------------------------------- -module(riak_core_bucket_props). --export([merge/2, validate/4, resolve/2, defaults/0, - append_defaults/1]). +-export([merge/2, + validate/4, + resolve/2, + defaults/0, + append_defaults/1]). -ifdef(TEST). @@ -29,85 +32,100 @@ -endif. -spec merge([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. + [{atom(), any()}]) -> [{atom(), any()}]. merge(Overriding, Other) -> - lists:ukeymerge(1, lists:ukeysort(1, Overriding), - lists:ukeysort(1, Other)). + lists:ukeymerge(1, + lists:ukeysort(1, Overriding), + lists:ukeysort(1, Other)). -spec validate(create | update, - {riak_core_bucket:bucket_type(), undefined | binary()} | - binary(), - undefined | [{atom(), any()}], - [{atom(), any()}]) -> {ok, [{atom(), any()}]} | - {error, [{atom(), atom()}]}. + {riak_core_bucket:bucket_type(), undefined | binary()} | + binary(), + undefined | [{atom(), any()}], + [{atom(), any()}]) -> {ok, [{atom(), any()}]} | + {error, [{atom(), atom()}]}. validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps) -> + BucketProps) -> ReservedErrors = validate_reserved_names(Bucket), - CoreErrors = validate_core_props(CreateOrUpdate, Bucket, - ExistingProps, BucketProps), - validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps, riak_core:bucket_validators(), - [ReservedErrors, CoreErrors]). + CoreErrors = validate_core_props(CreateOrUpdate, + Bucket, + ExistingProps, + BucketProps), + validate(CreateOrUpdate, + Bucket, + ExistingProps, + BucketProps, + riak_core:bucket_validators(), + [ReservedErrors, CoreErrors]). validate(_CreateOrUpdate, _Bucket, _ExistingProps, - Props, [], ErrorLists) -> + Props, [], ErrorLists) -> case lists:flatten(ErrorLists) of - [] -> {ok, Props}; - Errors -> {error, Errors} + [] -> {ok, Props}; + Errors -> {error, Errors} end; validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps0, [{_App, Validator} | T], Errors0) -> + BucketProps0, [{_App, Validator} | T], Errors0) -> {BucketProps, Errors} = - Validator:validate(CreateOrUpdate, Bucket, - ExistingProps, BucketProps0), - validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps, T, [Errors | Errors0]). + Validator:validate(CreateOrUpdate, + Bucket, + ExistingProps, + BucketProps0), + validate(CreateOrUpdate, + Bucket, + ExistingProps, + BucketProps, + T, + [Errors | Errors0]). validate_core_props(CreateOrUpdate, Bucket, - ExistingProps, BucketProps) -> + ExistingProps, BucketProps) -> lists:foldl(fun (Prop, Errors) -> - case validate_core_prop(CreateOrUpdate, Bucket, - ExistingProps, Prop) - of - true -> Errors; - Error -> [Error | Errors] - end - end, - [], BucketProps). + case validate_core_prop(CreateOrUpdate, + Bucket, + ExistingProps, + Prop) + of + true -> Errors; + Error -> [Error | Errors] + end + end, + [], + BucketProps). validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, Claimant}) + undefined, {claimant, Claimant}) when Claimant =:= node() -> %% claimant valid on first call to create if claimant is this node true; validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, _BadClaimant}) -> + undefined, {claimant, _BadClaimant}) -> %% claimant not valid on first call to create if claimant is not this node {claimant, "Invalid claimant"}; validate_core_prop(create, {_Bucket, undefined}, - Existing, {claimant, Claimant}) -> + Existing, {claimant, Claimant}) -> %% subsequent creation calls cannot modify claimant and it should exist case lists:keyfind(claimant, 1, Existing) of - false -> - {claimant, - "No claimant details found in existing " - "properties"}; - {claimant, Claimant} -> true; - {claimant, _Other} -> - {claimant, "Cannot modify claimant property"} + false -> + {claimant, + "No claimant details found in existing " + "properties"}; + {claimant, Claimant} -> true; + {claimant, _Other} -> + {claimant, "Cannot modify claimant property"} end; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {claimant, _Claimant}) -> + _Existing, {claimant, _Claimant}) -> %% cannot update claimant {claimant, "Cannot update claimant property"}; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {ddl, _DDL}) -> + _Existing, {ddl, _DDL}) -> %% cannot update time series DDL {ddl, "Cannot update time series data definition"}; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {table_def, _DDL}) -> + _Existing, {table_def, _DDL}) -> %% cannot update time series DDL (or, if it slips past riak_kv_console, %% the table_def SQL(ish) code that is parsed to make a DDL) %% @@ -115,19 +133,19 @@ validate_core_prop(update, {_Bucket, _BucketName}, %% errors from existing_atom functions {ddl, "Cannot update time series data definition"}; validate_core_prop(create, {_Bucket, undefined}, - undefined, {active, false}) -> + undefined, {active, false}) -> %% first creation call that sets active to false is always valid true; validate_core_prop(create, {_Bucket, undefined}, - _Existing, {active, false}) -> + _Existing, {active, false}) -> %% subsequent creation calls that leaves active false is valid true; validate_core_prop(update, {_Bucket, _}, _Existing, - {active, true}) -> + {active, true}) -> %% calls to update that do not modify active are valid true; validate_core_prop(_, {_Bucket, _}, _Existing, - {active, _}) -> + {active, _}) -> %% subsequent creation calls or update calls cannot modify active (it is modified directly %% by riak_core_claimant) {active, "Cannot modify active property"}; @@ -137,8 +155,8 @@ validate_core_prop(_, _, _, _) -> validate_reserved_names(Bucket) -> case validate_reserved_name(Bucket) of - ok -> []; - ErrStr -> [{reserved_name, ErrStr}] + ok -> []; + ErrStr -> [{reserved_name, ErrStr}] end. validate_reserved_name({<<"any">>, _}) -> @@ -149,32 +167,36 @@ validate_reserved_name(_) -> ok. -spec defaults() -> [{atom(), any()}]. defaults() -> - application:get_env(riak_core, default_bucket_props, - undefined). + application:get_env(riak_core, + default_bucket_props, + undefined). -spec append_defaults([{atom(), any()}]) -> ok. append_defaults(Items) when is_list(Items) -> OldDefaults = application:get_env(riak_core, - default_bucket_props, []), + default_bucket_props, + []), NewDefaults = merge(OldDefaults, Items), FixedDefaults = case riak_core:bucket_fixups() of - [] -> NewDefaults; - Fixups -> - riak_core_ring_manager:run_fixups(Fixups, default, - NewDefaults) - end, - application:set_env(riak_core, default_bucket_props, - FixedDefaults), + [] -> NewDefaults; + Fixups -> + riak_core_ring_manager:run_fixups(Fixups, + default, + NewDefaults) + end, + application:set_env(riak_core, + default_bucket_props, + FixedDefaults), %% do a noop transform on the ring, to make the fixups re-run catch riak_core_ring_manager:ring_trans(fun (Ring, _) -> - {new_ring, Ring} - end, - undefined), + {new_ring, Ring} + end, + undefined), ok. -spec resolve([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. + [{atom(), any()}]) -> [{atom(), any()}]. %%noinspection ErlangUnusedVariable resolve(PropsA, PropsB) @@ -182,36 +204,37 @@ resolve(PropsA, PropsB) PropsASorted = lists:ukeysort(1, PropsA), PropsBSorted = lists:ukeysort(1, PropsB), {_, Resolved} = lists:foldl(fun ({KeyA, _} = PropA, - {[{KeyA, _} = PropB | RestB], Acc}) -> - {RestB, - [{KeyA, resolve_prop(PropA, PropB)} - | Acc]}; - (PropA, {RestB, Acc}) -> - {RestB, [PropA | Acc]} - end, - {PropsBSorted, []}, PropsASorted), + {[{KeyA, _} = PropB | RestB], Acc}) -> + {RestB, + [{KeyA, resolve_prop(PropA, PropB)} + | Acc]}; + (PropA, {RestB, Acc}) -> + {RestB, [PropA | Acc]} + end, + {PropsBSorted, []}, + PropsASorted), Resolved. resolve_prop({allow_mult, Mult1}, - {allow_mult, Mult2}) -> + {allow_mult, Mult2}) -> Mult1 orelse - Mult2; %% assumes allow_mult=true is default + Mult2; %% assumes allow_mult=true is default resolve_prop({basic_quorum, Basic1}, - {basic_quorum, Basic2}) -> + {basic_quorum, Basic2}) -> Basic1 andalso Basic2; resolve_prop({big_vclock, Big1}, {big_vclock, Big2}) -> max(Big1, Big2); resolve_prop({chash_keyfun, KeyFun1}, - {chash_keyfun, _KeyFun2}) -> + {chash_keyfun, _KeyFun2}) -> KeyFun1; %% arbitrary choice resolve_prop({dw, DW1}, {dw, DW2}) -> %% 'quorum' wins over set numbers max(DW1, DW2); resolve_prop({last_write_wins, LWW1}, - {last_write_wins, LWW2}) -> + {last_write_wins, LWW2}) -> LWW1 andalso LWW2; resolve_prop({linkfun, LinkFun1}, - {linkfun, _LinkFun2}) -> + {linkfun, _LinkFun2}) -> LinkFun1; %% arbitrary choice resolve_prop({n_val, N1}, {n_val, N2}) -> max(N1, N2); resolve_prop({notfound_ok, NF1}, {notfound_ok, NF2}) -> @@ -227,11 +250,11 @@ resolve_prop({pw, PW1}, {pw, PW2}) -> max(PW1, PW2); resolve_prop({r, R1}, {r, R2}) -> max(R1, R2); resolve_prop({rw, RW1}, {rw, RW2}) -> max(RW1, RW2); resolve_prop({small_vclock, Small1}, - {small_vclock, Small2}) -> + {small_vclock, Small2}) -> max(Small1, Small2); resolve_prop({w, W1}, {w, W2}) -> max(W1, W2); resolve_prop({young_vclock, Young1}, - {young_vclock, Young2}) -> + {young_vclock, Young2}) -> max(Young1, Young2); resolve_prop({_, V1}, {_, _V2}) -> V1. @@ -245,37 +268,69 @@ resolve_hooks(Hooks1, Hooks2) -> -ifdef(TEST). simple_resolve_test() -> - Props1 = [{name, <<"test">>}, {allow_mult, false}, - {basic_quorum, false}, {big_vclock, 50}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 3}, {notfound_ok, true}, {old_vclock, 86400}, - {postcommit, []}, {pr, 0}, {precommit, [{a, b}]}, - {pw, 0}, {r, quorum}, {rw, quorum}, {small_vclock, 50}, - {w, quorum}, {young_vclock, 20}], - Props2 = [{name, <<"test">>}, {allow_mult, true}, - {basic_quorum, true}, {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, 3}, {last_write_wins, true}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, {notfound_ok, false}, {old_vclock, 86401}, - {postcommit, [{a, b}]}, {pr, 1}, {precommit, [{c, d}]}, - {pw, 3}, {r, 3}, {rw, 3}, {w, 1}, {young_vclock, 30}], - Expected = [{name, <<"test">>}, {allow_mult, true}, - {basic_quorum, false}, {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, {notfound_ok, true}, {old_vclock, 86401}, - {postcommit, [{a, b}]}, {pr, 1}, - {precommit, [{a, b}, {c, d}]}, {pw, 3}, {r, quorum}, - {rw, quorum}, {small_vclock, 50}, {w, quorum}, - {young_vclock, 30}], + Props1 = [{name, <<"test">>}, + {allow_mult, false}, + {basic_quorum, false}, + {big_vclock, 50}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, quorum}, + {last_write_wins, false}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 3}, + {notfound_ok, true}, + {old_vclock, 86400}, + {postcommit, []}, + {pr, 0}, + {precommit, [{a, b}]}, + {pw, 0}, + {r, quorum}, + {rw, quorum}, + {small_vclock, 50}, + {w, quorum}, + {young_vclock, 20}], + Props2 = [{name, <<"test">>}, + {allow_mult, true}, + {basic_quorum, true}, + {big_vclock, 60}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, 3}, + {last_write_wins, true}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 5}, + {notfound_ok, false}, + {old_vclock, 86401}, + {postcommit, [{a, b}]}, + {pr, 1}, + {precommit, [{c, d}]}, + {pw, 3}, + {r, 3}, + {rw, 3}, + {w, 1}, + {young_vclock, 30}], + Expected = [{name, <<"test">>}, + {allow_mult, true}, + {basic_quorum, false}, + {big_vclock, 60}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, quorum}, + {last_write_wins, false}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 5}, + {notfound_ok, true}, + {old_vclock, 86401}, + {postcommit, [{a, b}]}, + {pr, 1}, + {precommit, [{a, b}, {c, d}]}, + {pw, 3}, + {r, quorum}, + {rw, quorum}, + {small_vclock, 50}, + {w, quorum}, + {young_vclock, 30}], ?assertEqual((lists:ukeysort(1, Expected)), - (lists:ukeysort(1, resolve(Props1, Props2)))). + (lists:ukeysort(1, resolve(Props1, Props2)))). -endif. diff --git a/src/riak_core_claim.erl b/src/riak_core_claim.erl index 7a411d8c7..c6aba2b6d 100644 --- a/src/riak_core_claim.erl +++ b/src/riak_core_claim.erl @@ -53,20 +53,34 @@ -module(riak_core_claim). --export([claim/1, claim/3, claim_until_balanced/2, - claim_until_balanced/4]). - --export([default_wants_claim/1, default_wants_claim/2, - default_choose_claim/1, default_choose_claim/2, - default_choose_claim/3, never_wants_claim/1, - never_wants_claim/2, random_choose_claim/1, - random_choose_claim/2, random_choose_claim/3]). - --export([wants_claim_v2/1, wants_claim_v2/2, - choose_claim_v2/1, choose_claim_v2/2, choose_claim_v2/3, - claim_rebalance_n/2, claim_diversify/3, - claim_diagonal/3, wants/1, wants_owns_diff/2, - meets_target_n/2, diagonal_stripe/2]). +-export([claim/1, + claim/3, + claim_until_balanced/2, + claim_until_balanced/4]). + +-export([default_wants_claim/1, + default_wants_claim/2, + default_choose_claim/1, + default_choose_claim/2, + default_choose_claim/3, + never_wants_claim/1, + never_wants_claim/2, + random_choose_claim/1, + random_choose_claim/2, + random_choose_claim/3]). + +-export([wants_claim_v2/1, + wants_claim_v2/2, + choose_claim_v2/1, + choose_claim_v2/2, + choose_claim_v2/3, + claim_rebalance_n/2, + claim_diversify/3, + claim_diagonal/3, + wants/1, + wants_owns_diff/2, + meets_target_n/2, + diagonal_stripe/2]). -define(DEF_TARGET_N, 4). @@ -75,9 +89,10 @@ claim(Ring) -> claim(Ring, want, choose). claim(Ring, _, _) -> Members = riak_core_ring:claiming_members(Ring), lists:foldl(fun (Node, Ring0) -> - claim_until_balanced(Ring0, Node, want, choose) - end, - Ring, Members). + claim_until_balanced(Ring0, Node, want, choose) + end, + Ring, + Members). claim_until_balanced(Ring, Node) -> claim_until_balanced(Ring, Node, want, choose). @@ -85,10 +100,10 @@ claim_until_balanced(Ring, Node) -> claim_until_balanced(Ring, Node, want, choose) -> NeedsIndexes = wants_claim_v2(Ring, Node), case NeedsIndexes of - no -> Ring; - {yes, _NumToClaim} -> - NewRing = choose_claim_v2(Ring, Node), - claim_until_balanced(NewRing, Node, want, choose) + no -> Ring; + {yes, _NumToClaim} -> + NewRing = choose_claim_v2(Ring, Node), + claim_until_balanced(NewRing, Node, want, choose) end. %% =================================================================== @@ -125,8 +140,8 @@ wants_claim_v2(Ring, Node) -> Avg = RingSize div NodeCount, Count = proplists:get_value(Node, Counts, 0), case Count < Avg of - false -> no; - true -> {yes, Avg - Count} + false -> no; + true -> {yes, Avg - Count} end. %% Provide default choose parameters if none given @@ -134,11 +149,12 @@ default_choose_params() -> default_choose_params([]). default_choose_params(Params) -> case proplists:get_value(target_n_val, Params) of - undefined -> - TN = application:get_env(riak_core, target_n_val, - ?DEF_TARGET_N), - [{target_n_val, TN} | Params]; - _ -> Params + undefined -> + TN = application:get_env(riak_core, + target_n_val, + ?DEF_TARGET_N), + [{target_n_val, TN} | Params]; + _ -> Params end. choose_claim_v2(Ring) -> choose_claim_v2(Ring, node()). @@ -158,68 +174,79 @@ choose_claim_v2(Ring, Node, Params0) -> RingSize = riak_core_ring:num_partitions(Ring), NodeCount = erlang:length(Active), %% Deltas::[node(), integer()] - Deltas = get_deltas(RingSize, NodeCount, Owners, - Counts), + Deltas = get_deltas(RingSize, + NodeCount, + Owners, + Counts), {_, Want} = lists:keyfind(Node, 1, Deltas), TargetN = proplists:get_value(target_n_val, Params), AllIndices = lists:zip(lists:seq(0, length(Owners) - 1), - [Idx || {Idx, _} <- Owners]), + [Idx || {Idx, _} <- Owners]), EnoughNodes = (NodeCount > TargetN) or - (NodeCount == TargetN) and (RingSize rem TargetN =:= 0), + (NodeCount == TargetN) and (RingSize rem TargetN =:= 0), case EnoughNodes of - true -> - %% If we have enough nodes to meet target_n, then we prefer to - %% claim indices that are currently causing violations, and then - %% fallback to indices in linear order. The filtering steps below - %% will ensure no new violations are introduced. - Violated = lists:flatten(find_violations(Ring, - TargetN)), - Violated2 = [lists:keyfind(Idx, 2, AllIndices) - || Idx <- Violated], - Indices = Violated2 ++ AllIndices -- Violated2; - false -> - %% If we do not have enough nodes to meet target_n, then we prefer - %% claiming the same indices that would occur during a - %% re-diagonalization of the ring with target_n nodes, falling - %% back to linear offsets off these preferred indices when the - %% number of indices desired is less than the computed set. - Padding = lists:duplicate(TargetN, undefined), - Expanded = lists:sublist(Active ++ Padding, TargetN), - PreferredClaim = riak_core_claim:diagonal_stripe(Ring, - Expanded), - PreferredNth = [begin - {Nth, Idx} = lists:keyfind(Idx, 2, AllIndices), Nth - end - || {Idx, Owner} <- PreferredClaim, Owner =:= Node], - Offsets = lists:seq(0, - RingSize div length(PreferredNth)), - AllNth = lists:sublist([(X + Y) rem RingSize - || Y <- Offsets, X <- PreferredNth], - RingSize), - Indices = [lists:keyfind(Nth, 1, AllIndices) - || Nth <- AllNth] + true -> + %% If we have enough nodes to meet target_n, then we prefer to + %% claim indices that are currently causing violations, and then + %% fallback to indices in linear order. The filtering steps below + %% will ensure no new violations are introduced. + Violated = lists:flatten(find_violations(Ring, + TargetN)), + Violated2 = [lists:keyfind(Idx, 2, AllIndices) + || Idx <- Violated], + Indices = Violated2 ++ AllIndices -- Violated2; + false -> + %% If we do not have enough nodes to meet target_n, then we prefer + %% claiming the same indices that would occur during a + %% re-diagonalization of the ring with target_n nodes, falling + %% back to linear offsets off these preferred indices when the + %% number of indices desired is less than the computed set. + Padding = lists:duplicate(TargetN, undefined), + Expanded = lists:sublist(Active ++ Padding, TargetN), + PreferredClaim = riak_core_claim:diagonal_stripe(Ring, + Expanded), + PreferredNth = [begin + {Nth, Idx} = lists:keyfind(Idx, 2, AllIndices), + Nth + end + || {Idx, Owner} <- PreferredClaim, Owner =:= Node], + Offsets = lists:seq(0, + RingSize div length(PreferredNth)), + AllNth = lists:sublist([(X + Y) rem RingSize + || Y <- Offsets, X <- PreferredNth], + RingSize), + Indices = [lists:keyfind(Nth, 1, AllIndices) + || Nth <- AllNth] end, %% Filter out indices that conflict with the node's existing ownership - Indices2 = prefilter_violations(Ring, Node, AllIndices, - Indices, TargetN, RingSize), + Indices2 = prefilter_violations(Ring, + Node, + AllIndices, + Indices, + TargetN, + RingSize), %% Claim indices from the remaining candidate set - Claim = select_indices(Owners, Deltas, Indices2, - TargetN, RingSize), + Claim = select_indices(Owners, + Deltas, + Indices2, + TargetN, + RingSize), Claim2 = lists:sublist(Claim, Want), NewRing = lists:foldl(fun (Idx, Ring0) -> - riak_core_ring:transfer_node(Idx, Node, Ring0) - end, - Ring, Claim2), + riak_core_ring:transfer_node(Idx, Node, Ring0) + end, + Ring, + Claim2), RingChanged = [] /= Claim2, RingMeetsTargetN = meets_target_n(NewRing, TargetN), case {RingChanged, EnoughNodes, RingMeetsTargetN} of - {false, _, _} -> - %% Unable to claim, fallback to re-diagonalization - sequential_claim(Ring, Node, TargetN); - {_, true, false} -> - %% Failed to meet target_n, fallback to re-diagonalization - sequential_claim(Ring, Node, TargetN); - _ -> NewRing + {false, _, _} -> + %% Unable to claim, fallback to re-diagonalization + sequential_claim(Ring, Node, TargetN); + {_, true, false} -> + %% Failed to meet target_n, fallback to re-diagonalization + sequential_claim(Ring, Node, TargetN); + _ -> NewRing end. %% @private for each node in owners return a tuple of owner and delta @@ -228,21 +255,23 @@ choose_claim_v2(Ring, Node, Params0) -> %% that many more partitions, a negative means the owner can lose that %% many paritions. -spec get_deltas(RingSize :: pos_integer(), - NodeCount :: pos_integer(), - Owners :: [{Index :: non_neg_integer(), node()}], - Counts :: [{node(), non_neg_integer()}]) -> Deltas :: - [{node(), - integer()}]. + NodeCount :: pos_integer(), + Owners :: [{Index :: non_neg_integer(), node()}], + Counts :: [{node(), non_neg_integer()}]) -> Deltas :: + [{node(), + integer()}]. get_deltas(RingSize, NodeCount, Owners, Counts) -> Avg = RingSize / NodeCount, %% the most any node should own Max = ceiling(RingSize / NodeCount), - ActiveDeltas = [{Member, Count, - normalise_delta(Avg - Count)} - || {Member, Count} <- Counts], - BalancedDeltas = rebalance_deltas(ActiveDeltas, Max, - RingSize), + ActiveDeltas = [{Member, + Count, + normalise_delta(Avg - Count)} + || {Member, Count} <- Counts], + BalancedDeltas = rebalance_deltas(ActiveDeltas, + Max, + RingSize), add_default_deltas(Owners, BalancedDeltas, 0). %% @private a node can only claim whole partitions, but if RingSize @@ -264,53 +293,55 @@ normalise_delta(Delta) -> %% 6}, {n4, 8}, {n5,6} we rebalance the deltas so that select_indices %% doesn't leave some node not giving up enough partitions -spec rebalance_deltas([{node(), integer()}], - pos_integer(), pos_integer()) -> [{node(), integer()}]. + pos_integer(), pos_integer()) -> [{node(), integer()}]. rebalance_deltas(NodeDeltas, Max, RingSize) -> AppliedDeltas = [Own + Delta - || {_, Own, Delta} <- NodeDeltas], + || {_, Own, Delta} <- NodeDeltas], case lists:sum(AppliedDeltas) - RingSize of - 0 -> - [{Node, Delta} || {Node, _Cnt, Delta} <- NodeDeltas]; - N when N < 0 -> increase_keeps(NodeDeltas, N, Max, []) + 0 -> + [{Node, Delta} || {Node, _Cnt, Delta} <- NodeDeltas]; + N when N < 0 -> increase_keeps(NodeDeltas, N, Max, []) end. %% @private increases the delta for (some) nodes giving away %% partitions to the max they can keep -spec increase_keeps(Deltas :: [{node(), integer()}], - WantsError :: integer(), Max :: pos_integer(), - Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), - integer()}]. + WantsError :: integer(), Max :: pos_integer(), + Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), + integer()}]. increase_keeps(Rest, 0, _Max, Acc) -> [{Node, Delta} || {Node, _Own, Delta} - <- lists:usort(lists:append(Rest, Acc))]; + <- lists:usort(lists:append(Rest, Acc))]; increase_keeps([], N, Max, Acc) when N < 0 -> increase_takes(lists:reverse(Acc), N, Max, []); increase_keeps([{Node, Own, Delta} | Rest], N, Max, Acc) when Delta < 0 -> WouldOwn = Own + Delta, Additive = case WouldOwn + 1 =< Max of - true -> 1; - false -> 0 - end, - increase_keeps(Rest, N + Additive, Max, - [{Node, Own + Delta + Additive} | Acc]); + true -> 1; + false -> 0 + end, + increase_keeps(Rest, + N + Additive, + Max, + [{Node, Own + Delta + Additive} | Acc]); increase_keeps([NodeDelta | Rest], N, Max, Acc) -> increase_keeps(Rest, N, Max, [NodeDelta | Acc]). %% @private increases the delta for (some) nodes taking partitions to the max %% they can ask for -spec increase_takes(Deltas :: [{node(), integer()}], - WantsError :: integer(), Max :: pos_integer(), - Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), - integer()}]. + WantsError :: integer(), Max :: pos_integer(), + Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), + integer()}]. increase_takes(Rest, 0, _Max, Acc) -> [{Node, Delta} || {Node, _Own, Delta} - <- lists:usort(lists:append(Rest, Acc))]; + <- lists:usort(lists:append(Rest, Acc))]; increase_takes([], N, _Max, Acc) when N < 0 -> [{Node, Delta} || {Node, _Own, Delta} <- lists:usort(Acc)]; @@ -318,45 +349,53 @@ increase_takes([{Node, Own, Delta} | Rest], N, Max, Acc) when Delta > 0 -> WouldOwn = Own + Delta, Additive = case WouldOwn + 1 =< Max of - true -> 1; - false -> 0 - end, - increase_takes(Rest, N + Additive, Max, - [{Node, Own, Delta + Additive} | Acc]); + true -> 1; + false -> 0 + end, + increase_takes(Rest, + N + Additive, + Max, + [{Node, Own, Delta + Additive} | Acc]); increase_takes([NodeDelta | Rest], N, Max, Acc) -> increase_takes(Rest, N, Max, [NodeDelta | Acc]). meets_target_n(Ring, TargetN) -> Owners = lists:keysort(1, - riak_core_ring:all_owners(Ring)), + riak_core_ring:all_owners(Ring)), meets_target_n(Owners, TargetN, 0, [], []). meets_target_n([{Part, Node} | Rest], TargetN, Index, - First, Last) -> + First, Last) -> case lists:keytake(Node, 1, Last) of - {value, {Node, LastIndex, _}, NewLast} -> - if Index - LastIndex >= TargetN -> - %% node repeat respects TargetN - meets_target_n(Rest, TargetN, Index + 1, First, - [{Node, Index, Part} | NewLast]); - true -> - %% violation of TargetN - false - end; - false -> - %% haven't seen this node yet - meets_target_n(Rest, TargetN, Index + 1, - [{Node, Index} | First], [{Node, Index, Part} | Last]) + {value, {Node, LastIndex, _}, NewLast} -> + if Index - LastIndex >= TargetN -> + %% node repeat respects TargetN + meets_target_n(Rest, + TargetN, + Index + 1, + First, + [{Node, Index, Part} | NewLast]); + true -> + %% violation of TargetN + false + end; + false -> + %% haven't seen this node yet + meets_target_n(Rest, + TargetN, + Index + 1, + [{Node, Index} | First], + [{Node, Index, Part} | Last]) end; meets_target_n([], TargetN, Index, First, Last) -> %% start through end guarantees TargetN %% compute violations at wrap around, but don't fail %% because of them: handle during reclaim Violations = lists:filter(fun ({Node, L, _}) -> - {Node, F} = proplists:lookup(Node, First), - Index - L + F < TargetN - end, - Last), + {Node, F} = proplists:lookup(Node, First), + Index - L + F < TargetN + end, + Last), {true, [Part || {_, _, Part} <- Violations]}. %% Claim diversify tries to build a perfectly diverse ownership list that meets @@ -365,20 +404,23 @@ meets_target_n([], TargetN, Index, First, Last) -> %% list, updating the adjacency matrix needed to compute the diversity score as each %% node is added and uses it to drive the selection of the next nodes. claim_diversify(Wants, Owners, Params) -> - TN = proplists:get_value(target_n_val, Params, - ?DEF_TARGET_N), + TN = proplists:get_value(target_n_val, + Params, + ?DEF_TARGET_N), Q = length(Owners), Claiming = [N || {N, W} <- Wants, W > 0], {ok, NewOwners, _AM} = - riak_core_claim_util:construct(riak_core_claim_util:gen_complete_len(Q), - Claiming, TN), + riak_core_claim_util:construct(riak_core_claim_util:gen_complete_len(Q), + Claiming, + TN), {NewOwners, [diversified]}. %% Claim nodes in seq a,b,c,a,b,c trying to handle the wraparound %% case to meet target N claim_diagonal(Wants, Owners, Params) -> - TN = proplists:get_value(target_n_val, Params, - ?DEF_TARGET_N), + TN = proplists:get_value(target_n_val, + Params, + ?DEF_TARGET_N), Claiming = lists:sort([N || {N, W} <- Wants, W > 0]), S = length(Claiming), Q = length(Owners), @@ -388,10 +430,10 @@ claim_diagonal(Wants, Owners, Params) -> %% are available. Tail = Q - Reps * S, Last = case S >= TN + Tail of - true -> % If number wanted can be filled excluding first TN nodes - lists:sublist(lists:nthtail(TN - Tail, Claiming), Tail); - _ -> lists:sublist(Claiming, Tail) - end, + true -> % If number wanted can be filled excluding first TN nodes + lists:sublist(lists:nthtail(TN - Tail, Claiming), Tail); + _ -> lists:sublist(Claiming, Tail) + end, {lists:flatten([lists:duplicate(Reps, Claiming), Last]), [diagonalized]}. @@ -401,39 +443,41 @@ claim_diagonal(Wants, Owners, Params) -> %% attempts to eliminate tail violations (for example a ring that %% starts/ends n1 | n2 | ...| n3 | n4 | n1) -spec sequential_claim(riak_core_ring:riak_core_ring(), - node(), integer()) -> riak_core_ring:riak_core_ring(). + node(), integer()) -> riak_core_ring:riak_core_ring(). sequential_claim(Ring, Node, TargetN) -> Nodes = lists:usort([Node - | riak_core_ring:claiming_members(Ring)]), + | riak_core_ring:claiming_members(Ring)]), NodeCount = length(Nodes), RingSize = riak_core_ring:num_partitions(Ring), Overhang = RingSize rem NodeCount, HasTailViolation = Overhang > 0 andalso - Overhang < TargetN, + Overhang < TargetN, Shortfall = TargetN - Overhang, CompleteSequences = RingSize div NodeCount, MaxFetchesPerSeq = NodeCount - TargetN, MinFetchesPerSeq = ceiling(Shortfall / - CompleteSequences), + CompleteSequences), CanSolveViolation = CompleteSequences * MaxFetchesPerSeq - >= Shortfall, + >= Shortfall, Zipped = case HasTailViolation andalso CanSolveViolation - of - true -> - Partitions = lists:sort([I - || {I, _} - <- riak_core_ring:all_owners(Ring)]), - Nodelist = solve_tail_violations(RingSize, Nodes, - Shortfall, - MinFetchesPerSeq), - lists:zip(Partitions, lists:flatten(Nodelist)); - false -> diagonal_stripe(Ring, Nodes) - end, + of + true -> + Partitions = lists:sort([I + || {I, _} + <- riak_core_ring:all_owners(Ring)]), + Nodelist = solve_tail_violations(RingSize, + Nodes, + Shortfall, + MinFetchesPerSeq), + lists:zip(Partitions, lists:flatten(Nodelist)); + false -> diagonal_stripe(Ring, Nodes) + end, lists:foldl(fun ({P, N}, Acc) -> - riak_core_ring:transfer_node(P, N, Acc) - end, - Ring, Zipped). + riak_core_ring:transfer_node(P, N, Acc) + end, + Ring, + Zipped). %% @private every module has a ceiling function -spec ceiling(float()) -> integer(). @@ -441,87 +485,104 @@ sequential_claim(Ring, Node, TargetN) -> ceiling(F) -> T = trunc(F), case F - T == 0 of - true -> T; - false -> T + 1 + true -> T; + false -> T + 1 end. %% @private rem_fill increase the tail so that there is no wrap around %% preflist violation, by taking a `Shortfall' number nodes from %% earlier in the preflist -spec solve_tail_violations(integer(), [node()], - integer(), integer()) -> [node()]. + integer(), integer()) -> [node()]. solve_tail_violations(RingSize, Nodes, Shortfall, - MinFetchesPerSeq) -> + MinFetchesPerSeq) -> StartingNode = RingSize rem length(Nodes) + 1, - build_nodelist(RingSize, Nodes, Shortfall, StartingNode, - MinFetchesPerSeq, []). + build_nodelist(RingSize, + Nodes, + Shortfall, + StartingNode, + MinFetchesPerSeq, + []). %% @private build the node list by building tail to satisfy TargetN, then removing %% the added nodes from earlier segments -spec build_nodelist(integer(), [node()], integer(), - integer(), integer(), [node()]) -> [node()]. + integer(), integer(), [node()]) -> [node()]. build_nodelist(RingSize, Nodes, _Shortfall = 0, - _NodeCounter, _MinFetchesPerSeq, Acc) -> + _NodeCounter, _MinFetchesPerSeq, Acc) -> %% Finished shuffling, backfill if required ShuffledRing = lists:flatten(Acc), - backfill_ring(RingSize, Nodes, - (RingSize - length(ShuffledRing)) div length(Nodes), - Acc); + backfill_ring(RingSize, + Nodes, + (RingSize - length(ShuffledRing)) div length(Nodes), + Acc); build_nodelist(RingSize, Nodes, Shortfall, NodeCounter, - MinFetchesPerSeq, _Acc = []) -> + MinFetchesPerSeq, _Acc = []) -> %% Build the tail with sufficient nodes to satisfy TargetN NodeCount = length(Nodes), LastSegLength = RingSize rem NodeCount + Shortfall, NewSeq = lists:sublist(Nodes, 1, LastSegLength), - build_nodelist(RingSize, Nodes, Shortfall, NodeCounter, - MinFetchesPerSeq, NewSeq); + build_nodelist(RingSize, + Nodes, + Shortfall, + NodeCounter, + MinFetchesPerSeq, + NewSeq); build_nodelist(RingSize, Nodes, Shortfall, NodeCounter, - MinFetchesPerSeq, Acc) -> + MinFetchesPerSeq, Acc) -> %% Build rest of list, subtracting minimum of MinFetchesPerSeq, Shortfall %% or (NodeCount - NodeCounter) each time NodeCount = length(Nodes), NodesToRemove = min(min(MinFetchesPerSeq, Shortfall), - NodeCount - NodeCounter), - RemovalList = lists:sublist(Nodes, NodeCounter, - NodesToRemove), + NodeCount - NodeCounter), + RemovalList = lists:sublist(Nodes, + NodeCounter, + NodesToRemove), NewSeq = lists:subtract(Nodes, RemovalList), NewNodeCounter = NodeCounter + NodesToRemove, - build_nodelist(RingSize, Nodes, - Shortfall - NodesToRemove, NewNodeCounter, - MinFetchesPerSeq, [NewSeq | Acc]). + build_nodelist(RingSize, + Nodes, + Shortfall - NodesToRemove, + NewNodeCounter, + MinFetchesPerSeq, + [NewSeq | Acc]). %% @private Backfill the ring with full sequences -spec backfill_ring(integer(), [node()], integer(), - [node()]) -> [node()]. + [node()]) -> [node()]. backfill_ring(_RingSize, _Nodes, _Remaining = 0, Acc) -> Acc; backfill_ring(RingSize, Nodes, Remaining, Acc) -> - backfill_ring(RingSize, Nodes, Remaining - 1, - [Nodes | Acc]). + backfill_ring(RingSize, + Nodes, + Remaining - 1, + [Nodes | Acc]). claim_rebalance_n(Ring, Node) -> Nodes = lists:usort([Node - | riak_core_ring:claiming_members(Ring)]), + | riak_core_ring:claiming_members(Ring)]), Zipped = diagonal_stripe(Ring, Nodes), lists:foldl(fun ({P, N}, Acc) -> - riak_core_ring:transfer_node(P, N, Acc) - end, - Ring, Zipped). + riak_core_ring:transfer_node(P, N, Acc) + end, + Ring, + Zipped). diagonal_stripe(Ring, Nodes) -> %% diagonal stripes guarantee most disperse data Partitions = lists:sort([I - || {I, _} <- riak_core_ring:all_owners(Ring)]), + || {I, _} <- riak_core_ring:all_owners(Ring)]), Zipped = lists:zip(Partitions, - lists:sublist(lists:flatten(lists:duplicate(1 + - length(Partitions) - div - length(Nodes), - Nodes)), - 1, length(Partitions))), + lists:sublist(lists:flatten(lists:duplicate(1 + + length(Partitions) + div + length(Nodes), + Nodes)), + 1, + length(Partitions))), Zipped. random_choose_claim(Ring) -> @@ -532,7 +593,8 @@ random_choose_claim(Ring, Node) -> random_choose_claim(Ring, Node, _Params) -> riak_core_ring:transfer_node(riak_core_ring:random_other_index(Ring), - Node, Ring). + Node, + Ring). %% @spec never_wants_claim(riak_core_ring()) -> no %% @doc For use by nodes that should not claim any partitions. @@ -554,33 +616,36 @@ find_violations(Ring, TargetN) -> Owners2 = Owners ++ Suffix, %% Use a sliding window to determine violations {Bad, _} = lists:foldl(fun (P = {Idx, Owner}, - {Out, Window}) -> - Window2 = lists:sublist([P | Window], - TargetN - 1), - case lists:keyfind(Owner, 2, Window) of - {PrevIdx, Owner} -> - {[[PrevIdx, Idx] | Out], Window2}; - false -> {Out, Window2} - end - end, - {[], []}, Owners2), + {Out, Window}) -> + Window2 = lists:sublist([P | Window], + TargetN - 1), + case lists:keyfind(Owner, 2, Window) of + {PrevIdx, Owner} -> + {[[PrevIdx, Idx] | Out], Window2}; + false -> {Out, Window2} + end + end, + {[], []}, + Owners2), lists:reverse(Bad). %% @private %% %% @doc Counts up the number of partitions owned by each node. -spec get_counts([node()], - [{integer(), _}]) -> [{node(), non_neg_integer()}]. + [{integer(), _}]) -> [{node(), non_neg_integer()}]. get_counts(Nodes, Ring) -> Empty = [{Node, 0} || Node <- Nodes], Counts = lists:foldl(fun ({_Idx, Node}, Counts) -> - case lists:member(Node, Nodes) of - true -> dict:update_counter(Node, 1, Counts); - false -> Counts - end - end, - dict:from_list(Empty), Ring), + case lists:member(Node, Nodes) of + true -> + dict:update_counter(Node, 1, Counts); + false -> Counts + end + end, + dict:from_list(Empty), + Ring), dict:to_list(Counts). %% @private @@ -595,16 +660,16 @@ add_default_deltas(IdxOwners, Deltas, Default) -> %% @doc Filter out candidate indices that would violate target_n given %% a node's current partition ownership. prefilter_violations(Ring, Node, AllIndices, Indices, - TargetN, RingSize) -> + TargetN, RingSize) -> CurrentIndices = riak_core_ring:indices(Ring, Node), CurrentNth = [lists:keyfind(Idx, 2, AllIndices) - || Idx <- CurrentIndices], + || Idx <- CurrentIndices], [{Nth, Idx} || {Nth, Idx} <- Indices, - lists:all(fun ({CNth, _}) -> - spaced_by_n(CNth, Nth, TargetN, RingSize) - end, - CurrentNth)]. + lists:all(fun ({CNth, _}) -> + spaced_by_n(CNth, Nth, TargetN, RingSize) + end, + CurrentNth)]. %% @private %% @@ -619,10 +684,10 @@ prefilter_violations(Ring, Node, AllIndices, Indices, %% the desired ownership is 3, then we try to claim at most 2 partitions %% from A. select_indices(_Owners, _Deltas, [], _TargetN, - _RingSize) -> + _RingSize) -> []; select_indices(Owners, Deltas, Indices, TargetN, - RingSize) -> + RingSize) -> OwnerDT = dict:from_list(Owners), {FirstNth, _} = hd(Indices), %% The `First' symbol indicates whether or not this is the first @@ -632,28 +697,34 @@ select_indices(Owners, Deltas, Indices, TargetN, %% is willing to part with. It's the subsequent partitions %% claimed by this node that must not break the target_n invariant. {Claim, _, _, _} = lists:foldl(fun ({Nth, Idx}, - {Out, LastNth, DeltaDT, First}) -> - Owner = dict:fetch(Idx, OwnerDT), - Delta = dict:fetch(Owner, DeltaDT), - MeetsTN = spaced_by_n(LastNth, Nth, - TargetN, - RingSize), - case (Delta < 0) and - (First or MeetsTN) - of - true -> - NextDeltaDT = - dict:update_counter(Owner, - 1, - DeltaDT), - {[Idx | Out], Nth, NextDeltaDT, - false}; - false -> - {Out, LastNth, DeltaDT, First} - end - end, - {[], FirstNth, dict:from_list(Deltas), true}, - Indices), + {Out, LastNth, DeltaDT, First}) -> + Owner = dict:fetch(Idx, OwnerDT), + Delta = dict:fetch(Owner, DeltaDT), + MeetsTN = spaced_by_n(LastNth, + Nth, + TargetN, + RingSize), + case (Delta < 0) and + (First or MeetsTN) + of + true -> + NextDeltaDT = + dict:update_counter(Owner, + 1, + DeltaDT), + {[Idx | Out], + Nth, + NextDeltaDT, + false}; + false -> + {Out, + LastNth, + DeltaDT, + First} + end + end, + {[], FirstNth, dict:from_list(Deltas), true}, + Indices), lists:reverse(Claim). %% @private @@ -661,10 +732,12 @@ select_indices(Owners, Deltas, Indices, TargetN, %% @doc Determine if two positions in the ring meet target_n spacing. spaced_by_n(NthA, NthB, TargetN, RingSize) -> case NthA > NthB of - true -> - NFwd = NthA - NthB, NBack = NthB - NthA + RingSize; - false -> - NFwd = NthA - NthB + RingSize, NBack = NthB - NthA + true -> + NFwd = NthA - NthB, + NBack = NthB - NthA + RingSize; + false -> + NFwd = NthA - NthB + RingSize, + NBack = NthB - NthA end, (NFwd >= TargetN) and (NBack >= TargetN). @@ -672,8 +745,8 @@ spaced_by_n(NthA, NthB, TargetN, RingSize) -> %% overloaded by (negative) compared to what it owns. wants_owns_diff(Wants, Owns) -> [case lists:keyfind(N, 1, Owns) of - {N, O} -> {N, W - O}; - false -> {N, W} + {N, O} -> {N, W - O}; + false -> {N, W} end || {N, W} <- Wants]. @@ -681,11 +754,11 @@ wants_owns_diff(Wants, Owns) -> %% considered balanced wants(Ring) -> Active = - lists:sort(riak_core_ring:claiming_members(Ring)), + lists:sort(riak_core_ring:claiming_members(Ring)), Inactive = riak_core_ring:all_members(Ring) -- Active, Q = riak_core_ring:num_partitions(Ring), ActiveWants = lists:zip(Active, - wants_counts(length(Active), Q)), + wants_counts(length(Active), Q)), InactiveWants = [{N, 0} || N <- Inactive], lists:sort(ActiveWants ++ InactiveWants). @@ -695,19 +768,19 @@ wants(Ring) -> wants_counts(S, Q) -> Max = roundup(Q / S), case S * Max - Q of - 0 -> lists:duplicate(S, Max); - X -> - lists:duplicate(X, Max - 1) ++ - lists:duplicate(S - X, Max) + 0 -> lists:duplicate(S, Max); + X -> + lists:duplicate(X, Max - 1) ++ + lists:duplicate(S - X, Max) end. %% Round up to next whole integer - ceil roundup(I) when I >= 0 -> T = erlang:trunc(I), case I - T of - Neg when Neg < 0 -> T; - Pos when Pos > 0 -> T + 1; - _ -> T + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T end. %% =================================================================== @@ -730,17 +803,18 @@ wants_claim_test() -> %% @private console helper function to return node lists for claiming %% partitions -spec gen_diag(pos_integer(), pos_integer()) -> [Node :: - atom()]. + atom()]. gen_diag(RingSize, NodeCount) -> Nodes = [list_to_atom(lists:concat(["n_", N])) - || N <- lists:seq(1, NodeCount)], + || N <- lists:seq(1, NodeCount)], {HeadNode, RestNodes} = {hd(Nodes), tl(Nodes)}, R0 = riak_core_ring:fresh(RingSize, HeadNode), RAdded = lists:foldl(fun (Node, Racc) -> - riak_core_ring:add_member(HeadNode, Racc, Node) - end, - R0, RestNodes), + riak_core_ring:add_member(HeadNode, Racc, Node) + end, + R0, + RestNodes), Diag = diagonal_stripe(RAdded, Nodes), {_P, N} = lists:unzip(Diag), N. @@ -754,6 +828,6 @@ has_violations(Diag) -> NC = length(lists:usort(Diag)), Overhang = RS rem NC, Overhang > 0 andalso - Overhang < 4. %% hardcoded target n of 4 + Overhang < 4. %% hardcoded target n of 4 -endif. diff --git a/src/riak_core_claim_util.erl b/src/riak_core_claim_util.erl index ac7378f04..a209223e0 100644 --- a/src/riak_core_claim_util.erl +++ b/src/riak_core_claim_util.erl @@ -24,32 +24,53 @@ -module(riak_core_claim_util). --export([ring_stats/2, violation_stats/2, - balance_stats/1, diversity_stats/2]). - --export([node_load/3, print_analysis/1, - print_analysis/2, sort_by_down_fbmax/1]). - --export([adjacency_matrix/1, summarize_am/1, - adjacency_matrix_from_al/1, adjacency_list/1, - fixup_dam/2, score_am/2, count/2, rms/1]). - --export([make_ring/1, gen_complete_diverse/1, - gen_complete_len/1, construct/3]). - --export([num_perms/2, num_combs/2, fac/1, perm_gen/1, - down_combos/2, rotations/1, substitutions/2]). +-export([ring_stats/2, + violation_stats/2, + balance_stats/1, + diversity_stats/2]). + +-export([node_load/3, + print_analysis/1, + print_analysis/2, + sort_by_down_fbmax/1]). + +-export([adjacency_matrix/1, + summarize_am/1, + adjacency_matrix_from_al/1, + adjacency_list/1, + fixup_dam/2, + score_am/2, + count/2, + rms/1]). + +-export([make_ring/1, + gen_complete_diverse/1, + gen_complete_len/1, + construct/3]). + +-export([num_perms/2, + num_combs/2, + fac/1, + perm_gen/1, + down_combos/2, + rotations/1, + substitutions/2]). -record(load, - {node, % Node name - num_pri, % Number of primaries - num_fb, % Number of fallbacks - norm_fb}). % Normalised fallbacks - ratio of how many there are + {node, % Node name + num_pri, % Number of primaries + num_fb, % Number of fallbacks + norm_fb}). % Normalised fallbacks - ratio of how many there are -record(failure, - {down = [], % List of downed nodes - load = [], % List of #load{} records per up node - fbmin, fbmean, fbstddev, fb10, fb90, fbmax}). + {down = [], % List of downed nodes + load = [], % List of #load{} records per up node + fbmin, + fbmean, + fbstddev, + fb10, + fb90, + fbmax}). %% ------------------------------------------------------------------- %% Ring statistics @@ -57,7 +78,7 @@ ring_stats(R, TN) -> violation_stats(R, TN) ++ - balance_stats(R) ++ diversity_stats(R, TN). + balance_stats(R) ++ diversity_stats(R, TN). %% TargetN violations violation_stats(R, TN) -> @@ -69,14 +90,15 @@ balance_stats(R) -> M = length(riak_core_ring:claiming_members(R)), AllOwners = riak_core_ring:all_owners(R), Counts = lists:foldl(fun ({_, N}, A) -> - orddict:update_counter(N, 1, A) - end, - [], AllOwners), + orddict:update_counter(N, 1, A) + end, + [], + AllOwners), Avg = Q / M, Balance = lists:sum([begin - Delta = trunc(Avg - Count), Delta * Delta - end - || {_, Count} <- Counts]), + Delta = trunc(Avg - Count), Delta * Delta + end + || {_, Count} <- Counts]), [{balance, Balance}, {ownership, Counts}]. diversity_stats(R, TN) -> @@ -84,7 +106,7 @@ diversity_stats(R, TN) -> AM = adjacency_matrix(Owners), try [{diversity, riak_core_claim_util:score_am(AM, TN)}] catch - _:empty_list -> [{diversity, undefined}] + _:empty_list -> [{diversity, undefined}] end. %% ------------------------------------------------------------------- @@ -101,25 +123,27 @@ node_load(R, NVal, DownNodes) -> VL = vnode_load(R, NVal, DownNodes), TotFBs = lists:sum([NumFBs || {_N, _, NumFBs} <- VL]), [#load{node = N, num_pri = NumPris, num_fb = NumFBs, - norm_fb = norm_fb(NumFBs, TotFBs)} + norm_fb = norm_fb(NumFBs, TotFBs)} || {N, NumPris, NumFBs} <- VL]. vnode_load(R, NVal, DownNodes) -> UpNodes = riak_core_ring:all_members(R) -- DownNodes, Keys = [<<(I + 1):160/integer>> - || {I, _Owner} <- riak_core_ring:all_owners(R)], + || {I, _Owner} <- riak_core_ring:all_owners(R)], %% NValParts = Nval * riak_core_ring:num_partitions(R), - AllPLs = [riak_core_apl:get_apl_ann(Key, NVal, R, - UpNodes) - || Key <- Keys], + AllPLs = [riak_core_apl:get_apl_ann(Key, + NVal, + R, + UpNodes) + || Key <- Keys], FlatPLs = lists:flatten(AllPLs), [begin - Pris = lists:usort([Idx - || {{Idx, PN}, primary} <- FlatPLs, PN == N]), - FBs = lists:usort([Idx - || {{Idx, FN}, fallback} <- FlatPLs, FN == N]) - -- Pris, - {N, length(Pris), length(FBs)} + Pris = lists:usort([Idx + || {{Idx, PN}, primary} <- FlatPLs, PN == N]), + FBs = lists:usort([Idx + || {{Idx, FN}, fallback} <- FlatPLs, FN == N]) + -- Pris, + {N, length(Pris), length(FBs)} end || N <- UpNodes]. @@ -140,27 +164,34 @@ print_analysis(LoadAnalysis) -> print_analysis(IoDev, LoadAnalysis) -> io:format(IoDev, - " Min Mean/ SD 10th 90th Max DownNodes/" - "Worst\n", - []), + " Min Mean/ SD 10th 90th Max DownNodes/" + "Worst\n", + []), print_analysis1(IoDev, LoadAnalysis). %% @private print_analysis1(_IoDev, []) -> ok; print_analysis1(IoDev, - [#failure{down = Down, load = Load, fbmin = FBMin, - fbmean = FBMean, fbstddev = FBStdDev, fb10 = FB10, - fb90 = FB90, fbmax = FBMax} - | Rest]) -> + [#failure{down = Down, load = Load, fbmin = FBMin, + fbmean = FBMean, fbstddev = FBStdDev, fb10 = FB10, + fb90 = FB90, fbmax = FBMax} + | Rest]) -> %% Find the 3 worst FBmax Worst = [{N, NumFB} - || #load{node = N, num_fb = NumFB} - <- lists:sublist(lists:reverse(lists:keysort(#load.num_fb, - Load)), - 3)], - io:format(IoDev, "~4b ~4b/~4b ~4b ~4b ~4b ~w/~w\n", - [FBMin, toint(FBMean), toint(FBStdDev), toint(FB10), - toint(FB90), FBMax, Down, Worst]), + || #load{node = N, num_fb = NumFB} + <- lists:sublist(lists:reverse(lists:keysort(#load.num_fb, + Load)), + 3)], + io:format(IoDev, + "~4b ~4b/~4b ~4b ~4b ~4b ~w/~w\n", + [FBMin, + toint(FBMean), + toint(FBStdDev), + toint(FB10), + toint(FB90), + FBMax, + Down, + Worst]), print_analysis1(IoDev, Rest). %% @private round to nearest int @@ -170,20 +201,20 @@ toint(X) -> X. %% Order failures by number of nodes down ascending, then fbmax, then down list sort_by_down_fbmax(Failures) -> Cmp = fun (#failure{down = DownA, fbmax = FBMaxA}, - #failure{down = DownB, fbmax = FBMaxB}) -> - %% length(DownA) =< length(DownB) andalso - %% FBMaxA >= FBMaxB andalso - %% DownA =< DownB - case {length(DownA), length(DownB)} of - {DownALen, DownBLen} when DownALen < DownBLen -> true; - {DownALen, DownBLen} when DownALen > DownBLen -> false; - _ -> - if FBMaxA > FBMaxB -> true; - FBMaxA < FBMaxB -> false; - true -> DownA >= DownB - end - end - end, + #failure{down = DownB, fbmax = FBMaxB}) -> + %% length(DownA) =< length(DownB) andalso + %% FBMaxA >= FBMaxB andalso + %% DownA =< DownB + case {length(DownA), length(DownB)} of + {DownALen, DownBLen} when DownALen < DownBLen -> true; + {DownALen, DownBLen} when DownALen > DownBLen -> false; + _ -> + if FBMaxA > FBMaxB -> true; + FBMaxA < FBMaxB -> false; + true -> DownA >= DownB + end + end + end, lists:sort(Cmp, Failures). %% ------------------------------------------------------------------- @@ -242,11 +273,13 @@ sort_by_down_fbmax(Failures) -> adjacency_matrix(Owners) -> M = lists:usort(Owners), Tid = ets:new(am, [private, duplicate_bag]), - try adjacency_matrix_populate(Tid, M, Owners, - Owners ++ Owners), - adjacency_matrix_result(Tid, ets:first(Tid), []) + try adjacency_matrix_populate(Tid, + M, + Owners, + Owners ++ Owners), + adjacency_matrix_result(Tid, ets:first(Tid), []) after - ets:delete(Tid) + ets:delete(Tid) end. %% @private extract the adjacency matrix from the duplicate bag @@ -255,32 +288,42 @@ adjacency_matrix_result(_Tid, '$end_of_table', Acc) -> adjacency_matrix_result(Tid, NodePair, Acc) -> ALs = ets:lookup(Tid, NodePair), Ds = [D || {_, D} <- ALs], - adjacency_matrix_result(Tid, ets:next(Tid, NodePair), - [{NodePair, Ds} | Acc]). + adjacency_matrix_result(Tid, + ets:next(Tid, NodePair), + [{NodePair, Ds} | Acc]). adjacency_matrix_populate(_Tid, _M, [], _OwnersCycle) -> ok; adjacency_matrix_populate(Tid, M, [Node | Owners], - [Node | OwnersCycle]) -> - adjacency_matrix_add_dist(Tid, Node, M -- [Node], - OwnersCycle, 0), + [Node | OwnersCycle]) -> + adjacency_matrix_add_dist(Tid, + Node, + M -- [Node], + OwnersCycle, + 0), adjacency_matrix_populate(Tid, M, Owners, OwnersCycle). %% @private Compute the distance from node to the next of M nodes adjacency_matrix_add_dist(_Tid, _Node, _M, [], _) -> ok; adjacency_matrix_add_dist(_Tid, _Node, [], _OwnersCycle, - _) -> + _) -> ok; adjacency_matrix_add_dist(Tid, Node, M, - [OtherNode | OwnersCycle], Distance) -> + [OtherNode | OwnersCycle], Distance) -> case lists:member(OtherNode, M) of - true -> % haven't seen this node yet, add distance - ets:insert(Tid, {{Node, OtherNode}, Distance}), - adjacency_matrix_add_dist(Tid, Node, M -- [OtherNode], - OwnersCycle, Distance + 1); - _ -> % already passed OtherNode - adjacency_matrix_add_dist(Tid, Node, M, OwnersCycle, - Distance + 1) + true -> % haven't seen this node yet, add distance + ets:insert(Tid, {{Node, OtherNode}, Distance}), + adjacency_matrix_add_dist(Tid, + Node, + M -- [OtherNode], + OwnersCycle, + Distance + 1); + _ -> % already passed OtherNode + adjacency_matrix_add_dist(Tid, + Node, + M, + OwnersCycle, + Distance + 1) end. %% Make adjacency summary by working out counts of each distance @@ -294,11 +337,11 @@ count_distances([]) -> []; count_distances(Ds) -> MaxD = lists:max(Ds), PosCounts = lists:foldl(fun (D, Acc) -> - orddict:update_counter(D, 1, Acc) - end, - orddict:from_list([{D, 0} - || D <- lists:seq(0, MaxD)]), - Ds), + orddict:update_counter(D, 1, Acc) + end, + orddict:from_list([{D, 0} + || D <- lists:seq(0, MaxD)]), + Ds), %% PosCounts orddict must be initialized to make sure no distances %% are missing in the list comprehension [Count || {_Pos, Count} <- PosCounts]. @@ -307,9 +350,10 @@ count_distances(Ds) -> adjacency_matrix_from_al(AL) -> %% Make a count by distance of N1,N2 dict:to_list(lists:foldl(fun ({NPair, D}, Acc) -> - dict:append_list(NPair, [D], Acc) - end, - dict:new(), AL)). + dict:append_list(NPair, [D], Acc) + end, + dict:new(), + AL)). %% Create a pair of node names and a list of distances adjacency_list(Owners) -> @@ -318,41 +362,46 @@ adjacency_list(Owners) -> adjacency_list(_M, [], _OwnersCycle, Acc) -> Acc; adjacency_list(M, [Node | Owners], [Node | OwnersCycle], - Acc) -> - adjacency_list(M, Owners, OwnersCycle, - distances(Node, M -- [Node], OwnersCycle, 0, Acc)). + Acc) -> + adjacency_list(M, + Owners, + OwnersCycle, + distances(Node, M -- [Node], OwnersCycle, 0, Acc)). %% Compute the distance from node to the next of M nodes distances(_Node, _M, [], _, Distances) -> Distances; distances(_Node, [], _OwnersCycle, _, Distances) -> Distances; distances(Node, M, [OtherNode | OwnersCycle], Distance, - Distances) -> + Distances) -> case lists:member(OtherNode, M) of - true -> % haven't seen this node yet, add distance - distances(Node, M -- [OtherNode], OwnersCycle, - Distance + 1, - [{{Node, OtherNode}, Distance} | Distances]); - _ -> % already passed OtherNode - distances(Node, M, OwnersCycle, Distance + 1, Distances) + true -> % haven't seen this node yet, add distance + distances(Node, + M -- [OtherNode], + OwnersCycle, + Distance + 1, + [{{Node, OtherNode}, Distance} | Distances]); + _ -> % already passed OtherNode + distances(Node, M, OwnersCycle, Distance + 1, Distances) end. %% For each pair, get the count of distances < NVal score_am([], _NVal) -> undefined; score_am(AM, NVal) -> Cs = lists:flatten([begin - [C || {D, C} <- count(Ds, NVal), D < NVal] - end - || {_Pair, Ds} <- AM]), + [C || {D, C} <- count(Ds, NVal), D < NVal] + end + || {_Pair, Ds} <- AM]), rms(Cs). count(L, NVal) -> Acc0 = orddict:from_list([{D, 0} - || D <- lists:seq(0, NVal - 1)]), + || D <- lists:seq(0, NVal - 1)]), lists:foldl(fun (E, A) -> - orddict:update_counter(E, 1, A) - end, - Acc0, L). + orddict:update_counter(E, 1, A) + end, + Acc0, + L). rms([]) -> throw(empty_list); rms(L) -> @@ -369,23 +418,25 @@ make_ring(Nodes) -> Idxs = [I || {I, _} <- riak_core_ring:all_owners(R0)], NewOwners = lists:zip(Idxs, Nodes), R1 = lists:foldl(fun (N, R) -> - riak_core_ring:add_member(hd(Nodes), R, N) - end, - R0, Nodes), + riak_core_ring:add_member(hd(Nodes), R, N) + end, + R0, + Nodes), lists:foldl(fun ({I, N}, R) -> - riak_core_ring:transfer_node(I, N, R) - end, - R1, NewOwners). + riak_core_ring:transfer_node(I, N, R) + end, + R1, + NewOwners). %% Generate a completion test function that makes sure all required %% distances are created gen_complete_diverse(RequiredDs) -> fun (Owners, DAM) -> - OwnersLen = length(Owners), - NextPow2 = next_pow2(OwnersLen), - {met_required(Owners, DAM, RequiredDs) andalso - OwnersLen == NextPow2, - NextPow2} + OwnersLen = length(Owners), + NextPow2 = next_pow2(OwnersLen), + {met_required(Owners, DAM, RequiredDs) andalso + OwnersLen == NextPow2, + NextPow2} end. %% Generate until a fixed length has been hit @@ -401,39 +452,47 @@ construct(Complete, M, NVal) -> %% Make an empty adjacency matrix for all pairs of members empty_adjacency_matrix(M) -> lists:foldl(fun (Pair, AM0) -> - dict:append_list(Pair, [], AM0) - end, - dict:new(), [{F, T} || F <- M, T <- M, F /= T]). + dict:append_list(Pair, [], AM0) + end, + dict:new(), + [{F, T} || F <- M, T <- M, F /= T]). construct(Complete, M, Owners, DAM, NVal) -> %% Work out which pairs do not have the requiredDs case Complete(Owners, DAM) of - {true, _DesiredLen} -> {ok, Owners, DAM}; - {false, DesiredLen} -> - %% Easy ones - restrict the eligible list to not include the N-1 - %% previous nodes. If within NVal-1 of possibly closing the ring - %% then restrict in that direction as well. - Eligible0 = M -- lists:sublist(Owners, NVal - 1), - Eligible = case DesiredLen - length(Owners) of - Left when Left >= NVal -> - Eligible0; % At least Nval lest, no restriction - Left -> - Eligible0 -- - lists:sublist(lists:reverse(Owners), NVal - Left) - end, - case Eligible of - [] -> - %% No eligible nodes - not enough to meet NVal, use any node - logger:debug("construct -- unable to construct without " - "violating NVal"), - {Owners1, DAM1} = prepend_next_owner(M, M, Owners, DAM, - NVal), - construct(Complete, M, Owners1, DAM1, NVal); - _ -> - {Owners1, DAM1} = prepend_next_owner(M, Eligible, - Owners, DAM, NVal), - construct(Complete, M, Owners1, DAM1, NVal) - end + {true, _DesiredLen} -> {ok, Owners, DAM}; + {false, DesiredLen} -> + %% Easy ones - restrict the eligible list to not include the N-1 + %% previous nodes. If within NVal-1 of possibly closing the ring + %% then restrict in that direction as well. + Eligible0 = M -- lists:sublist(Owners, NVal - 1), + Eligible = case DesiredLen - length(Owners) of + Left when Left >= NVal -> + Eligible0; % At least Nval lest, no restriction + Left -> + Eligible0 -- + lists:sublist(lists:reverse(Owners), + NVal - Left) + end, + case Eligible of + [] -> + %% No eligible nodes - not enough to meet NVal, use any node + logger:debug("construct -- unable to construct without " + "violating NVal"), + {Owners1, DAM1} = prepend_next_owner(M, + M, + Owners, + DAM, + NVal), + construct(Complete, M, Owners1, DAM1, NVal); + _ -> + {Owners1, DAM1} = prepend_next_owner(M, + Eligible, + Owners, + DAM, + NVal), + construct(Complete, M, Owners1, DAM1, NVal) + end end. %% Returns true only when we have met all required distances across all @@ -441,11 +500,11 @@ construct(Complete, M, Owners, DAM, NVal) -> met_required(Owners, DAM, RequiredDs) -> FixupDAM = fixup_dam(Owners, DAM), case [Pair - || {Pair, Ds} <- dict:to_list(FixupDAM), - RequiredDs -- Ds /= []] - of - [] -> true; - _ -> false + || {Pair, Ds} <- dict:to_list(FixupDAM), + RequiredDs -- Ds /= []] + of + [] -> true; + _ -> false end. %% Return next greatest power of 2 @@ -458,39 +517,44 @@ next_pow2(X, R) -> next_pow2(X, R * 2). %% Take the AM scores and cap by TargetN and find the node that %% improves the RMS prepend_next_owner(M, [Node], Owners, DAM, - _TN) -> % only one node, not a lot of decisions to make + _TN) -> % only one node, not a lot of decisions to make prepend(M, Node, Owners, DAM); prepend_next_owner(M, Eligible, Owners, DAM, TN) -> {_BestScore, Owners2, DAM2} = lists:foldl(fun (Node, - {RunningScore, _RunningO, - _RunningDAM} = - Acc) -> - {Owners1, DAM1} = - prepend(M, Node, - Owners, DAM), - case - score_am(dict:to_list(DAM1), - TN) - of - BetterScore - when BetterScore < - RunningScore -> - {BetterScore, - Owners1, DAM1}; - _ -> Acc - end - end, - {undefined, undefined, undefined}, - Eligible), + {RunningScore, + _RunningO, + _RunningDAM} = + Acc) -> + {Owners1, DAM1} = + prepend(M, + Node, + Owners, + DAM), + case + score_am(dict:to_list(DAM1), + TN) + of + BetterScore + when BetterScore < + RunningScore -> + {BetterScore, + Owners1, + DAM1}; + _ -> Acc + end + end, + {undefined, undefined, undefined}, + Eligible), {Owners2, DAM2}. %% Prepend N to the front of Owners, and update AM prepend(M, N, Owners, DAM) -> Ds = distances2(M -- [N], Owners), DAM2 = lists:foldl(fun ({T, D}, DAM1) -> - dict:append_list({N, T}, [D], DAM1) - end, - DAM, Ds), + dict:append_list({N, T}, [D], DAM1) + end, + DAM, + Ds), {[N | Owners], DAM2}. %% Calculate the distances to each of the M nodes until @@ -501,16 +565,19 @@ distances2([], _Owners, _D, Acc) -> Acc; distances2(_M, [], _D, Acc) -> Acc; distances2(M, [T | Owners], D, Acc) -> case lists:member(T, M) of - true -> - distances2(M -- [T], Owners, D + 1, [{T, D} | Acc]); - false -> distances2(M, Owners, D + 1, Acc) + true -> + distances2(M -- [T], Owners, D + 1, [{T, D} | Acc]); + false -> distances2(M, Owners, D + 1, Acc) end. %% Fix up the dictionary AM adding in entries for the end of the owners list %% wrapping around to the start. fixup_dam(Owners, DAM) -> - fixup_dam(lists:usort(Owners), lists:reverse(Owners), - Owners, 0, DAM). + fixup_dam(lists:usort(Owners), + lists:reverse(Owners), + Owners, + 0, + DAM). fixup_dam([], _ToFix, _Owners, _D, DAM) -> DAM; fixup_dam(_M, [], _Owners, _D, DAM) -> DAM; @@ -518,9 +585,10 @@ fixup_dam(M, [N | ToFix], Owners, D, DAM) -> M2 = M -- [N], Ds = distances2(M2, Owners, D, []), DAM2 = lists:foldl(fun ({T, D0}, DAM1) -> - dict:append_list({N, T}, [D0], DAM1) - end, - DAM, Ds), + dict:append_list({N, T}, [D0], DAM1) + end, + DAM, + Ds), fixup_dam(M2, ToFix, Owners, D + 1, DAM2). %% ------------------------------------------------------------------- @@ -542,9 +610,9 @@ fac(N) when N > 0 -> N * fac(N - 1). perm_gen([E]) -> [[E]]; perm_gen(L) -> lists:append([begin - [[X | Y] || Y <- perm_gen(lists:delete(X, L))] - end - || X <- L]). + [[X | Y] || Y <- perm_gen(lists:delete(X, L))] + end + || X <- L]). %% Pick all combinations of Depth nodes from the MemFbers list %% 0 = [] diff --git a/src/riak_core_claimant.erl b/src/riak_core_claimant.erl index 015f0a4b4..efb3bb7df 100644 --- a/src/riak_core_claimant.erl +++ b/src/riak_core_claimant.erl @@ -25,19 +25,31 @@ %% API -export([start_link/0]). --export([leave_member/1, remove_member/1, - force_replace/2, replace/2, resize_ring/1, - abort_resize/0, plan/0, commit/0, clear/0, - ring_changed/2]). +-export([leave_member/1, + remove_member/1, + force_replace/2, + replace/2, + resize_ring/1, + abort_resize/0, + plan/0, + commit/0, + clear/0, + ring_changed/2]). -export([reassign_indices/1]). % helpers for claim sim %% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). - --type action() :: leave | remove | {replace, node()} | - {force_replace, node()}. +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-type action() :: leave | + remove | + {replace, node()} | + {force_replace, node()}. -type riak_core_ring() :: riak_core_ring:riak_core_ring(). @@ -45,25 +57,25 @@ %% A tuple representing a given cluster transition: %% {Ring, NewRing} where NewRing = f(Ring) -type ring_transition() :: {riak_core_ring(), - riak_core_ring()}. + riak_core_ring()}. -record(state, - {last_ring_id, - %% The set of staged cluster changes - changes :: [{node(), action()}], - %% Ring computed during the last planning stage based on - %% applying a set of staged cluster changes. When commiting - %% changes, the computed ring must match the previous planned - %% ring to be allowed. - next_ring :: riak_core_ring() | undefined, - %% Random number seed passed to remove_node to ensure the - %% current randomized remove algorithm is deterministic - %% between plan and commit phases - seed}). + {last_ring_id, + %% The set of staged cluster changes + changes :: [{node(), action()}], + %% Ring computed during the last planning stage based on + %% applying a set of staged cluster changes. When commiting + %% changes, the computed ring must match the previous planned + %% ring to be allowed. + next_ring :: riak_core_ring() | undefined, + %% Random number seed passed to remove_node to ensure the + %% current randomized remove algorithm is deterministic + %% between plan and commit phases + seed}). -define(ROUT(S, A), - ok).%%-define(ROUT(S,A),?debugFmt(S,A)). - %%-define(ROUT(S,A),io:format(S,A)). + ok).%%-define(ROUT(S,A),?debugFmt(S,A)). + %%-define(ROUT(S,A),io:format(S,A)). %%%=================================================================== %%% API @@ -71,8 +83,10 @@ %% @doc Spawn and register the riak_core_claimant server start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [], + []). %% @doc Determine how the cluster will be affected by the staged changes, %% returning the set of pending changes as well as a list of ring @@ -80,7 +94,7 @@ start_link() -> %% (eg. the initial transition that applies the staged changes, and %% any additional transitions triggered by later rebalancing). -spec plan() -> {error, term()} | - {ok, [action()], [ring_transition()]}. + {ok, [action()], [ring_transition()]}. plan() -> gen_server:call(claimant(), plan, infinity). @@ -157,16 +171,19 @@ ring_changed(Node, Ring) -> %%%=================================================================== reassign_indices(CState) -> - reassign_indices(CState, [], riak_core_rand:rand_seed(), - fun no_log/2). + reassign_indices(CState, + [], + riak_core_rand:rand_seed(), + fun no_log/2). %%%=================================================================== %%% Internal API helpers %%%=================================================================== stage(Node, Action) -> - gen_server:call(claimant(), {stage, Node, Action}, - infinity). + gen_server:call(claimant(), + {stage, Node, Action}, + infinity). claimant() -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -180,34 +197,40 @@ init([]) -> schedule_tick(), {ok, #state{changes = [], - seed = riak_core_rand:rand_seed()}}. + seed = riak_core_rand:rand_seed()}}. handle_call(clear, _From, State) -> - State2 = clear_staged(State), {reply, ok, State2}; + State2 = clear_staged(State), + {reply, ok, State2}; handle_call({stage, Node, Action}, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), - {Reply, State2} = maybe_stage(Node, Action, Ring, - State), + {Reply, State2} = maybe_stage(Node, + Action, + Ring, + State), {reply, Reply, State2}; handle_call(plan, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case riak_core_ring:ring_ready(Ring) of - false -> - Reply = {error, ring_not_ready}, {reply, Reply, State}; - true -> - {Reply, State2} = generate_plan(Ring, State), - {reply, Reply, State2} + false -> + Reply = {error, ring_not_ready}, + {reply, Reply, State}; + true -> + {Reply, State2} = generate_plan(Ring, State), + {reply, Reply, State2} end; handle_call(commit, _From, State) -> {Reply, State2} = commit_staged(State), {reply, Reply, State2}; handle_call(_Request, _From, State) -> - Reply = ok, {reply, Reply, State}. + Reply = ok, + {reply, Reply, State}. handle_cast(_Msg, State) -> {noreply, State}. handle_info(tick, State) -> - State2 = tick(State), {noreply, State2}; + State2 = tick(State), + {noreply, State2}; handle_info(reset_ring_id, State) -> State2 = State#state{last_ring_id = undefined}, {noreply, State2}; @@ -225,24 +248,24 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% @doc Verify that a cluster change request is valid and add it to %% the list of staged changes. maybe_stage(Node, Action, Ring, - State = #state{changes = Changes}) -> + State = #state{changes = Changes}) -> case valid_request(Node, Action, Changes, Ring) of - true -> - Changes2 = orddict:store(Node, Action, Changes), - Changes3 = filter_changes(Changes2, Ring), - State2 = State#state{changes = Changes3}, - {ok, State2}; - Error -> {Error, State} + true -> + Changes2 = orddict:store(Node, Action, Changes), + Changes3 = filter_changes(Changes2, Ring), + State2 = State#state{changes = Changes3}, + {ok, State2}; + Error -> {Error, State} end. %% @private %% @doc Determine how the staged set of cluster changes will affect %% the cluster. See {@link plan/0} for additional details. generate_plan(Ring, - State = #state{changes = Changes}) -> + State = #state{changes = Changes}) -> Changes2 = filter_changes(Changes, Ring), Joining = [{Node, join} - || Node <- riak_core_ring:members(Ring, [joining])], + || Node <- riak_core_ring:members(Ring, [joining])], AllChanges = lists:ukeysort(1, Changes2 ++ Joining), State2 = State#state{changes = Changes2}, generate_plan(AllChanges, Ring, State2). @@ -251,15 +274,15 @@ generate_plan([], _, State) -> %% There are no changes to apply {{ok, [], []}, State}; generate_plan(Changes, Ring, - State = #state{seed = Seed}) -> + State = #state{seed = Seed}) -> case compute_all_next_rings(Changes, Seed, Ring) of - {error, invalid_resize_claim} -> - {{error, invalid_resize_claim}, State}; - {ok, NextRings} -> - {_, NextRing} = hd(NextRings), - State2 = State#state{next_ring = NextRing}, - Reply = {ok, Changes, NextRings}, - {Reply, State2} + {error, invalid_resize_claim} -> + {{error, invalid_resize_claim}, State}; + {ok, NextRings} -> + {_, NextRing} = hd(NextRings), + State2 = State#state{next_ring = NextRing}, + Reply = {ok, Changes, NextRings}, + {Reply, State2} end. %% @private @@ -269,45 +292,46 @@ commit_staged(State = #state{next_ring = undefined}) -> {{error, nothing_planned}, State}; commit_staged(State) -> case maybe_commit_staged(State) of - {ok, _} -> - State2 = State#state{next_ring = undefined, - changes = [], seed = riak_core_rand:rand_seed()}, - {ok, State2}; - not_changed -> {error, State}; - {not_changed, Reason} -> {{error, Reason}, State} + {ok, _} -> + State2 = State#state{next_ring = undefined, + changes = [], + seed = riak_core_rand:rand_seed()}, + {ok, State2}; + not_changed -> {error, State}; + {not_changed, Reason} -> {{error, Reason}, State} end. %% @private maybe_commit_staged(State) -> riak_core_ring_manager:ring_trans(fun maybe_commit_staged/2, - State). + State). %% @private maybe_commit_staged(Ring, - State = #state{changes = Changes, seed = Seed}) -> + State = #state{changes = Changes, seed = Seed}) -> Changes2 = filter_changes(Changes, Ring), case compute_next_ring(Changes2, Seed, Ring) of - {error, invalid_resize_claim} -> - {ignore, invalid_resize_claim}; - {ok, NextRing} -> - maybe_commit_staged(Ring, NextRing, State) + {error, invalid_resize_claim} -> + {ignore, invalid_resize_claim}; + {ok, NextRing} -> + maybe_commit_staged(Ring, NextRing, State) end. %% @private maybe_commit_staged(Ring, NextRing, - #state{next_ring = PlannedRing}) -> + #state{next_ring = PlannedRing}) -> Claimant = riak_core_ring:claimant(Ring), IsReady = riak_core_ring:ring_ready(Ring), IsClaimant = Claimant == node(), IsSamePlan = same_plan(PlannedRing, NextRing), case {IsReady, IsClaimant, IsSamePlan} of - {false, _, _} -> {ignore, ring_not_ready}; - {_, false, _} -> ignore; - {_, _, false} -> {ignore, plan_changed}; - _ -> - NewRing = riak_core_ring:increment_vclock(Claimant, - NextRing), - {new_ring, NewRing} + {false, _, _} -> {ignore, ring_not_ready}; + {_, false, _} -> ignore; + {_, _, false} -> {ignore, plan_changed}; + _ -> + NewRing = riak_core_ring:increment_vclock(Claimant, + NextRing), + {new_ring, NewRing} end. %% @private @@ -319,12 +343,12 @@ maybe_commit_staged(Ring, NextRing, clear_staged(State) -> remove_joining_nodes(), State#state{changes = [], - seed = riak_core_rand:rand_seed()}. + seed = riak_core_rand:rand_seed()}. %% @private remove_joining_nodes() -> riak_core_ring_manager:ring_trans(fun remove_joining_nodes/2, - ok). + ok). %% @private remove_joining_nodes(Ring, _) -> @@ -333,125 +357,134 @@ remove_joining_nodes(Ring, _) -> Joining = riak_core_ring:members(Ring, [joining]), AreJoining = Joining /= [], case IsClaimant and AreJoining of - false -> ignore; - true -> - NewRing = remove_joining_nodes_from_ring(Claimant, - Joining, Ring), - {new_ring, NewRing} + false -> ignore; + true -> + NewRing = remove_joining_nodes_from_ring(Claimant, + Joining, + Ring), + {new_ring, NewRing} end. %% @private remove_joining_nodes_from_ring(Claimant, Joining, - Ring) -> + Ring) -> NewRing = lists:foldl(fun (Node, RingAcc) -> - riak_core_ring:set_member(Claimant, RingAcc, - Node, invalid, - same_vclock) - end, - Ring, Joining), + riak_core_ring:set_member(Claimant, + RingAcc, + Node, + invalid, + same_vclock) + end, + Ring, + Joining), NewRing2 = riak_core_ring:increment_vclock(Claimant, - NewRing), + NewRing), NewRing2. %% @private valid_request(Node, Action, Changes, Ring) -> case Action of - leave -> valid_leave_request(Node, Ring); - remove -> valid_remove_request(Node, Ring); - {replace, NewNode} -> - valid_replace_request(Node, NewNode, Changes, Ring); - {force_replace, NewNode} -> - valid_force_replace_request(Node, NewNode, Changes, - Ring); - {resize, NewRingSize} -> - valid_resize_request(NewRingSize, Changes, Ring); - abort_resize -> valid_resize_abort_request(Ring) + leave -> valid_leave_request(Node, Ring); + remove -> valid_remove_request(Node, Ring); + {replace, NewNode} -> + valid_replace_request(Node, NewNode, Changes, Ring); + {force_replace, NewNode} -> + valid_force_replace_request(Node, + NewNode, + Changes, + Ring); + {resize, NewRingSize} -> + valid_resize_request(NewRingSize, Changes, Ring); + abort_resize -> valid_resize_abort_request(Ring) end. %% @private valid_leave_request(Node, Ring) -> case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> true; - {_, joining} -> true; - {_, _} -> {error, already_leaving} + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> true; + {_, joining} -> true; + {_, _} -> {error, already_leaving} end. %% @private valid_remove_request(Node, Ring) -> IsClaimant = Node == riak_core_ring:claimant(Ring), - case {IsClaimant, riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {true, _, _} -> {error, is_claimant}; - {_, _, invalid} -> {error, not_member}; - {_, [Node], _} -> {error, only_member}; - _ -> true + case {IsClaimant, + riak_core_ring:all_members(Ring), + riak_core_ring:member_status(Ring, Node)} + of + {true, _, _} -> {error, is_claimant}; + {_, _, invalid} -> {error, not_member}; + {_, [Node], _} -> {error, only_member}; + _ -> true end. %% @private valid_replace_request(Node, NewNode, Changes, Ring) -> AlreadyReplacement = lists:member(NewNode, - existing_replacements(Changes)), + existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, - NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + NewNode) + == joining) + and not orddict:is_key(NewNode, Changes), case {riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, NewJoining} - of - {invalid, _, _} -> {error, not_member}; - {leaving, _, _} -> {error, already_leaving}; - {_, true, _} -> {error, already_replacement}; - {_, _, false} -> {error, invalid_replacement}; - _ -> true + AlreadyReplacement, + NewJoining} + of + {invalid, _, _} -> {error, not_member}; + {leaving, _, _} -> {error, already_leaving}; + {_, true, _} -> {error, already_replacement}; + {_, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private valid_force_replace_request(Node, NewNode, Changes, - Ring) -> + Ring) -> IsClaimant = Node == riak_core_ring:claimant(Ring), AlreadyReplacement = lists:member(NewNode, - existing_replacements(Changes)), + existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, - NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + NewNode) + == joining) + and not orddict:is_key(NewNode, Changes), case {IsClaimant, - riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, NewJoining} - of - {true, _, _, _} -> {error, is_claimant}; - {_, invalid, _, _} -> {error, not_member}; - {_, _, true, _} -> {error, already_replacement}; - {_, _, _, false} -> {error, invalid_replacement}; - _ -> true + riak_core_ring:member_status(Ring, Node), + AlreadyReplacement, + NewJoining} + of + {true, _, _, _} -> {error, is_claimant}; + {_, invalid, _, _} -> {error, not_member}; + {_, _, true, _} -> {error, already_replacement}; + {_, _, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private %% restrictions preventing resize along with other operations are temporary valid_resize_request(NewRingSize, [], Ring) -> IsResizing = riak_core_ring:num_partitions(Ring) =/= - NewRingSize, + NewRingSize, NodeCount = length(riak_core_ring:all_members(Ring)), Changes = length(riak_core_ring:pending_changes(Ring)) > - 0, + 0, case {IsResizing, NodeCount, Changes} of - {true, N, false} when N > 1 -> true; - {false, _, _} -> {error, same_size}; - {_, 1, _} -> {error, single_node}; - {_, _, true} -> {error, pending_changes} + {true, N, false} when N > 1 -> true; + {false, _, _} -> {error, same_size}; + {_, 1, _} -> {error, single_node}; + {_, _, true} -> {error, pending_changes} end. valid_resize_abort_request(Ring) -> IsResizing = riak_core_ring:is_resizing(Ring), IsPostResize = riak_core_ring:is_post_resize(Ring), case IsResizing andalso not IsPostResize of - true -> true; - false -> {error, not_resizing} + true -> true; + false -> {error, not_resizing} end. %% @private @@ -460,56 +493,59 @@ valid_resize_abort_request(Ring) -> %% changes that bypass the staging system. filter_changes(Changes, Ring) -> orddict:filter(fun (Node, Change) -> - filter_changes_pred(Node, Change, Changes, Ring) - end, - Changes). + filter_changes_pred(Node, Change, Changes, Ring) + end, + Changes). %% @private filter_changes_pred(Node, {Change, NewNode}, Changes, - Ring) + Ring) when (Change == replace) or (Change == force_replace) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsJoining = riak_core_ring:member_status(Ring, NewNode) - == joining, + == joining, NotChanging = not orddict:is_key(NewNode, Changes), IsMember and IsJoining and NotChanging; filter_changes_pred(Node, _, _, Ring) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsMember. %% @private existing_replacements(Changes) -> [Node || {_, {Change, Node}} <- Changes, - (Change == replace) or (Change == force_replace)]. + (Change == replace) or (Change == force_replace)]. %% @private %% Determine if two rings have logically equal cluster state same_plan(RingA, RingB) -> riak_core_ring:all_member_status(RingA) == - riak_core_ring:all_member_status(RingB) - andalso - riak_core_ring:all_owners(RingA) == - riak_core_ring:all_owners(RingB) - andalso - riak_core_ring:pending_changes(RingA) == - riak_core_ring:pending_changes(RingB). + riak_core_ring:all_member_status(RingB) + andalso + riak_core_ring:all_owners(RingA) == + riak_core_ring:all_owners(RingB) + andalso + riak_core_ring:pending_changes(RingA) == + riak_core_ring:pending_changes(RingB). schedule_tick() -> - Tick = application:get_env(riak_core, claimant_tick, - 10000), + Tick = application:get_env(riak_core, + claimant_tick, + 10000), erlang:send_after(Tick, ?MODULE, tick). tick(State = #state{last_ring_id = LastID}) -> case riak_core_ring_manager:get_ring_id() of - LastID -> schedule_tick(), State; - RingID -> - {ok, Ring} = riak_core_ring_manager:get_raw_ring(), - maybe_force_ring_update(Ring), - schedule_tick(), - State#state{last_ring_id = RingID} + LastID -> + schedule_tick(), + State; + RingID -> + {ok, Ring} = riak_core_ring_manager:get_raw_ring(), + maybe_force_ring_update(Ring), + schedule_tick(), + State#state{last_ring_id = RingID} end. maybe_force_ring_update(Ring) -> @@ -518,24 +554,25 @@ maybe_force_ring_update(Ring) -> %% Do not force if we have any joining nodes unless any of them are %% auto-joining nodes. Otherwise, we will force update continuously. JoinBlock = are_joining_nodes(Ring) andalso - auto_joining_nodes(Ring) == [], + auto_joining_nodes(Ring) == [], case IsClaimant and IsReady and not JoinBlock of - true -> do_maybe_force_ring_update(Ring); - false -> ok + true -> do_maybe_force_ring_update(Ring); + false -> ok end. do_maybe_force_ring_update(Ring) -> - case compute_next_ring([], riak_core_rand:rand_seed(), - Ring) - of - {ok, NextRing} -> - case same_plan(Ring, NextRing) of - false -> - logger:warning("Forcing update of stalled ring"), - riak_core_ring_manager:force_update(); - true -> ok - end; - _ -> ok + case compute_next_ring([], + riak_core_rand:rand_seed(), + Ring) + of + {ok, NextRing} -> + case same_plan(Ring, NextRing) of + false -> + logger:warning("Forcing update of stalled ring"), + riak_core_ring_manager:force_update(); + true -> ok + end; + _ -> ok end. %% ========================================================================= @@ -549,29 +586,31 @@ compute_all_next_rings(Changes, Seed, Ring) -> %% @private compute_all_next_rings(Changes, Seed, Ring, Acc) -> case compute_next_ring(Changes, Seed, Ring) of - {error, invalid_resize_claim} = Err -> Err; - {ok, NextRing} -> - Acc2 = [{Ring, NextRing} | Acc], - case not same_plan(Ring, NextRing) of - true -> - FutureRing = riak_core_ring:future_ring(NextRing), - compute_all_next_rings([], Seed, FutureRing, Acc2); - false -> {ok, lists:reverse(Acc2)} - end + {error, invalid_resize_claim} = Err -> Err; + {ok, NextRing} -> + Acc2 = [{Ring, NextRing} | Acc], + case not same_plan(Ring, NextRing) of + true -> + FutureRing = riak_core_ring:future_ring(NextRing), + compute_all_next_rings([], Seed, FutureRing, Acc2); + false -> {ok, lists:reverse(Acc2)} + end end. %% @private compute_next_ring(Changes, Seed, Ring) -> Replacing = [{Node, NewNode} - || {Node, {replace, NewNode}} <- Changes], + || {Node, {replace, NewNode}} <- Changes], Ring2 = apply_changes(Ring, Changes), {_, Ring3} = maybe_handle_joining(node(), Ring2), - {_, Ring4} = do_claimant_quiet(node(), Ring3, Replacing, - Seed), + {_, Ring4} = do_claimant_quiet(node(), + Ring3, + Replacing, + Seed), {Valid, Ring5} = maybe_compute_resize(Ring, Ring4), case Valid of - false -> {error, invalid_resize_claim}; - true -> {ok, Ring5} + false -> {error, invalid_resize_claim}; + true -> {ok, Ring5} end. %% @private @@ -579,9 +618,9 @@ maybe_compute_resize(Orig, MbResized) -> OrigSize = riak_core_ring:num_partitions(Orig), NewSize = riak_core_ring:num_partitions(MbResized), case OrigSize =/= NewSize of - false -> {true, MbResized}; - true -> - validate_resized_ring(compute_resize(Orig, MbResized)) + false -> {true, MbResized}; + true -> + validate_resized_ring(compute_resize(Orig, MbResized)) end. %% @private @@ -595,29 +634,32 @@ compute_resize(Orig, Resized) -> %% need to operate on balanced, future ring (apply changes determined by claim) CState0 = riak_core_ring:future_ring(Resized), Type = case riak_core_ring:num_partitions(Orig) < - riak_core_ring:num_partitions(Resized) - of - true -> larger; - false -> smaller - end, + riak_core_ring:num_partitions(Resized) + of + true -> larger; + false -> smaller + end, %% Each index in the original ring must perform several transfers %% to properly resize the ring. The first transfer for each index %% is scheduled here. Subsequent transfers are scheduled by vnode CState1 = lists:foldl(fun ({Idx, _} = IdxOwner, - CStateAcc) -> - %% indexes being abandoned in a shrinking ring have - %% no next owner - NextOwner = try - riak_core_ring:index_owner(CStateAcc, - Idx) - catch - error:{badmatch, false} -> none - end, - schedule_first_resize_transfer(Type, IdxOwner, - NextOwner, - CStateAcc) - end, - CState0, riak_core_ring:all_owners(Orig)), + CStateAcc) -> + %% indexes being abandoned in a shrinking ring have + %% no next owner + NextOwner = try + riak_core_ring:index_owner(CStateAcc, + Idx) + catch + error:{badmatch, false} -> + none + end, + schedule_first_resize_transfer(Type, + IdxOwner, + NextOwner, + CStateAcc) + end, + CState0, + riak_core_ring:all_owners(Orig)), riak_core_ring:set_pending_resize(CState1, Orig). %% @private @@ -625,27 +667,30 @@ compute_resize(Orig, Resized) -> %% the goal of ensuring the transfer will actually have data to send to the %% target. schedule_first_resize_transfer(smaller, - {Idx, _} = IdxOwner, none, Resized) -> + {Idx, _} = IdxOwner, none, Resized) -> %% partition no longer exists in shrunk ring, first successor will be %% new owner of its data Target = hd(riak_core_ring:preflist(<>, - Resized)), + Resized)), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, Target); + IdxOwner, + Target); schedule_first_resize_transfer(_Type, - {Idx, Owner} = IdxOwner, Owner, Resized) -> + {Idx, Owner} = IdxOwner, Owner, Resized) -> %% partition is not being moved during expansion, first predecessor will %% own at least a portion of its data Target = hd(chash:predecessors(Idx - 1, - riak_core_ring:chash(Resized))), + riak_core_ring:chash(Resized))), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, Target); + IdxOwner, + Target); schedule_first_resize_transfer(_, - {Idx, _Owner} = IdxOwner, NextOwner, Resized) -> + {Idx, _Owner} = IdxOwner, NextOwner, Resized) -> %% partition is being moved during expansion, schedule transfer to partition %% on new owner since it will still own some of its data riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, {Idx, NextOwner}). + IdxOwner, + {Idx, NextOwner}). %% @doc verify that resized ring was properly claimed (no owners are the dummy %% resized owner) in both the current and future ring @@ -656,23 +701,24 @@ validate_resized_ring(Ring) -> Members = riak_core_ring:all_members(Ring), FutureMembers = riak_core_ring:all_members(FutureRing), Invalid1 = [{Idx, Owner} - || {Idx, Owner} <- Owners, - not lists:member(Owner, Members)], + || {Idx, Owner} <- Owners, + not lists:member(Owner, Members)], Invalid2 = [{Idx, Owner} - || {Idx, Owner} <- FutureOwners, - not lists:member(Owner, FutureMembers)], + || {Idx, Owner} <- FutureOwners, + not lists:member(Owner, FutureMembers)], case Invalid1 ++ Invalid2 of - [] -> {true, Ring}; - _ -> {false, Ring} + [] -> {true, Ring}; + _ -> {false, Ring} end. %% @private apply_changes(Ring, Changes) -> NewRing = lists:foldl(fun ({Node, Cmd}, RingAcc2) -> - RingAcc3 = change({Cmd, Node}, RingAcc2), - RingAcc3 - end, - Ring, Changes), + RingAcc3 = change({Cmd, Node}, RingAcc2), + RingAcc3 + end, + Ring, + Changes), NewRing. %% @private @@ -682,13 +728,13 @@ change({join, Node}, Ring) -> change({leave, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:leave_member(Node, Ring, Node), Ring2; change({remove, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:remove_member(Node, Ring, Node), Ring2; change({{replace, _NewNode}, Node}, Ring) -> @@ -698,14 +744,15 @@ change({{replace, _NewNode}, Node}, Ring) -> change({{force_replace, NewNode}, Node}, Ring) -> Indices = riak_core_ring:indices(Ring, Node), Reassign = [{Idx, NewNode} || Idx <- Indices], - Ring2 = riak_core_ring:add_member(NewNode, Ring, - NewNode), + Ring2 = riak_core_ring:add_member(NewNode, + Ring, + NewNode), Ring3 = riak_core_ring:change_owners(Ring2, Reassign), Ring4 = riak_core_ring:remove_member(Node, Ring3, Node), case riak_core_ring:is_resizing(Ring4) of - true -> - replace_node_during_resize(Ring4, Node, NewNode); - false -> Ring4 + true -> + replace_node_during_resize(Ring4, Node, NewNode); + false -> Ring4 end; change({{resize, NewRingSize}, _Node}, Ring) -> riak_core_ring:resize(Ring, NewRingSize); @@ -714,8 +761,9 @@ change({abort_resize, _Node}, Ring) -> %%noinspection ErlangUnboundVariable internal_ring_changed(Node, CState) -> - {Changed, CState5} = do_claimant(Node, CState, - fun log/2), + {Changed, CState5} = do_claimant(Node, + CState, + fun log/2), inform_removed_nodes(Node, CState, CState5), %% Start/stop converge and rebalance delay timers %% (converge delay) @@ -727,158 +775,173 @@ internal_ring_changed(Node, CState) -> %% IsClaimant = riak_core_ring:claimant(CState5) =:= Node, WasPending = [] /= - riak_core_ring:pending_changes(CState), + riak_core_ring:pending_changes(CState), IsPending = [] /= - riak_core_ring:pending_changes(CState5), + riak_core_ring:pending_changes(CState5), %% Outer case statement already checks for ring_ready case {IsClaimant, Changed} of - {true, true} -> - %% STATS - %% riak_core_stat:update(converge_timer_end), - %% STATS - %% riak_core_stat:update(converge_timer_begin); - ok; - {true, false} -> - %% STATS - %% riak_core_stat:update(converge_timer_end); - ok; - _ -> ok + {true, true} -> + %% STATS + %% riak_core_stat:update(converge_timer_end), + %% STATS + %% riak_core_stat:update(converge_timer_begin); + ok; + {true, false} -> + %% STATS + %% riak_core_stat:update(converge_timer_end); + ok; + _ -> ok end, case {IsClaimant, WasPending, IsPending} of - {true, false, true} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_begin); - ok; - {true, true, false} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_end); - ok; - _ -> ok + {true, false, true} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_begin); + ok; + {true, true, false} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_end); + ok; + _ -> ok end, %% Set cluster name if it is undefined case {IsClaimant, riak_core_ring:cluster_name(CState5)} - of - {true, undefined} -> - ClusterName = {Node, riak_core_rand:rand_seed()}, - {_, _} = - riak_core_util:rpc_every_member(riak_core_ring_manager, - set_cluster_name, [ClusterName], - 1000), - ok; - _ -> - ClusterName = riak_core_ring:cluster_name(CState5), ok + of + {true, undefined} -> + ClusterName = {Node, riak_core_rand:rand_seed()}, + {_, _} = + riak_core_util:rpc_every_member(riak_core_ring_manager, + set_cluster_name, + [ClusterName], + 1000), + ok; + _ -> + ClusterName = riak_core_ring:cluster_name(CState5), + ok end, case Changed of - true -> - CState6 = riak_core_ring:set_cluster_name(CState5, - ClusterName), - riak_core_ring:increment_vclock(Node, CState6); - false -> CState5 + true -> + CState6 = riak_core_ring:set_cluster_name(CState5, + ClusterName), + riak_core_ring:increment_vclock(Node, CState6); + false -> CState5 end. inform_removed_nodes(Node, OldRing, NewRing) -> CName = riak_core_ring:cluster_name(NewRing), Exiting = riak_core_ring:members(OldRing, [exiting]) -- - [Node], + [Node], Invalid = riak_core_ring:members(NewRing, [invalid]), Changed = - ordsets:intersection(ordsets:from_list(Exiting), - ordsets:from_list(Invalid)), + ordsets:intersection(ordsets:from_list(Exiting), + ordsets:from_list(Invalid)), %% Tell exiting node to shutdown. _ = [riak_core_ring_manager:refresh_ring(ExitingNode, - CName) - || ExitingNode <- Changed], + CName) + || ExitingNode <- Changed], ok. do_claimant_quiet(Node, CState, Replacing, Seed) -> - do_claimant(Node, CState, Replacing, Seed, - fun no_log/2). + do_claimant(Node, + CState, + Replacing, + Seed, + fun no_log/2). do_claimant(Node, CState, Log) -> - do_claimant(Node, CState, [], - riak_core_rand:rand_seed(), Log). + do_claimant(Node, + CState, + [], + riak_core_rand:rand_seed(), + Log). do_claimant(Node, CState, Replacing, Seed, Log) -> AreJoining = are_joining_nodes(CState), {C1, CState2} = maybe_update_claimant(Node, CState), {C2, CState3} = maybe_handle_auto_joining(Node, - CState2), + CState2), case AreJoining of - true -> - %% Do not rebalance if there are joining nodes - Changed = C1 or C2, - CState5 = CState3; - false -> - {C3, CState4} = maybe_update_ring(Node, CState3, - Replacing, Seed, Log), - {C4, CState5} = maybe_remove_exiting(Node, CState4), - Changed = C1 or C2 or C3 or C4 + true -> + %% Do not rebalance if there are joining nodes + Changed = C1 or C2, + CState5 = CState3; + false -> + {C3, CState4} = maybe_update_ring(Node, + CState3, + Replacing, + Seed, + Log), + {C4, CState5} = maybe_remove_exiting(Node, CState4), + Changed = C1 or C2 or C3 or C4 end, {Changed, CState5}. %% @private maybe_update_claimant(Node, CState) -> Members = riak_core_ring:members(CState, - [valid, leaving]), + [valid, leaving]), Claimant = riak_core_ring:claimant(CState), NextClaimant = hd(Members ++ [undefined]), ClaimantMissing = not lists:member(Claimant, Members), case {ClaimantMissing, NextClaimant} of - {true, Node} -> - %% Become claimant - CState2 = riak_core_ring:set_claimant(CState, Node), - CState3 = - riak_core_ring:increment_ring_version(Claimant, - CState2), - {true, CState3}; - _ -> {false, CState} + {true, Node} -> + %% Become claimant + CState2 = riak_core_ring:set_claimant(CState, Node), + CState3 = + riak_core_ring:increment_ring_version(Claimant, + CState2), + {true, CState3}; + _ -> {false, CState} end. %% @private maybe_update_ring(Node, CState, Replacing, Seed, Log) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - case riak_core_ring:claiming_members(CState) of - [] -> - %% Consider logging an error/warning here or even - %% intentionally crashing. This state makes no logical - %% sense given that it represents a cluster without any - %% active nodes. - {false, CState}; - _ -> - Resizing = riak_core_ring:is_resizing(CState), - {Changed, CState2} = update_ring(Node, CState, - Replacing, Seed, Log, - Resizing), - {Changed, CState2} - end; - _ -> {false, CState} + Node -> + case riak_core_ring:claiming_members(CState) of + [] -> + %% Consider logging an error/warning here or even + %% intentionally crashing. This state makes no logical + %% sense given that it represents a cluster without any + %% active nodes. + {false, CState}; + _ -> + Resizing = riak_core_ring:is_resizing(CState), + {Changed, CState2} = update_ring(Node, + CState, + Replacing, + Seed, + Log, + Resizing), + {Changed, CState2} + end; + _ -> {false, CState} end. %% @private maybe_remove_exiting(Node, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - %% Change exiting nodes to invalid, skipping this node. - Exiting = riak_core_ring:members(CState, [exiting]) -- - [Node], - Changed = Exiting /= [], - CState2 = lists:foldl(fun (ENode, CState0) -> - ClearedCS = - riak_core_ring:clear_member_meta(Node, - CState0, - ENode), - riak_core_ring:set_member(Node, - ClearedCS, - ENode, - invalid, - same_vclock) - end, - CState, Exiting), - {Changed, CState2}; - _ -> {false, CState} + Node -> + %% Change exiting nodes to invalid, skipping this node. + Exiting = riak_core_ring:members(CState, [exiting]) -- + [Node], + Changed = Exiting /= [], + CState2 = lists:foldl(fun (ENode, CState0) -> + ClearedCS = + riak_core_ring:clear_member_meta(Node, + CState0, + ENode), + riak_core_ring:set_member(Node, + ClearedCS, + ENode, + invalid, + same_vclock) + end, + CState, + Exiting), + {Changed, CState2}; + _ -> {false, CState} end. %% @private @@ -892,10 +955,11 @@ auto_joining_nodes(CState) -> %% case application:get_env(riak_core, staged_joins, true) of false -> Joining; true -> [Member || Member <- Joining, - riak_core_ring:get_member_meta(CState, Member, - '$autojoin') - == - true].%% end. + riak_core_ring:get_member_meta(CState, + Member, + '$autojoin') + == + true].%% end. %% @private maybe_handle_auto_joining(Node, CState) -> @@ -911,94 +975,102 @@ maybe_handle_joining(Node, CState) -> maybe_handle_joining(Node, Joining, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - Changed = Joining /= [], - CState2 = lists:foldl(fun (JNode, CState0) -> - riak_core_ring:set_member(Node, CState0, - JNode, valid, - same_vclock) - end, - CState, Joining), - {Changed, CState2}; - _ -> {false, CState} + Node -> + Changed = Joining /= [], + CState2 = lists:foldl(fun (JNode, CState0) -> + riak_core_ring:set_member(Node, + CState0, + JNode, + valid, + same_vclock) + end, + CState, + Joining), + {Changed, CState2}; + _ -> {false, CState} end. %% @private update_ring(CNode, CState, Replacing, Seed, Log, - false) -> + false) -> Next0 = riak_core_ring:pending_changes(CState), ?ROUT("Members: ~p~n", - [riak_core_ring:members(CState, - [joining, valid, leaving, exiting, - invalid])]), + [riak_core_ring:members(CState, + [joining, + valid, + leaving, + exiting, + invalid])]), ?ROUT("Updating ring :: next0 : ~p~n", [Next0]), %% Remove tuples from next for removed nodes InvalidMembers = riak_core_ring:members(CState, - [invalid]), + [invalid]), Next2 = lists:filter(fun (NInfo) -> - {Owner, NextOwner, _} = - riak_core_ring:next_owner(NInfo), - not lists:member(Owner, InvalidMembers) and - not lists:member(NextOwner, InvalidMembers) - end, - Next0), + {Owner, NextOwner, _} = + riak_core_ring:next_owner(NInfo), + not lists:member(Owner, InvalidMembers) and + not lists:member(NextOwner, InvalidMembers) + end, + Next0), CState2 = riak_core_ring:set_pending_changes(CState, - Next2), + Next2), %% Transfer ownership after completed handoff {RingChanged1, CState3} = transfer_ownership(CState2, - Log), + Log), ?ROUT("Updating ring :: next1 : ~p~n", - [riak_core_ring:pending_changes(CState3)]), + [riak_core_ring:pending_changes(CState3)]), %% Ressign leaving/inactive indices {RingChanged2, CState4} = reassign_indices(CState3, - Replacing, Seed, Log), + Replacing, + Seed, + Log), ?ROUT("Updating ring :: next2 : ~p~n", - [riak_core_ring:pending_changes(CState4)]), + [riak_core_ring:pending_changes(CState4)]), %% Rebalance the ring as necessary. If pending changes exist ring %% is not rebalanced Next3 = rebalance_ring(CNode, CState4), Log(debug, - {"Pending ownership transfers: ~b~n", - [length(riak_core_ring:pending_changes(CState4))]}), + {"Pending ownership transfers: ~b~n", + [length(riak_core_ring:pending_changes(CState4))]}), %% Remove transfers to/from down nodes Next4 = handle_down_nodes(CState4, Next3), NextChanged = Next0 /= Next4, Changed = NextChanged or RingChanged1 or RingChanged2, case Changed of - true -> - OldS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next0]), - NewS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next4]), - Diff = ordsets:subtract(NewS, OldS), - _ = [Log(next, NChange) || NChange <- Diff], - ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), - CState5 = riak_core_ring:set_pending_changes(CState4, - Next4), - CState6 = riak_core_ring:increment_ring_version(CNode, - CState5), - {true, CState6}; - false -> {false, CState} + true -> + OldS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next0]), + NewS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next4]), + Diff = ordsets:subtract(NewS, OldS), + _ = [Log(next, NChange) || NChange <- Diff], + ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), + CState5 = riak_core_ring:set_pending_changes(CState4, + Next4), + CState6 = riak_core_ring:increment_ring_version(CNode, + CState5), + {true, CState6}; + false -> {false, CState} end; update_ring(CNode, CState, _Replacing, _Seed, _Log, - true) -> + true) -> {Installed, CState1} = - maybe_install_resized_ring(CState), + maybe_install_resized_ring(CState), {Aborted, CState2} = - riak_core_ring:maybe_abort_resize(CState1), + riak_core_ring:maybe_abort_resize(CState1), Changed = Installed orelse Aborted, case Changed of - true -> - CState3 = riak_core_ring:increment_ring_version(CNode, - CState2), - {true, CState3}; - false -> {false, CState} + true -> + CState3 = riak_core_ring:increment_ring_version(CNode, + CState2), + {true, CState3}; + false -> {false, CState} end. maybe_install_resized_ring(CState) -> case riak_core_ring:is_resize_complete(CState) of - true -> {true, riak_core_ring:future_ring(CState)}; - false -> {false, CState} + true -> {true, riak_core_ring:future_ring(CState)}; + false -> {false, CState} end. %% @private @@ -1006,31 +1078,34 @@ transfer_ownership(CState, Log) -> Next = riak_core_ring:pending_changes(CState), %% Remove already completed and transfered changes Next2 = lists:filter(fun (NInfo = {Idx, _, _, _, _}) -> - {_, NewOwner, S} = - riak_core_ring:next_owner(NInfo), - not - ((S == complete) and - (riak_core_ring:index_owner(CState, Idx) - =:= NewOwner)) - end, - Next), + {_, NewOwner, S} = + riak_core_ring:next_owner(NInfo), + not + ((S == complete) and + (riak_core_ring:index_owner(CState, + Idx) + =:= NewOwner)) + end, + Next), CState2 = lists:foldl(fun (NInfo = {Idx, _, _, _, _}, - CState0) -> - case riak_core_ring:next_owner(NInfo) of - {_, Node, complete} -> - Log(ownership, {Idx, Node, CState0}), - riak_core_ring:transfer_node(Idx, Node, - CState0); - _ -> CState0 - end - end, - CState, Next2), + CState0) -> + case riak_core_ring:next_owner(NInfo) of + {_, Node, complete} -> + Log(ownership, {Idx, Node, CState0}), + riak_core_ring:transfer_node(Idx, + Node, + CState0); + _ -> CState0 + end + end, + CState, + Next2), NextChanged = Next2 /= Next, RingChanged = riak_core_ring:all_owners(CState) /= - riak_core_ring:all_owners(CState2), + riak_core_ring:all_owners(CState2), Changed = NextChanged or RingChanged, CState3 = riak_core_ring:set_pending_changes(CState2, - Next2), + Next2), {Changed, CState3}. %% @private @@ -1038,25 +1113,35 @@ reassign_indices(CState, Replacing, Seed, Log) -> Next = riak_core_ring:pending_changes(CState), Invalid = riak_core_ring:members(CState, [invalid]), CState2 = lists:foldl(fun (Node, CState0) -> - remove_node(CState0, Node, invalid, Replacing, - Seed, Log) - end, - CState, Invalid), + remove_node(CState0, + Node, + invalid, + Replacing, + Seed, + Log) + end, + CState, + Invalid), CState3 = case Next of - [] -> - Leaving = riak_core_ring:members(CState, [leaving]), - lists:foldl(fun (Node, CState0) -> - remove_node(CState0, Node, leaving, - Replacing, Seed, Log) - end, - CState2, Leaving); - _ -> CState2 - end, + [] -> + Leaving = riak_core_ring:members(CState, [leaving]), + lists:foldl(fun (Node, CState0) -> + remove_node(CState0, + Node, + leaving, + Replacing, + Seed, + Log) + end, + CState2, + Leaving); + _ -> CState2 + end, Owners1 = riak_core_ring:all_owners(CState), Owners2 = riak_core_ring:all_owners(CState3), RingChanged = Owners1 /= Owners2, NextChanged = Next /= - riak_core_ring:pending_changes(CState3), + riak_core_ring:pending_changes(CState3), {RingChanged or NextChanged, CState3}. %% @private @@ -1070,34 +1155,34 @@ rebalance_ring(_CNode, [], CState) -> Owners2 = riak_core_ring:all_owners(CState2), Owners3 = lists:zip(Owners1, Owners2), Next = [{Idx, PrevOwner, NewOwner, [], awaiting} - || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, - PrevOwner /= NewOwner], + || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, + PrevOwner /= NewOwner], Next; rebalance_ring(_CNode, Next, _CState) -> Next. %% @private handle_down_nodes(CState, Next) -> LeavingMembers = riak_core_ring:members(CState, - [leaving, invalid]), + [leaving, invalid]), DownMembers = riak_core_ring:members(CState, [down]), Next2 = [begin - OwnerLeaving = lists:member(O, LeavingMembers), - NextDown = lists:member(NO, DownMembers), - case OwnerLeaving and NextDown of - true -> - Active = riak_core_ring:active_members(CState) -- [O], - RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), - {Idx, O, RNode, Mods, Status}; - _ -> T - end - end - || T = {Idx, O, NO, Mods, Status} <- Next], + OwnerLeaving = lists:member(O, LeavingMembers), + NextDown = lists:member(NO, DownMembers), + case OwnerLeaving and NextDown of + true -> + Active = riak_core_ring:active_members(CState) -- [O], + RNode = + lists:nth(riak_core_rand:uniform(length(Active)), + Active), + {Idx, O, RNode, Mods, Status}; + _ -> T + end + end + || T = {Idx, O, NO, Mods, Status} <- Next], Next3 = [T - || T = {_, O, NO, _, _} <- Next2, - not lists:member(O, DownMembers), - not lists:member(NO, DownMembers)], + || T = {_, O, NO, _, _} <- Next2, + not lists:member(O, DownMembers), + not lists:member(NO, DownMembers)], Next3. %% @private @@ -1109,86 +1194,96 @@ reassign_indices_to(Node, NewNode, Ring) -> %% @private remove_node(CState, Node, Status, Replacing, Seed, - Log) -> + Log) -> Indices = riak_core_ring:indices(CState, Node), - remove_node(CState, Node, Status, Replacing, Seed, Log, - Indices). + remove_node(CState, + Node, + Status, + Replacing, + Seed, + Log, + Indices). %% @private remove_node(CState, _Node, _Status, _Replacing, _Seed, - _Log, []) -> + _Log, []) -> CState; remove_node(CState, Node, Status, Replacing, Seed, Log, - Indices) -> + Indices) -> CStateT1 = riak_core_ring:change_owners(CState, - riak_core_ring:all_next_owners(CState)), + riak_core_ring:all_next_owners(CState)), case orddict:find(Node, Replacing) of - {ok, NewNode} -> - CStateT2 = reassign_indices_to(Node, NewNode, CStateT1); - error -> - CStateT2 = - riak_core_gossip:remove_from_cluster(CStateT1, Node, - Seed) + {ok, NewNode} -> + CStateT2 = reassign_indices_to(Node, NewNode, CStateT1); + error -> + CStateT2 = + riak_core_gossip:remove_from_cluster(CStateT1, + Node, + Seed) end, Owners1 = riak_core_ring:all_owners(CState), Owners2 = riak_core_ring:all_owners(CStateT2), Owners3 = lists:zip(Owners1, Owners2), RemovedIndices = case Status of - invalid -> Indices; - leaving -> [] - end, + invalid -> Indices; + leaving -> [] + end, Reassign = [{Idx, NewOwner} - || {Idx, NewOwner} <- Owners2, - lists:member(Idx, RemovedIndices)], + || {Idx, NewOwner} <- Owners2, + lists:member(Idx, RemovedIndices)], Next = [{Idx, PrevOwner, NewOwner, [], awaiting} - || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, - PrevOwner /= NewOwner, - not lists:member(Idx, RemovedIndices)], + || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, + PrevOwner /= NewOwner, + not lists:member(Idx, RemovedIndices)], _ = [Log(reassign, {Idx, NewOwner, CState}) - || {Idx, NewOwner} <- Reassign], + || {Idx, NewOwner} <- Reassign], %% Unlike rebalance_ring, remove_node can be called when Next is non-empty, %% therefore we need to merge the values. Original Next has priority. Next2 = lists:ukeysort(1, - riak_core_ring:pending_changes(CState) ++ Next), + riak_core_ring:pending_changes(CState) ++ Next), CState2 = riak_core_ring:change_owners(CState, - Reassign), + Reassign), CState3 = riak_core_ring:set_pending_changes(CState2, - Next2), + Next2), CState3. replace_node_during_resize(CState0, Node, NewNode) -> PostResize = riak_core_ring:is_post_resize(CState0), - CState1 = replace_node_during_resize(CState0, Node, - NewNode, PostResize), + CState1 = replace_node_during_resize(CState0, + Node, + NewNode, + PostResize), riak_core_ring:increment_ring_version(riak_core_ring:claimant(CState1), - CState1). + CState1). replace_node_during_resize(CState0, Node, NewNode, - false) -> %% ongoing xfers + false) -> %% ongoing xfers %% for each of the indices being moved from Node to NewNode, reschedule resize %% transfers where the target is owned by Node. CState1 = - riak_core_ring:reschedule_resize_transfers(CState0, - Node, NewNode), + riak_core_ring:reschedule_resize_transfers(CState0, + Node, + NewNode), %% since the resized chash is carried directly in state vs. being rebuilt via next %% list, perform reassignment {ok, FutureCHash} = - riak_core_ring:resized_ring(CState1), + riak_core_ring:resized_ring(CState1), FutureCState = riak_core_ring:set_chash(CState1, - FutureCHash), - ReassignedFuture = reassign_indices_to(Node, NewNode, - FutureCState), + FutureCHash), + ReassignedFuture = reassign_indices_to(Node, + NewNode, + FutureCState), ReassignedCHash = - riak_core_ring:chash(ReassignedFuture), + riak_core_ring:chash(ReassignedFuture), riak_core_ring:set_resized_ring(CState1, - ReassignedCHash); + ReassignedCHash); replace_node_during_resize(CState, Node, _NewNode, - true) -> %% performing cleanup + true) -> %% performing cleanup %% we are simply deleting data at this point, no reason to do that on either node NewNext = [{I, N, O, M, S} - || {I, N, O, M, S} - <- riak_core_ring:pending_changes(CState), - N =/= Node], + || {I, N, O, M, S} + <- riak_core_ring:pending_changes(CState), + N =/= Node], riak_core_ring:set_pending_changes(CState, NewNext). no_log(_, _) -> ok. @@ -1197,14 +1292,14 @@ log(debug, {Msg, Args}) -> logger:debug(Msg, Args); log(ownership, {Idx, NewOwner, CState}) -> Owner = riak_core_ring:index_owner(CState, Idx), logger:debug("(new-owner) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(reassign, {Idx, NewOwner, CState}) -> Owner = riak_core_ring:index_owner(CState, Idx), logger:debug("(reassign) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(next, {Idx, Owner, NewOwner}) -> logger:debug("(pending) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(_, _) -> ok. %% =================================================================== diff --git a/src/riak_core_eventhandler_guard.erl b/src/riak_core_eventhandler_guard.erl index b3e05ec2a..015ecab2b 100644 --- a/src/riak_core_eventhandler_guard.erl +++ b/src/riak_core_eventhandler_guard.erl @@ -25,8 +25,12 @@ -export([start_link/3, start_link/4]). --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {handlermod, handler, exitfun}). @@ -35,14 +39,16 @@ start_link(HandlerMod, Handler, Args) -> start_link(HandlerMod, Handler, Args, ExitFun) -> gen_server:start_link(?MODULE, - [HandlerMod, Handler, Args, ExitFun], []). + [HandlerMod, Handler, Args, ExitFun], + []). init([HandlerMod, Handler, Args, ExitFun]) -> - ok = gen_event:add_sup_handler(HandlerMod, Handler, - Args), + ok = gen_event:add_sup_handler(HandlerMod, + Handler, + Args), {ok, #state{handlermod = HandlerMod, handler = Handler, - exitfun = ExitFun}}. + exitfun = ExitFun}}. handle_call(_Request, _From, State) -> {reply, ok, State}. @@ -50,16 +56,16 @@ handle_call(_Request, _From, State) -> handle_cast(_Msg, State) -> {noreply, State}. handle_info({gen_event_EXIT, _Handler, shutdown}, - State) -> + State) -> {stop, normal, State}; handle_info({gen_event_EXIT, _Handler, normal}, - State) -> + State) -> {stop, normal, State}; handle_info({gen_event_EXIT, Handler, _Reason}, - State = #state{exitfun = undefined}) -> + State = #state{exitfun = undefined}) -> {stop, {gen_event_EXIT, Handler}, State}; handle_info({gen_event_EXIT, Handler, Reason}, - State = #state{exitfun = ExitFun}) -> + State = #state{exitfun = ExitFun}) -> ExitFun(Handler, Reason), {stop, {gen_event_EXIT, Handler}, State}; handle_info(_Info, State) -> {noreply, State}. diff --git a/src/riak_core_eventhandler_sup.erl b/src/riak_core_eventhandler_sup.erl index a9bb53f3e..bb3dfb1c4 100644 --- a/src/riak_core_eventhandler_sup.erl +++ b/src/riak_core_eventhandler_sup.erl @@ -28,44 +28,52 @@ -export([start_link/0, init/1]). -export([start_guarded_handler/3, - start_guarded_handler/4, stop_guarded_handler/3]). + start_guarded_handler/4, + stop_guarded_handler/3]). start_guarded_handler(HandlerMod, Handler, Args) -> - start_guarded_handler(HandlerMod, Handler, Args, - undefined). + start_guarded_handler(HandlerMod, + Handler, + Args, + undefined). start_guarded_handler(HandlerMod, Handler, Args, - ExitFun) -> + ExitFun) -> case supervisor:start_child(?MODULE, - handler_spec(HandlerMod, Handler, Args, - ExitFun)) - of - {ok, _Pid} -> ok; - Other -> Other + handler_spec(HandlerMod, + Handler, + Args, + ExitFun)) + of + {ok, _Pid} -> ok; + Other -> Other end. stop_guarded_handler(HandlerMod, Handler, Args) -> case lists:member(Handler, - gen_event:which_handlers(HandlerMod)) - of - true -> - case gen_event:delete_handler(HandlerMod, Handler, Args) - of - {error, module_not_found} -> {error, module_not_found}; - O -> - Id = {HandlerMod, Handler}, - ok = supervisor:terminate_child(?MODULE, Id), - ok = supervisor:delete_child(?MODULE, Id), - O - end; - false -> {error, module_not_found} + gen_event:which_handlers(HandlerMod)) + of + true -> + case gen_event:delete_handler(HandlerMod, Handler, Args) + of + {error, module_not_found} -> {error, module_not_found}; + O -> + Id = {HandlerMod, Handler}, + ok = supervisor:terminate_child(?MODULE, Id), + ok = supervisor:delete_child(?MODULE, Id), + O + end; + false -> {error, module_not_found} end. handler_spec(HandlerMod, Handler, Args, ExitFun) -> {{HandlerMod, Handler}, - {riak_core_eventhandler_guard, start_link, + {riak_core_eventhandler_guard, + start_link, [HandlerMod, Handler, Args, ExitFun]}, - transient, 5000, worker, + transient, + 5000, + worker, [riak_core_eventhandler_guard]}. start_link() -> diff --git a/src/riak_core_gossip.erl b/src/riak_core_gossip.erl index 723d699e4..865bcf793 100644 --- a/src/riak_core_gossip.erl +++ b/src/riak_core_gossip.erl @@ -35,13 +35,22 @@ -export([start_link/0, stop/0]). --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). - --export([distribute_ring/1, send_ring/1, send_ring/2, - remove_from_cluster/2, remove_from_cluster/3, - random_gossip/1, recursive_gossip/1, - random_recursive_gossip/1, rejoin/2]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-export([distribute_ring/1, + send_ring/1, + send_ring/2, + remove_from_cluster/2, + remove_from_cluster/3, + random_gossip/1, + recursive_gossip/1, + random_recursive_gossip/1, + rejoin/2]). %% Default gossip rate: allow at most 45 gossip messages every 10 seconds -define(DEFAULT_LIMIT, {45, 10000}). @@ -56,7 +65,7 @@ %% Distribute a ring to all members of that ring. distribute_ring(Ring) -> gen_server:cast({?MODULE, node()}, - {distribute_ring, Ring}). + {distribute_ring, Ring}). %% send_ring/1 - %% Send the current node's ring to some other node. @@ -68,11 +77,13 @@ send_ring(ToNode) -> send_ring(node(), ToNode). send_ring(Node, Node) -> ok; send_ring(FromNode, ToNode) -> gen_server:cast({?MODULE, FromNode}, - {send_ring_to, ToNode}). + {send_ring_to, ToNode}). start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [], + []). stop() -> gen_server:cast(?MODULE, stop). @@ -82,9 +93,9 @@ rejoin(Node, Ring) -> %% @doc Gossip state to a random node in the ring. random_gossip(Ring) -> case riak_core_ring:random_other_active_node(Ring) of - no_node -> % must be single node cluster - ok; - RandomNode -> send_ring(node(), RandomNode) + no_node -> % must be single node cluster + ok; + RandomNode -> send_ring(node(), RandomNode) end. %% @doc Gossip state to a fixed set of nodes determined from a binary @@ -100,7 +111,7 @@ recursive_gossip(Ring, Node) -> Tree = riak_core_util:build_tree(2, Nodes, [cycles]), Children = orddict:fetch(Node, Tree), _ = [send_ring(node(), OtherNode) - || OtherNode <- Children], + || OtherNode <- Children], ok. recursive_gossip(Ring) -> @@ -108,15 +119,15 @@ recursive_gossip(Ring) -> %% and therefore we fallback to random_recursive_gossip as necessary. Active = riak_core_ring:active_members(Ring), case lists:member(node(), Active) of - true -> recursive_gossip(Ring, node()); - false -> random_recursive_gossip(Ring) + true -> recursive_gossip(Ring, node()); + false -> random_recursive_gossip(Ring) end. random_recursive_gossip(Ring) -> Active = riak_core_ring:active_members(Ring), RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), + lists:nth(riak_core_rand:uniform(length(Active)), + Active), recursive_gossip(Ring, RNode). %% =================================================================== @@ -127,7 +138,8 @@ random_recursive_gossip(Ring) -> init(_State) -> schedule_next_reset(), {Tokens, _} = application:get_env(riak_core, - gossip_limit, ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), State = #state{gossip_tokens = Tokens}, {ok, State}. @@ -135,25 +147,26 @@ handle_call(_, _From, State) -> {reply, ok, State}. %% @private handle_cast({send_ring_to, _Node}, - State = #state{gossip_tokens = 0}) -> + State = #state{gossip_tokens = 0}) -> %% Out of gossip tokens, ignore the send request {noreply, State}; handle_cast({send_ring_to, Node}, State) -> {ok, RingOut} = riak_core_ring_manager:get_raw_ring(), riak_core_ring:check_tainted(RingOut, - "Error: riak_core_gossip/send_ring_to " - ":: Sending tainted ring over gossip"), + "Error: riak_core_gossip/send_ring_to " + ":: Sending tainted ring over gossip"), gen_server:cast({?MODULE, Node}, - {reconcile_ring, RingOut}), + {reconcile_ring, RingOut}), Tokens = State#state.gossip_tokens - 1, {noreply, State#state{gossip_tokens = Tokens}}; handle_cast({distribute_ring, Ring}, State) -> Nodes = riak_core_ring:active_members(Ring), riak_core_ring:check_tainted(Ring, - "Error: riak_core_gossip/distribute_ring " - ":: Sending tainted ring over gossip"), - gen_server:abcast(Nodes, ?MODULE, - {reconcile_ring, Ring}), + "Error: riak_core_gossip/distribute_ring " + ":: Sending tainted ring over gossip"), + gen_server:abcast(Nodes, + ?MODULE, + {reconcile_ring, Ring}), {noreply, State}; handle_cast({reconcile_ring, OtherRing}, State) -> %% Compare the two rings, see if there is anything that @@ -161,7 +174,7 @@ handle_cast({reconcile_ring, OtherRing}, State) -> %% STATS % riak_core_stat:update(gossip_received), riak_core_ring_manager:ring_trans(fun reconcile/2, - [OtherRing]), + [OtherRing]), {noreply, State}; handle_cast(gossip_ring, State) -> % Gossip the ring to some random other node... @@ -171,18 +184,18 @@ handle_cast(gossip_ring, State) -> handle_cast({rejoin, OtherRing}, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), SameCluster = riak_core_ring:cluster_name(Ring) =:= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), case SameCluster of - true -> - OtherNode = riak_core_ring:owner_node(OtherRing), - case riak_core:join(node(), OtherNode, true, true) of - ok -> ok; - {error, Reason} -> - logger:error("Could not rejoin cluster: ~p", [Reason]), - ok - end, - {noreply, State}; - false -> {noreply, State} + true -> + OtherNode = riak_core_ring:owner_node(OtherRing), + case riak_core:join(node(), OtherNode, true, true) of + ok -> ok; + {error, Reason} -> + logger:error("Could not rejoin cluster: ~p", [Reason]), + ok + end, + {noreply, State}; + false -> {noreply, State} end; handle_cast(_, State) -> {noreply, State}. @@ -190,7 +203,8 @@ handle_info(reset_tokens, State) -> schedule_next_reset(), gen_server:cast(?MODULE, gossip_ring), {Tokens, _} = application:get_env(riak_core, - gossip_limit, ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), {noreply, State#state{gossip_tokens = Tokens}}; handle_info(_Info, State) -> {noreply, State}. @@ -206,188 +220,211 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. schedule_next_reset() -> {_, Reset} = application:get_env(riak_core, - gossip_limit, ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), erlang:send_after(Reset, ?MODULE, reset_tokens). %%noinspection ErlangUnboundVariable reconcile(Ring0, [OtherRing0]) -> {Ring, OtherRing} = - riak_core_ring:reconcile_names(Ring0, OtherRing0), + riak_core_ring:reconcile_names(Ring0, OtherRing0), Node = node(), OtherNode = riak_core_ring:owner_node(OtherRing), Members = riak_core_ring:reconcile_members(Ring, - OtherRing), + OtherRing), WrongCluster = riak_core_ring:cluster_name(Ring) /= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), PreStatus = riak_core_ring:member_status(Members, - OtherNode), + OtherNode), IgnoreGossip = WrongCluster or (PreStatus =:= invalid) - or (PreStatus =:= down), + or (PreStatus =:= down), case IgnoreGossip of - true -> Ring2 = Ring, Changed = false; - false -> - {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, - Ring) + true -> + Ring2 = Ring, + Changed = false; + false -> + {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, + Ring) end, OtherStatus = riak_core_ring:member_status(Ring2, - OtherNode), + OtherNode), case {WrongCluster, OtherStatus, Changed} of - {true, _, _} -> - %% TODO: Tell other node to stop gossiping to this node. - %% STATS - % riak_core_stat:update(ignored_gossip), - ignore; - {_, down, _} -> - %% Tell other node to rejoin the cluster. - riak_core_gossip:rejoin(OtherNode, Ring2), - ignore; - {_, invalid, _} -> - %% Exiting/Removed node never saw shutdown cast, re-send. - ClusterName = riak_core_ring:cluster_name(Ring), - riak_core_ring_manager:refresh_ring(OtherNode, - ClusterName), - ignore; - {_, _, new_ring} -> - Ring3 = riak_core_ring:ring_changed(Node, Ring2), - %% STATS - % riak_core_stat:update(rings_reconciled), - log_membership_changes(Ring, Ring3), - {reconciled_ring, Ring3}; - {_, _, _} -> ignore + {true, _, _} -> + %% TODO: Tell other node to stop gossiping to this node. + %% STATS + % riak_core_stat:update(ignored_gossip), + ignore; + {_, down, _} -> + %% Tell other node to rejoin the cluster. + riak_core_gossip:rejoin(OtherNode, Ring2), + ignore; + {_, invalid, _} -> + %% Exiting/Removed node never saw shutdown cast, re-send. + ClusterName = riak_core_ring:cluster_name(Ring), + riak_core_ring_manager:refresh_ring(OtherNode, + ClusterName), + ignore; + {_, _, new_ring} -> + Ring3 = riak_core_ring:ring_changed(Node, Ring2), + %% STATS + % riak_core_stat:update(rings_reconciled), + log_membership_changes(Ring, Ring3), + {reconciled_ring, Ring3}; + {_, _, _} -> ignore end. log_membership_changes(OldRing, NewRing) -> OldStatus = riak_core_ring:all_member_status(OldRing), NewStatus = riak_core_ring:all_member_status(NewRing), do_log_membership_changes(lists:sort(OldStatus), - lists:sort(NewStatus)). + lists:sort(NewStatus)). do_log_membership_changes([], []) -> ok; do_log_membership_changes([{Node, Status} | Old], - [{Node, Status} | New]) -> + [{Node, Status} | New]) -> %% No change do_log_membership_changes(Old, New); do_log_membership_changes([{Node, Status1} | Old], - [{Node, Status2} | New]) -> + [{Node, Status2} | New]) -> %% State changed, did not join or leave log_node_changed(Node, Status1, Status2), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, _OldStatus} | _] = - Old, - [{NewNode, NewStatus} | New]) + Old, + [{NewNode, NewStatus} | New]) when NewNode < OldNode -> %% Node added log_node_added(NewNode, NewStatus), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, OldStatus} | Old], - [{NewNode, _NewStatus} | _] = New) + [{NewNode, _NewStatus} | _] = New) when OldNode < NewNode -> %% Node removed log_node_removed(OldNode, OldStatus), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, OldStatus} | Old], - []) -> + []) -> %% Trailing nodes were removed log_node_removed(OldNode, OldStatus), do_log_membership_changes(Old, []); do_log_membership_changes([], - [{NewNode, NewStatus} | New]) -> + [{NewNode, NewStatus} | New]) -> %% Trailing nodes were added log_node_added(NewNode, NewStatus), do_log_membership_changes([], New). log_node_changed(Node, Old, New) -> logger:info("'~s' changed from '~s' to '~s'~n", - [Node, Old, New]). + [Node, Old, New]). log_node_added(Node, New) -> logger:info("'~s' joined cluster with status '~s'~n", - [Node, New]). + [Node, New]). log_node_removed(Node, Old) -> logger:info("'~s' removed from cluster (previously: " - "'~s')~n", - [Node, Old]). + "'~s')~n", + [Node, Old]). remove_from_cluster(Ring, ExitingNode) -> - remove_from_cluster(Ring, ExitingNode, - riak_core_rand:rand_seed()). + remove_from_cluster(Ring, + ExitingNode, + riak_core_rand:rand_seed()). remove_from_cluster(Ring, ExitingNode, Seed) -> % Get a list of indices owned by the ExitingNode... AllOwners = riak_core_ring:all_owners(Ring), % Transfer indexes to other nodes... - ExitRing = case attempt_simple_transfer(Seed, Ring, - AllOwners, ExitingNode) - of - {ok, NR} -> NR; - target_n_fail -> - %% re-diagonalize - %% first hand off all claims to *any* one else, - %% just so rebalance doesn't include exiting node - Members = riak_core_ring:claiming_members(Ring), - Other = hd(lists:delete(ExitingNode, Members)), - TempRing = lists:foldl(fun ({I, N}, R) - when N == ExitingNode -> - riak_core_ring:transfer_node(I, - Other, - R); - (_, R) -> R - end, - Ring, AllOwners), - riak_core_claim:claim_rebalance_n(TempRing, Other) - end, + ExitRing = case attempt_simple_transfer(Seed, + Ring, + AllOwners, + ExitingNode) + of + {ok, NR} -> NR; + target_n_fail -> + %% re-diagonalize + %% first hand off all claims to *any* one else, + %% just so rebalance doesn't include exiting node + Members = riak_core_ring:claiming_members(Ring), + Other = hd(lists:delete(ExitingNode, Members)), + TempRing = lists:foldl(fun ({I, N}, R) + when N == ExitingNode -> + riak_core_ring:transfer_node(I, + Other, + R); + (_, R) -> R + end, + Ring, + AllOwners), + riak_core_claim:claim_rebalance_n(TempRing, Other) + end, ExitRing. attempt_simple_transfer(Seed, Ring, Owners, - ExitingNode) -> - TargetN = application:get_env(riak_core, target_n_val, - undefined), - attempt_simple_transfer(Seed, Ring, Owners, TargetN, - ExitingNode, 0, - [{O, -TargetN} - || O <- riak_core_ring:claiming_members(Ring), - O /= ExitingNode]). + ExitingNode) -> + TargetN = application:get_env(riak_core, + target_n_val, + undefined), + attempt_simple_transfer(Seed, + Ring, + Owners, + TargetN, + ExitingNode, + 0, + [{O, -TargetN} + || O <- riak_core_ring:claiming_members(Ring), + O /= ExitingNode]). attempt_simple_transfer(Seed, Ring, [{P, Exit} | Rest], - TargetN, Exit, Idx, Last) -> + TargetN, Exit, Idx, Last) -> %% handoff case [N || {N, I} <- Last, Idx - I >= TargetN] of - [] -> target_n_fail; - Candidates -> - %% these nodes don't violate target_n in the reverse direction - StepsToNext = fun (Node) -> - length(lists:takewhile(fun ({_, Owner}) -> - Node /= Owner - end, - Rest)) - end, - case lists:filter(fun (N) -> - Next = StepsToNext(N), - Next + 1 >= TargetN orelse - Next == length(Rest) - end, - Candidates) - of - [] -> target_n_fail; - Qualifiers -> - %% these nodes don't violate target_n forward - {Rand, Seed2} = - riak_core_rand:uniform_s(length(Qualifiers), Seed), - Chosen = lists:nth(Rand, Qualifiers), - %% choose one, and do the rest of the ring - attempt_simple_transfer(Seed2, - riak_core_ring:transfer_node(P, Chosen, - Ring), - Rest, TargetN, Exit, Idx + 1, - lists:keyreplace(Chosen, 1, Last, - {Chosen, Idx})) - end + [] -> target_n_fail; + Candidates -> + %% these nodes don't violate target_n in the reverse direction + StepsToNext = fun (Node) -> + length(lists:takewhile(fun ({_, Owner}) -> + Node /= Owner + end, + Rest)) + end, + case lists:filter(fun (N) -> + Next = StepsToNext(N), + Next + 1 >= TargetN orelse + Next == length(Rest) + end, + Candidates) + of + [] -> target_n_fail; + Qualifiers -> + %% these nodes don't violate target_n forward + {Rand, Seed2} = + riak_core_rand:uniform_s(length(Qualifiers), Seed), + Chosen = lists:nth(Rand, Qualifiers), + %% choose one, and do the rest of the ring + attempt_simple_transfer(Seed2, + riak_core_ring:transfer_node(P, + Chosen, + Ring), + Rest, + TargetN, + Exit, + Idx + 1, + lists:keyreplace(Chosen, + 1, + Last, + {Chosen, Idx})) + end end; attempt_simple_transfer(Seed, Ring, [{_, N} | Rest], - TargetN, Exit, Idx, Last) -> + TargetN, Exit, Idx, Last) -> %% just keep track of seeing this node - attempt_simple_transfer(Seed, Ring, Rest, TargetN, Exit, - Idx + 1, lists:keyreplace(N, 1, Last, {N, Idx})); + attempt_simple_transfer(Seed, + Ring, + Rest, + TargetN, + Exit, + Idx + 1, + lists:keyreplace(N, 1, Last, {N, Idx})); attempt_simple_transfer(_, Ring, [], _, _, _, _) -> {ok, Ring}. diff --git a/src/riak_core_handoff_listener.erl b/src/riak_core_handoff_listener.erl index fb98c2eea..7f391707e 100644 --- a/src/riak_core_handoff_listener.erl +++ b/src/riak_core_handoff_listener.erl @@ -28,22 +28,31 @@ -export([start_link/0]). --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). --export([get_handoff_ip/0, sock_opts/0, - new_connection/2]). +-export([get_handoff_ip/0, + sock_opts/0, + new_connection/2]). -record(state, - {ipaddr :: string(), portnum :: integer()}). + {ipaddr :: string(), portnum :: integer()}). start_link() -> - PortNum = application:get_env(riak_core, handoff_port, - undefined), - IpAddr = application:get_env(riak_core, handoff_ip, - undefined), - gen_nb_server:start_link(?MODULE, IpAddr, PortNum, - [IpAddr, PortNum]). + PortNum = application:get_env(riak_core, + handoff_port, + undefined), + IpAddr = application:get_env(riak_core, + handoff_ip, + undefined), + gen_nb_server:start_link(?MODULE, + IpAddr, + PortNum, + [IpAddr, PortNum]). get_handoff_ip() -> gen_server:call(?MODULE, handoff_ip, infinity). @@ -56,10 +65,10 @@ sock_opts() -> [binary, {packet, 4}, {reuseaddr, true}, {backlog, 64}]. handle_call(handoff_ip, _From, - State = #state{ipaddr = I}) -> + State = #state{ipaddr = I}) -> {reply, {ok, I}, State}; handle_call(handoff_port, _From, - State = #state{portnum = P}) -> + State = #state{portnum = P}) -> {reply, {ok, P}, State}. handle_cast(_Msg, State) -> {noreply, State}. @@ -72,13 +81,13 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. new_connection(Socket, State) -> case riak_core_handoff_manager:add_inbound() of - {ok, Pid} -> - ok = gen_tcp:controlling_process(Socket, Pid), - ok = riak_core_handoff_receiver:set_socket(Pid, Socket), - {ok, State}; - {error, _Reason} -> - %% STATS - %% riak_core_stat:update(rejected_handoffs), - gen_tcp:close(Socket), - {ok, State} + {ok, Pid} -> + ok = gen_tcp:controlling_process(Socket, Pid), + ok = riak_core_handoff_receiver:set_socket(Pid, Socket), + {ok, State}; + {error, _Reason} -> + %% STATS + %% riak_core_stat:update(rejected_handoffs), + gen_tcp:close(Socket), + {ok, State} end. diff --git a/src/riak_core_handoff_listener_sup.erl b/src/riak_core_handoff_listener_sup.erl index ed7f87126..f68fa4f35 100644 --- a/src/riak_core_handoff_listener_sup.erl +++ b/src/riak_core_handoff_listener_sup.erl @@ -26,8 +26,12 @@ -export([start_link/0, init/1]). -define(CHILD(I, Type), - {I, {I, start_link, []}, permanent, brutal_kill, Type, - [I]}). + {I, + {I, start_link, []}, + permanent, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_handoff_manager.erl b/src/riak_core_handoff_manager.erl index 2a4ae579a..6c0ac838b 100644 --- a/src/riak_core_handoff_manager.erl +++ b/src/riak_core_handoff_manager.erl @@ -18,21 +18,34 @@ -behaviour(gen_server). %% gen_server api --export([start_link/0, init/1, handle_call/3, - handle_cast/2, handle_info/2, terminate/2, - code_change/3]). +-export([start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). %% exclusion api --export([add_exclusion/2, get_exclusions/1, - remove_exclusion/2]). +-export([add_exclusion/2, + get_exclusions/1, + remove_exclusion/2]). %% handoff api --export([add_outbound/6, add_outbound/7, add_inbound/0, - xfer/3, kill_xfer/3, status/0, status/1, - status_update/2, set_concurrency/1, get_concurrency/0, - set_recv_data/2, kill_handoffs/0, - kill_handoffs_in_direction/1, - handoff_change_enabled_setting/2]). +-export([add_outbound/6, + add_outbound/7, + add_inbound/0, + xfer/3, + kill_xfer/3, + status/0, + status/1, + status_update/2, + set_concurrency/1, + get_concurrency/0, + set_recv_data/2, + kill_handoffs/0, + kill_handoffs_in_direction/1, + handoff_change_enabled_setting/2]). -include("riak_core_handoff.hrl"). @@ -45,75 +58,92 @@ -endif. -record(state, - {excl, handoffs = [] :: [handoff_status()]}). + {excl, handoffs = [] :: [handoff_status()]}). %% this can be overridden with riak_core handoff_concurrency -define(HANDOFF_CONCURRENCY, 2). -define(HO_EQ(HOA, HOB), - HOA#handoff_status.mod_src_tgt == - HOB#handoff_status.mod_src_tgt - andalso - HOA#handoff_status.timestamp == - HOB#handoff_status.timestamp). + HOA#handoff_status.mod_src_tgt == + HOB#handoff_status.mod_src_tgt + andalso + HOA#handoff_status.timestamp == + HOB#handoff_status.timestamp). %%%=================================================================== %%% API %%%=================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [], + []). init([]) -> {ok, #state{excl = sets:new(), handoffs = []}}. add_outbound(HOType, Module, Idx, Node, VnodePid, - Opts) -> - add_outbound(HOType, Module, Idx, Idx, Node, VnodePid, - Opts). + Opts) -> + add_outbound(HOType, + Module, + Idx, + Idx, + Node, + VnodePid, + Opts). add_outbound(HOType, Module, SrcIdx, TargetIdx, Node, - VnodePid, Opts) -> + VnodePid, Opts) -> case application:get_env(riak_core, - disable_outbound_handoff) - of - {ok, true} -> {error, max_concurrency}; - _ -> - gen_server:call(?MODULE, - {add_outbound, HOType, Module, SrcIdx, TargetIdx, - Node, VnodePid, Opts}, - infinity) + disable_outbound_handoff) + of + {ok, true} -> {error, max_concurrency}; + _ -> + gen_server:call(?MODULE, + {add_outbound, + HOType, + Module, + SrcIdx, + TargetIdx, + Node, + VnodePid, + Opts}, + infinity) end. add_inbound() -> case application:get_env(riak_core, - disable_inbound_handoff) - of - {ok, true} -> {error, max_concurrency}; - _ -> gen_server:call(?MODULE, {add_inbound}, infinity) + disable_inbound_handoff) + of + {ok, true} -> {error, max_concurrency}; + _ -> gen_server:call(?MODULE, {add_inbound}, infinity) end. %% @doc Initiate a transfer from `SrcPartition' to `TargetPartition' %% for the given `Module' using the `FilterModFun' filter. -spec xfer({index(), node()}, mod_partition(), - {module(), atom()}) -> ok. + {module(), atom()}) -> ok. xfer({SrcPartition, SrcOwner}, {Module, TargetPartition}, FilterModFun) -> %% NOTE: This will not work with old nodes ReqOrigin = node(), gen_server:cast({?MODULE, SrcOwner}, - {send_handoff, repair, Module, - {SrcPartition, TargetPartition}, ReqOrigin, - FilterModFun}). + {send_handoff, + repair, + Module, + {SrcPartition, TargetPartition}, + ReqOrigin, + FilterModFun}). %% @doc Associate `Data' with the inbound handoff `Recv'. -spec set_recv_data(pid(), proplists:proplist()) -> ok. set_recv_data(Recv, Data) -> - gen_server:call(?MODULE, {set_recv_data, Recv, Data}, - infinity). + gen_server:call(?MODULE, + {set_recv_data, Recv, Data}, + infinity). status() -> status(none). @@ -126,11 +156,12 @@ status(Filter) -> status_update(ModSrcTgt, Stats) -> gen_server:cast(?MODULE, - {status_update, ModSrcTgt, Stats}). + {status_update, ModSrcTgt, Stats}). set_concurrency(Limit) -> - gen_server:call(?MODULE, {set_concurrency, Limit}, - infinity). + gen_server:call(?MODULE, + {set_concurrency, Limit}, + infinity). get_concurrency() -> gen_server:call(?MODULE, get_concurrency, infinity). @@ -140,134 +171,152 @@ get_concurrency() -> kill_xfer(SrcNode, ModSrcTarget, Reason) -> gen_server:cast({?MODULE, SrcNode}, - {kill_xfer, ModSrcTarget, Reason}). + {kill_xfer, ModSrcTarget, Reason}). kill_handoffs() -> set_concurrency(0). -spec kill_handoffs_in_direction(inbound | - outbound) -> ok. + outbound) -> ok. kill_handoffs_in_direction(Direction) -> - gen_server:call(?MODULE, {kill_in_direction, Direction}, - infinity). + gen_server:call(?MODULE, + {kill_in_direction, Direction}, + infinity). add_exclusion(Module, Index) -> gen_server:cast(?MODULE, - {add_exclusion, {Module, Index}}). + {add_exclusion, {Module, Index}}). remove_exclusion(Module, Index) -> gen_server:cast(?MODULE, - {del_exclusion, {Module, Index}}). + {del_exclusion, {Module, Index}}). get_exclusions(Module) -> - gen_server:call(?MODULE, {get_exclusions, Module}, - infinity). + gen_server:call(?MODULE, + {get_exclusions, Module}, + infinity). %%%=================================================================== %%% Callbacks %%%=================================================================== handle_call({get_exclusions, Module}, _From, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> Reply = [I - || {M, I} <- sets:to_list(Excl), M =:= Module], + || {M, I} <- sets:to_list(Excl), M =:= Module], {reply, {ok, Reply}, State}; -handle_call({add_outbound, Type, Mod, SrcIdx, TargetIdx, - Node, Pid, Opts}, - _From, State = #state{handoffs = HS}) -> - case send_handoff(Type, {Mod, SrcIdx, TargetIdx}, Node, - Pid, HS, Opts) - of - {ok, - Handoff = #handoff_status{transport_pid = Sender}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Sender}, State#state{handoffs = HS2}}; - {false, - _ExistingHandoff = #handoff_status{transport_pid = - Sender}} -> - {reply, {ok, Sender}, State}; - Error -> {reply, Error, State} +handle_call({add_outbound, + Type, + Mod, + SrcIdx, + TargetIdx, + Node, + Pid, + Opts}, + _From, State = #state{handoffs = HS}) -> + case send_handoff(Type, + {Mod, SrcIdx, TargetIdx}, + Node, + Pid, + HS, + Opts) + of + {ok, + Handoff = #handoff_status{transport_pid = Sender}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Sender}, State#state{handoffs = HS2}}; + {false, + _ExistingHandoff = #handoff_status{transport_pid = + Sender}} -> + {reply, {ok, Sender}, State}; + Error -> {reply, Error, State} end; handle_call({add_inbound}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case receive_handoff() of - {ok, - Handoff = #handoff_status{transport_pid = Receiver}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Receiver}, State#state{handoffs = HS2}}; - Error -> {reply, Error, State} + {ok, + Handoff = #handoff_status{transport_pid = Receiver}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Receiver}, State#state{handoffs = HS2}}; + Error -> {reply, Error, State} end; handle_call({set_recv_data, Recv, Data}, _From, - State = #state{handoffs = HS}) -> - case lists:keyfind(Recv, #handoff_status.transport_pid, - HS) - of - false -> - throw({error, - "set_recv_data called for non-existing " - "receiver", - Recv, Data}); - #handoff_status{} = H -> - H2 = H#handoff_status{mod_src_tgt = - proplists:get_value(mod_src_tgt, Data), - vnode_pid = - proplists:get_value(vnode_pid, Data)}, - HS2 = lists:keyreplace(Recv, - #handoff_status.transport_pid, HS, H2), - {reply, ok, State#state{handoffs = HS2}} + State = #state{handoffs = HS}) -> + case lists:keyfind(Recv, + #handoff_status.transport_pid, + HS) + of + false -> + throw({error, + "set_recv_data called for non-existing " + "receiver", + Recv, + Data}); + #handoff_status{} = H -> + H2 = H#handoff_status{mod_src_tgt = + proplists:get_value(mod_src_tgt, Data), + vnode_pid = + proplists:get_value(vnode_pid, Data)}, + HS2 = lists:keyreplace(Recv, + #handoff_status.transport_pid, + HS, + H2), + {reply, ok, State#state{handoffs = HS2}} end; handle_call({xfer_status, Xfer}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> TP = Xfer#handoff_status.transport_pid, - case lists:keyfind(TP, #handoff_status.transport_pid, - HS) - of - false -> {reply, not_found, State}; - _ -> {reply, in_progress, State} + case lists:keyfind(TP, + #handoff_status.transport_pid, + HS) + of + false -> {reply, not_found, State}; + _ -> {reply, in_progress, State} end; handle_call({status, Filter}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> Status = lists:filter(filter(Filter), - [build_status(HO) || HO <- HS]), + [build_status(HO) || HO <- HS]), {reply, Status, State}; handle_call({set_concurrency, Limit}, _From, - State = #state{handoffs = HS}) -> - application:set_env(riak_core, handoff_concurrency, - Limit), + State = #state{handoffs = HS}) -> + application:set_env(riak_core, + handoff_concurrency, + Limit), case Limit < erlang:length(HS) of - true -> - %% Note: we don't update the state with the handoffs that we're - %% keeping because we'll still get the 'DOWN' messages with - %% a reason of 'max_concurrency' and we want to be able to do - %% something with that if necessary. - {_Keep, Discard} = lists:split(Limit, HS), - _ = [erlang:exit(Pid, max_concurrency) - || #handoff_status{transport_pid = Pid} <- Discard], - {reply, ok, State}; - false -> {reply, ok, State} + true -> + %% Note: we don't update the state with the handoffs that we're + %% keeping because we'll still get the 'DOWN' messages with + %% a reason of 'max_concurrency' and we want to be able to do + %% something with that if necessary. + {_Keep, Discard} = lists:split(Limit, HS), + _ = [erlang:exit(Pid, max_concurrency) + || #handoff_status{transport_pid = Pid} <- Discard], + {reply, ok, State}; + false -> {reply, ok, State} end; handle_call(get_concurrency, _From, State) -> Concurrency = get_concurrency_limit(), {reply, Concurrency, State}; handle_call({kill_in_direction, Direction}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> %% TODO (atb): Refactor this to comply with max_concurrency logspam PR's exit codes %% NB. As-is this handles worker termination the same way as set_concurrency; %% no state update is performed here, we let the worker DOWNs mark them %% as dead rather than trimming here. Kill = [H - || H = #handoff_status{direction = D} <- HS, - D =:= Direction], + || H = #handoff_status{direction = D} <- HS, + D =:= Direction], _ = [erlang:exit(Pid, max_concurrency) - || #handoff_status{transport_pid = Pid} <- Kill], + || #handoff_status{transport_pid = Pid} <- Kill], {reply, ok, State}. handle_cast({del_exclusion, {Mod, Idx}}, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> Excl2 = sets:del_element({Mod, Idx}, Excl), {noreply, State#state{excl = Excl2}}; handle_cast({add_exclusion, {Mod, Idx}}, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> %% Note: This function used to trigger a ring event after adding an %% exclusion to ensure that an exiting node would eventually shutdown %% after all vnodes had finished handoff. This behavior is now handled @@ -275,36 +324,49 @@ handle_cast({add_exclusion, {Mod, Idx}}, Excl2 = sets:add_element({Mod, Idx}, Excl), {noreply, State#state{excl = Excl2}}; handle_cast({status_update, ModSrcTgt, StatsUpdate}, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case lists:keyfind(ModSrcTgt, - #handoff_status.mod_src_tgt, HS) - of - false -> - logger:error("status_update for non-existing handoff ~p", - [ModSrcTgt]), - {noreply, State}; - HO -> - Stats2 = update_stats(StatsUpdate, - HO#handoff_status.stats), - HO2 = HO#handoff_status{stats = Stats2}, - HS2 = lists:keyreplace(ModSrcTgt, - #handoff_status.mod_src_tgt, HS, HO2), - {noreply, State#state{handoffs = HS2}} + #handoff_status.mod_src_tgt, + HS) + of + false -> + logger:error("status_update for non-existing handoff ~p", + [ModSrcTgt]), + {noreply, State}; + HO -> + Stats2 = update_stats(StatsUpdate, + HO#handoff_status.stats), + HO2 = HO#handoff_status{stats = Stats2}, + HS2 = lists:keyreplace(ModSrcTgt, + #handoff_status.mod_src_tgt, + HS, + HO2), + {noreply, State#state{handoffs = HS2}} end; -handle_cast({send_handoff, Type, Mod, {Src, Target}, - ReqOrigin, {FilterMod, FilterFun} = FMF}, - State = #state{handoffs = HS}) -> +handle_cast({send_handoff, + Type, + Mod, + {Src, Target}, + ReqOrigin, + {FilterMod, FilterFun} = FMF}, + State = #state{handoffs = HS}) -> Filter = FilterMod:FilterFun(Target), %% TODO: make a record? {ok, VNode} = riak_core_vnode_manager:get_vnode_pid(Src, - Mod), - case send_handoff(Type, {Mod, Src, Target}, ReqOrigin, - VNode, HS, {Filter, FMF}, ReqOrigin, []) - of - {ok, Handoff} -> - HS2 = HS ++ [Handoff], - {noreply, State#state{handoffs = HS2}}; - _ -> {noreply, State} + Mod), + case send_handoff(Type, + {Mod, Src, Target}, + ReqOrigin, + VNode, + HS, + {Filter, FMF}, + ReqOrigin, + []) + of + {ok, Handoff} -> + HS2 = HS ++ [Handoff], + {noreply, State#state{handoffs = HS2}}; + _ -> {noreply, State} end; handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> HS = State#state.handoffs, @@ -312,76 +374,77 @@ handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> {noreply, State#state{handoffs = HS2}}. handle_info({'DOWN', Ref, process, _Pid, Reason}, - State = #state{handoffs = HS}) -> - case lists:keytake(Ref, #handoff_status.transport_mon, - HS) - of - {value, - #handoff_status{mod_src_tgt = {M, S, I}, - direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, - req_origin = Origin}, - NewHS} -> - WarnVnode = case Reason of - %% if the reason the handoff process died was anything other - %% than 'normal' we should log the reason why as an error - normal -> false; - X - when X == max_concurrency orelse - element(1, X) == shutdown andalso - element(2, X) == max_concurrency -> - logger:info("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true; - _ -> - logger:error("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true - end, - %% if we have the vnode process pid, tell the vnode why the - %% handoff stopped so it can clean up its state - case WarnVnode andalso is_pid(Vnode) of - true -> - riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); - _ -> - case Origin of - none -> ok; - _ -> - %% Use proplist instead so it's more - %% flexible in future, or does - %% capabilities nullify that? - Msg = {M, S, I}, - riak_core_vnode_manager:xfer_complete(Origin, Msg) - end, - ok - end, - %% No monitor on vnode for receiver - if VnodeM /= undefined -> demonitor(VnodeM); - true -> ok - end, - %% removed the handoff from the list of active handoffs - {noreply, State#state{handoffs = NewHS}}; - false -> - case lists:keytake(Ref, #handoff_status.vnode_mon, HS) - of - {value, - #handoff_status{mod_src_tgt = {M, _, I}, - direction = Dir, transport_pid = Trans, - transport_mon = TransM}, - NewHS} -> - %% In this case the vnode died and the handoff - %% sender must be killed. - logger:error("An ~w handoff of partition ~w ~w was " - "terminated because the vnode died", - [Dir, M, I]), - demonitor(TransM), - exit(Trans, vnode_died), - {noreply, State#state{handoffs = NewHS}}; - _ -> {noreply, State} - end + State = #state{handoffs = HS}) -> + case lists:keytake(Ref, + #handoff_status.transport_mon, + HS) + of + {value, + #handoff_status{mod_src_tgt = {M, S, I}, + direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, + req_origin = Origin}, + NewHS} -> + WarnVnode = case Reason of + %% if the reason the handoff process died was anything other + %% than 'normal' we should log the reason why as an error + normal -> false; + X + when X == max_concurrency orelse + element(1, X) == shutdown andalso + element(2, X) == max_concurrency -> + logger:info("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true; + _ -> + logger:error("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true + end, + %% if we have the vnode process pid, tell the vnode why the + %% handoff stopped so it can clean up its state + case WarnVnode andalso is_pid(Vnode) of + true -> + riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); + _ -> + case Origin of + none -> ok; + _ -> + %% Use proplist instead so it's more + %% flexible in future, or does + %% capabilities nullify that? + Msg = {M, S, I}, + riak_core_vnode_manager:xfer_complete(Origin, Msg) + end, + ok + end, + %% No monitor on vnode for receiver + if VnodeM /= undefined -> demonitor(VnodeM); + true -> ok + end, + %% removed the handoff from the list of active handoffs + {noreply, State#state{handoffs = NewHS}}; + false -> + case lists:keytake(Ref, #handoff_status.vnode_mon, HS) + of + {value, + #handoff_status{mod_src_tgt = {M, _, I}, + direction = Dir, transport_pid = Trans, + transport_mon = TransM}, + NewHS} -> + %% In this case the vnode died and the handoff + %% sender must be killed. + logger:error("An ~w handoff of partition ~w ~w was " + "terminated because the vnode died", + [Dir, M, I]), + demonitor(TransM), + exit(Trans, vnode_died), + {noreply, State#state{handoffs = NewHS}}; + _ -> {noreply, State} + end end. terminate(_Reason, _State) -> ok. @@ -394,34 +457,42 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. build_status(HO) -> #handoff_status{mod_src_tgt = {Mod, SrcP, TargetP}, - src_node = SrcNode, target_node = TargetNode, - direction = Dir, status = Status, timestamp = StartTS, - transport_pid = TPid, type = Type} = - HO, + src_node = SrcNode, target_node = TargetNode, + direction = Dir, status = Status, timestamp = StartTS, + transport_pid = TPid, type = Type} = + HO, {status_v2, - [{mod, Mod}, {src_partition, SrcP}, - {target_partition, TargetP}, {src_node, SrcNode}, - {target_node, TargetNode}, {direction, Dir}, - {status, Status}, {start_ts, StartTS}, - {sender_pid, TPid}, {stats, calc_stats(HO)}, + [{mod, Mod}, + {src_partition, SrcP}, + {target_partition, TargetP}, + {src_node, SrcNode}, + {target_node, TargetNode}, + {direction, Dir}, + {status, Status}, + {start_ts, StartTS}, + {sender_pid, TPid}, + {stats, calc_stats(HO)}, {type, Type}]}. calc_stats(#handoff_status{stats = Stats, - timestamp = StartTS, size = Size}) -> + timestamp = StartTS, size = Size}) -> case dict:find(last_update, Stats) of - error -> no_stats; - {ok, LastUpdate} -> - Objs = dict:fetch(objs, Stats), - Bytes = dict:fetch(bytes, Stats), - CalcSize = get_size(Size), - Done = calc_pct_done(Objs, Bytes, CalcSize), - ElapsedS = timer:now_diff(LastUpdate, StartTS) / - 1000000, - ObjsS = round(Objs / ElapsedS), - BytesS = round(Bytes / ElapsedS), - [{objs_total, Objs}, {objs_per_s, ObjsS}, - {bytes_per_s, BytesS}, {last_update, LastUpdate}, - {size, CalcSize}, {pct_done_decimal, Done}] + error -> no_stats; + {ok, LastUpdate} -> + Objs = dict:fetch(objs, Stats), + Bytes = dict:fetch(bytes, Stats), + CalcSize = get_size(Size), + Done = calc_pct_done(Objs, Bytes, CalcSize), + ElapsedS = timer:now_diff(LastUpdate, StartTS) / + 1000000, + ObjsS = round(Objs / ElapsedS), + BytesS = round(Bytes / ElapsedS), + [{objs_total, Objs}, + {objs_per_s, ObjsS}, + {bytes_per_s, BytesS}, + {last_update, LastUpdate}, + {size, CalcSize}, + {pct_done_decimal, Done}] end. get_size({F, dynamic}) -> F(); @@ -434,67 +505,84 @@ calc_pct_done(_, Bytes, {Size, bytes}) -> Bytes / Size. filter(none) -> fun (_) -> true end; filter({Key, Value} = _Filter) -> fun ({status_v2, Status}) -> - case proplists:get_value(Key, Status) of - Value -> true; - _ -> false - end + case proplists:get_value(Key, Status) of + Value -> true; + _ -> false + end end. resize_transfer_filter(Ring, Mod, Src, Target) -> fun (K) -> - {_, Hashed} = Mod:object_info(K), - riak_core_ring:is_future_index(Hashed, Src, Target, - Ring) + {_, Hashed} = Mod:object_info(K), + riak_core_ring:is_future_index(Hashed, + Src, + Target, + Ring) end. resize_transfer_notsent_fun(Ring, Mod, Src) -> Shrinking = riak_core_ring:num_partitions(Ring) > - riak_core_ring:future_num_partitions(Ring), + riak_core_ring:future_num_partitions(Ring), case Shrinking of - false -> NValMap = DefaultN = undefined; - true -> - NValMap = Mod:nval_map(Ring), - DefaultN = riak_core_bucket:default_object_nval() + false -> NValMap = DefaultN = undefined; + true -> + NValMap = Mod:nval_map(Ring), + DefaultN = riak_core_bucket:default_object_nval() end, fun (Key, Acc) -> - record_seen_index(Ring, Shrinking, NValMap, DefaultN, - Mod, Src, Key, Acc) + record_seen_index(Ring, + Shrinking, + NValMap, + DefaultN, + Mod, + Src, + Key, + Acc) end. record_seen_index(Ring, Shrinking, NValMap, DefaultN, - Mod, Src, Key, Seen) -> + Mod, Src, Key, Seen) -> {Bucket, Hashed} = Mod:object_info(Key), CheckNVal = case Shrinking of - false -> undefined; - true -> proplists:get_value(Bucket, NValMap, DefaultN) - end, - case riak_core_ring:future_index(Hashed, Src, CheckNVal, - Ring) - of - undefined -> Seen; - FutureIndex -> ordsets:add_element(FutureIndex, Seen) + false -> undefined; + true -> proplists:get_value(Bucket, NValMap, DefaultN) + end, + case riak_core_ring:future_index(Hashed, + Src, + CheckNVal, + Ring) + of + undefined -> Seen; + FutureIndex -> ordsets:add_element(FutureIndex, Seen) end. get_concurrency_limit() -> - application:get_env(riak_core, handoff_concurrency, - ?HANDOFF_CONCURRENCY). + application:get_env(riak_core, + handoff_concurrency, + ?HANDOFF_CONCURRENCY). %% true if handoff_concurrency (inbound + outbound) hasn't yet been reached handoff_concurrency_limit_reached() -> Receivers = - supervisor:count_children(riak_core_handoff_receiver_sup), + supervisor:count_children(riak_core_handoff_receiver_sup), Senders = - supervisor:count_children(riak_core_handoff_sender_sup), + supervisor:count_children(riak_core_handoff_sender_sup), ActiveReceivers = proplists:get_value(active, - Receivers), + Receivers), ActiveSenders = proplists:get_value(active, Senders), get_concurrency_limit() =< - ActiveReceivers + ActiveSenders. + ActiveReceivers + ActiveSenders. send_handoff(HOType, ModSrcTarget, Node, Pid, HS, - Opts) -> - send_handoff(HOType, ModSrcTarget, Node, Pid, HS, - {none, none}, none, Opts). + Opts) -> + send_handoff(HOType, + ModSrcTarget, + Node, + Pid, + HS, + {none, none}, + none, + Opts). %% @private %% @@ -504,109 +592,119 @@ send_handoff(HOType, ModSrcTarget, Node, Pid, HS, %% `Origin' is the node this request originated from so a reply %% can't be sent on completion. -spec send_handoff(ho_type(), - {module(), index(), index()}, node(), pid(), list(), - {predicate() | none, {module(), atom()} | none}, node(), - [{atom(), term()}]) -> {ok, handoff_status()} | - {error, max_concurrency} | - {false, handoff_status()}. + {module(), index(), index()}, node(), pid(), list(), + {predicate() | none, {module(), atom()} | none}, node(), + [{atom(), term()}]) -> {ok, handoff_status()} | + {error, max_concurrency} | + {false, handoff_status()}. send_handoff(HOType, {Mod, Src, Target}, Node, Vnode, - HS, {Filter, FilterModFun}, Origin, Opts) -> + HS, {Filter, FilterModFun}, Origin, Opts) -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - ShouldHandoff = case lists:keyfind({Mod, Src, Target}, - #handoff_status.mod_src_tgt, HS) - of - false -> true; - Handoff = #handoff_status{target_node = Node, - vnode_pid = Vnode} -> - {false, Handoff}; - #handoff_status{transport_pid = Sender} -> - %% found a running handoff with a different vnode - %% source or a different target node, kill the current - %% one and the new one will start up - erlang:exit(Sender, resubmit_handoff_change), - true - end, - case ShouldHandoff of - true -> - VnodeM = monitor(process, Vnode), - %% start the sender process - BaseOpts = [{src_partition, Src}, - {target_partition, Target}], - case HOType of - repair -> - HOFilter = Filter, - HOAcc0 = undefined, - HONotSentFun = undefined; - resize -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - HOFilter = resize_transfer_filter(Ring, Mod, Src, - Target), - HOAcc0 = ordsets:new(), - HONotSentFun = resize_transfer_notsent_fun(Ring, Mod, - Src); - _ -> - HOFilter = none, - HOAcc0 = undefined, - HONotSentFun = undefined - end, - HOOpts = [{filter, HOFilter}, {notsent_acc0, HOAcc0}, - {notsent_fun, HONotSentFun} - | BaseOpts], - {ok, Pid} = - riak_core_handoff_sender_sup:start_sender(HOType, Mod, - Node, Vnode, - HOOpts), - PidM = monitor(process, Pid), - Size = validate_size(proplists:get_value(size, Opts)), - %% successfully started up a new sender handoff - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = outbound, - timestamp = os:timestamp(), src_node = node(), - target_node = Node, - mod_src_tgt = {Mod, Src, Target}, - vnode_pid = Vnode, vnode_mon = VnodeM, - status = [], stats = dict:new(), type = HOType, - req_origin = Origin, - filter_mod_fun = FilterModFun, size = Size}}; - %% handoff already going, just return it - AlreadyExists = {false, _CurrentHandoff} -> - AlreadyExists - end + true -> {error, max_concurrency}; + false -> + ShouldHandoff = case lists:keyfind({Mod, Src, Target}, + #handoff_status.mod_src_tgt, + HS) + of + false -> true; + Handoff = #handoff_status{target_node = Node, + vnode_pid = Vnode} -> + {false, Handoff}; + #handoff_status{transport_pid = Sender} -> + %% found a running handoff with a different vnode + %% source or a different target node, kill the current + %% one and the new one will start up + erlang:exit(Sender, + resubmit_handoff_change), + true + end, + case ShouldHandoff of + true -> + VnodeM = monitor(process, Vnode), + %% start the sender process + BaseOpts = [{src_partition, Src}, + {target_partition, Target}], + case HOType of + repair -> + HOFilter = Filter, + HOAcc0 = undefined, + HONotSentFun = undefined; + resize -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + HOFilter = resize_transfer_filter(Ring, + Mod, + Src, + Target), + HOAcc0 = ordsets:new(), + HONotSentFun = resize_transfer_notsent_fun(Ring, + Mod, + Src); + _ -> + HOFilter = none, + HOAcc0 = undefined, + HONotSentFun = undefined + end, + HOOpts = [{filter, HOFilter}, + {notsent_acc0, HOAcc0}, + {notsent_fun, HONotSentFun} + | BaseOpts], + {ok, Pid} = + riak_core_handoff_sender_sup:start_sender(HOType, + Mod, + Node, + Vnode, + HOOpts), + PidM = monitor(process, Pid), + Size = validate_size(proplists:get_value(size, Opts)), + %% successfully started up a new sender handoff + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = outbound, + timestamp = os:timestamp(), + src_node = node(), target_node = Node, + mod_src_tgt = {Mod, Src, Target}, + vnode_pid = Vnode, vnode_mon = VnodeM, + status = [], stats = dict:new(), + type = HOType, req_origin = Origin, + filter_mod_fun = FilterModFun, + size = Size}}; + %% handoff already going, just return it + AlreadyExists = {false, _CurrentHandoff} -> + AlreadyExists + end end. %% spawn a receiver process receive_handoff() -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - {ok, Pid} = - riak_core_handoff_receiver_sup:start_receiver(), - PidM = monitor(process, Pid), - %% successfully started up a new receiver - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = inbound, - timestamp = os:timestamp(), - mod_src_tgt = {undefined, undefined, undefined}, - src_node = undefined, target_node = undefined, - status = [], stats = dict:new(), req_origin = none}} + true -> {error, max_concurrency}; + false -> + {ok, Pid} = + riak_core_handoff_receiver_sup:start_receiver(), + PidM = monitor(process, Pid), + %% successfully started up a new receiver + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = inbound, + timestamp = os:timestamp(), + mod_src_tgt = {undefined, undefined, undefined}, + src_node = undefined, target_node = undefined, + status = [], stats = dict:new(), + req_origin = none}} end. update_stats(StatsUpdate, Stats) -> #ho_stats{last_update = LU, objs = Objs, - bytes = Bytes} = - StatsUpdate, + bytes = Bytes} = + StatsUpdate, Stats2 = dict:update_counter(objs, Objs, Stats), Stats3 = dict:update_counter(bytes, Bytes, Stats2), dict:store(last_update, LU, Stats3). validate_size(Size = {N, U}) when is_number(N) andalso - N > 0 andalso (U =:= bytes orelse U =:= objects) -> + N > 0 andalso (U =:= bytes orelse U =:= objects) -> Size; validate_size(Size = {F, dynamic}) when is_function(F) -> @@ -620,53 +718,65 @@ validate_size(_) -> undefined. %% can have two simultaneous inbound xfers. kill_xfer_i(ModSrcTarget, Reason, HS) -> case lists:keytake(ModSrcTarget, - #handoff_status.mod_src_tgt, HS) - of - false -> HS; - {value, Xfer, HS2} -> - #handoff_status{mod_src_tgt = - {Mod, SrcPartition, TargetPartition}, - type = Type, target_node = TargetNode, - src_node = SrcNode, transport_pid = TP} = - Xfer, - Msg = "~p transfer of ~p from ~p ~p to ~p ~p " - "killed for reason ~p", - case Type of - undefined -> ok; - _ -> - logger:info(Msg, - [Type, Mod, SrcNode, SrcPartition, TargetNode, - TargetPartition, Reason]) - end, - exit(TP, {kill_xfer, Reason}), - kill_xfer_i(ModSrcTarget, Reason, HS2) + #handoff_status.mod_src_tgt, + HS) + of + false -> HS; + {value, Xfer, HS2} -> + #handoff_status{mod_src_tgt = + {Mod, SrcPartition, TargetPartition}, + type = Type, target_node = TargetNode, + src_node = SrcNode, transport_pid = TP} = + Xfer, + Msg = "~p transfer of ~p from ~p ~p to ~p ~p " + "killed for reason ~p", + case Type of + undefined -> ok; + _ -> + logger:info(Msg, + [Type, + Mod, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition, + Reason]) + end, + exit(TP, {kill_xfer, Reason}), + kill_xfer_i(ModSrcTarget, Reason, HS2) end. handoff_change_enabled_setting(EnOrDis, Direction) -> SetFun = case EnOrDis of - enable -> fun handoff_enable/1; - disable -> fun handoff_disable/1 - end, + enable -> fun handoff_enable/1; + disable -> fun handoff_disable/1 + end, case Direction of - inbound -> SetFun(inbound); - outbound -> SetFun(outbound); - both -> SetFun(inbound), SetFun(outbound) + inbound -> SetFun(inbound); + outbound -> SetFun(outbound); + both -> + SetFun(inbound), + SetFun(outbound) end. handoff_enable(inbound) -> - application:set_env(riak_core, disable_inbound_handoff, - false); + application:set_env(riak_core, + disable_inbound_handoff, + false); handoff_enable(outbound) -> - application:set_env(riak_core, disable_outbound_handoff, - false). + application:set_env(riak_core, + disable_outbound_handoff, + false). handoff_disable(inbound) -> - application:set_env(riak_core, disable_inbound_handoff, - true), + application:set_env(riak_core, + disable_inbound_handoff, + true), kill_handoffs_in_direction(inbound); handoff_disable(outbound) -> - application:set_env(riak_core, disable_outbound_handoff, - true), + application:set_env(riak_core, + disable_outbound_handoff, + true), kill_handoffs_in_direction(outbound). %%%=================================================================== @@ -680,15 +790,15 @@ handoff_test_() -> {setup, %% called when the tests start and complete... fun () -> - {ok, ManPid} = start_link(), - {ok, RSupPid} = - riak_core_handoff_receiver_sup:start_link(), - {ok, SSupPid} = - riak_core_handoff_sender_sup:start_link(), - [ManPid, RSupPid, SSupPid] + {ok, ManPid} = start_link(), + {ok, RSupPid} = + riak_core_handoff_receiver_sup:start_link(), + {ok, SSupPid} = + riak_core_handoff_sender_sup:start_link(), + [ManPid, RSupPid, SSupPid] end, fun (PidList) -> - lists:foreach(fun (Pid) -> exit(Pid, kill) end, PidList) + lists:foreach(fun (Pid) -> exit(Pid, kill) end, PidList) end, %% actual list of test [?_test((simple_handoff())), @@ -700,8 +810,12 @@ simple_handoff() -> ?assertEqual(ok, (set_concurrency(0))), ?assertEqual({error, max_concurrency}, (add_inbound())), ?assertEqual({error, max_concurrency}, - (add_outbound(ownership, riak_kv_vnode, 0, node(), - self(), []))), + (add_outbound(ownership, + riak_kv_vnode, + 0, + node(), + self(), + []))), %% allow for a single handoff ?assertEqual(ok, (set_concurrency(1))), %% done @@ -720,12 +834,14 @@ config_disable() -> ?assertEqual(1, (length(status()))), Ref = monitor(process, Pid), CatchDownFun = fun () -> - receive - {'DOWN', Ref, process, Pid, max_concurrency} -> ok; - Other -> {error, unexpected_message, Other} - after 1000 -> {error, timeout_waiting_for_down_msg} - end - end, + receive + {'DOWN', Ref, process, Pid, max_concurrency} -> + ok; + Other -> {error, unexpected_message, Other} + after 1000 -> + {error, timeout_waiting_for_down_msg} + end + end, ?assertEqual(ok, (handoff_disable(inbound))), ?assertEqual(ok, (CatchDownFun())), %% We use wait_until because it's possible that the handoff manager process @@ -746,10 +862,11 @@ config_disable() -> wait_until(Fun, Retry, Delay) when Retry > 0 -> Res = Fun(), case Res of - true -> ok; - _ when Retry == 1 -> {fail, Res}; - _ -> - timer:sleep(Delay), wait_until(Fun, Retry - 1, Delay) + true -> ok; + _ when Retry == 1 -> {fail, Res}; + _ -> + timer:sleep(Delay), + wait_until(Fun, Retry - 1, Delay) end. -endif. diff --git a/src/riak_core_handoff_receiver.erl b/src/riak_core_handoff_receiver.erl index 80f7f9428..d25f6857b 100644 --- a/src/riak_core_handoff_receiver.erl +++ b/src/riak_core_handoff_receiver.erl @@ -26,20 +26,26 @@ -behaviour(gen_server). --export([start_link/0, set_socket/2, - supports_batching/0]). +-export([start_link/0, + set_socket/2, + supports_batching/0]). --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, - {sock :: port() | undefined, peer :: term(), - recv_timeout_len :: non_neg_integer(), - vnode_timeout_len :: non_neg_integer(), - partition :: non_neg_integer() | undefined, - vnode_mod = riak_kv_vnode :: module(), - vnode :: pid() | undefined, - count = 0 :: non_neg_integer()}). + {sock :: port() | undefined, + peer :: term(), + recv_timeout_len :: non_neg_integer(), + vnode_timeout_len :: non_neg_integer(), + partition :: non_neg_integer() | undefined, + vnode_mod = riak_kv_vnode :: module(), + vnode :: pid() | undefined, + count = 0 :: non_neg_integer()}). %% set the TCP receive timeout to five minutes to be conservative. -define(RECV_TIMEOUT, 300000). @@ -57,12 +63,13 @@ supports_batching() -> true. init([]) -> {ok, #state{recv_timeout_len = - application:get_env(riak_core, handoff_receive_timeout, - ?RECV_TIMEOUT), - vnode_timeout_len = - application:get_env(riak_core, - handoff_receive_vnode_timeout, - ?VNODE_TIMEOUT)}}. + application:get_env(riak_core, + handoff_receive_timeout, + ?RECV_TIMEOUT), + vnode_timeout_len = + application:get_env(riak_core, + handoff_receive_vnode_timeout, + ?VNODE_TIMEOUT)}}. handle_call({set_socket, Socket0}, _From, State) -> SockOpts = [{active, once}, {packet, 4}, {header, 1}], @@ -72,105 +79,113 @@ handle_call({set_socket, Socket0}, _From, State) -> {reply, ok, State#state{sock = Socket, peer = Peer}}. handle_info({tcp_closed, _Socket}, - State = #state{partition = Partition, count = Count, - peer = Peer}) -> + State = #state{partition = Partition, count = Count, + peer = Peer}) -> logger:info("Handoff receiver for partition ~p exited " - "after processing ~p objects from ~p", - [Partition, Count, Peer]), + "after processing ~p objects from ~p", + [Partition, Count, Peer]), {stop, normal, State}; handle_info({tcp_error, _Socket, Reason}, - State = #state{partition = Partition, count = Count, - peer = Peer}) -> + State = #state{partition = Partition, count = Count, + peer = Peer}) -> logger:info("Handoff receiver for partition ~p exited " - "after processing ~p objects from ~p: " - "TCP error ~p", - [Partition, Count, Peer, Reason]), + "after processing ~p objects from ~p: " + "TCP error ~p", + [Partition, Count, Peer, Reason]), {stop, normal, State}; handle_info({tcp, Socket, Data}, State) -> [MsgType | MsgData] = Data, case catch process_message(MsgType, MsgData, State) of - {'EXIT', Reason} -> - logger:error("Handoff receiver for partition ~p exited " - "abnormally after processing ~p objects " - "from ~p: ~p", - [State#state.partition, State#state.count, - State#state.peer, Reason]), - {stop, normal, State}; - NewState when is_record(NewState, state) -> - InetMod = inet, - InetMod:setopts(Socket, [{active, once}]), - {noreply, NewState, State#state.recv_timeout_len} + {'EXIT', Reason} -> + logger:error("Handoff receiver for partition ~p exited " + "abnormally after processing ~p objects " + "from ~p: ~p", + [State#state.partition, + State#state.count, + State#state.peer, + Reason]), + {stop, normal, State}; + NewState when is_record(NewState, state) -> + InetMod = inet, + InetMod:setopts(Socket, [{active, once}]), + {noreply, NewState, State#state.recv_timeout_len} end; handle_info(timeout, State) -> logger:error("Handoff receiver for partition ~p timed " - "out after processing ~p objects from " - "~p.", - [State#state.partition, State#state.count, - State#state.peer]), + "out after processing ~p objects from " + "~p.", + [State#state.partition, + State#state.count, + State#state.peer]), {stop, normal, State}. process_message(?PT_MSG_INIT, MsgData, - State = #state{vnode_mod = VNodeMod, peer = Peer}) -> + State = #state{vnode_mod = VNodeMod, peer = Peer}) -> <> = MsgData, logger:info("Receiving handoff data for partition " - "~p:~p from ~p", - [VNodeMod, Partition, Peer]), + "~p:~p from ~p", + [VNodeMod, Partition, Peer]), {ok, VNode} = - riak_core_vnode_master:get_vnode_pid(Partition, - VNodeMod), + riak_core_vnode_master:get_vnode_pid(Partition, + VNodeMod), Data = [{mod_src_tgt, {VNodeMod, undefined, Partition}}, - {vnode_pid, VNode}], + {vnode_pid, VNode}], riak_core_handoff_manager:set_recv_data(self(), Data), State#state{partition = Partition, vnode = VNode}; process_message(?PT_MSG_BATCH, MsgData, State) -> lists:foldl(fun (Obj, StateAcc) -> - process_message(?PT_MSG_OBJ, Obj, StateAcc) - end, - State, binary_to_term(MsgData)); + process_message(?PT_MSG_OBJ, Obj, StateAcc) + end, + State, + binary_to_term(MsgData)); process_message(?PT_MSG_OBJ, MsgData, - State = #state{vnode = VNode, count = Count, - vnode_timeout_len = VNodeTimeout}) -> - try riak_core_vnode:handoff_data(VNode, MsgData, VNodeTimeout) + State = #state{vnode = VNode, count = Count, + vnode_timeout_len = VNodeTimeout}) -> + try riak_core_vnode:handoff_data(VNode, + MsgData, + VNodeTimeout) of - ok -> State#state{count = Count + 1}; - E = {error, _} -> exit(E) + ok -> State#state{count = Count + 1}; + E = {error, _} -> exit(E) catch - exit:{timeout, _} -> - exit({error, - {vnode_timeout, VNodeTimeout, size(MsgData), - binary:part(MsgData, {0, min(size(MsgData), 128)})}}) + exit:{timeout, _} -> + exit({error, + {vnode_timeout, + VNodeTimeout, + size(MsgData), + binary:part(MsgData, {0, min(size(MsgData), 128)})}}) end; process_message(?PT_MSG_OLDSYNC, MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, <<(?PT_MSG_OLDSYNC):8, "sync">>), <> = MsgData, VNodeMod = binary_to_atom(VNodeModBin, utf8), State#state{vnode_mod = VNodeMod}; process_message(?PT_MSG_SYNC, _MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8, "sync">>), State; process_message(?PT_MSG_VERIFY_NODE, ExpectedName, - State = #state{sock = Socket, peer = Peer}) -> + State = #state{sock = Socket, peer = Peer}) -> case binary_to_term(ExpectedName) of - _Node when _Node =:= node() -> - gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), - State; - Node -> - logger:error("Handoff from ~p expects us to be ~s " - "but we are ~s.", - [Peer, Node, node()]), - exit({error, {wrong_node, Node}}) + _Node when _Node =:= node() -> + gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), + State; + Node -> + logger:error("Handoff from ~p expects us to be ~s " + "but we are ~s.", + [Peer, Node, node()]), + exit({error, {wrong_node, Node}}) end; process_message(?PT_MSG_CONFIGURE, MsgData, State) -> ConfProps = binary_to_term(MsgData), State#state{vnode_mod = - proplists:get_value(vnode_mod, ConfProps), - partition = proplists:get_value(partition, ConfProps)}; + proplists:get_value(vnode_mod, ConfProps), + partition = proplists:get_value(partition, ConfProps)}; process_message(_, _MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, - <<(?PT_MSG_UNKNOWN):8, "unknown_msg">>), + <<(?PT_MSG_UNKNOWN):8, "unknown_msg">>), State. handle_cast(_Msg, State) -> {noreply, State}. @@ -181,8 +196,8 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. safe_peername(Skt, Mod) -> case Mod:peername(Skt) of - {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; - _ -> - {unknown, - unknown} % Real info is {Addr, Port} + {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; + _ -> + {unknown, + unknown} % Real info is {Addr, Port} end. diff --git a/src/riak_core_handoff_receiver_sup.erl b/src/riak_core_handoff_receiver_sup.erl index 0a9402f10..1f682c6ef 100644 --- a/src/riak_core_handoff_receiver_sup.erl +++ b/src/riak_core_handoff_receiver_sup.erl @@ -29,8 +29,12 @@ -export([start_receiver/0]). -define(CHILD(I, Type), - {I, {I, start_link, []}, temporary, brutal_kill, Type, - [I]}). + {I, + {I, start_link, []}, + temporary, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_handoff_sender.erl b/src/riak_core_handoff_sender.erl index aaef2a259..df67035b9 100644 --- a/src/riak_core_handoff_sender.erl +++ b/src/riak_core_handoff_sender.erl @@ -38,37 +38,50 @@ -define(STATUS_INTERVAL, 2). -define(LOG_INFO(Str, Args), - logger:info("~p transfer of ~p from ~p ~p to ~p ~p " - "failed " - ++ Str, - [Type, Module, SrcNode, SrcPartition, TargetNode, - TargetPartition] - ++ Args)). + logger:info("~p transfer of ~p from ~p ~p to ~p ~p " + "failed " + ++ Str, + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition] + ++ Args)). -define(LOG_FAIL(Str, Args), - logger:error("~p transfer of ~p from ~p ~p to ~p ~p " - "failed " - ++ Str, - [Type, Module, SrcNode, SrcPartition, TargetNode, - TargetPartition] - ++ Args)). + logger:error("~p transfer of ~p from ~p ~p to ~p ~p " + "failed " + ++ Str, + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition] + ++ Args)). %% Accumulator for the visit item HOF -record(ho_acc, - {ack :: non_neg_integer(), - error :: ok | {error, any()}, filter :: function(), - module :: module(), parent :: pid(), socket :: any(), - src_target :: {non_neg_integer(), non_neg_integer()}, - stats :: #ho_stats{}, - total_objects :: non_neg_integer(), - total_bytes :: non_neg_integer(), - use_batching :: boolean(), item_queue :: [binary()], - item_queue_length :: non_neg_integer(), - item_queue_byte_size :: non_neg_integer(), - acksync_threshold :: non_neg_integer(), - acksync_timer :: timer:tref() | undefined, - type :: ho_type(), notsent_acc :: term(), - notsent_fun :: function() | undefined}). + {ack :: non_neg_integer(), + error :: ok | {error, any()}, + filter :: function(), + module :: module(), + parent :: pid(), + socket :: any(), + src_target :: {non_neg_integer(), non_neg_integer()}, + stats :: #ho_stats{}, + total_objects :: non_neg_integer(), + total_bytes :: non_neg_integer(), + use_batching :: boolean(), + item_queue :: [binary()], + item_queue_length :: non_neg_integer(), + item_queue_byte_size :: non_neg_integer(), + acksync_threshold :: non_neg_integer(), + acksync_timer :: timer:tref() | undefined, + type :: ho_type(), + notsent_acc :: term(), + notsent_fun :: function() | undefined}). %%%=================================================================== %%% API @@ -76,8 +89,8 @@ start_link(TargetNode, Module, {Type, Opts}, Vnode) -> Pid = spawn_link(fun () -> - start_fold(TargetNode, Module, {Type, Opts}, Vnode) - end), + start_fold(TargetNode, Module, {Type, Opts}, Vnode) + end), {ok, Pid}. %%%=================================================================== @@ -85,23 +98,27 @@ start_link(TargetNode, Module, {Type, Opts}, Vnode) -> %%%=================================================================== start_fold_(TargetNode, Module, Type, Opts, ParentPid, - SrcNode, SrcPartition, TargetPartition) -> + SrcNode, SrcPartition, TargetPartition) -> %% Give workers one more chance to abort or get a lock or whatever. FoldOpts = maybe_call_handoff_started(Module, - SrcPartition), + SrcPartition), Filter = get_filter(Opts), [_Name, Host] = string:tokens(atom_to_list(TargetNode), - "@"), + "@"), {ok, Port} = get_handoff_port(TargetNode), TNHandoffIP = case get_handoff_ip(TargetNode) of - error -> Host; - {ok, "0.0.0.0"} -> Host; - {ok, Other} -> Other - end, - SockOpts = [binary, {packet, 4}, {header, 1}, - {active, false}], - {ok, Socket} = gen_tcp:connect(TNHandoffIP, Port, - SockOpts, 15000), + error -> Host; + {ok, "0.0.0.0"} -> Host; + {ok, Other} -> Other + end, + SockOpts = [binary, + {packet, 4}, + {header, 1}, + {active, false}], + {ok, Socket} = gen_tcp:connect(TNHandoffIP, + Port, + SockOpts, + 15000), RecvTimeout = get_handoff_receive_timeout(), %% We want to ensure that the node we think we are talking to %% really is the node we expect. @@ -111,16 +128,16 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% print an error and keep going with our fingers crossed. TargetBin = term_to_binary(TargetNode), VerifyNodeMsg = <<(?PT_MSG_VERIFY_NODE):8, - TargetBin/binary>>, + TargetBin/binary>>, ok = gen_tcp:send(Socket, VerifyNodeMsg), case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_VERIFY_NODE | _]} -> ok; - {ok, [?PT_MSG_UNKNOWN | _]} -> - logger:warning("Could not verify identity of peer ~s.", - [TargetNode]), - ok; - {error, timeout} -> exit({shutdown, timeout}); - {error, closed} -> exit({shutdown, wrong_node}) + {ok, [?PT_MSG_VERIFY_NODE | _]} -> ok; + {ok, [?PT_MSG_UNKNOWN | _]} -> + logger:warning("Could not verify identity of peer ~s.", + [TargetNode]), + ok; + {error, timeout} -> exit({shutdown, timeout}); + {error, closed} -> exit({shutdown, wrong_node}) end, %% Piggyback the sync command from previous releases to send %% the vnode type across. If talking to older nodes they'll @@ -128,12 +145,13 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% After 0.12.0 the calls can be switched to use PT_MSG_SYNC %% and PT_MSG_CONFIGURE VMaster = list_to_atom(atom_to_list(Module) ++ - "_master"), + "_master"), ModBin = atom_to_binary(Module, utf8), Msg = <<(?PT_MSG_OLDSYNC):8, ModBin/binary>>, ok = gen_tcp:send(Socket, Msg), AckSyncThreshold = application:get_env(riak_core, - handoff_acksync_threshold, 25), + handoff_acksync_threshold, + 25), %% Now that handoff_concurrency applies to both outbound and %% inbound conns there is a chance that the receiver may %% decide to reject the senders attempt to start a handoff. @@ -142,266 +160,301 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% socket at this point is a rejection by the receiver to %% enforce handoff_concurrency. case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> ok; - {error, timeout} -> exit({shutdown, timeout}); - {error, closed} -> exit({shutdown, max_concurrency}) + {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> ok; + {error, timeout} -> exit({shutdown, timeout}); + {error, closed} -> exit({shutdown, max_concurrency}) end, RemoteSupportsBatching = - remote_supports_batching(TargetNode), + remote_supports_batching(TargetNode), logger:info("Starting ~p transfer of ~p from ~p ~p " - "to ~p ~p", - [Type, Module, SrcNode, SrcPartition, TargetNode, - TargetPartition]), + "to ~p ~p", + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition]), M = <<(?PT_MSG_INIT):8, TargetPartition:160/integer>>, ok = gen_tcp:send(Socket, M), StartFoldTime = os:timestamp(), Stats = #ho_stats{interval_end = - future_now(get_status_interval())}, + future_now(get_status_interval())}, UnsentAcc0 = get_notsent_acc0(Opts), UnsentFun = get_notsent_fun(Opts), Req = riak_core_util:make_fold_req(fun visit_item/3, - #ho_acc{ack = 0, error = ok, - filter = Filter, module = Module, - parent = ParentPid, - socket = Socket, - src_target = - {SrcPartition, - TargetPartition}, - stats = Stats, total_bytes = 0, - total_objects = 0, - use_batching = - RemoteSupportsBatching, - item_queue = [], - item_queue_length = 0, - item_queue_byte_size = 0, - acksync_threshold = - AckSyncThreshold, - type = Type, - notsent_acc = UnsentAcc0, - notsent_fun = UnsentFun}, - false, FoldOpts), + #ho_acc{ack = 0, error = ok, + filter = Filter, module = Module, + parent = ParentPid, + socket = Socket, + src_target = + {SrcPartition, + TargetPartition}, + stats = Stats, total_bytes = 0, + total_objects = 0, + use_batching = + RemoteSupportsBatching, + item_queue = [], + item_queue_length = 0, + item_queue_byte_size = 0, + acksync_threshold = + AckSyncThreshold, + type = Type, + notsent_acc = UnsentAcc0, + notsent_fun = UnsentFun}, + false, + FoldOpts), %% IFF the vnode is using an async worker to perform the fold %% then sync_command will return error on vnode crash, %% otherwise it will wait forever but vnode crash will be %% caught by handoff manager. I know, this is confusing, a %% new handoff system will be written soon enough. AccRecord0 = case - riak_core_vnode_master:sync_command({SrcPartition, - SrcNode}, - Req, VMaster, infinity) - of - #ho_acc{} = Ret -> Ret; - Ret -> - logger:error("[handoff] Bad handoff record: ~p", [Ret]), - Ret - end, + riak_core_vnode_master:sync_command({SrcPartition, + SrcNode}, + Req, + VMaster, + infinity) + of + #ho_acc{} = Ret -> Ret; + Ret -> + logger:error("[handoff] Bad handoff record: ~p", + [Ret]), + Ret + end, %% Send any straggler entries remaining in the buffer: AccRecord = send_objects(AccRecord0#ho_acc.item_queue, - AccRecord0), + AccRecord0), if AccRecord == {error, vnode_shutdown} -> - ?LOG_INFO("because the local vnode was shutdown", []), - throw({be_quiet, error, - local_vnode_shutdown_requested}); + ?LOG_INFO("because the local vnode was shutdown", []), + throw({be_quiet, + error, + local_vnode_shutdown_requested}); true -> - ok % If not #ho_acc, get badmatch below + ok % If not #ho_acc, get badmatch below end, #ho_acc{error = ErrStatus, module = Module, - parent = ParentPid, total_objects = TotalObjects, - total_bytes = TotalBytes, stats = FinalStats, - acksync_timer = TRef, notsent_acc = NotSentAcc} = - AccRecord, + parent = ParentPid, total_objects = TotalObjects, + total_bytes = TotalBytes, stats = FinalStats, + acksync_timer = TRef, notsent_acc = NotSentAcc} = + AccRecord, _ = timer:cancel(TRef), case ErrStatus of - ok -> - %% One last sync to make sure the message has been received. - %% post-0.14 vnodes switch to handoff to forwarding immediately - %% so handoff_complete can only be sent once all of the data is - %% written. handle_handoff_data is a sync call, so once - %% we receive the sync the remote side will be up to date. - logger:debug("~p ~p Sending final sync", - [SrcPartition, Module]), - ok = gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8>>), - case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_SYNC | <<"sync">>]} -> - logger:debug("~p ~p Final sync received", - [SrcPartition, Module]); - {error, timeout} -> exit({shutdown, timeout}) - end, - FoldTimeDiff = end_fold_time(StartFoldTime), - ThroughputBytes = TotalBytes / FoldTimeDiff, - ok = - logger:info("~p transfer of ~p from ~p ~p to ~p ~p " - "completed: sent ~p bytes in ~p of ~p " - "objects in ~p seconds (~p/second)", - [Type, Module, SrcNode, SrcPartition, TargetNode, - TargetPartition, TotalBytes, - FinalStats#ho_stats.objs, TotalObjects, FoldTimeDiff, - ThroughputBytes]), - case Type of - repair -> ok; - resize -> - riak_core_vnode:resize_transfer_complete(ParentPid, NotSentAcc); - _ -> - riak_core_vnode:handoff_complete(ParentPid) - end; - {error, ErrReason} -> - if ErrReason == timeout -> exit({shutdown, timeout}); - true -> exit({shutdown, {error, ErrReason}}) - end + ok -> + %% One last sync to make sure the message has been received. + %% post-0.14 vnodes switch to handoff to forwarding immediately + %% so handoff_complete can only be sent once all of the data is + %% written. handle_handoff_data is a sync call, so once + %% we receive the sync the remote side will be up to date. + logger:debug("~p ~p Sending final sync", + [SrcPartition, Module]), + ok = gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8>>), + case gen_tcp:recv(Socket, 0, RecvTimeout) of + {ok, [?PT_MSG_SYNC | <<"sync">>]} -> + logger:debug("~p ~p Final sync received", + [SrcPartition, Module]); + {error, timeout} -> exit({shutdown, timeout}) + end, + FoldTimeDiff = end_fold_time(StartFoldTime), + ThroughputBytes = TotalBytes / FoldTimeDiff, + ok = + logger:info("~p transfer of ~p from ~p ~p to ~p ~p " + "completed: sent ~p bytes in ~p of ~p " + "objects in ~p seconds (~p/second)", + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition, + TotalBytes, + FinalStats#ho_stats.objs, + TotalObjects, + FoldTimeDiff, + ThroughputBytes]), + case Type of + repair -> ok; + resize -> + riak_core_vnode:resize_transfer_complete(ParentPid, + NotSentAcc); + _ -> riak_core_vnode:handoff_complete(ParentPid) + end; + {error, ErrReason} -> + if ErrReason == timeout -> exit({shutdown, timeout}); + true -> exit({shutdown, {error, ErrReason}}) + end end. start_fold(TargetNode, Module, {Type, Opts}, - ParentPid) -> + ParentPid) -> SrcNode = node(), SrcPartition = get_src_partition(Opts), TargetPartition = get_target_partition(Opts), - try start_fold_(TargetNode, Module, Type, Opts, - ParentPid, SrcNode, SrcPartition, TargetPartition) + try start_fold_(TargetNode, + Module, + Type, + Opts, + ParentPid, + SrcNode, + SrcPartition, + TargetPartition) catch - exit:{shutdown, max_concurrency} -> - %% Need to fwd the error so the handoff mgr knows - exit({shutdown, max_concurrency}); - exit:{shutdown, timeout} -> - %% A receive timeout during handoff - %% STATS - %% riak_core_stat:update(handoff_timeouts), - ?LOG_FAIL("because of TCP recv timeout", []), - exit({shutdown, timeout}); - exit:{shutdown, {error, Reason}} -> - ?LOG_FAIL("because of ~p", [Reason]), - riak_core_vnode:handoff_error(ParentPid, fold_error, Reason), - exit({shutdown, {error, Reason}}); - {be_quiet, Err, Reason} -> - riak_core_vnode:handoff_error(ParentPid, Err, Reason); - Err:Reason:Stacktrace -> - ?LOG_FAIL("because of ~p:~p ~p", [Err, Reason, Stacktrace]), - riak_core_vnode:handoff_error(ParentPid, Err, Reason) + exit:{shutdown, max_concurrency} -> + %% Need to fwd the error so the handoff mgr knows + exit({shutdown, max_concurrency}); + exit:{shutdown, timeout} -> + %% A receive timeout during handoff + %% STATS + %% riak_core_stat:update(handoff_timeouts), + ?LOG_FAIL("because of TCP recv timeout", []), + exit({shutdown, timeout}); + exit:{shutdown, {error, Reason}} -> + ?LOG_FAIL("because of ~p", [Reason]), + riak_core_vnode:handoff_error(ParentPid, + fold_error, + Reason), + exit({shutdown, {error, Reason}}); + {be_quiet, Err, Reason} -> + riak_core_vnode:handoff_error(ParentPid, Err, Reason); + Err:Reason:Stacktrace -> + ?LOG_FAIL("because of ~p:~p ~p", + [Err, Reason, Stacktrace]), + riak_core_vnode:handoff_error(ParentPid, Err, Reason) end. start_visit_item_timer() -> Ival = case application:get_env(riak_core, - handoff_receive_timeout, undefined) - of - TO when is_integer(TO) -> erlang:max(1000, TO div 3); - _ -> 60 * 1000 - end, + handoff_receive_timeout, + undefined) + of + TO when is_integer(TO) -> erlang:max(1000, TO div 3); + _ -> 60 * 1000 + end, timer:send_interval(Ival, tick_send_sync). visit_item(K, V, - Acc0 = #ho_acc{acksync_threshold = AccSyncThreshold}) -> + Acc0 = #ho_acc{acksync_threshold = AccSyncThreshold}) -> %% Eventually, a vnode worker proc will be doing this fold, but we don't %% know the pid of that proc ahead of time. So we have to start the %% timer some time after the fold has started execution on that proc %% ... like now, perhaps. Acc = case get(is_visit_item_timer_set) of - undefined -> - put(is_visit_item_timer_set, true), - {ok, TRef} = start_visit_item_timer(), - Acc0#ho_acc{acksync_timer = TRef}; - _ -> Acc0 - end, + undefined -> + put(is_visit_item_timer_set, true), + {ok, TRef} = start_visit_item_timer(), + Acc0#ho_acc{acksync_timer = TRef}; + _ -> Acc0 + end, receive - tick_send_sync -> - visit_item2(K, V, Acc#ho_acc{ack = AccSyncThreshold}) - after 0 -> visit_item2(K, V, Acc) + tick_send_sync -> + visit_item2(K, V, Acc#ho_acc{ack = AccSyncThreshold}) + after 0 -> visit_item2(K, V, Acc) end. %% When a tcp error occurs, the ErrStatus argument is set to {error, Reason}. %% Since we can't abort the fold, this clause is just a no-op. visit_item2(_K, _V, - Acc = #ho_acc{error = {error, _Reason}}) -> + Acc = #ho_acc{error = {error, _Reason}}) -> %% When a TCP error occurs, #ho_acc.error is set to {error, Reason}. throw(Acc); visit_item2(K, V, - Acc = #ho_acc{ack = _AccSyncThreshold, - acksync_threshold = _AccSyncThreshold}) -> + Acc = #ho_acc{ack = _AccSyncThreshold, + acksync_threshold = _AccSyncThreshold}) -> #ho_acc{module = Module, socket = Sock, - src_target = {SrcPartition, TargetPartition}, - stats = Stats} = - Acc, + src_target = {SrcPartition, TargetPartition}, + stats = Stats} = + Acc, RecvTimeout = get_handoff_receive_timeout(), M = <<(?PT_MSG_OLDSYNC):8, "sync">>, NumBytes = byte_size(M), Stats2 = incr_bytes(Stats, NumBytes), - Stats3 = maybe_send_status({Module, SrcPartition, - TargetPartition}, - Stats2), + Stats3 = maybe_send_status({Module, + SrcPartition, + TargetPartition}, + Stats2), case gen_tcp:send(Sock, M) of - ok -> - case gen_tcp:recv(Sock, 0, RecvTimeout) of - {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> - Acc2 = Acc#ho_acc{ack = 0, error = ok, stats = Stats3}, - visit_item2(K, V, Acc2); - {error, Reason} -> - Acc#ho_acc{ack = 0, error = {error, Reason}, - stats = Stats3} - end; - {error, Reason} -> - Acc#ho_acc{ack = 0, error = {error, Reason}, - stats = Stats3} + ok -> + case gen_tcp:recv(Sock, 0, RecvTimeout) of + {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> + Acc2 = Acc#ho_acc{ack = 0, error = ok, stats = Stats3}, + visit_item2(K, V, Acc2); + {error, Reason} -> + Acc#ho_acc{ack = 0, error = {error, Reason}, + stats = Stats3} + end; + {error, Reason} -> + Acc#ho_acc{ack = 0, error = {error, Reason}, + stats = Stats3} end; visit_item2(K, V, Acc) -> #ho_acc{filter = Filter, module = Module, - total_objects = TotalObjects, - use_batching = UseBatching, item_queue = ItemQueue, - item_queue_length = ItemQueueLength, - item_queue_byte_size = ItemQueueByteSize, - notsent_fun = NotSentFun, notsent_acc = NotSentAcc} = - Acc, + total_objects = TotalObjects, + use_batching = UseBatching, item_queue = ItemQueue, + item_queue_length = ItemQueueLength, + item_queue_byte_size = ItemQueueByteSize, + notsent_fun = NotSentFun, notsent_acc = NotSentAcc} = + Acc, case Filter(K) of - true -> - case Module:encode_handoff_item(K, V) of - corrupted -> - {Bucket, Key} = K, - logger:warning("Unreadable object ~p/~p discarded", - [Bucket, Key]), - Acc; - BinObj -> - case UseBatching of - true -> - ItemQueue2 = [BinObj | ItemQueue], - ItemQueueLength2 = ItemQueueLength + 1, - ItemQueueByteSize2 = ItemQueueByteSize + - byte_size(BinObj), - Acc2 = Acc#ho_acc{item_queue_length = ItemQueueLength2, - item_queue_byte_size = - ItemQueueByteSize2}, - %% Unit size is bytes: - HandoffBatchThreshold = application:get_env(riak_core, - handoff_batch_threshold, - 1024 * 1024), - case ItemQueueByteSize2 =< HandoffBatchThreshold of - true -> Acc2#ho_acc{item_queue = ItemQueue2}; - false -> send_objects(ItemQueue2, Acc2) - end; - _ -> - #ho_acc{ack = Ack, socket = Sock, - src_target = {SrcPartition, TargetPartition}, - stats = Stats, total_objects = TotalObjects, - total_bytes = TotalBytes} = - Acc, - M = <<(?PT_MSG_OBJ):8, BinObj/binary>>, - NumBytes = byte_size(M), - Stats2 = incr_bytes(incr_objs(Stats), NumBytes), - Stats3 = maybe_send_status({Module, SrcPartition, - TargetPartition}, - Stats2), - case gen_tcp:send(Sock, M) of - ok -> - Acc#ho_acc{ack = Ack + 1, error = ok, - stats = Stats3, - total_bytes = TotalBytes + NumBytes, - total_objects = TotalObjects + 1}; - {error, Reason} -> - Acc#ho_acc{error = {error, Reason}, stats = Stats3} - end - end - end; - false -> - NewNotSentAcc = handle_not_sent_item(NotSentFun, - NotSentAcc, K), - Acc#ho_acc{error = ok, total_objects = TotalObjects + 1, - notsent_acc = NewNotSentAcc} + true -> + case Module:encode_handoff_item(K, V) of + corrupted -> + {Bucket, Key} = K, + logger:warning("Unreadable object ~p/~p discarded", + [Bucket, Key]), + Acc; + BinObj -> + case UseBatching of + true -> + ItemQueue2 = [BinObj | ItemQueue], + ItemQueueLength2 = ItemQueueLength + 1, + ItemQueueByteSize2 = ItemQueueByteSize + + byte_size(BinObj), + Acc2 = Acc#ho_acc{item_queue_length = + ItemQueueLength2, + item_queue_byte_size = + ItemQueueByteSize2}, + %% Unit size is bytes: + HandoffBatchThreshold = + application:get_env(riak_core, + handoff_batch_threshold, + 1024 * 1024), + case ItemQueueByteSize2 =< HandoffBatchThreshold of + true -> Acc2#ho_acc{item_queue = ItemQueue2}; + false -> send_objects(ItemQueue2, Acc2) + end; + _ -> + #ho_acc{ack = Ack, socket = Sock, + src_target = + {SrcPartition, TargetPartition}, + stats = Stats, total_objects = TotalObjects, + total_bytes = TotalBytes} = + Acc, + M = <<(?PT_MSG_OBJ):8, BinObj/binary>>, + NumBytes = byte_size(M), + Stats2 = incr_bytes(incr_objs(Stats), NumBytes), + Stats3 = maybe_send_status({Module, + SrcPartition, + TargetPartition}, + Stats2), + case gen_tcp:send(Sock, M) of + ok -> + Acc#ho_acc{ack = Ack + 1, error = ok, + stats = Stats3, + total_bytes = + TotalBytes + NumBytes, + total_objects = + TotalObjects + 1}; + {error, Reason} -> + Acc#ho_acc{error = {error, Reason}, + stats = Stats3} + end + end + end; + false -> + NewNotSentAcc = handle_not_sent_item(NotSentFun, + NotSentAcc, + K), + Acc#ho_acc{error = ok, total_objects = TotalObjects + 1, + notsent_acc = NewNotSentAcc} end. handle_not_sent_item(undefined, _, _) -> undefined; @@ -413,45 +466,50 @@ send_objects([], Acc) -> Acc; send_objects(ItemsReverseList, Acc) -> Items = lists:reverse(ItemsReverseList), #ho_acc{ack = Ack, module = Module, socket = Sock, - src_target = {SrcPartition, TargetPartition}, - stats = Stats, total_objects = TotalObjects, - total_bytes = TotalBytes, - item_queue_length = NObjects} = - Acc, + src_target = {SrcPartition, TargetPartition}, + stats = Stats, total_objects = TotalObjects, + total_bytes = TotalBytes, + item_queue_length = NObjects} = + Acc, ObjectList = term_to_binary(Items), M = <<(?PT_MSG_BATCH):8, ObjectList/binary>>, NumBytes = byte_size(M), Stats2 = incr_bytes(incr_objs(Stats, NObjects), - NumBytes), - Stats3 = maybe_send_status({Module, SrcPartition, - TargetPartition}, - Stats2), + NumBytes), + Stats3 = maybe_send_status({Module, + SrcPartition, + TargetPartition}, + Stats2), case gen_tcp:send(Sock, M) of - ok -> - Acc#ho_acc{ack = Ack + 1, error = ok, stats = Stats3, - total_objects = TotalObjects + NObjects, - total_bytes = TotalBytes + NumBytes, item_queue = [], - item_queue_length = 0, item_queue_byte_size = 0}; - {error, Reason} -> - Acc#ho_acc{error = {error, Reason}, stats = Stats3} + ok -> + Acc#ho_acc{ack = Ack + 1, error = ok, stats = Stats3, + total_objects = TotalObjects + NObjects, + total_bytes = TotalBytes + NumBytes, item_queue = [], + item_queue_length = 0, item_queue_byte_size = 0}; + {error, Reason} -> + Acc#ho_acc{error = {error, Reason}, stats = Stats3} end. get_handoff_ip(Node) when is_atom(Node) -> case riak_core_util:safe_rpc(Node, - riak_core_handoff_listener, get_handoff_ip, [], - infinity) - of - {badrpc, _} -> error; - Res -> Res + riak_core_handoff_listener, + get_handoff_ip, + [], + infinity) + of + {badrpc, _} -> error; + Res -> Res end. get_handoff_port(Node) when is_atom(Node) -> gen_server:call({riak_core_handoff_listener, Node}, - handoff_port, infinity). + handoff_port, + infinity). get_handoff_receive_timeout() -> - application:get_env(riak_core, handoff_timeout, - ?TCP_TIMEOUT). + application:get_env(riak_core, + handoff_timeout, + ?TCP_TIMEOUT). end_fold_time(StartFoldTime) -> EndFoldTime = os:timestamp(), @@ -478,10 +536,10 @@ is_elapsed(TS) -> os:timestamp() >= TS. %% %% @doc Increment `Stats' byte count by `NumBytes'. -spec incr_bytes(ho_stats(), - non_neg_integer()) -> NewStats :: ho_stats(). + non_neg_integer()) -> NewStats :: ho_stats(). incr_bytes(Stats = #ho_stats{bytes = Bytes}, - NumBytes) -> + NumBytes) -> Stats#ho_stats{bytes = Bytes + NumBytes}. incr_objs(Stats) -> incr_objs(Stats, 1). @@ -490,7 +548,7 @@ incr_objs(Stats) -> incr_objs(Stats, 1). %% %% @doc Increment `Stats' object count by NObjs: -spec incr_objs(ho_stats(), - non_neg_integer()) -> NewStats :: ho_stats(). + non_neg_integer()) -> NewStats :: ho_stats(). incr_objs(Stats = #ho_stats{objs = Objs}, NObjs) -> Stats#ho_stats{objs = Objs + NObjs}. @@ -501,24 +559,25 @@ incr_objs(Stats = #ho_stats{objs = Objs}, NObjs) -> %% for `ModSrcTgt' to the manager and return a new stats record %% `NetStats'. -spec maybe_send_status({module(), non_neg_integer(), - non_neg_integer()}, - ho_stats()) -> NewStats :: ho_stats(). + non_neg_integer()}, + ho_stats()) -> NewStats :: ho_stats(). maybe_send_status(ModSrcTgt, - Stats = #ho_stats{interval_end = IntervalEnd}) -> + Stats = #ho_stats{interval_end = IntervalEnd}) -> case is_elapsed(IntervalEnd) of - true -> - Stats2 = Stats#ho_stats{last_update = os:timestamp()}, - riak_core_handoff_manager:status_update(ModSrcTgt, - Stats2), - #ho_stats{interval_end = - future_now(get_status_interval())}; - false -> Stats + true -> + Stats2 = Stats#ho_stats{last_update = os:timestamp()}, + riak_core_handoff_manager:status_update(ModSrcTgt, + Stats2), + #ho_stats{interval_end = + future_now(get_status_interval())}; + false -> Stats end. get_status_interval() -> - application:get_env(riak_core, handoff_status_interval, - ?STATUS_INTERVAL). + application:get_env(riak_core, + handoff_status_interval, + ?STATUS_INTERVAL). get_src_partition(Opts) -> proplists:get_value(src_partition, Opts). @@ -531,16 +590,16 @@ get_notsent_acc0(Opts) -> get_notsent_fun(Opts) -> case proplists:get_value(notsent_fun, Opts) of - none -> fun (_, _) -> undefined end; - Fun -> Fun + none -> fun (_, _) -> undefined end; + Fun -> Fun end. -spec get_filter(proplists:proplist()) -> predicate(). get_filter(Opts) -> case proplists:get_value(filter, Opts) of - none -> fun (_) -> true end; - Filter -> Filter + none -> fun (_) -> true end; + Filter -> Filter end. %% @private @@ -549,17 +608,19 @@ get_filter(Opts) -> %% otherwise fall back to the slower, object-at-a-time path remote_supports_batching(Node) -> - case catch rpc:call(Node, riak_core_handoff_receiver, - supports_batching, []) - of - true -> - logger:debug("remote node supports batching, enabling"), - true; - _ -> - %% whatever the problem here, just revert to the old behavior - %% which shouldn't matter too much for any single handoff - logger:debug("remote node doesn't support batching"), - false + case catch rpc:call(Node, + riak_core_handoff_receiver, + supports_batching, + []) + of + true -> + logger:debug("remote node supports batching, enabling"), + true; + _ -> + %% whatever the problem here, just revert to the old behavior + %% which shouldn't matter too much for any single handoff + logger:debug("remote node doesn't support batching"), + false end. %% @private @@ -573,19 +634,19 @@ remote_supports_batching(Node) -> %% the process. maybe_call_handoff_started(Module, SrcPartition) -> case lists:member({handoff_started, 2}, - Module:module_info(exports)) - of - true -> - WorkerPid = self(), - case Module:handoff_started(SrcPartition, WorkerPid) of - {ok, FoldOpts} -> FoldOpts; - {error, max_concurrency} -> - %% Handoff of that partition is busy or can't proceed. Stopping with - %% max_concurrency will cause this partition to be retried again later. - exit({shutdown, max_concurrency}); - {error, Error} -> exit({shutdown, Error}) - end; - false -> - %% optional callback not implemented, so we carry on, w/ no addition fold options - [] + Module:module_info(exports)) + of + true -> + WorkerPid = self(), + case Module:handoff_started(SrcPartition, WorkerPid) of + {ok, FoldOpts} -> FoldOpts; + {error, max_concurrency} -> + %% Handoff of that partition is busy or can't proceed. Stopping with + %% max_concurrency will cause this partition to be retried again later. + exit({shutdown, max_concurrency}); + {error, Error} -> exit({shutdown, Error}) + end; + false -> + %% optional callback not implemented, so we carry on, w/ no addition fold options + [] end. diff --git a/src/riak_core_handoff_sender_sup.erl b/src/riak_core_handoff_sender_sup.erl index cbb503c0d..2def80331 100644 --- a/src/riak_core_handoff_sender_sup.erl +++ b/src/riak_core_handoff_sender_sup.erl @@ -31,8 +31,12 @@ -include("riak_core_handoff.hrl"). -define(CHILD(I, Type), - {I, {I, start_link, []}, temporary, brutal_kill, Type, - [I]}). + {I, + {I, start_link, []}, + temporary, + brutal_kill, + Type, + [I]}). %%%=================================================================== %%% API @@ -56,11 +60,11 @@ start_link() -> %% * unsent_acc0 - optional. The intial accumulator value passed to unsent_fun %% for the first unsent key -spec start_sender(ho_type(), atom(), term(), pid(), - [{atom(), term()}]) -> {ok, pid()}. + [{atom(), term()}]) -> {ok, pid()}. start_sender(Type, Module, TargetNode, VNode, Opts) -> supervisor:start_child(?MODULE, - [TargetNode, Module, {Type, Opts}, VNode]). + [TargetNode, Module, {Type, Opts}, VNode]). %%%=================================================================== %%% Callbacks diff --git a/src/riak_core_handoff_sup.erl b/src/riak_core_handoff_sup.erl index 518f89eaf..4493f220e 100644 --- a/src/riak_core_handoff_sup.erl +++ b/src/riak_core_handoff_sup.erl @@ -26,8 +26,12 @@ -export([start_link/0, init/1]). -define(CHILD(I, Type), - {I, {I, start_link, []}, permanent, brutal_kill, Type, - [I]}). + {I, + {I, start_link, []}, + permanent, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_node_watcher.erl b/src/riak_core_node_watcher.erl index 19d090bd6..46b999453 100644 --- a/src/riak_core_node_watcher.erl +++ b/src/riak_core_node_watcher.erl @@ -26,11 +26,20 @@ -define(DEFAULT_HEALTH_CHECK_INTERVAL, 60000). %% API --export([start_link/0, service_up/2, service_up/3, - service_up/4, check_health/1, suspend_health_checks/0, - resume_health_checks/0, service_down/1, service_down/2, - node_up/0, node_down/0, services/0, services/1, - nodes/1]). +-export([start_link/0, + service_up/2, + service_up/3, + service_up/4, + check_health/1, + suspend_health_checks/0, + resume_health_checks/0, + service_down/1, + service_down/2, + node_up/0, + node_down/0, + services/0, + services/1, + nodes/1]). %% TEST API -ifdef(TEST). @@ -46,60 +55,74 @@ -endif. %% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, - {status = up, services = [], health_checks = [], - healths_enabled = true, peers = [], avsn = 0, - bcast_tref, bcast_mod = {gen_server, abcast}}). + {status = up, + services = [], + health_checks = [], + healths_enabled = true, + peers = [], + avsn = 0, + bcast_tref, + bcast_mod = {gen_server, abcast}}). -record(health_check, - {state = waiting :: waiting | checking | suspend, - callback :: {atom(), atom(), [any()]}, - service_pid :: pid(), - checking_pid :: pid() | undefined, - health_failures = 0 :: non_neg_integer(), - callback_failures = 0 :: non_neg_integer(), - interval_tref, - %% how many milliseconds to wait after a check has - %% finished before starting a new one - check_interval = ?DEFAULT_HEALTH_CHECK_INTERVAL :: - timeout(), - max_callback_failures = 3, max_health_failures = 1}). + {state = waiting :: waiting | checking | suspend, + callback :: {atom(), atom(), [any()]}, + service_pid :: pid(), + checking_pid :: pid() | undefined, + health_failures = 0 :: non_neg_integer(), + callback_failures = 0 :: non_neg_integer(), + interval_tref, + %% how many milliseconds to wait after a check has + %% finished before starting a new one + check_interval = ?DEFAULT_HEALTH_CHECK_INTERVAL :: + timeout(), + max_callback_failures = 3, + max_health_failures = 1}). %% =================================================================== %% Public API %% =================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [], + []). service_up(Id, Pid) -> - gen_server:call(?MODULE, {service_up, Id, Pid}, - infinity). + gen_server:call(?MODULE, + {service_up, Id, Pid}, + infinity). %% @doc {@link service_up/4} with default options. %% @see service_up/4 -spec service_up(Id :: atom(), Pid :: pid(), - MFA :: mfa()) -> ok. + MFA :: mfa()) -> ok. service_up(Id, Pid, MFA) -> service_up(Id, Pid, MFA, []). -type hc_check_interval_opt() :: {check_interval, - timeout()}. + timeout()}. -type hc_max_callback_fails_opt() :: {max_callback_failures, - non_neg_integer()}. + non_neg_integer()}. -type hc_max_health_fails_opt() :: {max_health_failures, - non_neg_integer()}. + non_neg_integer()}. -type health_opt() :: hc_check_interval_opt() | - hc_max_callback_fails_opt() | hc_max_health_fails_opt(). + hc_max_callback_fails_opt() | + hc_max_health_fails_opt(). -type health_opts() :: [health_opt()]. @@ -121,14 +144,17 @@ service_up(Id, Pid, MFA) -> %% any other, using {@link service_down/1}. %% @see service_up/2 -spec service_up(Id :: atom(), Pid :: pid(), - Callback :: mfa(), Options :: health_opts()) -> ok. + Callback :: mfa(), Options :: health_opts()) -> ok. service_up(Id, Pid, {Module, Function, Args}, - Options) -> + Options) -> gen_server:call(?MODULE, - {service_up, Id, Pid, {Module, Function, Args}, - Options}, - infinity). + {service_up, + Id, + Pid, + {Module, Function, Args}, + Options}, + infinity). %% @doc Force a health check for the given service. If the service does %% not have a health check associated with it, this is ignored. Resets the @@ -137,7 +163,8 @@ service_up(Id, Pid, {Module, Function, Args}, -spec check_health(Service :: atom()) -> ok. check_health(Service) -> - (?MODULE) ! {check_health, Service}, ok. + (?MODULE) ! {check_health, Service}, + ok. suspend_health_checks() -> gen_server:call(?MODULE, suspend_healths, infinity). @@ -150,7 +177,8 @@ service_down(Id) -> service_down(Id, true) -> gen_server:call(?MODULE, - {service_down, Id, health_check}, infinitiy); + {service_down, Id, health_check}, + infinitiy); service_down(Id, false) -> service_down(Id). node_up() -> @@ -164,8 +192,8 @@ services() -> services(Node) -> case check_node_valid(Node) of - true -> internal_get_services(Node); - _ -> invalid_node + true -> internal_get_services(Node); + _ -> invalid_node end. nodes(Service) -> internal_get_nodes(Service). @@ -179,8 +207,9 @@ nodes(Service) -> internal_get_nodes(Service). avsn() -> gen_server:call(?MODULE, get_avsn, infinity). set_broadcast_module(Module, Fn) -> - gen_server:call(?MODULE, {set_bcast_mod, Module, Fn}, - infinity). + gen_server:call(?MODULE, + {set_bcast_mod, Module, Fn}, + infinity). -endif. @@ -199,11 +228,11 @@ init([]) -> ok = net_kernel:monitor_nodes(true), %% Setup ETS table to track node status (?MODULE) = ets:new(?MODULE, - [protected, {read_concurrency, true}, named_table]), + [protected, {read_concurrency, true}, named_table]), {ok, schedule_broadcast(#state{})}. handle_call({set_bcast_mod, Module, Fn}, _From, - State) -> + State) -> %% Call available for swapping out how broadcasts are generated {reply, ok, State#state{bcast_mod = {Module, Fn}}}; handle_call(get_avsn, _From, State) -> @@ -214,36 +243,44 @@ handle_call({service_up, Id, Pid}, _From, State) -> S3 = add_service(Id, Pid, S2), {reply, ok, S3}; handle_call({service_up, Id, Pid, MFA, Options}, From, - State) -> + State) -> %% update the active set of services if needed. {reply, _, State1} = handle_call({service_up, Id, Pid}, - From, State), + From, + State), State2 = remove_health_check(Id, State1), case application:get_env(riak_core, - enable_health_checks, true) - of - true -> - %% install the health check - CheckInterval = proplists:get_value(check_interval, - Options, - ?DEFAULT_HEALTH_CHECK_INTERVAL), - IntervalTref = case CheckInterval of - infinity -> undefined; - N -> erlang:send_after(N, self(), {check_health, Id}) - end, - CheckRec = #health_check{callback = MFA, - check_interval = CheckInterval, - service_pid = Pid, - max_health_failures = - proplists:get_value(max_health_failures, - Options, 1), - max_callback_failures = - proplists:get_value(max_callback_failures, - Options, 3), - interval_tref = IntervalTref}, - Healths = orddict:store(Id, CheckRec, - State2#state.health_checks); - false -> Healths = State2#state.health_checks + enable_health_checks, + true) + of + true -> + %% install the health check + CheckInterval = proplists:get_value(check_interval, + Options, + ?DEFAULT_HEALTH_CHECK_INTERVAL), + IntervalTref = case CheckInterval of + infinity -> undefined; + N -> + erlang:send_after(N, + self(), + {check_health, Id}) + end, + CheckRec = #health_check{callback = MFA, + check_interval = CheckInterval, + service_pid = Pid, + max_health_failures = + proplists:get_value(max_health_failures, + Options, + 1), + max_callback_failures = + proplists:get_value(max_callback_failures, + Options, + 3), + interval_tref = IntervalTref}, + Healths = orddict:store(Id, + CheckRec, + State2#state.health_checks); + false -> Healths = State2#state.health_checks end, {reply, ok, State2#state{health_checks = Healths}}; handle_call({service_down, Id}, _From, State) -> @@ -254,62 +291,64 @@ handle_call({service_down, Id}, _From, State) -> handle_call({node_status, Status}, _From, State) -> Transition = {State#state.status, Status}, S2 = case Transition of - {up, down} -> %% up -> down - case State#state.healths_enabled of - true -> - Healths = all_health_fsms(suspend, - State#state.health_checks); - false -> Healths = State#state.health_checks - end, - local_delete(State#state{status = down, - health_checks = Healths}); - {down, up} -> %% down -> up - case State#state.healths_enabled of - true -> - Healths = all_health_fsms(resume, - State#state.health_checks); - false -> Healths = State#state.health_checks - end, - local_update(State#state{status = up, - health_checks = Healths}); - {Status, Status} -> %% noop - State - end, + {up, down} -> %% up -> down + case State#state.healths_enabled of + true -> + Healths = all_health_fsms(suspend, + State#state.health_checks); + false -> Healths = State#state.health_checks + end, + local_delete(State#state{status = down, + health_checks = Healths}); + {down, up} -> %% down -> up + case State#state.healths_enabled of + true -> + Healths = all_health_fsms(resume, + State#state.health_checks); + false -> Healths = State#state.health_checks + end, + local_update(State#state{status = up, + health_checks = Healths}); + {Status, Status} -> %% noop + State + end, {reply, ok, update_avsn(S2)}; handle_call(services, _From, State) -> Res = [Service - || {{by_service, Service}, Nds} - <- ets:tab2list(?MODULE), - Nds /= []], + || {{by_service, Service}, Nds} + <- ets:tab2list(?MODULE), + Nds /= []], {reply, lists:sort(Res), State}; handle_call(suspend_healths, _From, - State = #state{healths_enabled = false}) -> + State = #state{healths_enabled = false}) -> {reply, already_disabled, State}; handle_call(suspend_healths, _From, - State = #state{healths_enabled = true}) -> + State = #state{healths_enabled = true}) -> logger:info("suspending all health checks"), Healths = all_health_fsms(suspend, - State#state.health_checks), - {reply, ok, + State#state.health_checks), + {reply, + ok, update_avsn(State#state{health_checks = Healths, - healths_enabled = false})}; + healths_enabled = false})}; handle_call(resume_healths, _From, - State = #state{healths_enabled = true}) -> + State = #state{healths_enabled = true}) -> {reply, already_enabled, State}; handle_call(resume_healths, _From, - State = #state{healths_enabled = false}) -> + State = #state{healths_enabled = false}) -> logger:info("resuming all health checks"), Healths = all_health_fsms(resume, - State#state.health_checks), - {reply, ok, + State#state.health_checks), + {reply, + ok, update_avsn(State#state{health_checks = Healths, - healths_enabled = true})}. + healths_enabled = true})}. handle_cast({ring_update, R}, State) -> %% Ring has changed; determine what peers are new to us %% and broadcast out current status to those peers. Peers0 = - ordsets:from_list(riak_core_ring:all_members(R)), + ordsets:from_list(riak_core_ring:all_members(R)), Peers = ordsets:del_element(node(), Peers0), S2 = peers_update(Peers, State), {noreply, update_avsn(S2)}; @@ -317,35 +356,38 @@ handle_cast({up, Node, Services}, State) -> S2 = node_up(Node, Services, State), {noreply, update_avsn(S2)}; handle_cast({down, Node}, State) -> - node_down(Node, State), {noreply, update_avsn(State)}; + node_down(Node, State), + {noreply, update_avsn(State)}; handle_cast({health_check_result, Pid, R}, State) -> Service = erlang:erase(Pid), - State2 = handle_check_msg({result, Pid, R}, Service, - State), + State2 = handle_check_msg({result, Pid, R}, + Service, + State), {noreply, State2}. handle_info({nodeup, _Node}, State) -> %% Ignore node up events; nothing to do here... {noreply, State}; handle_info({nodedown, Node}, State) -> - node_down(Node, State), {noreply, update_avsn(State)}; + node_down(Node, State), + {noreply, update_avsn(State)}; handle_info({'DOWN', Mref, _, _Pid, _Info}, State) -> %% A sub-system monitored process has terminated. Identify %% the sub-system in question and notify our peers. case erlang:get(Mref) of - undefined -> - %% No entry found for this monitor; ignore the message - {noreply, update_avsn(State)}; - Id -> - %% Remove the id<->mref entries in the pdict - delete_service_mref(Id), - %% remove any health checks in place - S2 = remove_health_check(Id, State), - %% Update our list of active services and ETS table - Services = ordsets:del_element(Id, - State#state.services), - S3 = local_update(S2#state{services = Services}), - {noreply, update_avsn(S3)} + undefined -> + %% No entry found for this monitor; ignore the message + {noreply, update_avsn(State)}; + Id -> + %% Remove the id<->mref entries in the pdict + delete_service_mref(Id), + %% remove any health checks in place + S2 = remove_health_check(Id, State), + %% Update our list of active services and ETS table + Services = ordsets:del_element(Id, + State#state.services), + S3 = local_update(S2#state{services = Services}), + {noreply, update_avsn(S3)} end; handle_info({'EXIT', Pid, _Cause} = Msg, State) -> Service = erlang:erase(Pid), @@ -359,12 +401,13 @@ handle_info({gen_event_EXIT, _, _}, State) -> watch_for_ring_events(), {noreply, update_avsn(State)}; handle_info(broadcast, State) -> - S2 = broadcast(State#state.peers, State), {noreply, S2}. + S2 = broadcast(State#state.peers, State), + {noreply, S2}. terminate(_Reason, State) -> %% Let our peers know that we are shutting down broadcast(State#state.peers, - State#state{status = down}). + State#state{status = down}). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -383,23 +426,23 @@ update_avsn(State) -> watch_for_ring_events() -> Self = self(), Fn = fun (R) -> gen_server:cast(Self, {ring_update, R}) - end, + end, riak_core_ring_events:add_sup_callback(Fn). delete_service_mref(Id) -> %% Cleanup the monitor if one exists case erlang:get(Id) of - undefined -> ok; - Mref -> - erlang:erase(Mref), - erlang:erase(Id), - erlang:demonitor(Mref) + undefined -> ok; + Mref -> + erlang:erase(Mref), + erlang:erase(Id), + erlang:demonitor(Mref) end. broadcast(Nodes, State) -> case State#state.status of - up -> Msg = {up, node(), State#state.services}; - down -> Msg = {down, node()} + up -> Msg = {up, node(), State#state.services}; + down -> Msg = {down, node()} end, {Mod, Fn} = State#state.bcast_mod, Mod:Fn(Nodes, ?MODULE, Msg), @@ -407,11 +450,13 @@ broadcast(Nodes, State) -> schedule_broadcast(State) -> case State#state.bcast_tref of - undefined -> ok; - OldTref -> _ = erlang:cancel_timer(OldTref), ok + undefined -> ok; + OldTref -> + _ = erlang:cancel_timer(OldTref), + ok end, {ok, Interval} = application:get_env(riak_core, - gossip_interval), + gossip_interval), Tref = erlang:send_after(Interval, self(), broadcast), State#state{bcast_tref = Tref}. @@ -422,37 +467,37 @@ is_node_up(Node) -> ets:member(?MODULE, Node). node_up(Node, Services, State) -> case is_peer(Node, State) of - true -> - %% Before we alter the ETS table, see if this node was previously - %% down. In that situation, we'll go ahead and broadcast out. - S2 = case is_node_up(Node) of - false -> broadcast([Node], State); - true -> State - end, - case node_update(Node, Services) of - [] -> ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) - end, - S2; - false -> State + true -> + %% Before we alter the ETS table, see if this node was previously + %% down. In that situation, we'll go ahead and broadcast out. + S2 = case is_node_up(Node) of + false -> broadcast([Node], State); + true -> State + end, + case node_update(Node, Services) of + [] -> ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) + end, + S2; + false -> State end. node_down(Node, State) -> case is_peer(Node, State) of - true -> - case node_delete(Node) of - [] -> ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) - end; - false -> ok + true -> + case node_delete(Node) of + [] -> ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) + end; + false -> ok end. node_delete(Node) -> Services = internal_get_services(Node), _ = [internal_delete(Node, Service) - || Service <- Services], + || Service <- Services], ets:delete(?MODULE, Node), Services. @@ -462,7 +507,7 @@ node_update(Node, Services) -> Now = riak_core_util:moment(), NewStatus = ordsets:from_list(Services), OldStatus = - ordsets:from_list(internal_get_services(Node)), + ordsets:from_list(internal_get_services(Node)), Added = ordsets:subtract(NewStatus, OldStatus), Deleted = ordsets:subtract(OldStatus, NewStatus), %% Update ets table with changes; make sure to touch unchanged @@ -480,23 +525,23 @@ local_update(#state{status = down} = State) -> local_update(State) -> %% Update our local ETS table case node_update(node(), State#state.services) of - [] -> - %% No material changes; no local notification necessary - ok; - AffectedServices -> - %% Generate a local notification about the affected services and - %% also broadcast our status - riak_core_node_watcher_events:service_update(AffectedServices) + [] -> + %% No material changes; no local notification necessary + ok; + AffectedServices -> + %% Generate a local notification about the affected services and + %% also broadcast our status + riak_core_node_watcher_events:service_update(AffectedServices) end, broadcast(State#state.peers, State). local_delete(State) -> case node_delete(node()) of - [] -> - %% No services changed; no local notification required - ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) + [] -> + %% No services changed; no local notification required + ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) end, broadcast(State#state.peers, State). @@ -507,15 +552,17 @@ peers_update(NewPeers, State) -> %% For peers that have been deleted, remove their entries from %% the ETS table; we no longer care about their status Services0 = lists:foldl(fun (Node, Acc) -> - S = node_delete(Node), S ++ Acc - end, - [], Deleted), + S = node_delete(Node), + S ++ Acc + end, + [], + Deleted), Services = ordsets:from_list(Services0), %% Notify local parties if any services are affected by this change case Services of - [] -> ok; - _ -> - riak_core_node_watcher_events:service_update(Services) + [] -> ok; + _ -> + riak_core_node_watcher_events:service_update(Services) end, %% Broadcast our current status to new peers broadcast(Added, State#state{peers = NewPeers}). @@ -523,36 +570,36 @@ peers_update(NewPeers, State) -> internal_delete(Node, Service) -> Svcs = internal_get_services(Node), ets:insert(?MODULE, - {{by_node, Node}, Svcs -- [Service]}), + {{by_node, Node}, Svcs -- [Service]}), Nds = internal_get_nodes(Service), ets:insert(?MODULE, - {{by_service, Service}, Nds -- [Node]}). + {{by_service, Service}, Nds -- [Node]}). internal_insert(Node, Service) -> %% Remove Service & node before adding: avoid accidental duplicates Svcs = internal_get_services(Node) -- [Service], ets:insert(?MODULE, - {{by_node, Node}, [Service | Svcs]}), + {{by_node, Node}, [Service | Svcs]}), Nds = internal_get_nodes(Service) -- [Node], ets:insert(?MODULE, - {{by_service, Service}, [Node | Nds]}). + {{by_service, Service}, [Node | Nds]}). internal_get_services(Node) -> case ets:lookup(?MODULE, {by_node, Node}) of - [{{by_node, Node}, Ss}] -> Ss; - [] -> [] + [{{by_node, Node}, Ss}] -> Ss; + [] -> [] end. internal_get_nodes(Service) -> case ets:lookup(?MODULE, {by_service, Service}) of - [{{by_service, Service}, Ns}] -> Ns; - [] -> [] + [{{by_service, Service}, Ns}] -> Ns; + [] -> [] end. add_service(ServiceId, Pid, State) -> %% Update the set of active services locally Services = ordsets:add_element(ServiceId, - State#state.services), + State#state.services), S2 = State#state{services = Services}, %% Remove any existing mrefs for this service delete_service_mref(ServiceId), @@ -567,7 +614,7 @@ add_service(ServiceId, Pid, State) -> drop_service(ServiceId, State) -> %% Update the set of active services locally Services = ordsets:del_element(ServiceId, - State#state.services), + State#state.services), S2 = State#state{services = Services}, %% Remove any existing mrefs for this service delete_service_mref(ServiceId), @@ -577,47 +624,50 @@ drop_service(ServiceId, State) -> handle_check_msg(_Msg, undefined, State) -> State; handle_check_msg(_Msg, _ServiceId, - #state{status = down} = State) -> + #state{status = down} = State) -> %% most likely a late message State; handle_check_msg(Msg, ServiceId, State) -> case orddict:find(ServiceId, State#state.health_checks) - of - error -> State; - {ok, Check} -> - CheckReturn = health_fsm(Msg, ServiceId, Check), - handle_check_return(CheckReturn, ServiceId, State) + of + error -> State; + {ok, Check} -> + CheckReturn = health_fsm(Msg, ServiceId, Check), + handle_check_return(CheckReturn, ServiceId, State) end. handle_check_return({remove, _Check}, ServiceId, - State) -> + State) -> Healths = orddict:erase(ServiceId, - State#state.health_checks), + State#state.health_checks), State#state{health_checks = Healths}; handle_check_return({ok, Check}, ServiceId, State) -> - Healths = orddict:store(ServiceId, Check, - State#state.health_checks), + Healths = orddict:store(ServiceId, + Check, + State#state.health_checks), State#state{health_checks = Healths}; handle_check_return({up, Check}, ServiceId, State) -> #health_check{service_pid = Pid} = Check, - Healths = orddict:store(ServiceId, Check, - State#state.health_checks), + Healths = orddict:store(ServiceId, + Check, + State#state.health_checks), S2 = State#state{health_checks = Healths}, add_service(ServiceId, Pid, S2); handle_check_return({down, Check}, ServiceId, State) -> - Healths = orddict:store(ServiceId, Check, - State#state.health_checks), + Healths = orddict:store(ServiceId, + Check, + State#state.health_checks), S2 = State#state{health_checks = Healths}, drop_service(ServiceId, S2). remove_health_check(ServiceId, State) -> #state{health_checks = Healths} = State, Healths2 = case orddict:find(ServiceId, Healths) of - error -> Healths; - {ok, Check} -> - {_, _} = health_fsm(remove, ServiceId, Check), - orddict:erase(ServiceId, Healths) - end, + error -> Healths; + {ok, Check} -> + {_, _} = health_fsm(remove, ServiceId, Check), + orddict:erase(ServiceId, Healths) + end, State#state{health_checks = Healths2}. %% health checks are an fsm to make mental modeling easier. @@ -632,16 +682,18 @@ remove_health_check(ServiceId, State) -> %% health check finished health_fsm(Msg, Service, - #health_check{state = StateName} = Check) -> - {Reply, NextState, Check2} = health_fsm(StateName, Msg, - Service, Check), + #health_check{state = StateName} = Check) -> + {Reply, NextState, Check2} = health_fsm(StateName, + Msg, + Service, + Check), Check3 = Check2#health_check{state = NextState}, {Reply, Check3}. %% suspend state health_fsm(suspend, resume, Service, InCheck) -> #health_check{health_failures = N, check_interval = V} = - InCheck, + InCheck, Tref = next_health_tref(N, V, Service), OutCheck = InCheck#health_check{interval_tref = Tref}, {ok, waiting, OutCheck}; @@ -651,80 +703,88 @@ health_fsm(suspend, remove, _Service, InCheck) -> health_fsm(checking, suspend, _Service, InCheck) -> #health_check{checking_pid = Pid} = InCheck, erlang:erase(Pid), - {ok, suspend, + {ok, + suspend, InCheck#health_check{checking_pid = undefined}}; health_fsm(checking, check_health, _Service, InCheck) -> {ok, checking, InCheck}; health_fsm(checking, remove, _Service, InCheck) -> {remove, checking, InCheck}; health_fsm(checking, {result, Pid, Cause}, Service, - #health_check{checking_pid = Pid} = InCheck) -> + #health_check{checking_pid = Pid} = InCheck) -> %% handle result from checking pid #health_check{health_failures = HPFails, - max_health_failures = HPMaxFails} = - InCheck, - {Reply, HPFails1} = handle_fsm_exit(Cause, HPFails, - HPMaxFails), + max_health_failures = HPMaxFails} = + InCheck, + {Reply, HPFails1} = handle_fsm_exit(Cause, + HPFails, + HPMaxFails), Tref = next_health_tref(HPFails1, - InCheck#health_check.check_interval, Service), + InCheck#health_check.check_interval, + Service), OutCheck = InCheck#health_check{checking_pid = - undefined, - health_failures = HPFails1, - callback_failures = 0, - interval_tref = Tref}, + undefined, + health_failures = HPFails1, + callback_failures = 0, + interval_tref = Tref}, {Reply, waiting, OutCheck}; health_fsm(checking, {'EXIT', Pid, Cause}, Service, - #health_check{checking_pid = Pid} = InCheck) + #health_check{checking_pid = Pid} = InCheck) when Cause =/= normal -> logger:error("health check process for ~p error'ed: " - " ~p", - [Service, Cause]), + " ~p", + [Service, Cause]), Fails = InCheck#health_check.callback_failures + 1, if Fails == - InCheck#health_check.max_callback_failures -> - logger:error("health check callback for ~p failed " - "too many times, disabling.", - [Service]), - {down, suspend, - InCheck#health_check{checking_pid = undefined, - callback_failures = Fails}}; + InCheck#health_check.max_callback_failures -> + logger:error("health check callback for ~p failed " + "too many times, disabling.", + [Service]), + {down, + suspend, + InCheck#health_check{checking_pid = undefined, + callback_failures = Fails}}; Fails < InCheck#health_check.max_callback_failures -> - #health_check{health_failures = N, - check_interval = Inter} = - InCheck, - Tref = next_health_tref(N, Inter, Service), - OutCheck = InCheck#health_check{checking_pid = - undefined, - callback_failures = Fails, - interval_tref = Tref}, - {ok, waiting, OutCheck}; + #health_check{health_failures = N, + check_interval = Inter} = + InCheck, + Tref = next_health_tref(N, Inter, Service), + OutCheck = InCheck#health_check{checking_pid = + undefined, + callback_failures = Fails, + interval_tref = Tref}, + {ok, waiting, OutCheck}; true -> - %% likely a late message, or a faker - {ok, suspend, - InCheck#health_check{checking_pid = undefined, - callback_failures = Fails}} + %% likely a late message, or a faker + {ok, + suspend, + InCheck#health_check{checking_pid = undefined, + callback_failures = Fails}} end; %% message handling when in a waiting state health_fsm(waiting, suspend, _Service, InCheck) -> case InCheck#health_check.interval_tref of - undefined -> ok; - _ -> - _ = - erlang:cancel_timer(InCheck#health_check.interval_tref), - ok + undefined -> ok; + _ -> + _ = + erlang:cancel_timer(InCheck#health_check.interval_tref), + ok end, - {ok, suspend, + {ok, + suspend, InCheck#health_check{interval_tref = undefined}}; health_fsm(waiting, check_health, Service, InCheck) -> InCheck1 = start_health_check(Service, InCheck), {ok, checking, InCheck1}; health_fsm(waiting, remove, _Service, InCheck) -> case InCheck#health_check.interval_tref of - undefined -> ok; - Tref -> _ = erlang:cancel_timer(Tref), ok + undefined -> ok; + Tref -> + _ = erlang:cancel_timer(Tref), + ok end, OutCheck = InCheck#health_check{interval_tref = - undefined}, + undefined}, {remove, waiting, OutCheck}; %% fallthrough handling health_fsm(StateName, _Msg, _Service, Health) -> @@ -747,40 +807,44 @@ handle_fsm_exit(false, HPFails, __) -> {ok, HPFails + 1}. start_health_check(Service, - #health_check{checking_pid = undefined} = CheckRec) -> + #health_check{checking_pid = undefined} = CheckRec) -> {Mod, Func, Args} = CheckRec#health_check.callback, Pid = CheckRec#health_check.service_pid, case CheckRec#health_check.interval_tref of - undefined -> ok; - Tref -> _ = erlang:cancel_timer(Tref), ok + undefined -> ok; + Tref -> + _ = erlang:cancel_timer(Tref), + ok end, CheckingPid = proc_lib:spawn_link(fun () -> - case erlang:apply(Mod, Func, - [Pid | Args]) - of - R - when R =:= true orelse - R =:= false -> - health_check_result(self(), - R); - Else -> exit(Else) - end - end), + case erlang:apply(Mod, + Func, + [Pid | Args]) + of + R + when R =:= true orelse + R =:= false -> + health_check_result(self(), + R); + Else -> exit(Else) + end + end), erlang:put(CheckingPid, Service), CheckRec#health_check{state = checking, - checking_pid = CheckingPid, - interval_tref = undefined}; + checking_pid = CheckingPid, + interval_tref = undefined}; start_health_check(_Service, Check) -> Check. health_check_result(CheckPid, Result) -> gen_server:cast(?MODULE, - {health_check_result, CheckPid, Result}). + {health_check_result, CheckPid, Result}). next_health_tref(_, infinity, _) -> undefined; next_health_tref(N, V, Service) -> Time = determine_time(N, V), - erlang:send_after(Time, self(), - {check_health, Service}). + erlang:send_after(Time, + self(), + {check_health, Service}). all_health_fsms(Msg, Healths) -> [begin {ok, C1} = health_fsm(Msg, S, C), {S, C1} end diff --git a/src/riak_core_node_watcher_events.erl b/src/riak_core_node_watcher_events.erl index a141c9bc2..70f2c5f14 100644 --- a/src/riak_core_node_watcher_events.erl +++ b/src/riak_core_node_watcher_events.erl @@ -24,14 +24,22 @@ -behaviour(gen_event). %% API --export([start_link/0, add_handler/2, add_sup_handler/2, - add_guarded_handler/2, add_callback/1, - add_sup_callback/1, add_guarded_callback/1, - service_update/1]). +-export([start_link/0, + add_handler/2, + add_sup_handler/2, + add_guarded_handler/2, + add_callback/1, + add_sup_callback/1, + add_guarded_callback/1, + service_update/1]). %% gen_event callbacks --export([init/1, handle_event/2, handle_call/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {callback}). @@ -48,20 +56,24 @@ add_sup_handler(Handler, Args) -> gen_event:add_sup_handler(?MODULE, Handler, Args). add_guarded_handler(Handler, Args) -> - riak_core:add_guarded_event_handler(?MODULE, Handler, - Args). + riak_core:add_guarded_event_handler(?MODULE, + Handler, + Args). add_callback(Fn) when is_function(Fn) -> - gen_event:add_handler(?MODULE, {?MODULE, make_ref()}, - [Fn]). + gen_event:add_handler(?MODULE, + {?MODULE, make_ref()}, + [Fn]). add_sup_callback(Fn) when is_function(Fn) -> gen_event:add_sup_handler(?MODULE, - {?MODULE, make_ref()}, [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_guarded_callback(Fn) when is_function(Fn) -> riak_core:add_guarded_event_handler(?MODULE, - {?MODULE, make_ref()}, [Fn]). + {?MODULE, make_ref()}, + [Fn]). service_update(Services) -> gen_event:notify(?MODULE, {service_update, Services}). @@ -76,7 +88,8 @@ init([Fn]) -> {ok, #state{callback = Fn}}. handle_event({service_update, Services}, State) -> - (State#state.callback)(Services), {ok, State}. + (State#state.callback)(Services), + {ok, State}. handle_call(_Request, State) -> {ok, ok, State}. diff --git a/src/riak_core_priority_queue.erl b/src/riak_core_priority_queue.erl index b3cfad917..8be373fda 100644 --- a/src/riak_core_priority_queue.erl +++ b/src/riak_core_priority_queue.erl @@ -54,8 +54,17 @@ -module(riak_core_priority_queue). --export([new/0, is_queue/1, is_empty/1, len/1, - to_list/1, in/2, in/3, out/1, out/2, pout/1, join/2]). +-export([new/0, + is_queue/1, + is_empty/1, + len/1, + to_list/1, + in/2, + in/3, + out/1, + out/2, + pout/1, + join/2]). %%---------------------------------------------------------------------------- @@ -64,7 +73,7 @@ -type squeue() :: {queue, [any()], [any()]}. -type pqueue() :: squeue() | - {pqueue, [{priority(), squeue()}]}. + {pqueue, [{priority(), squeue()}]}. %%---------------------------------------------------------------------------- @@ -78,9 +87,9 @@ is_queue({queue, R, F}) when is_list(R), is_list(F) -> true; is_queue({pqueue, Queues}) when is_list(Queues) -> lists:all(fun ({P, Q}) -> - is_integer(P) andalso is_queue(Q) - end, - Queues); + is_integer(P) andalso is_queue(Q) + end, + Queues); is_queue(_) -> false. -spec is_empty(pqueue()) -> boolean(). @@ -121,14 +130,14 @@ in(X, Priority, {pqueue, Queues}) -> P = -Priority, {pqueue, case lists:keysearch(P, 1, Queues) of - {value, {_, Q}} -> - lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); - false -> - lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + {value, {_, Q}} -> + lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); + false -> + lists:keysort(1, [{P, {queue, [X], []}} | Queues]) end}. -spec out(pqueue()) -> {empty | {value, any()}, - pqueue()}. + pqueue()}. out({queue, [], []} = Q) -> {empty, Q}; out({queue, [V], []}) -> {{value, V}, {queue, [], []}}; @@ -142,19 +151,19 @@ out({queue, In, [V | Out]}) when is_list(In) -> out({pqueue, [{P, Q} | Queues]}) -> {R, Q1} = out(Q), NewQ = case is_empty(Q1) of - true -> - case Queues of - [] -> {queue, [], []}; - [{0, OnlyQ}] -> OnlyQ; - [_ | _] -> {pqueue, Queues} - end; - false -> {pqueue, [{P, Q1} | Queues]} - end, + true -> + case Queues of + [] -> {queue, [], []}; + [{0, OnlyQ}] -> OnlyQ; + [_ | _] -> {pqueue, Queues} + end; + false -> {pqueue, [{P, Q1} | Queues]} + end, {R, NewQ}. -spec out(priority(), pqueue()) -> {empty | - {value, any()}, - pqueue()}. + {value, any()}, + pqueue()}. out(_Priority, {queue, [], []} = Q) -> {empty, Q}; out(Priority, {queue, _, _} = Q) when Priority =< 0 -> @@ -166,23 +175,24 @@ out(Priority, {pqueue, [{P, _Q} | _Queues]} = Q) out(_Priority, {pqueue, [_ | _]} = Q) -> {empty, Q}. -spec pout(pqueue()) -> {empty | - {value, any(), priority()}, - pqueue()}. + {value, any(), priority()}, + pqueue()}. pout({queue, [], []} = Q) -> {empty, Q}; pout({queue, _, _} = Q) -> - {{value, V}, Q1} = out(Q), {{value, V, 0}, Q1}; + {{value, V}, Q1} = out(Q), + {{value, V, 0}, Q1}; pout({pqueue, [{P, Q} | Queues]}) -> {{value, V}, Q1} = out(Q), NewQ = case is_empty(Q1) of - true -> - case Queues of - [] -> {queue, [], []}; - [{0, OnlyQ}] -> OnlyQ; - [_ | _] -> {pqueue, Queues} - end; - false -> {pqueue, [{P, Q1} | Queues]} - end, + true -> + case Queues of + [] -> {queue, [], []}; + [{0, OnlyQ}] -> OnlyQ; + [_ | _] -> {pqueue, Queues} + end; + false -> {pqueue, [{P, Q1} | Queues]} + end, {{value, V, -P}, NewQ}. -spec join(pqueue(), pqueue()) -> pqueue(). @@ -193,23 +203,23 @@ join({queue, AIn, AOut}, {queue, BIn, BOut}) -> {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; join(A = {queue, _, _}, {pqueue, BPQ}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, - BPQ), + BPQ), Post1 = case Post of - [] -> [{0, A}]; - [{0, ZeroQueue} | Rest] -> - [{0, join(A, ZeroQueue)} | Rest]; - _ -> [{0, A} | Post] - end, + [] -> [{0, A}]; + [{0, ZeroQueue} | Rest] -> + [{0, join(A, ZeroQueue)} | Rest]; + _ -> [{0, A} | Post] + end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, B = {queue, _, _}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, - APQ), + APQ), Post1 = case Post of - [] -> [{0, B}]; - [{0, ZeroQueue} | Rest] -> - [{0, join(ZeroQueue, B)} | Rest]; - _ -> [{0, B} | Post] - end, + [] -> [{0, B}]; + [{0, ZeroQueue} | Rest] -> + [{0, join(ZeroQueue, B)} | Rest]; + _ -> [{0, B} | Post] + end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, {pqueue, BPQ}) -> {pqueue, merge(APQ, BPQ, [])}. @@ -241,29 +251,29 @@ simple_case(Order) -> ?assertEqual(0, ((?MODULE):len(Queue))), ?assertEqual([], ((?MODULE):to_list(Queue))), case Order of - forward -> - Queue2 = (?MODULE):in(low, Queue), - Queue3 = (?MODULE):in(mid, 500, Queue2), - Queue4 = (?MODULE):in(high, 1000, Queue3); - reverse -> - Queue2 = (?MODULE):in(high, 1000, Queue), - Queue3 = (?MODULE):in(mid, 500, Queue2), - Queue4 = (?MODULE):in(low, Queue3); - mixed -> - Queue2 = (?MODULE):in(high, 1000, Queue), - Queue3 = (?MODULE):in(low, Queue2), - Queue4 = (?MODULE):in(mid, 500, Queue3) + forward -> + Queue2 = (?MODULE):in(low, Queue), + Queue3 = (?MODULE):in(mid, 500, Queue2), + Queue4 = (?MODULE):in(high, 1000, Queue3); + reverse -> + Queue2 = (?MODULE):in(high, 1000, Queue), + Queue3 = (?MODULE):in(mid, 500, Queue2), + Queue4 = (?MODULE):in(low, Queue3); + mixed -> + Queue2 = (?MODULE):in(high, 1000, Queue), + Queue3 = (?MODULE):in(low, Queue2), + Queue4 = (?MODULE):in(mid, 500, Queue3) end, ?assertEqual(false, ((?MODULE):is_empty(Queue4))), ?assertEqual(3, ((?MODULE):len(Queue4))), ?assertMatch({{value, high}, _}, - ((?MODULE):out(Queue4))), + ((?MODULE):out(Queue4))), {{value, high}, Queue5} = (?MODULE):out(Queue4), ?assertMatch({{value, mid}, _}, - ((?MODULE):out(Queue5))), + ((?MODULE):out(Queue5))), {{value, mid}, Queue6} = (?MODULE):out(Queue5), ?assertMatch({{value, low}, _}, - ((?MODULE):out(Queue6))), + ((?MODULE):out(Queue6))), {{value, low}, Queue7} = (?MODULE):out(Queue6), ?assertEqual(0, ((?MODULE):len(Queue7))), ?assertEqual(true, ((?MODULE):is_queue(Queue2))), @@ -282,9 +292,13 @@ merge_case() -> QueueB3 = (?MODULE):in(4, QueueB2), QueueB4 = (?MODULE):in(6, QueueB3), Merged1 = (?MODULE):join(QueueA4, QueueB4), - ?assertEqual([{0, 1}, {0, 3}, {0, 5}, {0, 2}, {0, 4}, - {0, 6}], - ((?MODULE):to_list(Merged1))), + ?assertEqual([{0, 1}, + {0, 3}, + {0, 5}, + {0, 2}, + {0, 4}, + {0, 6}], + ((?MODULE):to_list(Merged1))), QueueC1 = (?MODULE):new(), QueueC2 = (?MODULE):in(1, 10, QueueC1), QueueC3 = (?MODULE):in(3, 30, QueueC2), @@ -294,9 +308,13 @@ merge_case() -> QueueD3 = (?MODULE):in(4, 40, QueueD2), QueueD4 = (?MODULE):in(6, 60, QueueD3), Merged2 = (?MODULE):join(QueueC4, QueueD4), - ?assertEqual([{60, 6}, {50, 5}, {40, 4}, {30, 3}, - {20, 2}, {10, 1}], - ((?MODULE):to_list(Merged2))), + ?assertEqual([{60, 6}, + {50, 5}, + {40, 4}, + {30, 3}, + {20, 2}, + {10, 1}], + ((?MODULE):to_list(Merged2))), ok. basic_test() -> diff --git a/src/riak_core_rand.erl b/src/riak_core_rand.erl index 8d22d671e..f54c8e7b1 100644 --- a/src/riak_core_rand.erl +++ b/src/riak_core_rand.erl @@ -4,8 +4,13 @@ -module(riak_core_rand). %% API --export([uniform/0, uniform/1, uniform_s/2, seed/0, - seed/1, rand_seed/0, rand_bytes/1]). +-export([uniform/0, + uniform/1, + uniform_s/2, + seed/0, + seed/1, + rand_seed/0, + rand_bytes/1]). %% As the algorithm is not changed in any place we can use the default %% algorithm for all call here. @@ -33,14 +38,16 @@ seed() -> rand:seed(?ALGO). %% rand:seed will return the **new** seed. We can work around this by first %% getting the exported seed then using this instead. -spec seed({integer(), integer(), integer()} | - rand:export_state()) -> rand:export_state() | undefined. + rand:export_state()) -> rand:export_state() | undefined. seed({_, _, _} = Seed) -> Old = rand:export_seed(), _New = rand:seed(?ALGO, Seed), Old; seed(Seed) -> - Old = rand:export_seed(), _New = rand:seed(Seed), Old. + Old = rand:export_seed(), + _New = rand:seed(Seed), + Old. rand_bytes(Size) -> crypto:strong_rand_bytes(Size). diff --git a/src/riak_core_ring.erl b/src/riak_core_ring.erl index 4593cd382..774ef8636 100644 --- a/src/riak_core_ring.erl +++ b/src/riak_core_ring.erl @@ -28,51 +28,115 @@ -module(riak_core_ring). --export([all_members/1, all_owners/1, all_preflists/2, - diff_nodes/2, equal_rings/2, fresh/0, fresh/1, fresh/2, - get_meta/2, get_buckets/1, index_owner/2, my_indices/1, - num_partitions/1, owner_node/1, preflist/2, - random_node/1, random_other_index/1, - random_other_index/2, random_other_node/1, reconcile/2, - rename_node/3, responsible_index/2, transfer_node/3, - update_meta/3, remove_meta/2]). - --export([cluster_name/1, set_tainted/1, check_tainted/2, - nearly_equal/2, claimant/1, member_status/2, - pretty_print/2, all_member_status/1, - update_member_meta/5, clear_member_meta/3, - get_member_meta/3, add_member/3, remove_member/3, - leave_member/3, exit_member/3, down_member/3, - set_member/4, set_member/5, members/2, set_claimant/2, - increment_vclock/2, ring_version/1, - increment_ring_version/2, set_pending_changes/2, - active_members/1, claiming_members/1, ready_members/1, - random_other_active_node/1, down_members/1, set_owner/2, - indices/2, future_indices/2, future_ring/1, - disowning_indices/2, cancel_transfers/1, - pending_changes/1, next_owner/1, next_owner/2, - next_owner/3, completed_next_owners/2, - all_next_owners/1, change_owners/2, handoff_complete/3, - ring_ready/0, ring_ready/1, ring_ready_info/1, - ring_changed/2, set_cluster_name/2, reconcile_names/2, - reconcile_members/2, is_primary/2, chash/1, set_chash/2, - resize/2, set_pending_resize/2, - set_pending_resize_abort/1, maybe_abort_resize/1, - schedule_resize_transfer/3, awaiting_resize_transfer/3, - resize_transfer_status/4, resize_transfer_complete/4, - complete_resize_transfers/3, - reschedule_resize_transfers/3, is_resizing/1, - is_post_resize/1, is_resize_complete/1, resized_ring/1, - set_resized_ring/2, future_index/3, future_index/4, - future_index/5, is_future_index/4, future_owner/2, - future_num_partitions/1, vnode_type/2, - deletion_complete/3]). +-export([all_members/1, + all_owners/1, + all_preflists/2, + diff_nodes/2, + equal_rings/2, + fresh/0, + fresh/1, + fresh/2, + get_meta/2, + get_buckets/1, + index_owner/2, + my_indices/1, + num_partitions/1, + owner_node/1, + preflist/2, + random_node/1, + random_other_index/1, + random_other_index/2, + random_other_node/1, + reconcile/2, + rename_node/3, + responsible_index/2, + transfer_node/3, + update_meta/3, + remove_meta/2]). + +-export([cluster_name/1, + set_tainted/1, + check_tainted/2, + nearly_equal/2, + claimant/1, + member_status/2, + pretty_print/2, + all_member_status/1, + update_member_meta/5, + clear_member_meta/3, + get_member_meta/3, + add_member/3, + remove_member/3, + leave_member/3, + exit_member/3, + down_member/3, + set_member/4, + set_member/5, + members/2, + set_claimant/2, + increment_vclock/2, + ring_version/1, + increment_ring_version/2, + set_pending_changes/2, + active_members/1, + claiming_members/1, + ready_members/1, + random_other_active_node/1, + down_members/1, + set_owner/2, + indices/2, + future_indices/2, + future_ring/1, + disowning_indices/2, + cancel_transfers/1, + pending_changes/1, + next_owner/1, + next_owner/2, + next_owner/3, + completed_next_owners/2, + all_next_owners/1, + change_owners/2, + handoff_complete/3, + ring_ready/0, + ring_ready/1, + ring_ready_info/1, + ring_changed/2, + set_cluster_name/2, + reconcile_names/2, + reconcile_members/2, + is_primary/2, + chash/1, + set_chash/2, + resize/2, + set_pending_resize/2, + set_pending_resize_abort/1, + maybe_abort_resize/1, + schedule_resize_transfer/3, + awaiting_resize_transfer/3, + resize_transfer_status/4, + resize_transfer_complete/4, + complete_resize_transfers/3, + reschedule_resize_transfers/3, + is_resizing/1, + is_post_resize/1, + is_resize_complete/1, + resized_ring/1, + set_resized_ring/2, + future_index/3, + future_index/4, + future_index/5, + is_future_index/4, + future_owner/2, + future_num_partitions/1, + vnode_type/2, + deletion_complete/3]). %% upgrade/1, - %% downgrade/2, + %% downgrade/2, --export_type([riak_core_ring/0, ring_size/0, - partition_id/0]). +-export_type([riak_core_ring/0, + ring_size/0, + partition_id/0]). -ifdef(TEST). @@ -81,40 +145,44 @@ -endif. -record(chstate, - {nodename :: - term(), % the Node responsible for this chstate - vclock :: - vclock:vclock() | - undefined, % for this chstate object, entries are - % {Node, Ctr} - chring :: - chash:chash() | - undefined, % chash ring of {IndexAsInt, Node} mappings - meta :: dict:dict() | undefined, - % dict of cluster-wide other data (primarily - % bucket N-value, etc) - clustername :: {term(), term()} | undefined, - next :: - [{integer(), term(), term(), [module()], - awaiting | complete}], - members :: - [{node(), - {member_status(), vclock:vclock(), - [{atom(), term()}]}}] | - undefined, - claimant :: term(), - seen :: [{term(), vclock:vclock()}] | undefined, - rvsn :: vclock:vclock() | undefined}). - --type member_status() :: joining | valid | invalid | - leaving | exiting | down. + {nodename :: + term(), % the Node responsible for this chstate + vclock :: + vclock:vclock() | + undefined, % for this chstate object, entries are + % {Node, Ctr} + chring :: + chash:chash() | + undefined, % chash ring of {IndexAsInt, Node} mappings + meta :: dict:dict() | undefined, + % dict of cluster-wide other data (primarily + % bucket N-value, etc) + clustername :: {term(), term()} | undefined, + next :: + [{integer(), term(), term(), [module()], + awaiting | complete}], + members :: + [{node(), + {member_status(), vclock:vclock(), + [{atom(), term()}]}}] | + undefined, + claimant :: term(), + seen :: [{term(), vclock:vclock()}] | undefined, + rvsn :: vclock:vclock() | undefined}). + +-type member_status() :: joining | + valid | + invalid | + leaving | + exiting | + down. %% type meta_entry(). Record for each entry in #chstate.meta -record(meta_entry, - {value, % The value stored under this entry - lastmod}). % The last modified time of this entry, - % from calendar:datetime_to_gregorian_seconds( - % calendar:universal_time()), + {value, % The value stored under this entry + lastmod}). % The last modified time of this entry, + % from calendar:datetime_to_gregorian_seconds( + % calendar:universal_time()), %% @type riak_core_ring(). Opaque data type used for partition ownership -type riak_core_ring() :: #chstate{}. @@ -122,11 +190,11 @@ -type chstate() :: riak_core_ring(). -type pending_change() :: {Owner :: node(), - NextOwner :: node(), awaiting | complete} | - {undefined, undefined, undefined}. + NextOwner :: node(), awaiting | complete} | + {undefined, undefined, undefined}. -type resize_transfer() :: {{integer(), term()}, - ordsets:ordset(node()), awaiting | complete}. + ordsets:ordset(node()), awaiting | complete}. -type ring_size() :: non_neg_integer(). @@ -141,12 +209,17 @@ set_tainted(Ring) -> update_meta(riak_core_ring_tainted, true, Ring). check_tainted(Ring = #chstate{}, Msg) -> - Exit = application:get_env(riak_core, exit_when_tainted, - false), + Exit = application:get_env(riak_core, + exit_when_tainted, + false), case {get_meta(riak_core_ring_tainted, Ring), Exit} of - {{ok, true}, true} -> riak_core:stop(Msg), ok; - {{ok, true}, false} -> logger:error(Msg), ok; - _ -> ok + {{ok, true}, true} -> + riak_core:stop(Msg), + ok; + {{ok, true}, false} -> + logger:error(Msg), + ok; + _ -> ok end. %% @doc Verify that the two rings are identical expect that metadata can @@ -157,18 +230,18 @@ check_tainted(Ring = #chstate{}, Msg) -> nearly_equal(RingA, RingB) -> TestVC = vclock:descends(RingB#chstate.vclock, - RingA#chstate.vclock), + RingA#chstate.vclock), RingA2 = RingA#chstate{vclock = undefined, - meta = undefined}, + meta = undefined}, RingB2 = RingB#chstate{vclock = undefined, - meta = undefined}, + meta = undefined}, TestRing = RingA2 =:= RingB2, TestVC and TestRing. %% @doc Determine if a given Index/Node `IdxNode' combination is a %% primary. -spec is_primary(chstate(), - {chash:index_as_int(), node()}) -> boolean(). + {chash:index_as_int(), node()}) -> boolean(). is_primary(Ring, IdxNode) -> Owners = all_owners(Ring), @@ -184,7 +257,7 @@ set_chash(State, CHash) -> %% @doc Produce a list of all nodes that are members of the cluster -spec all_members(State :: chstate()) -> [Node :: - term()]. + term()]. all_members(#chstate{members = Members}) -> get_members(Members). @@ -195,7 +268,7 @@ members(#chstate{members = Members}, Types) -> %% @doc Produce a list of all active (not marked as down) cluster members active_members(#chstate{members = Members}) -> get_members(Members, - [joining, valid, leaving, exiting]). + [joining, valid, leaving, exiting]). %% @doc Returns a list of members guaranteed safe for requests ready_members(#chstate{members = Members}) -> @@ -203,21 +276,21 @@ ready_members(#chstate{members = Members}) -> %% @doc Provide all ownership information in the form of {Index,Node} pairs. -spec all_owners(State :: chstate()) -> [{Index :: - integer(), - Node :: term()}]. + integer(), + Node :: term()}]. all_owners(State) -> chash:nodes(State#chstate.chring). %% @doc Provide every preflist in the ring, truncated at N. -spec all_preflists(State :: chstate(), - N :: integer()) -> [[{Index :: integer(), - Node :: term()}]]. + N :: integer()) -> [[{Index :: integer(), + Node :: term()}]]. all_preflists(State, N) -> [lists:sublist(preflist(Key, State), N) || Key - <- [<<(I + 1):160/integer>> - || {I, _Owner} <- (?MODULE):all_owners(State)]]. + <- [<<(I + 1):160/integer>> + || {I, _Owner} <- (?MODULE):all_owners(State)]]. %% @doc For two rings, return the list of owners that have differing ownership. -spec diff_nodes(chstate(), chstate()) -> [node()]. @@ -225,18 +298,18 @@ all_preflists(State, N) -> diff_nodes(State1, State2) -> AO = lists:zip(all_owners(State1), all_owners(State2)), AllDiff = [[N1, N2] - || {{I, N1}, {I, N2}} <- AO, N1 =/= N2], + || {{I, N1}, {I, N2}} <- AO, N1 =/= N2], lists:usort(lists:flatten(AllDiff)). -spec equal_rings(chstate(), chstate()) -> boolean(). equal_rings(_A = #chstate{chring = RA, meta = MA}, - _B = #chstate{chring = RB, meta = MB}) -> + _B = #chstate{chring = RB, meta = MB}) -> MDA = lists:sort(dict:to_list(MA)), MDB = lists:sort(dict:to_list(MB)), case MDA =:= MDB of - false -> false; - true -> RA =:= RB + false -> false; + true -> RA =:= RB end. %% @doc This is used only when this node is creating a brand new cluster. @@ -251,24 +324,25 @@ fresh() -> -spec fresh(NodeName :: term()) -> chstate(). fresh(NodeName) -> - fresh(application:get_env(riak_core, ring_creation_size, - undefined), - NodeName). + fresh(application:get_env(riak_core, + ring_creation_size, + undefined), + NodeName). %% @doc Equivalent to fresh/1 but allows specification of the ring size. %% Called by fresh/1, and otherwise only intended for testing purposes. -spec fresh(ring_size(), - NodeName :: term()) -> chstate(). + NodeName :: term()) -> chstate(). fresh(RingSize, NodeName) -> VClock = vclock:increment(NodeName, vclock:fresh()), #chstate{nodename = NodeName, - clustername = {NodeName, erlang:timestamp()}, - members = - [{NodeName, {valid, VClock, [{gossip_vsn, 2}]}}], - chring = chash:fresh(RingSize, NodeName), next = [], - claimant = NodeName, seen = [{NodeName, VClock}], - rvsn = VClock, vclock = VClock, meta = dict:new()}. + clustername = {NodeName, erlang:timestamp()}, + members = + [{NodeName, {valid, VClock, [{gossip_vsn, 2}]}}], + chring = chash:fresh(RingSize, NodeName), next = [], + claimant = NodeName, seen = [{NodeName, VClock}], + rvsn = VClock, vclock = VClock, meta = dict:new()}. %% @doc change the size of the ring to `NewRingSize'. If the ring %% is larger than the current ring any new indexes will be owned @@ -277,32 +351,32 @@ fresh(RingSize, NodeName) -> resize(State, NewRingSize) -> NewRing = lists:foldl(fun ({Idx, Owner}, RingAcc) -> - chash:update(Idx, Owner, RingAcc) - end, - chash:fresh(NewRingSize, '$dummyhost@resized'), - all_owners(State)), + chash:update(Idx, Owner, RingAcc) + end, + chash:fresh(NewRingSize, '$dummyhost@resized'), + all_owners(State)), set_chash(State, NewRing). % @doc Return a value from the cluster metadata dict -spec get_meta(Key :: term(), - State :: chstate()) -> {ok, term()} | undefined. + State :: chstate()) -> {ok, term()} | undefined. get_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - error -> undefined; - {ok, '$removed'} -> undefined; - {ok, M} when M#meta_entry.value =:= '$removed' -> - undefined; - {ok, M} -> {ok, M#meta_entry.value} + error -> undefined; + {ok, '$removed'} -> undefined; + {ok, M} when M#meta_entry.value =:= '$removed' -> + undefined; + {ok, M} -> {ok, M#meta_entry.value} end. -spec get_meta(term(), term(), chstate()) -> {ok, - term()}. + term()}. get_meta(Key, Default, State) -> case get_meta(Key, State) of - undefined -> {ok, Default}; - Res -> Res + undefined -> {ok, Default}; + Res -> Res end. %% @doc return the names of all the custom buckets stored in the ring. @@ -311,14 +385,15 @@ get_meta(Key, Default, State) -> get_buckets(State) -> Keys = dict:fetch_keys(State#chstate.meta), lists:foldl(fun ({bucket, Bucket}, Acc) -> - [Bucket | Acc]; - (_, Acc) -> Acc - end, - [], Keys). + [Bucket | Acc]; + (_, Acc) -> Acc + end, + [], + Keys). %% @doc Return the node that owns the given index. -spec index_owner(State :: chstate(), - Idx :: chash:index_as_int()) -> Node :: term(). + Idx :: chash:index_as_int()) -> Node :: term(). index_owner(State, Idx) -> {Idx, Owner} = lists:keyfind(Idx, 1, all_owners(State)), @@ -328,23 +403,23 @@ index_owner(State, Idx) -> %% this function will error if the ring is shrinking and Idx no longer exists %% in it -spec future_owner(chstate(), - chash:index_as_int()) -> term(). + chash:index_as_int()) -> term(). future_owner(State, Idx) -> index_owner(future_ring(State), Idx). %% @doc Return all partition indices owned by the node executing this function. -spec my_indices(State :: - chstate()) -> [chash:index_as_int()]. + chstate()) -> [chash:index_as_int()]. my_indices(State) -> [I || {I, Owner} <- (?MODULE):all_owners(State), - Owner =:= node()]. + Owner =:= node()]. %% @doc Return the number of partitions in this Riak ring. -spec num_partitions(State :: - chstate()) -> pos_integer(). + chstate()) -> pos_integer(). num_partitions(State) -> chash:size(State#chstate.chring). @@ -352,10 +427,10 @@ num_partitions(State) -> -spec future_num_partitions(chstate()) -> pos_integer(). future_num_partitions(State = #chstate{chring = - CHRing}) -> + CHRing}) -> case resized_ring(State) of - {ok, C} -> chash:size(C); - undefined -> chash:size(CHRing) + {ok, C} -> chash:size(C); + undefined -> chash:size(CHRing) end. %% @doc Return the node that is responsible for a given chstate. @@ -366,8 +441,8 @@ owner_node(State) -> State#chstate.nodename. %% @doc For a given object key, produce the ordered list of %% {partition,node} pairs that could be responsible for that object. -spec preflist(Key :: binary(), - State :: chstate()) -> [{Index :: chash:index_as_int(), - Node :: term()}]. + State :: chstate()) -> [{Index :: chash:index_as_int(), + Node :: term()}]. preflist(Key, State) -> chash:successors(Key, State#chstate.chring). @@ -382,112 +457,114 @@ random_node(State) -> %% @doc Return a partition index not owned by the node executing this function. %% If this node owns all partitions, return any index. -spec random_other_index(State :: - chstate()) -> chash:index_as_int(). + chstate()) -> chash:index_as_int(). random_other_index(State) -> L = [I - || {I, Owner} <- (?MODULE):all_owners(State), - Owner =/= node()], + || {I, Owner} <- (?MODULE):all_owners(State), + Owner =/= node()], case L of - [] -> hd(my_indices(State)); - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> hd(my_indices(State)); + _ -> lists:nth(riak_core_rand:uniform(length(L)), L) end. -spec random_other_index(State :: chstate(), - Exclude :: [term()]) -> chash:index_as_int() | - no_indices. + Exclude :: [term()]) -> chash:index_as_int() | + no_indices. random_other_index(State, Exclude) when is_list(Exclude) -> L = [I - || {I, Owner} <- (?MODULE):all_owners(State), - Owner =/= node(), not lists:member(I, Exclude)], + || {I, Owner} <- (?MODULE):all_owners(State), + Owner =/= node(), not lists:member(I, Exclude)], case L of - [] -> no_indices; - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_indices; + _ -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen node from amongst the owners other than this one. -spec random_other_node(State :: chstate()) -> Node :: - term() | no_node. + term() | no_node. random_other_node(State) -> case lists:delete(node(), all_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen active node other than this one. -spec random_other_active_node(State :: - chstate()) -> Node :: term() | no_node. + chstate()) -> Node :: term() | no_node. random_other_active_node(State) -> case lists:delete(node(), active_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Incorporate another node's state into our view of the Riak world. -spec reconcile(ExternState :: chstate(), - MyState :: chstate()) -> {no_change | new_ring, - chstate()}. + MyState :: chstate()) -> {no_change | new_ring, + chstate()}. reconcile(ExternState, MyState) -> check_tainted(ExternState, - "Error: riak_core_ring/reconcile :: reconcilin" - "g tainted external ring"), + "Error: riak_core_ring/reconcile :: reconcilin" + "g tainted external ring"), check_tainted(MyState, - "Error: riak_core_ring/reconcile :: reconcilin" - "g tainted internal ring"), + "Error: riak_core_ring/reconcile :: reconcilin" + "g tainted internal ring"), case internal_reconcile(MyState, ExternState) of - {false, State} -> {no_change, State}; - {true, State} -> {new_ring, State} + {false, State} -> {no_change, State}; + {true, State} -> {new_ring, State} end. %% @doc Rename OldNode to NewNode in a Riak ring. -spec rename_node(State :: chstate(), OldNode :: atom(), - NewNode :: atom()) -> chstate(). + NewNode :: atom()) -> chstate(). rename_node(State = #chstate{chring = Ring, - nodename = ThisNode, members = Members, - claimant = Claimant, seen = Seen}, - OldNode, NewNode) + nodename = ThisNode, members = Members, + claimant = Claimant, seen = Seen}, + OldNode, NewNode) when is_atom(OldNode), is_atom(NewNode) -> State#chstate{chring = - lists:foldl(fun ({Idx, Owner}, AccIn) -> - case Owner of - OldNode -> - chash:update(Idx, NewNode, - AccIn); - _ -> AccIn - end - end, - Ring, riak_core_ring:all_owners(State)), - members = - orddict:from_list(proplists:substitute_aliases([{OldNode, - NewNode}], - Members)), - seen = - orddict:from_list(proplists:substitute_aliases([{OldNode, - NewNode}], - Seen)), - nodename = - case ThisNode of - OldNode -> NewNode; - _ -> ThisNode - end, - claimant = - case Claimant of - OldNode -> NewNode; - _ -> Claimant - end, - vclock = - vclock:increment(NewNode, State#chstate.vclock)}. + lists:foldl(fun ({Idx, Owner}, AccIn) -> + case Owner of + OldNode -> + chash:update(Idx, + NewNode, + AccIn); + _ -> AccIn + end + end, + Ring, + riak_core_ring:all_owners(State)), + members = + orddict:from_list(proplists:substitute_aliases([{OldNode, + NewNode}], + Members)), + seen = + orddict:from_list(proplists:substitute_aliases([{OldNode, + NewNode}], + Seen)), + nodename = + case ThisNode of + OldNode -> NewNode; + _ -> ThisNode + end, + claimant = + case Claimant of + OldNode -> NewNode; + _ -> Claimant + end, + vclock = + vclock:increment(NewNode, State#chstate.vclock)}. %% @doc Determine the integer ring index responsible %% for a chash key. -spec responsible_index(binary(), - chstate()) -> integer(). + chstate()) -> integer(). responsible_index(ChashKey, #chstate{chring = Ring}) -> <> = ChashKey, @@ -502,23 +579,26 @@ responsible_index(ChashKey, #chstate{chring = Ring}) -> %% the returned index will always be `OrigIdx'. If the ring is %% resizing the index may be different -spec future_index(chash:index(), integer(), - chstate()) -> integer() | undefined. + chstate()) -> integer() | undefined. future_index(CHashKey, OrigIdx, State) -> future_index(CHashKey, OrigIdx, undefined, State). -spec future_index(chash:index(), integer(), - undefined | integer(), chstate()) -> integer() | - undefined. + undefined | integer(), chstate()) -> integer() | + undefined. future_index(CHashKey, OrigIdx, NValCheck, State) -> OrigCount = num_partitions(State), NextCount = future_num_partitions(State), - future_index(CHashKey, OrigIdx, NValCheck, OrigCount, - NextCount). + future_index(CHashKey, + OrigIdx, + NValCheck, + OrigCount, + NextCount). future_index(CHashKey, OrigIdx, NValCheck, OrigCount, - NextCount) -> + NextCount) -> <> = CHashKey, OrigInc = chash:ring_increment(OrigCount), NextInc = chash:ring_increment(NextCount), @@ -529,42 +609,43 @@ future_index(CHashKey, OrigIdx, NValCheck, OrigCount, %% Determine position of the source partition in the ring %% if OrigIdx is 0 we know the position is OrigCount (number of partitions) OrigPos = case OrigIdx of - 0 -> OrigCount; - _ -> OrigIdx div OrigInc - end, + 0 -> OrigCount; + _ -> OrigIdx div OrigInc + end, %% The distance between the key's owner (head of preflist) and the source partition %% is the position of the source in the preflist, the distance may be negative %% in which case we have wrapped around the ring. distance of zero means the source %% is the head of the preflist. OrigDist = case OrigPos - OwnerPos of - P when P < 0 -> OrigCount + P; - P -> P - end, + P when P < 0 -> OrigCount + P; + P -> P + end, %% In the case that the ring is shrinking the future index for a key whose position %% in the preflist is >= ring size may be calculated, any transfer is invalid in %% this case, return undefined. The position may also be >= an optional N value for %% the key, if this is true undefined is also returned - case check_invalid_future_index(OrigDist, NextCount, - NValCheck) - of - true -> undefined; - false -> - %% Determine the partition (head of preflist) that will own the key in the future ring - FuturePos = CHashInt div NextInc + 1, - NextOwner = FuturePos * NextInc, - %% Determine the partition that the key should be transferred to (has same position - %% in future preflist as source partition does in current preflist) - RingTop = trunc(math:pow(2, 160) - 1), - (NextOwner + NextInc * OrigDist) rem RingTop + case check_invalid_future_index(OrigDist, + NextCount, + NValCheck) + of + true -> undefined; + false -> + %% Determine the partition (head of preflist) that will own the key in the future ring + FuturePos = CHashInt div NextInc + 1, + NextOwner = FuturePos * NextInc, + %% Determine the partition that the key should be transferred to (has same position + %% in future preflist as source partition does in current preflist) + RingTop = trunc(math:pow(2, 160) - 1), + (NextOwner + NextInc * OrigDist) rem RingTop end. check_invalid_future_index(OrigDist, NextCount, - NValCheck) -> + NValCheck) -> OverRingSize = OrigDist >= NextCount, OverNVal = case NValCheck of - undefined -> false; - _ -> OrigDist >= NValCheck - end, + undefined -> false; + _ -> OrigDist >= NValCheck + end, OverRingSize orelse OverNVal. %% Takes the hashed value for a key and any partition, `OrigIdx', @@ -572,55 +653,58 @@ check_invalid_future_index(OrigDist, NextCount, %% is in the same position in the future preflist for that key. %% @see future_index/4 -spec is_future_index(chash:index(), integer(), - integer(), chstate()) -> boolean(). + integer(), chstate()) -> boolean(). is_future_index(CHashKey, OrigIdx, TargetIdx, State) -> - FutureIndex = future_index(CHashKey, OrigIdx, undefined, - State), + FutureIndex = future_index(CHashKey, + OrigIdx, + undefined, + State), FutureIndex =:= TargetIdx. -spec transfer_node(Idx :: integer(), Node :: term(), - MyState :: chstate()) -> chstate(). + MyState :: chstate()) -> chstate(). transfer_node(Idx, Node, MyState) -> case chash:lookup(Idx, MyState#chstate.chring) of - Node -> MyState; - _ -> - Me = MyState#chstate.nodename, - VClock = vclock:increment(Me, MyState#chstate.vclock), - CHRing = chash:update(Idx, Node, - MyState#chstate.chring), - MyState#chstate{vclock = VClock, chring = CHRing} + Node -> MyState; + _ -> + Me = MyState#chstate.nodename, + VClock = vclock:increment(Me, MyState#chstate.vclock), + CHRing = chash:update(Idx, + Node, + MyState#chstate.chring), + MyState#chstate{vclock = VClock, chring = CHRing} end. % @doc Set a key in the cluster metadata dict -spec update_meta(Key :: term(), Val :: term(), - State :: chstate()) -> chstate(). + State :: chstate()) -> chstate(). update_meta(Key, Val, State) -> Change = case dict:find(Key, State#chstate.meta) of - {ok, OldM} -> Val /= OldM#meta_entry.value; - error -> true - end, + {ok, OldM} -> Val /= OldM#meta_entry.value; + error -> true + end, if Change -> - M = #meta_entry{lastmod = - calendar:datetime_to_gregorian_seconds(calendar:universal_time()), - value = Val}, - VClock = vclock:increment(State#chstate.nodename, - State#chstate.vclock), - State#chstate{vclock = VClock, - meta = dict:store(Key, M, State#chstate.meta)}; + M = #meta_entry{lastmod = + calendar:datetime_to_gregorian_seconds(calendar:universal_time()), + value = Val}, + VClock = vclock:increment(State#chstate.nodename, + State#chstate.vclock), + State#chstate{vclock = VClock, + meta = dict:store(Key, M, State#chstate.meta)}; true -> State end. %% @doc Logical delete of a key in the cluster metadata dict -spec remove_meta(Key :: term(), - State :: chstate()) -> chstate(). + State :: chstate()) -> chstate(). remove_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - {ok, _} -> update_meta(Key, '$removed', State); - error -> State + {ok, _} -> update_meta(Key, '$removed', State); + error -> State end. %% @doc Return the current claimant. @@ -641,12 +725,12 @@ set_cluster_name(State, Name) -> State#chstate{clustername = Name}. reconcile_names(RingA = #chstate{clustername = NameA}, - RingB = #chstate{clustername = NameB}) -> + RingB = #chstate{clustername = NameB}) -> case (NameA =:= undefined) or (NameB =:= undefined) of - true -> - {RingA#chstate{clustername = undefined}, - RingB#chstate{clustername = undefined}}; - false -> {RingA, RingB} + true -> + {RingA#chstate{clustername = undefined}, + RingB#chstate{clustername = undefined}}; + false -> {RingA, RingB} end. increment_vclock(Node, State) -> @@ -661,69 +745,75 @@ increment_ring_version(Node, State) -> %% @doc Returns the current membership status for a node in the cluster. -spec member_status(chstate() | [node()], - Node :: node()) -> member_status(). + Node :: node()) -> member_status(). member_status(#chstate{members = Members}, Node) -> member_status(Members, Node); member_status(Members, Node) -> case orddict:find(Node, Members) of - {ok, {Status, _, _}} -> Status; - _ -> invalid + {ok, {Status, _, _}} -> Status; + _ -> invalid end. %% @doc Returns the current membership status for all nodes in the cluster. -spec all_member_status(State :: chstate()) -> [{node(), - member_status()}]. + member_status()}]. all_member_status(#chstate{members = Members}) -> [{Node, Status} || {Node, {Status, _VC, _}} <- Members, - Status /= invalid]. + Status /= invalid]. get_member_meta(State, Member, Key) -> case orddict:find(Member, State#chstate.members) of - error -> undefined; - {ok, {_, _, Meta}} -> - case orddict:find(Key, Meta) of - error -> undefined; - {ok, Value} -> Value - end + error -> undefined; + {ok, {_, _, Meta}} -> + case orddict:find(Key, Meta) of + error -> undefined; + {ok, Value} -> Value + end end. %% @doc Set a key in the member metadata orddict update_member_meta(Node, State, Member, Key, Val) -> VClock = vclock:increment(Node, State#chstate.vclock), - State2 = update_member_meta(Node, State, Member, Key, - Val, same_vclock), + State2 = update_member_meta(Node, + State, + Member, + Key, + Val, + same_vclock), State2#chstate{vclock = VClock}. update_member_meta(Node, State, Member, Key, Val, - same_vclock) -> + same_vclock) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, MD}) -> - {Status, vclock:increment(Node, VC), - orddict:store(Key, Val, MD)} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, MD}) -> + {Status, + vclock:increment(Node, VC), + orddict:store(Key, Val, MD)} + end, + Members), + State#chstate{members = Members2}; + false -> State end. clear_member_meta(Node, State, Member) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, _MD}) -> - {Status, vclock:increment(Node, VC), - orddict:new()} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, _MD}) -> + {Status, + vclock:increment(Node, VC), + orddict:new()} + end, + Members), + State#chstate{members = Members2}; + false -> State end. add_member(PNode, State, Node) -> @@ -744,45 +834,49 @@ down_member(PNode, State, Node) -> set_member(Node, CState, Member, Status) -> VClock = vclock:increment(Node, CState#chstate.vclock), - CState2 = set_member(Node, CState, Member, Status, - same_vclock), + CState2 = set_member(Node, + CState, + Member, + Status, + same_vclock), CState2#chstate{vclock = VClock}. set_member(Node, CState, Member, Status, same_vclock) -> Members2 = orddict:update(Member, - fun ({_, VC, MD}) -> - {Status, vclock:increment(Node, VC), MD} - end, - {Status, vclock:increment(Node, vclock:fresh()), - []}, - CState#chstate.members), + fun ({_, VC, MD}) -> + {Status, vclock:increment(Node, VC), MD} + end, + {Status, + vclock:increment(Node, vclock:fresh()), + []}, + CState#chstate.members), CState#chstate{members = Members2}. %% @doc Return a list of all members of the cluster that are eligible to %% claim partitions. -spec claiming_members(State :: chstate()) -> [Node :: - node()]. + node()]. claiming_members(#chstate{members = Members}) -> get_members(Members, [joining, valid, down]). %% @doc Return a list of all members of the cluster that are marked as down. -spec down_members(State :: chstate()) -> [Node :: - node()]. + node()]. down_members(#chstate{members = Members}) -> get_members(Members, [down]). %% @doc Set the node that is responsible for a given chstate. -spec set_owner(State :: chstate(), - Node :: node()) -> chstate(). + Node :: node()) -> chstate(). set_owner(State, Node) -> State#chstate{nodename = Node}. %% @doc Return all partition indices owned by a node. -spec indices(State :: chstate(), - Node :: node()) -> [integer()]. + Node :: node()) -> [integer()]. indices(State, Node) -> AllOwners = all_owners(State), @@ -791,13 +885,13 @@ indices(State, Node) -> %% @doc Return all partition indices that will be owned by a node after all %% pending ownership transfers have completed. -spec future_indices(State :: chstate(), - Node :: node()) -> [integer()]. + Node :: node()) -> [integer()]. future_indices(State, Node) -> indices(future_ring(State), Node). -spec all_next_owners(chstate()) -> [{integer(), - term()}]. + term()}]. all_next_owners(CState) -> Next = riak_core_ring:pending_changes(CState), @@ -806,37 +900,38 @@ all_next_owners(CState) -> %% @private change_owners(CState, Reassign) -> lists:foldl(fun ({Idx, NewOwner}, CState0) -> - %% if called for indexes not in the current ring (during resizing) - %% ignore the error - try riak_core_ring:transfer_node(Idx, NewOwner, CState0) - catch - error:{badmatch, _} -> CState0 - end - end, - CState, Reassign). + %% if called for indexes not in the current ring (during resizing) + %% ignore the error + try riak_core_ring:transfer_node(Idx, NewOwner, CState0) + catch + error:{badmatch, _} -> CState0 + end + end, + CState, + Reassign). %% @doc Return all indices that a node is scheduled to give to another. disowning_indices(State, Node) -> case is_resizing(State) of - false -> - [Idx - || {Idx, Owner, _NextOwner, _Mods, _Status} - <- State#chstate.next, - Owner =:= Node]; - true -> - [Idx - || {Idx, Owner} <- all_owners(State), Owner =:= Node, - disowned_during_resize(State, Idx, Owner)] + false -> + [Idx + || {Idx, Owner, _NextOwner, _Mods, _Status} + <- State#chstate.next, + Owner =:= Node]; + true -> + [Idx + || {Idx, Owner} <- all_owners(State), Owner =:= Node, + disowned_during_resize(State, Idx, Owner)] end. disowned_during_resize(CState, Idx, Owner) -> %% catch error when index doesn't exist, we are disowning it if its going away NextOwner = try future_owner(CState, Idx) catch - _:_ -> undefined - end, + _:_ -> undefined + end, case NextOwner of - Owner -> false; - _ -> true + Owner -> false; + _ -> true end. %% @doc Returns a list of all pending ownership transfers. @@ -850,12 +945,12 @@ set_pending_changes(State, Transfers) -> %% @doc Given a ring, `Resizing', that has been resized (and presumably rebalanced) %% schedule a resize transition for `Orig'. -spec set_pending_resize(chstate(), - chstate()) -> chstate(). + chstate()) -> chstate(). set_pending_resize(Resizing, Orig) -> %% all existing indexes must transfer data when the ring is being resized Next = [{Idx, Owner, '$resize', [], awaiting} - || {Idx, Owner} <- riak_core_ring:all_owners(Orig)], + || {Idx, Owner} <- riak_core_ring:all_owners(Orig)], %% Whether or not the ring is shrinking or expanding, some %% ownership may be shared between the old and new ring. To prevent %% degenerate cases where partitions whose ownership does not @@ -864,42 +959,42 @@ set_pending_resize(Resizing, Orig) -> %% of the next list which is treated as ordered. FutureOwners = riak_core_ring:all_owners(Resizing), SortedNext = lists:sort(fun ({Idx, Owner, _, _, _}, - _) -> - %% we only need to check one element because the end result - %% is the same as if we checked both: - %% - %% true, false -> true - %% true, true -> true - %% false, false -> false - %% false, true -> false - lists:member({Idx, Owner}, FutureOwners) - end, - Next), + _) -> + %% we only need to check one element because the end result + %% is the same as if we checked both: + %% + %% true, false -> true + %% true, true -> true + %% false, false -> false + %% false, true -> false + lists:member({Idx, Owner}, FutureOwners) + end, + Next), %% Resizing is assumed to have a modified chring, we need to put back %% the original chring to not install the resized one pre-emptively. The %% resized ring is stored in ring metadata for later use FutureCHash = chash(Resizing), ResetRing = set_chash(Resizing, chash(Orig)), set_resized_ring(set_pending_changes(ResetRing, - SortedNext), - FutureCHash). + SortedNext), + FutureCHash). -spec maybe_abort_resize(chstate()) -> {boolean(), - chstate()}. + chstate()}. maybe_abort_resize(State) -> Resizing = is_resizing(State), PostResize = is_post_resize(State), PendingAbort = is_resize_aborted(State), case PendingAbort andalso - Resizing andalso not PostResize - of - true -> - State1 = State#chstate{next = []}, - State2 = clear_all_resize_transfers(State1), - State3 = remove_meta('$resized_ring_abort', State2), - {true, remove_meta('$resized_ring', State3)}; - false -> {false, State} + Resizing andalso not PostResize + of + true -> + State1 = State#chstate{next = []}, + State2 = clear_all_resize_transfers(State1), + State3 = remove_meta('$resized_ring_abort', State2), + {true, remove_meta('$resized_ring', State3)}; + false -> {false, State} end. -spec set_pending_resize_abort(chstate()) -> chstate(). @@ -908,115 +1003,133 @@ set_pending_resize_abort(State) -> update_meta('$resized_ring_abort', true, State). -spec schedule_resize_transfer(chstate(), - {integer(), term()}, - integer() | {integer(), term()}) -> chstate(). + {integer(), term()}, + integer() | {integer(), term()}) -> chstate(). schedule_resize_transfer(State, Source, TargetIdx) when is_integer(TargetIdx) -> TargetNode = index_owner(future_ring(State), TargetIdx), - schedule_resize_transfer(State, Source, - {TargetIdx, TargetNode}); + schedule_resize_transfer(State, + Source, + {TargetIdx, TargetNode}); schedule_resize_transfer(State, Source, Source) -> State; schedule_resize_transfer(State, Source, Target) -> Transfers = resize_transfers(State, Source), %% ignore if we have already scheduled a transfer from source -> target case lists:keymember(Target, 1, Transfers) of - true -> State; - false -> - Transfers1 = lists:keystore(Target, 1, Transfers, - {Target, ordsets:new(), awaiting}), - set_resize_transfers(State, Source, Transfers1) + true -> State; + false -> + Transfers1 = lists:keystore(Target, + 1, + Transfers, + {Target, ordsets:new(), awaiting}), + set_resize_transfers(State, Source, Transfers1) end. %% @doc reassign all outbound and inbound resize transfers from `Node' to `NewNode' -spec reschedule_resize_transfers(chstate(), term(), - term()) -> chstate(). + term()) -> chstate(). reschedule_resize_transfers(State = #chstate{next = - Next}, - Node, NewNode) -> + Next}, + Node, NewNode) -> {NewNext, NewState} = lists:mapfoldl(fun (Entry, - StateAcc) -> - reschedule_resize_operation(Node, - NewNode, - Entry, - StateAcc) - end, - State, Next), + StateAcc) -> + reschedule_resize_operation(Node, + NewNode, + Entry, + StateAcc) + end, + State, + Next), NewState#chstate{next = NewNext}. reschedule_resize_operation(N, NewNode, - {Idx, N, '$resize', _Mods, _Status}, State) -> - NewEntry = {Idx, NewNode, '$resize', ordsets:new(), - awaiting}, + {Idx, N, '$resize', _Mods, _Status}, State) -> + NewEntry = {Idx, + NewNode, + '$resize', + ordsets:new(), + awaiting}, NewState = reschedule_outbound_resize_transfers(State, - Idx, N, NewNode), + Idx, + N, + NewNode), {NewEntry, NewState}; reschedule_resize_operation(Node, NewNode, - {Idx, OtherNode, '$resize', _Mods, _Status} = Entry, - State) -> + {Idx, OtherNode, '$resize', _Mods, _Status} = Entry, + State) -> {Changed, NewState} = - reschedule_inbound_resize_transfers({Idx, OtherNode}, - Node, NewNode, State), + reschedule_inbound_resize_transfers({Idx, OtherNode}, + Node, + NewNode, + State), case Changed of - true -> - NewEntry = {Idx, OtherNode, '$resize', ordsets:new(), - awaiting}, - {NewEntry, NewState}; - false -> {Entry, State} + true -> + NewEntry = {Idx, + OtherNode, + '$resize', + ordsets:new(), + awaiting}, + {NewEntry, NewState}; + false -> {Entry, State} end. reschedule_inbound_resize_transfers(Source, Node, - NewNode, State) -> + NewNode, State) -> F = fun (Transfer, Acc) -> - {NewXfer, NewAcc} = - reschedule_inbound_resize_transfer(Transfer, Node, - NewNode), - {NewXfer, NewAcc orelse Acc} - end, - {ResizeTransfers, Changed} = lists:mapfoldl(F, false, - resize_transfers(State, - Source)), + {NewXfer, NewAcc} = + reschedule_inbound_resize_transfer(Transfer, + Node, + NewNode), + {NewXfer, NewAcc orelse Acc} + end, + {ResizeTransfers, Changed} = lists:mapfoldl(F, + false, + resize_transfers(State, + Source)), {Changed, set_resize_transfers(State, Source, ResizeTransfers)}. -reschedule_inbound_resize_transfer({{Idx, Target}, _, - _}, - Target, NewNode) -> +reschedule_inbound_resize_transfer({{Idx, Target}, + _, + _}, + Target, NewNode) -> {{{Idx, NewNode}, ordsets:new(), awaiting}, true}; reschedule_inbound_resize_transfer(Transfer, _, _) -> {Transfer, false}. reschedule_outbound_resize_transfers(State, Idx, Node, - NewNode) -> + NewNode) -> OldSource = {Idx, Node}, NewSource = {Idx, NewNode}, Transfers = resize_transfers(State, OldSource), F = fun ({I, N}) when N =:= Node -> {I, NewNode}; - (T) -> T - end, + (T) -> T + end, NewTransfers = [{F(Target), ordsets:new(), awaiting} - || {Target, _, _} <- Transfers], + || {Target, _, _} <- Transfers], set_resize_transfers(clear_resize_transfers(OldSource, - State), - NewSource, NewTransfers). + State), + NewSource, + NewTransfers). %% @doc returns the first awaiting resize_transfer for a {SourceIdx, SourceNode} %% pair. If all transfers for the pair are complete, undefined is returned -spec awaiting_resize_transfer(chstate(), - {integer(), term()}, atom()) -> {integer(), - term()} | - undefined. + {integer(), term()}, atom()) -> {integer(), + term()} | + undefined. awaiting_resize_transfer(State, Source, Mod) -> ResizeTransfers = resize_transfers(State, Source), Awaiting = [{Target, Mods, Status} - || {Target, Mods, Status} <- ResizeTransfers, - Status =/= complete, not ordsets:is_element(Mod, Mods)], + || {Target, Mods, Status} <- ResizeTransfers, + Status =/= complete, not ordsets:is_element(Mod, Mods)], case Awaiting of - [] -> undefined; - [{Target, _, _} | _] -> Target + [] -> undefined; + [{Target, _, _} | _] -> Target end. %% @doc return the status of a resize_transfer for `Source' (an index-node pair). undefined @@ -1024,23 +1137,24 @@ awaiting_resize_transfer(State, Source, Mod) -> %% is marked as such or `Mod' is contained in the completed modules set. awaiting is %% returned otherwise -spec resize_transfer_status(chstate(), - {integer(), term()}, {integer(), term()}, - atom()) -> awaiting | complete | undefined. + {integer(), term()}, {integer(), term()}, + atom()) -> awaiting | complete | undefined. resize_transfer_status(State, Source, Target, Mod) -> ResizeTransfers = resize_transfers(State, Source), - IsComplete = case lists:keyfind(Target, 1, - ResizeTransfers) - of - false -> undefined; - {Target, _, complete} -> true; - {Target, Mods, awaiting} -> - ordsets:is_element(Mod, Mods) - end, + IsComplete = case lists:keyfind(Target, + 1, + ResizeTransfers) + of + false -> undefined; + {Target, _, complete} -> true; + {Target, Mods, awaiting} -> + ordsets:is_element(Mod, Mods) + end, case IsComplete of - true -> complete; - false -> awaiting; - undefined -> undefined + true -> complete; + false -> awaiting; + undefined -> undefined end. %% @doc mark a resize_transfer from `Source' to `Target' for `Mod' complete. @@ -1049,124 +1163,127 @@ resize_transfer_status(State, Source, Target, Mod) -> %% for `Source' that need to be started to be scheduled before calling %% this fuction -spec resize_transfer_complete(chstate(), - {integer(), term()}, {integer(), term()}, - atom()) -> chstate(). + {integer(), term()}, {integer(), term()}, + atom()) -> chstate(). resize_transfer_complete(State, {SrcIdx, _} = Source, - Target, Mod) -> + Target, Mod) -> ResizeTransfers = resize_transfers(State, Source), Transfer = lists:keyfind(Target, 1, ResizeTransfers), case Transfer of - {Target, Mods, Status} -> - VNodeMods = ordsets:from_list([VMod - || {_, VMod} - <- riak_core:vnode_modules()]), - Mods2 = ordsets:add_element(Mod, Mods), - Status2 = case {Status, Mods2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting - end, - ResizeTransfers2 = lists:keyreplace(Target, 1, - ResizeTransfers, - {Target, Mods2, Status2}), - State1 = set_resize_transfers(State, Source, - ResizeTransfers2), - AllComplete = lists:all(fun ({_, _, complete}) -> true; - ({_, Ms, awaiting}) -> - ordsets:is_element(Mod, Ms) - end, - ResizeTransfers2), - case AllComplete of - true -> transfer_complete(State1, SrcIdx, Mod); - false -> State1 - end; - _ -> State + {Target, Mods, Status} -> + VNodeMods = ordsets:from_list([VMod + || {_, VMod} + <- riak_core:vnode_modules()]), + Mods2 = ordsets:add_element(Mod, Mods), + Status2 = case {Status, Mods2} of + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting + end, + ResizeTransfers2 = lists:keyreplace(Target, + 1, + ResizeTransfers, + {Target, Mods2, Status2}), + State1 = set_resize_transfers(State, + Source, + ResizeTransfers2), + AllComplete = lists:all(fun ({_, _, complete}) -> true; + ({_, Ms, awaiting}) -> + ordsets:is_element(Mod, Ms) + end, + ResizeTransfers2), + case AllComplete of + true -> transfer_complete(State1, SrcIdx, Mod); + false -> State1 + end; + _ -> State end. -spec is_resizing(chstate()) -> boolean(). is_resizing(State) -> case resized_ring(State) of - undefined -> false; - {ok, _} -> true + undefined -> false; + {ok, _} -> true end. -spec is_post_resize(chstate()) -> boolean(). is_post_resize(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> true; - _ -> false + {ok, '$cleanup'} -> true; + _ -> false end. -spec is_resize_aborted(chstate()) -> boolean(). is_resize_aborted(State) -> case get_meta('$resized_ring_abort', State) of - {ok, true} -> true; - _ -> false + {ok, true} -> true; + _ -> false end. -spec is_resize_complete(chstate()) -> boolean(). is_resize_complete(#chstate{next = Next}) -> not - lists:any(fun ({_, _, _, _, awaiting}) -> true; - ({_, _, _, _, complete}) -> false - end, - Next). + lists:any(fun ({_, _, _, _, awaiting}) -> true; + ({_, _, _, _, complete}) -> false + end, + Next). -spec complete_resize_transfers(chstate(), - {integer(), term()}, atom()) -> [{integer(), - term()}]. + {integer(), term()}, atom()) -> [{integer(), + term()}]. complete_resize_transfers(State, Source, Mod) -> [Target || {Target, Mods, Status} - <- resize_transfers(State, Source), - Status =:= complete orelse - ordsets:is_element(Mod, Mods)]. + <- resize_transfers(State, Source), + Status =:= complete orelse + ordsets:is_element(Mod, Mods)]. -spec deletion_complete(chstate(), integer(), - atom()) -> chstate(). + atom()) -> chstate(). deletion_complete(State, Idx, Mod) -> transfer_complete(State, Idx, Mod). -spec resize_transfers(chstate(), - {integer(), term()}) -> [resize_transfer()]. + {integer(), term()}) -> [resize_transfer()]. resize_transfers(State, Source) -> {ok, Transfers} = get_meta({resize, Source}, [], State), Transfers. -spec set_resize_transfers(chstate(), - {integer(), term()}, - [resize_transfer()]) -> chstate(). + {integer(), term()}, + [resize_transfer()]) -> chstate(). set_resize_transfers(State, Source, Transfers) -> update_meta({resize, Source}, Transfers, State). clear_all_resize_transfers(State) -> - lists:foldl(fun clear_resize_transfers/2, State, - all_owners(State)). + lists:foldl(fun clear_resize_transfers/2, + State, + all_owners(State)). clear_resize_transfers(Source, State) -> remove_meta({resize, Source}, State). -spec resized_ring(chstate()) -> {ok, chash:chash()} | - undefined. + undefined. resized_ring(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> {ok, State#chstate.chring}; - {ok, CHRing} -> {ok, CHRing}; - _ -> undefined + {ok, '$cleanup'} -> {ok, State#chstate.chring}; + {ok, CHRing} -> {ok, CHRing}; + _ -> undefined end. -spec set_resized_ring(chstate(), - chash:chash()) -> chstate(). + chash:chash()) -> chstate(). set_resized_ring(State, FutureCHash) -> update_meta('$resized_ring', FutureCHash, State). @@ -1175,39 +1292,40 @@ cleanup_after_resize(State) -> update_meta('$resized_ring', '$cleanup', State). -spec vnode_type(chstate(), integer()) -> primary | - {fallback, term()} | future_primary | - resized_primary. + {fallback, term()} | + future_primary | + resized_primary. vnode_type(State, Idx) -> vnode_type(State, Idx, node()). vnode_type(State, Idx, Node) -> try index_owner(State, Idx) of - Node -> primary; - Owner -> - case next_owner(State, Idx) of - {_, Node, _} -> future_primary; - _ -> {fallback, Owner} - end + Node -> primary; + Owner -> + case next_owner(State, Idx) of + {_, Node, _} -> future_primary; + _ -> {fallback, Owner} + end catch - error:{badmatch, _} -> - %% idx doesn't exist so must be an index in a resized ring - resized_primary + error:{badmatch, _} -> + %% idx doesn't exist so must be an index in a resized ring + resized_primary end. %% @doc Return details for a pending partition ownership change. -spec next_owner(State :: chstate(), - Idx :: integer()) -> pending_change(). + Idx :: integer()) -> pending_change(). next_owner(State, Idx) -> case lists:keyfind(Idx, 1, State#chstate.next) of - false -> {undefined, undefined, undefined}; - NInfo -> next_owner(NInfo) + false -> {undefined, undefined, undefined}; + NInfo -> next_owner(NInfo) end. %% @doc Return details for a pending partition ownership change. -spec next_owner(State :: chstate(), Idx :: integer(), - Mod :: module()) -> pending_change(). + Mod :: module()) -> pending_change(). next_owner(State, Idx, Mod) -> NInfo = lists:keyfind(Idx, 1, State#chstate.next), @@ -1215,14 +1333,14 @@ next_owner(State, Idx, Mod) -> next_owner_status(NInfo, Mod) -> case NInfo of - false -> {undefined, undefined, undefined}; - {_, Owner, NextOwner, _Transfers, complete} -> - {Owner, NextOwner, complete}; - {_, Owner, NextOwner, Transfers, _Status} -> - case ordsets:is_element(Mod, Transfers) of - true -> {Owner, NextOwner, complete}; - false -> {Owner, NextOwner, awaiting} - end + false -> {undefined, undefined, undefined}; + {_, Owner, NextOwner, _Transfers, complete} -> + {Owner, NextOwner, complete}; + {_, Owner, NextOwner, Transfers, _Status} -> + case ordsets:is_element(Mod, Transfers) of + true -> {Owner, NextOwner, complete}; + false -> {Owner, NextOwner, awaiting} + end end. %% @private @@ -1232,28 +1350,28 @@ next_owner({_, Owner, NextOwner, _Transfers, Status}) -> completed_next_owners(Mod, #chstate{next = Next}) -> [{Idx, O, NO} || NInfo = {Idx, _, _, _, _} <- Next, - {O, NO, complete} <- [next_owner_status(NInfo, Mod)]]. + {O, NO, complete} <- [next_owner_status(NInfo, Mod)]]. %% @doc Returns true if all cluster members have seen the current ring. -spec ring_ready(State :: chstate()) -> boolean(). ring_ready(State0) -> check_tainted(State0, - "Error: riak_core_ring/ring_ready called " - "on tainted ring"), + "Error: riak_core_ring/ring_ready called " + "on tainted ring"), Owner = owner_node(State0), State = update_seen(Owner, State0), Seen = State#chstate.seen, Members = get_members(State#chstate.members, - [valid, leaving, exiting]), + [valid, leaving, exiting]), VClock = State#chstate.vclock, R = [begin - case orddict:find(Node, Seen) of - error -> false; - {ok, VC} -> vclock:equal(VClock, VC) - end - end - || Node <- Members], + case orddict:find(Node, Seen) of + error -> false; + {ok, VC} -> vclock:equal(VClock, VC) + end + end + || Node <- Members], Ready = lists:all(fun (X) -> X =:= true end, R), Ready. @@ -1266,32 +1384,33 @@ ring_ready_info(State0) -> State = update_seen(Owner, State0), Seen = State#chstate.seen, Members = get_members(State#chstate.members, - [valid, leaving, exiting]), + [valid, leaving, exiting]), RecentVC = orddict:fold(fun (_, VC, Recent) -> - case vclock:descends(VC, Recent) of - true -> VC; - false -> Recent - end - end, - State#chstate.vclock, Seen), + case vclock:descends(VC, Recent) of + true -> VC; + false -> Recent + end + end, + State#chstate.vclock, + Seen), Outdated = orddict:filter(fun (Node, VC) -> - not vclock:equal(VC, RecentVC) and - lists:member(Node, Members) - end, - Seen), + not vclock:equal(VC, RecentVC) and + lists:member(Node, Members) + end, + Seen), Outdated. %% @doc Marks a pending transfer as completed. -spec handoff_complete(State :: chstate(), - Idx :: integer(), Mod :: module()) -> chstate(). + Idx :: integer(), Mod :: module()) -> chstate(). handoff_complete(State, Idx, Mod) -> transfer_complete(State, Idx, Mod). ring_changed(Node, State) -> check_tainted(State, - "Error: riak_core_ring/ring_changed called " - "on tainted ring"), + "Error: riak_core_ring/ring_changed called " + "on tainted ring"), internal_ring_changed(Node, State). %% @doc Return the ring that will exist after all pending ownership transfers @@ -1303,100 +1422,109 @@ future_ring(State) -> future_ring(State, false) -> FutureState = change_owners(State, - all_next_owners(State)), + all_next_owners(State)), %% Individual nodes will move themselves from leaving to exiting if they %% have no ring ownership, this is implemented in riak_core_ring_handler. %% Emulate it here to return similar ring. Leaving = get_members(FutureState#chstate.members, - [leaving]), + [leaving]), FutureState2 = lists:foldl(fun (Node, StateAcc) -> - case indices(StateAcc, Node) of - [] -> - riak_core_ring:exit_member(Node, - StateAcc, - Node); - _ -> StateAcc - end - end, - FutureState, Leaving), + case indices(StateAcc, Node) of + [] -> + riak_core_ring:exit_member(Node, + StateAcc, + Node); + _ -> StateAcc + end + end, + FutureState, + Leaving), FutureState2#chstate{next = []}; future_ring(State0 = #chstate{next = OldNext}, true) -> case is_post_resize(State0) of - false -> - {ok, FutureCHash} = resized_ring(State0), - State1 = cleanup_after_resize(State0), - State2 = clear_all_resize_transfers(State1), - Resized = State2#chstate{chring = FutureCHash}, - Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, - Acc) -> - DeleteEntry = {Idx, Owner, '$delete', [], - awaiting}, - %% catch error when index doesn't exist in new ring - try index_owner(Resized, Idx) of - Owner -> Acc; - _ -> [DeleteEntry | Acc] - catch - error:{badmatch, _} -> - [DeleteEntry | Acc] - end - end, - [], OldNext), - Resized#chstate{next = Next}; - true -> - State1 = remove_meta('$resized_ring', State0), - State1#chstate{next = []} + false -> + {ok, FutureCHash} = resized_ring(State0), + State1 = cleanup_after_resize(State0), + State2 = clear_all_resize_transfers(State1), + Resized = State2#chstate{chring = FutureCHash}, + Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, + Acc) -> + DeleteEntry = {Idx, + Owner, + '$delete', + [], + awaiting}, + %% catch error when index doesn't exist in new ring + try index_owner(Resized, Idx) of + Owner -> Acc; + _ -> [DeleteEntry | Acc] + catch + error:{badmatch, _} -> + [DeleteEntry | Acc] + end + end, + [], + OldNext), + Resized#chstate{next = Next}; + true -> + State1 = remove_meta('$resized_ring', State0), + State1#chstate{next = []} end. pretty_print(Ring, Opts) -> OptNumeric = lists:member(numeric, Opts), OptLegend = lists:member(legend, Opts), Out = proplists:get_value(out, Opts, standard_io), - TargetN = proplists:get_value(target_n, Opts, - application:get_env(riak_core, target_n_val, - undefined)), + TargetN = proplists:get_value(target_n, + Opts, + application:get_env(riak_core, + target_n_val, + undefined)), Owners = riak_core_ring:all_members(Ring), Indices = riak_core_ring:all_owners(Ring), RingSize = length(Indices), Numeric = OptNumeric orelse length(Owners) > 26, case Numeric of - true -> - Ids = [integer_to_list(N) - || N <- lists:seq(1, length(Owners))]; - false -> - Ids = [[Letter] - || Letter <- lists:seq(97, 96 + length(Owners))] + true -> + Ids = [integer_to_list(N) + || N <- lists:seq(1, length(Owners))]; + false -> + Ids = [[Letter] + || Letter <- lists:seq(97, 96 + length(Owners))] end, Names = lists:zip(Owners, Ids), case OptLegend of - true -> - io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), - _ = [begin - NodeIndices = [Idx - || {Idx, Owner} <- Indices, Owner =:= Node], - RingPercent = length(NodeIndices) * 100 / RingSize, - io:format(Out, "Node ~s: ~w (~5.1f%) ~s~n", - [Name, length(NodeIndices), RingPercent, Node]) - end - || {Node, Name} <- Names], - io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); - false -> ok + true -> + io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), + _ = [begin + NodeIndices = [Idx + || {Idx, Owner} <- Indices, Owner =:= Node], + RingPercent = length(NodeIndices) * 100 / RingSize, + io:format(Out, + "Node ~s: ~w (~5.1f%) ~s~n", + [Name, length(NodeIndices), RingPercent, Node]) + end + || {Node, Name} <- Names], + io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); + false -> ok end, case Numeric of - true -> - Ownership = [orddict:fetch(Owner, Names) - || {_Idx, Owner} <- Indices], - io:format(Out, "~p~n", [Ownership]); - false -> - lists:foldl(fun ({_, Owner}, N) -> - Name = orddict:fetch(Owner, Names), - case N rem TargetN of - 0 -> io:format(Out, "~s|", [[Name]]); - _ -> io:format(Out, "~s", [[Name]]) - end, - N + 1 - end, - 1, Indices), - io:format(Out, "~n", []) + true -> + Ownership = [orddict:fetch(Owner, Names) + || {_Idx, Owner} <- Indices], + io:format(Out, "~p~n", [Ownership]); + false -> + lists:foldl(fun ({_, Owner}, N) -> + Name = orddict:fetch(Owner, Names), + case N rem TargetN of + 0 -> io:format(Out, "~s|", [[Name]]); + _ -> io:format(Out, "~s", [[Name]]) + end, + N + 1 + end, + 1, + Indices), + io:format(Out, "~n", []) end. %% @doc Return a ring with all transfers cancelled - for claim sim @@ -1410,26 +1538,27 @@ cancel_transfers(Ring) -> Ring#chstate{next = []}. internal_ring_changed(Node, CState0) -> CState = update_seen(Node, CState0), case ring_ready(CState) of - false -> CState; - true -> riak_core_claimant:ring_changed(Node, CState) + false -> CState; + true -> riak_core_claimant:ring_changed(Node, CState) end. %% @private merge_meta({N1, M1}, {N2, M2}) -> Meta = dict:merge(fun (_, D1, D2) -> - pick_val({N1, D1}, {N2, D2}) - end, - M1, M2), + pick_val({N1, D1}, {N2, D2}) + end, + M1, + M2), log_meta_merge(M1, M2, Meta), Meta. %% @private pick_val({N1, M1}, {N2, M2}) -> case {M1#meta_entry.lastmod, N1} > - {M2#meta_entry.lastmod, N2} - of - true -> M1; - false -> M2 + {M2#meta_entry.lastmod, N2} + of + true -> M1; + false -> M2 end. %% @private @@ -1444,10 +1573,10 @@ log_meta_merge(M1, M2, Meta) -> %% subsequent log messages will allow us to track ring versions. %% Handle legacy rings as well. log_ring_result(#chstate{vclock = V, members = Members, - next = Next}) -> + next = Next}) -> logger:debug("Updated ring vclock: ~p, Members: ~p, " - "Next: ~p", - [V, Members, Next]). + "Next: ~p", + [V, Members, Next]). %% @private internal_reconcile(State, OtherState) -> @@ -1467,49 +1596,52 @@ internal_reconcile(State, OtherState) -> VMerge1 = vclock:merge([VC1, VC2]), VMerge2 = vclock:merge([VC2, VC1]), case {vclock:equal(VMerge1, VMerge2), VMerge1 < VMerge2} - of - {true, _} -> VC3 = VMerge1; - {_, true} -> VC3 = VMerge1; - {_, false} -> VC3 = VMerge2 + of + {true, _} -> VC3 = VMerge1; + {_, true} -> VC3 = VMerge1; + {_, false} -> VC3 = VMerge2 end, Newer = vclock:descends(VC1, VC2), Older = vclock:descends(VC2, VC1), Equal = equal_cstate(State3, OtherState3), case {Equal, Newer, Older} of - {_, true, false} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, false, true} -> - {true, - OtherState3#chstate{nodename = VNode, vclock = VC3}}; - {true, _, _} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, true, true} -> - %% Exceptional condition that should only occur during - %% rolling upgrades and manual setting of the ring. - %% Merge as a divergent case. - State4 = reconcile_divergent(VNode, State3, - OtherState3), - {true, State4#chstate{nodename = VNode}}; - {_, false, false} -> - %% Unable to reconcile based on vector clock, merge rings. - State4 = reconcile_divergent(VNode, State3, - OtherState3), - {true, State4#chstate{nodename = VNode}} + {_, true, false} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, false, true} -> + {true, + OtherState3#chstate{nodename = VNode, vclock = VC3}}; + {true, _, _} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, true, true} -> + %% Exceptional condition that should only occur during + %% rolling upgrades and manual setting of the ring. + %% Merge as a divergent case. + State4 = reconcile_divergent(VNode, + State3, + OtherState3), + {true, State4#chstate{nodename = VNode}}; + {_, false, false} -> + %% Unable to reconcile based on vector clock, merge rings. + State4 = reconcile_divergent(VNode, + State3, + OtherState3), + {true, State4#chstate{nodename = VNode}} end. %% @private reconcile_divergent(VNode, StateA, StateB) -> VClock = vclock:increment(VNode, - vclock:merge([StateA#chstate.vclock, - StateB#chstate.vclock])), + vclock:merge([StateA#chstate.vclock, + StateB#chstate.vclock])), Members = reconcile_members(StateA, StateB), Meta = merge_meta({StateA#chstate.nodename, - StateA#chstate.meta}, - {StateB#chstate.nodename, StateB#chstate.meta}), - NewState = reconcile_ring(StateA, StateB, - get_members(Members)), + StateA#chstate.meta}, + {StateB#chstate.nodename, StateB#chstate.meta}), + NewState = reconcile_ring(StateA, + StateB, + get_members(Members)), NewState1 = NewState#chstate{vclock = VClock, - members = Members, meta = Meta}, + members = Members, meta = Meta}, log_ring_result(NewState1), NewState1. @@ -1518,31 +1650,34 @@ reconcile_divergent(VNode, StateA, StateB) -> %% and falling back to manual merge for divergent cases. reconcile_members(StateA, StateB) -> orddict:merge(fun (_K, {Valid1, VC1, Meta1}, - {Valid2, VC2, Meta2}) -> - New1 = vclock:descends(VC1, VC2), - New2 = vclock:descends(VC2, VC1), - MergeVC = vclock:merge([VC1, VC2]), - case {New1, New2} of - {true, false} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {Valid1, MergeVC, MergeMeta}; - {false, true} -> - MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), - {Valid2, MergeVC, MergeMeta}; - {_, _} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {merge_status(Valid1, Valid2), MergeVC, - MergeMeta} - end - end, - StateA#chstate.members, StateB#chstate.members). + {Valid2, VC2, Meta2}) -> + New1 = vclock:descends(VC1, VC2), + New2 = vclock:descends(VC2, VC1), + MergeVC = vclock:merge([VC1, VC2]), + case {New1, New2} of + {true, false} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {Valid1, MergeVC, MergeMeta}; + {false, true} -> + MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), + {Valid2, MergeVC, MergeMeta}; + {_, _} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {merge_status(Valid1, Valid2), + MergeVC, + MergeMeta} + end + end, + StateA#chstate.members, + StateB#chstate.members). %% @private reconcile_seen(StateA, StateB) -> orddict:merge(fun (_, VC1, VC2) -> - vclock:merge([VC1, VC2]) - end, - StateA#chstate.seen, StateB#chstate.seen). + vclock:merge([VC1, VC2]) + end, + StateA#chstate.seen, + StateB#chstate.seen). %% @private merge_next_status(complete, _) -> complete; @@ -1553,14 +1688,20 @@ merge_next_status(awaiting, awaiting) -> awaiting. %% @doc Merge two next lists that must be of the same size and have %% the same Idx/Owner pair. reconcile_next(Next1, Next2) -> - lists:zipwith(fun ({Idx, Owner, Node, Transfers1, - Status1}, - {Idx, Owner, Node, Transfers2, Status2}) -> - {Idx, Owner, Node, - ordsets:union(Transfers1, Transfers2), - merge_next_status(Status1, Status2)} - end, - Next1, Next2). + lists:zipwith(fun ({Idx, + Owner, + Node, + Transfers1, + Status1}, + {Idx, Owner, Node, Transfers2, Status2}) -> + {Idx, + Owner, + Node, + ordsets:union(Transfers1, Transfers2), + merge_next_status(Status1, Status2)} + end, + Next1, + Next2). %% @private %% @doc Merge two next lists that may be of different sizes and @@ -1569,96 +1710,102 @@ reconcile_next(Next1, Next2) -> %% the merge is the same as in reconcile_next/2. reconcile_divergent_next(BaseNext, OtherNext) -> MergedNext = substitute(1, BaseNext, OtherNext), - lists:zipwith(fun ({Idx, Owner1, Node1, Transfers1, - Status1}, - {Idx, Owner2, Node2, Transfers2, Status2}) -> - Same = {Owner1, Node1} =:= {Owner2, Node2}, - case {Same, Status1, Status2} of - {false, _, _} -> - {Idx, Owner1, Node1, Transfers1, Status1}; - _ -> - {Idx, Owner1, Node1, - ordsets:union(Transfers1, Transfers2), - merge_next_status(Status1, Status2)} - end - end, - BaseNext, MergedNext). + lists:zipwith(fun ({Idx, + Owner1, + Node1, + Transfers1, + Status1}, + {Idx, Owner2, Node2, Transfers2, Status2}) -> + Same = {Owner1, Node1} =:= {Owner2, Node2}, + case {Same, Status1, Status2} of + {false, _, _} -> + {Idx, Owner1, Node1, Transfers1, Status1}; + _ -> + {Idx, + Owner1, + Node1, + ordsets:union(Transfers1, Transfers2), + merge_next_status(Status1, Status2)} + end + end, + BaseNext, + MergedNext). %% @private substitute(Idx, TL1, TL2) -> lists:map(fun (T) -> - Key = element(Idx, T), - case lists:keyfind(Key, Idx, TL2) of - false -> T; - T2 -> T2 - end - end, - TL1). + Key = element(Idx, T), + case lists:keyfind(Key, Idx, TL2) of + false -> T; + T2 -> T2 + end + end, + TL1). %% @private reconcile_ring(StateA = #chstate{claimant = Claimant1, - rvsn = VC1, next = Next1}, - StateB = #chstate{claimant = Claimant2, rvsn = VC2, - next = Next2}, - Members) -> + rvsn = VC1, next = Next1}, + StateB = #chstate{claimant = Claimant2, rvsn = VC2, + next = Next2}, + Members) -> %% Try to reconcile based on the ring version (rvsn) vector clock. V1Newer = vclock:descends(VC1, VC2), V2Newer = vclock:descends(VC2, VC1), EqualVC = vclock:equal(VC1, VC2) and - (Claimant1 =:= Claimant2), + (Claimant1 =:= Claimant2), case {EqualVC, V1Newer, V2Newer} of - {true, _, _} -> - Next = reconcile_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {_, _, _} -> - %% Ring versions were divergent, so fall back to reconciling based - %% on claimant. Under normal operation, divergent ring versions - %% should only occur if there are two different claimants, and one - %% claimant is invalid. For example, when a claimant is removed and - %% a new claimant has just taken over. We therefore chose the ring - %% with the valid claimant. - CValid1 = lists:member(Claimant1, Members), - CValid2 = lists:member(Claimant2, Members), - case {CValid1, CValid2} of - {true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {false, false} -> - %% This can occur when removed/down nodes are still - %% up and gossip to each other. We need to pick a - %% claimant to handle this case, although the choice - %% is irrelevant as a correct valid claimant will - %% eventually emerge when the ring converges. - %TODO False-false and true-true are the same. _-_ maybe better not repitition - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end; - {true, true} -> - %% This should never happen in normal practice. - %% But, we need to handle it for exceptional cases. - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end - end + {true, _, _} -> + Next = reconcile_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {_, _, _} -> + %% Ring versions were divergent, so fall back to reconciling based + %% on claimant. Under normal operation, divergent ring versions + %% should only occur if there are two different claimants, and one + %% claimant is invalid. For example, when a claimant is removed and + %% a new claimant has just taken over. We therefore chose the ring + %% with the valid claimant. + CValid1 = lists:member(Claimant1, Members), + CValid2 = lists:member(Claimant2, Members), + case {CValid1, CValid2} of + {true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {false, false} -> + %% This can occur when removed/down nodes are still + %% up and gossip to each other. We need to pick a + %% claimant to handle this case, although the choice + %% is irrelevant as a correct valid claimant will + %% eventually emerge when the ring converges. + %TODO False-false and true-true are the same. _-_ maybe better not repitition + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end; + {true, true} -> + %% This should never happen in normal practice. + %% But, we need to handle it for exceptional cases. + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end + end end. %% @private @@ -1678,40 +1825,43 @@ merge_status(_, _) -> invalid. %% @private transfer_complete(CState = #chstate{next = Next, - vclock = VClock}, - Idx, Mod) -> + vclock = VClock}, + Idx, Mod) -> {Idx, Owner, NextOwner, Transfers, Status} = - lists:keyfind(Idx, 1, Next), + lists:keyfind(Idx, 1, Next), Transfers2 = ordsets:add_element(Mod, Transfers), VNodeMods = ordsets:from_list([VMod - || {_, VMod} <- riak_core:vnode_modules()]), + || {_, VMod} <- riak_core:vnode_modules()]), Status2 = case {Status, Transfers2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting - end, - Next2 = lists:keyreplace(Idx, 1, Next, - {Idx, Owner, NextOwner, Transfers2, Status2}), + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting + end, + Next2 = lists:keyreplace(Idx, + 1, + Next, + {Idx, Owner, NextOwner, Transfers2, Status2}), VClock2 = vclock:increment(Owner, VClock), CState#chstate{next = Next2, vclock = VClock2}. %% @private get_members(Members) -> get_members(Members, - [joining, valid, leaving, exiting, down]). + [joining, valid, leaving, exiting, down]). %% @private get_members(Members, Types) -> [Node || {Node, {V, _, _}} <- Members, - lists:member(V, Types)]. + lists:member(V, Types)]. %% @private update_seen(Node, - CState = #chstate{vclock = VClock, seen = Seen}) -> + CState = #chstate{vclock = VClock, seen = Seen}) -> Seen2 = orddict:update(Node, - fun (SeenVC) -> vclock:merge([SeenVC, VClock]) end, - VClock, Seen), + fun (SeenVC) -> vclock:merge([SeenVC, VClock]) end, + VClock, + Seen), CState#chstate{seen = Seen2}. %% @private @@ -1720,34 +1870,35 @@ equal_cstate(StateA, StateB) -> equal_cstate(StateA, StateB, false) -> T1 = equal_members(StateA#chstate.members, - StateB#chstate.members), + StateB#chstate.members), T2 = vclock:equal(StateA#chstate.rvsn, - StateB#chstate.rvsn), + StateB#chstate.rvsn), T3 = equal_seen(StateA, StateB), T4 = equal_rings(StateA, StateB), %% Clear fields checked manually and test remaining through equality. %% Note: We do not consider cluster name in equality. StateA2 = StateA#chstate{nodename = undefined, - members = undefined, vclock = undefined, - rvsn = undefined, seen = undefined, - chring = undefined, meta = undefined, - clustername = undefined}, + members = undefined, vclock = undefined, + rvsn = undefined, seen = undefined, + chring = undefined, meta = undefined, + clustername = undefined}, StateB2 = StateB#chstate{nodename = undefined, - members = undefined, vclock = undefined, - rvsn = undefined, seen = undefined, - chring = undefined, meta = undefined, - clustername = undefined}, + members = undefined, vclock = undefined, + rvsn = undefined, seen = undefined, + chring = undefined, meta = undefined, + clustername = undefined}, T5 = StateA2 =:= StateB2, T1 andalso T2 andalso T3 andalso T4 andalso T5. %% @private equal_members(M1, M2) -> L = orddict:merge(fun (_, {Status1, VC1, Meta1}, - {Status2, VC2, Meta2}) -> - Status1 =:= Status2 andalso - vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 - end, - M1, M2), + {Status2, VC2, Meta2}) -> + Status1 =:= Status2 andalso + vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 + end, + M1, + M2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). @@ -1756,20 +1907,21 @@ equal_seen(StateA, StateB) -> Seen1 = filtered_seen(StateA), Seen2 = filtered_seen(StateB), L = orddict:merge(fun (_, VC1, VC2) -> - vclock:equal(VC1, VC2) - end, - Seen1, Seen2), + vclock:equal(VC1, VC2) + end, + Seen1, + Seen2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). %% @private filtered_seen(State = #chstate{seen = Seen}) -> case get_members(State#chstate.members) of - [] -> Seen; - Members -> - orddict:filter(fun (N, _) -> lists:member(N, Members) - end, - Seen) + [] -> Seen; + Members -> + orddict:filter(fun (N, _) -> lists:member(N, Members) + end, + Seen) end. %% =================================================================== @@ -1809,20 +1961,20 @@ index_test() -> ?assertEqual((node()), (index_owner(Ring0, 0))), ?assertEqual(x, (index_owner(Ring1, 0))), ?assertEqual((lists:sort([x, node()])), - (lists:sort(diff_nodes(Ring0, Ring1)))). + (lists:sort(diff_nodes(Ring0, Ring1)))). reconcile_test() -> Ring0 = fresh(2, node()), Ring1 = transfer_node(0, x, Ring0), %% Only members and seen should have changed {new_ring, Ring2} = reconcile(fresh(2, someone_else), - Ring1), + Ring1), ?assertNot((equal_cstate(Ring1, Ring2, false))), RingB0 = fresh(2, node()), RingB1 = transfer_node(0, x, RingB0), RingB2 = RingB1#chstate{nodename = b}, ?assertMatch({no_change, _}, - (reconcile(Ring1, RingB2))), + (reconcile(Ring1, RingB2))), {no_change, RingB3} = reconcile(Ring1, RingB2), ?assert((equal_cstate(RingB2, RingB3))). @@ -1831,22 +1983,22 @@ metadata_inequality_test() -> Ring1 = update_meta(key, val, Ring0), ?assertNot((equal_rings(Ring0, Ring1))), ?assertEqual((Ring1#chstate.meta), - (merge_meta({node0, Ring0#chstate.meta}, - {node1, Ring1#chstate.meta}))), + (merge_meta({node0, Ring0#chstate.meta}, + {node1, Ring1#chstate.meta}))), timer:sleep(1001), % ensure that lastmod is at least a second later Ring2 = update_meta(key, val2, Ring1), ?assertEqual((get_meta(key, Ring2)), - (get_meta(key, - #chstate{meta = - merge_meta({node1, Ring1#chstate.meta}, - {node2, - Ring2#chstate.meta})}))), + (get_meta(key, + #chstate{meta = + merge_meta({node1, Ring1#chstate.meta}, + {node2, + Ring2#chstate.meta})}))), ?assertEqual((get_meta(key, Ring2)), - (get_meta(key, - #chstate{meta = - merge_meta({node2, Ring2#chstate.meta}, - {node1, - Ring1#chstate.meta})}))). + (get_meta(key, + #chstate{meta = + merge_meta({node2, Ring2#chstate.meta}, + {node1, + Ring1#chstate.meta})}))). metadata_remove_test() -> Ring0 = fresh(2, node()), @@ -1856,17 +2008,17 @@ metadata_remove_test() -> Ring2 = remove_meta(key, Ring1), ?assertEqual(undefined, (get_meta(key, Ring2))), ?assertEqual(undefined, - (get_meta(key, - #chstate{meta = - merge_meta({node1, Ring1#chstate.meta}, - {node2, - Ring2#chstate.meta})}))), + (get_meta(key, + #chstate{meta = + merge_meta({node1, Ring1#chstate.meta}, + {node2, + Ring2#chstate.meta})}))), ?assertEqual(undefined, - (get_meta(key, - #chstate{meta = - merge_meta({node2, Ring2#chstate.meta}, - {node1, - Ring1#chstate.meta})}))). + (get_meta(key, + #chstate{meta = + merge_meta({node2, Ring2#chstate.meta}, + {node1, + Ring1#chstate.meta})}))). rename_test() -> Ring0 = fresh(2, node()), @@ -1878,14 +2030,14 @@ exclusion_test() -> Ring0 = fresh(2, node()), Ring1 = transfer_node(0, x, Ring0), ?assertEqual(0, - (random_other_index(Ring1, - [730750818665451459101842416358141509827966271488]))), + (random_other_index(Ring1, + [730750818665451459101842416358141509827966271488]))), ?assertEqual(no_indices, - (random_other_index(Ring1, [0]))), + (random_other_index(Ring1, [0]))), ?assertEqual([{730750818665451459101842416358141509827966271488, - node()}, - {0, x}], - (preflist(<<1:160/integer>>, Ring1))). + node()}, + {0, x}], + (preflist(<<1:160/integer>>, Ring1))). random_other_node_test() -> Ring0 = fresh(2, node()), @@ -1900,7 +2052,7 @@ membership_test() -> RingA2 = add_member(nodeA, RingA1, nodeB), RingA3 = add_member(nodeA, RingA2, nodeC), ?assertEqual([nodeA, nodeB, nodeC], - (all_members(RingA3))), + (all_members(RingA3))), RingA4 = remove_member(nodeA, RingA3, nodeC), ?assertEqual([nodeA, nodeB], (all_members(RingA4))), %% Node should stay removed @@ -1914,37 +2066,41 @@ membership_test() -> RingB2 = add_member(nodeB, RingA6, nodeC), {_, RingA7} = reconcile(RingB2, RingA6), ?assertEqual([nodeA, nodeB, nodeC], - (all_members(RingA7))), - Priority = [{invalid, 1}, {down, 2}, {joining, 3}, - {valid, 4}, {exiting, 5}, {leaving, 6}], + (all_members(RingA7))), + Priority = [{invalid, 1}, + {down, 2}, + {joining, 3}, + {valid, 4}, + {exiting, 5}, + {leaving, 6}], RingX1 = fresh(nodeA), RingX2 = add_member(nodeA, RingX1, nodeB), RingX3 = add_member(nodeA, RingX2, nodeC), ?assertEqual(joining, (member_status(RingX3, nodeC))), %% Parallel/sibling status changes merge based on priority [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - StatusC = case PriorityA < PriorityB of - true -> StatusA; - false -> StatusB - end, - {_, RingT3} = reconcile(RingT2, RingT1), - ?assertEqual(StatusC, (member_status(RingT3, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + StatusC = case PriorityA < PriorityB of + true -> StatusA; + false -> StatusB + end, + {_, RingT3} = reconcile(RingT2, RingT1), + ?assertEqual(StatusC, (member_status(RingT3, nodeC))) end || {StatusA, PriorityA} <- Priority, - {StatusB, PriorityB} <- Priority], + {StatusB, PriorityB} <- Priority], %% Related status changes merge to descendant [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - RingT3 = set_member(nodeA, RingT1, nodeA, valid), - {_, RingT4} = reconcile(RingT2, RingT3), - ?assertEqual(StatusB, (member_status(RingT4, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + RingT3 = set_member(nodeA, RingT1, nodeA, valid), + {_, RingT4} = reconcile(RingT2, RingT3), + ?assertEqual(StatusB, (member_status(RingT4, nodeC))) end || {StatusA, _} <- Priority, {StatusB, _} <- Priority], ok. @@ -1957,25 +2113,25 @@ ring_version_test() -> #chstate{rvsn = RVsn, vclock = VClock} = Ring3, RingA1 = transfer_node(0, nodeA, Ring3), RingA2 = RingA1#chstate{vclock = - vclock:increment(nodeA, VClock)}, + vclock:increment(nodeA, VClock)}, RingB1 = transfer_node(0, nodeB, Ring3), RingB2 = RingB1#chstate{vclock = - vclock:increment(nodeB, VClock)}, + vclock:increment(nodeB, VClock)}, %% RingA1 has most recent ring version {_, RingT1} = reconcile(RingA2#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB2), + vclock:increment(nodeA, RVsn)}, + RingB2), ?assertEqual(nodeA, (index_owner(RingT1, 0))), %% RingB1 has most recent ring version {_, RingT2} = reconcile(RingA2, - RingB2#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + RingB2#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeB, (index_owner(RingT2, 0))), %% Divergent ring versions, merge based on claimant {_, RingT3} = reconcile(RingA2#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB2#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB2#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeA, (index_owner(RingT3, 0))), %% Divergent ring versions, one valid claimant. Merge on claimant. RingA3 = RingA2#chstate{claimant = nodeA}, @@ -1983,39 +2139,45 @@ ring_version_test() -> RingB3 = RingB2#chstate{claimant = nodeB}, RingB4 = remove_member(nodeB, RingB3, nodeA), {_, RingT4} = reconcile(RingA4#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB3#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB3#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeA, (index_owner(RingT4, 0))), {_, RingT5} = reconcile(RingA3#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB4#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB4#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeB, (index_owner(RingT5, 0))). reconcile_next_test() -> Next1 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], complete}], + {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], complete}], Next2 = [{0, nodeA, nodeB, [riak_kv_vnode], complete}, - {1, nodeA, nodeB, [], awaiting}, - {2, nodeA, nodeB, [], awaiting}], - Next3 = [{0, nodeA, nodeB, - [riak_kv_vnode, riak_pipe_vnode], complete}, - {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], complete}], + {1, nodeA, nodeB, [], awaiting}, + {2, nodeA, nodeB, [], awaiting}], + Next3 = [{0, + nodeA, + nodeB, + [riak_kv_vnode, riak_pipe_vnode], + complete}, + {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], complete}], ?assertEqual(Next3, (reconcile_next(Next1, Next2))), Next4 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], awaiting}], + {1, nodeA, nodeB, [], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], awaiting}], Next5 = [{0, nodeA, nodeC, [riak_kv_vnode], complete}, - {2, nodeA, nodeB, [riak_kv_vnode], complete}], + {2, nodeA, nodeB, [riak_kv_vnode], complete}], Next6 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [], awaiting}, - {2, nodeA, nodeB, [riak_kv_vnode, riak_pipe_vnode], - complete}], + {1, nodeA, nodeB, [], awaiting}, + {2, + nodeA, + nodeB, + [riak_kv_vnode, riak_pipe_vnode], + complete}], ?assertEqual(Next6, - (reconcile_divergent_next(Next4, Next5))). + (reconcile_divergent_next(Next4, Next5))). resize_test() -> Ring0 = fresh(4, a), @@ -2027,89 +2189,113 @@ resize_test() -> valid_resize(Ring0, Ring1), Ring3 = set_pending_resize(Ring2, Ring0), ?assertEqual((num_partitions(Ring0)), - (num_partitions(Ring3))), + (num_partitions(Ring3))), ?assertEqual((num_partitions(Ring2)), - (future_num_partitions(Ring3))), + (future_num_partitions(Ring3))), ?assertEqual((num_partitions(Ring2)), - (num_partitions(future_ring(Ring3)))), + (num_partitions(future_ring(Ring3)))), Key = <<0:160/integer>>, OrigIdx = element(1, hd(preflist(Key, Ring0))), %% for non-resize transitions index should be the same ?assertEqual(OrigIdx, - (future_index(Key, OrigIdx, undefined, Ring0))), + (future_index(Key, OrigIdx, undefined, Ring0))), ?assertEqual((element(1, hd(preflist(Key, Ring2)))), - (future_index(Key, OrigIdx, undefined, Ring3))). + (future_index(Key, OrigIdx, undefined, Ring3))). resize_xfer_test_() -> {setup, fun () -> - meck:unload(), - meck:new(riak_core, [passthrough]), - meck:expect(riak_core, vnode_modules, - fun () -> - [{some_app, fake_vnode}, - {other_app, other_vnode}] - end) + meck:unload(), + meck:new(riak_core, [passthrough]), + meck:expect(riak_core, + vnode_modules, + fun () -> + [{some_app, fake_vnode}, + {other_app, other_vnode}] + end) end, - fun (_) -> meck:unload() end, fun test_resize_xfers/0}. + fun (_) -> meck:unload() end, + fun test_resize_xfers/0}. test_resize_xfers() -> Ring0 = riak_core_ring:fresh(4, a), Ring1 = set_pending_resize(resize(Ring0, 8), Ring0), Source1 = {0, a}, Target1 = - {730750818665451459101842416358141509827966271488, a}, + {730750818665451459101842416358141509827966271488, a}, TargetIdx2 = - 365375409332725729550921208179070754913983135744, - Ring2 = schedule_resize_transfer(Ring1, Source1, - Target1), + 365375409332725729550921208179070754913983135744, + Ring2 = schedule_resize_transfer(Ring1, + Source1, + Target1), ?assertEqual(Target1, - (awaiting_resize_transfer(Ring2, Source1, fake_vnode))), + (awaiting_resize_transfer(Ring2, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring2, Source1, Target1, - fake_vnode))), + (resize_transfer_status(Ring2, + Source1, + Target1, + fake_vnode))), %% use Target1 since we haven't used it as a source index ?assertEqual(undefined, - (awaiting_resize_transfer(Ring2, Target1, fake_vnode))), + (awaiting_resize_transfer(Ring2, Target1, fake_vnode))), ?assertEqual(undefined, - (resize_transfer_status(Ring2, Target1, Source1, - fake_vnode))), - Ring3 = schedule_resize_transfer(Ring2, Source1, - TargetIdx2), - Ring4 = resize_transfer_complete(Ring3, Source1, - Target1, fake_vnode), + (resize_transfer_status(Ring2, + Target1, + Source1, + fake_vnode))), + Ring3 = schedule_resize_transfer(Ring2, + Source1, + TargetIdx2), + Ring4 = resize_transfer_complete(Ring3, + Source1, + Target1, + fake_vnode), ?assertEqual({TargetIdx2, a}, - (awaiting_resize_transfer(Ring4, Source1, fake_vnode))), + (awaiting_resize_transfer(Ring4, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring4, Source1, {TargetIdx2, a}, - fake_vnode))), + (resize_transfer_status(Ring4, + Source1, + {TargetIdx2, a}, + fake_vnode))), ?assertEqual(complete, - (resize_transfer_status(Ring4, Source1, Target1, - fake_vnode))), - Ring5 = resize_transfer_complete(Ring4, Source1, - {TargetIdx2, a}, fake_vnode), - {_, '$resize', Status1} = next_owner(Ring5, 0, - fake_vnode), + (resize_transfer_status(Ring4, + Source1, + Target1, + fake_vnode))), + Ring5 = resize_transfer_complete(Ring4, + Source1, + {TargetIdx2, a}, + fake_vnode), + {_, '$resize', Status1} = next_owner(Ring5, + 0, + fake_vnode), ?assertEqual(complete, Status1), - Ring6 = resize_transfer_complete(Ring5, Source1, - {TargetIdx2, a}, other_vnode), - Ring7 = resize_transfer_complete(Ring6, Source1, - Target1, other_vnode), - {_, '$resize', Status2} = next_owner(Ring7, 0, - fake_vnode), + Ring6 = resize_transfer_complete(Ring5, + Source1, + {TargetIdx2, a}, + other_vnode), + Ring7 = resize_transfer_complete(Ring6, + Source1, + Target1, + other_vnode), + {_, '$resize', Status2} = next_owner(Ring7, + 0, + fake_vnode), ?assertEqual(complete, Status2), - {_, '$resize', Status3} = next_owner(Ring7, 0, - other_vnode), + {_, '$resize', Status3} = next_owner(Ring7, + 0, + other_vnode), ?assertEqual(complete, Status3), {_, '$resize', complete} = next_owner(Ring7, 0). valid_resize(Ring0, Ring1) -> lists:foreach(fun ({Idx, Owner}) -> - case lists:keyfind(Idx, 1, all_owners(Ring0)) of - false -> ?assertEqual('$dummyhost@resized', Owner); - {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) - end - end, - all_owners(Ring1)). + case lists:keyfind(Idx, 1, all_owners(Ring0)) of + false -> + ?assertEqual('$dummyhost@resized', Owner); + {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) + end + end, + all_owners(Ring1)). -endif. diff --git a/src/riak_core_ring_events.erl b/src/riak_core_ring_events.erl index 4b7b44d4f..d5257418e 100644 --- a/src/riak_core_ring_events.erl +++ b/src/riak_core_ring_events.erl @@ -24,15 +24,25 @@ -behaviour(gen_event). %% API --export([start_link/0, add_handler/2, add_sup_handler/2, - add_guarded_handler/2, add_callback/1, - add_sup_callback/1, add_guarded_callback/1, - ring_update/1, force_update/0, ring_sync_update/1, - force_sync_update/0]). +-export([start_link/0, + add_handler/2, + add_sup_handler/2, + add_guarded_handler/2, + add_callback/1, + add_sup_callback/1, + add_guarded_callback/1, + ring_update/1, + force_update/0, + ring_sync_update/1, + force_sync_update/0]). %% gen_event callbacks --export([init/1, handle_event/2, handle_call/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {callback}). @@ -49,20 +59,24 @@ add_sup_handler(Handler, Args) -> gen_event:add_sup_handler(?MODULE, Handler, Args). add_guarded_handler(Handler, Args) -> - riak_core:add_guarded_event_handler(?MODULE, Handler, - Args). + riak_core:add_guarded_event_handler(?MODULE, + Handler, + Args). add_callback(Fn) when is_function(Fn) -> - gen_event:add_handler(?MODULE, {?MODULE, make_ref()}, - [Fn]). + gen_event:add_handler(?MODULE, + {?MODULE, make_ref()}, + [Fn]). add_sup_callback(Fn) when is_function(Fn) -> gen_event:add_sup_handler(?MODULE, - {?MODULE, make_ref()}, [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_guarded_callback(Fn) when is_function(Fn) -> riak_core:add_guarded_event_handler(?MODULE, - {?MODULE, make_ref()}, [Fn]). + {?MODULE, make_ref()}, + [Fn]). force_update() -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -88,7 +102,8 @@ init([Fn]) -> {ok, #state{callback = Fn}}. handle_event({ring_update, Ring}, State) -> - (State#state.callback)(Ring), {ok, State}. + (State#state.callback)(Ring), + {ok, State}. handle_call(_Request, State) -> {ok, ok, State}. diff --git a/src/riak_core_ring_handler.erl b/src/riak_core_ring_handler.erl index f8ea3aa27..645920ff3 100644 --- a/src/riak_core_ring_handler.erl +++ b/src/riak_core_ring_handler.erl @@ -19,8 +19,12 @@ -behaviour(gen_event). %% gen_event callbacks --export([init/1, handle_event/2, handle_call/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -export([ensure_vnodes_started/1]). @@ -55,62 +59,65 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. ensure_vnodes_started(Ring) -> case riak_core:vnode_modules() of - [] -> ok; - AppMods -> - case ensure_vnodes_started(AppMods, Ring, []) of - [] -> - Ready = riak_core_ring:ring_ready(Ring), - FutureIndices = riak_core_ring:future_indices(Ring, - node()), - Status = riak_core_ring:member_status(Ring, node()), - case {Ready, FutureIndices, Status} of - {true, [], leaving} -> - case ready_to_exit(AppMods) of - true -> exit_ring_trans(), maybe_shutdown(Ring); - false -> ok - end; - {_, _, invalid} -> - riak_core_ring_manager:refresh_my_ring(); - {_, _, exiting} -> - %% Deliberately do nothing. - ok; - {_, _, _} -> ok - end; - _ -> ok - end + [] -> ok; + AppMods -> + case ensure_vnodes_started(AppMods, Ring, []) of + [] -> + Ready = riak_core_ring:ring_ready(Ring), + FutureIndices = riak_core_ring:future_indices(Ring, + node()), + Status = riak_core_ring:member_status(Ring, node()), + case {Ready, FutureIndices, Status} of + {true, [], leaving} -> + case ready_to_exit(AppMods) of + true -> + exit_ring_trans(), + maybe_shutdown(Ring); + false -> ok + end; + {_, _, invalid} -> + riak_core_ring_manager:refresh_my_ring(); + {_, _, exiting} -> + %% Deliberately do nothing. + ok; + {_, _, _} -> ok + end; + _ -> ok + end end. %% Shutdown if we are the only node in the cluster maybe_shutdown(Ring) -> case riak_core_ring:random_other_node(Ring) of - no_node -> riak_core_ring_manager:refresh_my_ring(); - _ -> ok + no_node -> riak_core_ring_manager:refresh_my_ring(); + _ -> ok end. exit_ring_trans() -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:exit_member(node(), - Ring2, - node()), - {new_ring, Ring3} - end, - []). + Ring3 = + riak_core_ring:exit_member(node(), + Ring2, + node()), + {new_ring, Ring3} + end, + []). ready_to_exit([]) -> true; ready_to_exit([{_App, Mod} | AppMods]) -> case erlang:function_exported(Mod, ready_to_exit, 0) - andalso not Mod:ready_to_exit() - of - true -> false; - false -> ready_to_exit(AppMods) + andalso not Mod:ready_to_exit() + of + true -> false; + false -> ready_to_exit(AppMods) end. ensure_vnodes_started([], _Ring, Acc) -> lists:flatten(Acc); ensure_vnodes_started([{App, Mod} | T], Ring, Acc) -> - ensure_vnodes_started(T, Ring, - [ensure_vnodes_started({App, Mod}, Ring) | Acc]). + ensure_vnodes_started(T, + Ring, + [ensure_vnodes_started({App, Mod}, Ring) | Acc]). ensure_vnodes_started({App, Mod}, Ring) -> Startable = startable_vnodes(Mod, Ring), @@ -120,55 +127,56 @@ ensure_vnodes_started({App, Mod}, Ring) -> %% (needed to support those vnodes). The hack does not fix %% that dependency: internal techdebt todo list #A7 does. spawn_link(fun () -> - %% Use a registered name as a lock to prevent the same - %% vnode module from being started twice. - ModList = atom_to_list(Mod), - RegName = "riak_core_ring_handler_ensure_" ++ ModList, - try erlang:register(list_to_atom(RegName), self()) catch - error:badarg -> exit(normal) - end, - %% Let the app finish starting... - ok = riak_core:wait_for_application(App), - %% Start the vnodes. - HasStartVnodes = lists:member({start_vnodes, 1}, - Mod:module_info(exports)), - case HasStartVnodes of - true -> Mod:start_vnodes(Startable); - false -> [Mod:start_vnode(I) || I <- Startable] - end, - %% Mark the service as up. - SupName = list_to_atom(atom_to_list(App) ++ "_sup"), - SupPid = erlang:whereis(SupName), - case riak_core:health_check(App) of - undefined -> - riak_core_node_watcher:service_up(App, SupPid); - HealthMFA -> - riak_core_node_watcher:service_up(App, SupPid, - HealthMFA) - end, - exit(normal) - end), + %% Use a registered name as a lock to prevent the same + %% vnode module from being started twice. + ModList = atom_to_list(Mod), + RegName = "riak_core_ring_handler_ensure_" ++ ModList, + try erlang:register(list_to_atom(RegName), self()) catch + error:badarg -> exit(normal) + end, + %% Let the app finish starting... + ok = riak_core:wait_for_application(App), + %% Start the vnodes. + HasStartVnodes = lists:member({start_vnodes, 1}, + Mod:module_info(exports)), + case HasStartVnodes of + true -> Mod:start_vnodes(Startable); + false -> [Mod:start_vnode(I) || I <- Startable] + end, + %% Mark the service as up. + SupName = list_to_atom(atom_to_list(App) ++ "_sup"), + SupPid = erlang:whereis(SupName), + case riak_core:health_check(App) of + undefined -> + riak_core_node_watcher:service_up(App, SupPid); + HealthMFA -> + riak_core_node_watcher:service_up(App, + SupPid, + HealthMFA) + end, + exit(normal) + end), Startable. startable_vnodes(Mod, Ring) -> AllMembers = riak_core_ring:all_members(Ring), case {length(AllMembers), hd(AllMembers) =:= node()} of - {1, true} -> riak_core_ring:my_indices(Ring); - _ -> - {ok, ModExcl} = - riak_core_handoff_manager:get_exclusions(Mod), - Excl = ModExcl -- - riak_core_ring:disowning_indices(Ring, node()), - case riak_core_ring:random_other_index(Ring, Excl) of - no_indices -> - case length(Excl) =:= - riak_core_ring:num_partitions(Ring) - of - true -> []; - false -> riak_core_ring:my_indices(Ring) - end; - RO -> [RO | riak_core_ring:my_indices(Ring)] - end + {1, true} -> riak_core_ring:my_indices(Ring); + _ -> + {ok, ModExcl} = + riak_core_handoff_manager:get_exclusions(Mod), + Excl = ModExcl -- + riak_core_ring:disowning_indices(Ring, node()), + case riak_core_ring:random_other_index(Ring, Excl) of + no_indices -> + case length(Excl) =:= + riak_core_ring:num_partitions(Ring) + of + true -> []; + false -> riak_core_ring:my_indices(Ring) + end; + RO -> [RO | riak_core_ring:my_indices(Ring)] + end end. maybe_start_vnode_proxies(Ring) -> @@ -177,30 +185,30 @@ maybe_start_vnode_proxies(Ring) -> FutureSize = riak_core_ring:future_num_partitions(Ring), Larger = Size < FutureSize, case Larger of - true -> - FutureIdxs = - riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), - _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) - || {Idx, _} <- FutureIdxs, Mod <- Mods], - ok; - false -> ok + true -> + FutureIdxs = + riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), + _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) + || {Idx, _} <- FutureIdxs, Mod <- Mods], + ok; + false -> ok end. maybe_stop_vnode_proxies(Ring) -> Mods = [M || {_, M} <- riak_core:vnode_modules()], case riak_core_ring:pending_changes(Ring) of - [] -> - Idxs = [{I, M} - || {I, _} <- riak_core_ring:all_owners(Ring), - M <- Mods], - ProxySpecs = - supervisor:which_children(riak_core_vnode_proxy_sup), - Running = [{I, M} - || {{M, I}, _, _, _} <- ProxySpecs, - lists:member(M, Mods)], - ToShutdown = Running -- Idxs, - _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) - || {I, M} <- ToShutdown], - ok; - _ -> ok + [] -> + Idxs = [{I, M} + || {I, _} <- riak_core_ring:all_owners(Ring), + M <- Mods], + ProxySpecs = + supervisor:which_children(riak_core_vnode_proxy_sup), + Running = [{I, M} + || {{M, I}, _, _, _} <- ProxySpecs, + lists:member(M, Mods)], + ToShutdown = Running -- Idxs, + _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) + || {I, M} <- ToShutdown], + ok; + _ -> ok end. diff --git a/src/riak_core_ring_manager.erl b/src/riak_core_ring_manager.erl index 4ab1fbcbf..05cf4134b 100644 --- a/src/riak_core_ring_manager.erl +++ b/src/riak_core_ring_manager.erl @@ -61,17 +61,34 @@ -behaviour(gen_server). --export([start_link/0, start_link/1, get_my_ring/0, - get_raw_ring/0, get_raw_ring_chashbin/0, - get_chash_bin/0, get_ring_id/0, get_bucket_meta/1, - refresh_my_ring/0, refresh_ring/2, set_my_ring/1, - write_ringfile/0, prune_ringfiles/0, read_ringfile/1, - find_latest_ringfile/0, force_update/0, - do_write_ringfile/1, ring_trans/2, run_fixups/3, - set_cluster_name/1, is_stable_ring/0]). - --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([start_link/0, + start_link/1, + get_my_ring/0, + get_raw_ring/0, + get_raw_ring_chashbin/0, + get_chash_bin/0, + get_ring_id/0, + get_bucket_meta/1, + refresh_my_ring/0, + refresh_ring/2, + set_my_ring/1, + write_ringfile/0, + prune_ringfiles/0, + read_ringfile/1, + find_latest_ringfile/0, + force_update/0, + do_write_ringfile/1, + ring_trans/2, + run_fixups/3, + set_cluster_name/1, + is_stable_ring/0]). + +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -ifdef(TEST). @@ -80,10 +97,12 @@ -endif. -record(state, - {mode, raw_ring, ring_changed_time, inactivity_timer}). + {mode, raw_ring, ring_changed_time, inactivity_timer}). --export([setup_ets/1, cleanup_ets/1, set_ring_global/1, - promote_ring/0]). +-export([setup_ets/1, + cleanup_ets/1, + set_ring_global/1, + promote_ring/0]). %% For EUnit testing @@ -102,47 +121,52 @@ %% =================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [live], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [live], + []). %% Testing entry point start_link(test) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [test], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [test], + []). -spec get_my_ring() -> {ok, - riak_core_ring:riak_core_ring()} | - {error, any()}. + riak_core_ring:riak_core_ring()} | + {error, any()}. get_my_ring() -> Ring = case persistent_term:get(?RING_KEY, undefined) of - ets -> - case ets:lookup(?ETS, ring) of - [{_, RingETS}] -> RingETS; - _ -> undefined - end; - RingMochi -> RingMochi - end, + ets -> + case ets:lookup(?ETS, ring) of + [{_, RingETS}] -> RingETS; + _ -> undefined + end; + RingMochi -> RingMochi + end, case Ring of - Ring when is_tuple(Ring) -> {ok, Ring}; - undefined -> {error, no_ring} + Ring when is_tuple(Ring) -> {ok, Ring}; + undefined -> {error, no_ring} end. get_raw_ring() -> try Ring = ets:lookup_element(?ETS, raw_ring, 2), - {ok, Ring} + {ok, Ring} catch - _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) + _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) end. get_raw_ring_chashbin() -> try Ring = ets:lookup_element(?ETS, raw_ring, 2), - {ok, CHBin} = get_chash_bin(), - {ok, Ring, CHBin} + {ok, CHBin} = get_chash_bin(), + {ok, Ring, CHBin} catch - _:_ -> - gen_server:call(?MODULE, get_raw_ring_chashbin, - infinity) + _:_ -> + gen_server:call(?MODULE, + get_raw_ring_chashbin, + infinity) end. %% @spec refresh_my_ring() -> ok @@ -151,7 +175,7 @@ refresh_my_ring() -> refresh_ring(Node, ClusterName) -> gen_server:cast({?MODULE, Node}, - {refresh_my_ring, ClusterName}). + {refresh_my_ring, ClusterName}). %% @spec set_my_ring(riak_core_ring:riak_core_ring()) -> ok set_my_ring(Ring) -> @@ -159,8 +183,8 @@ set_my_ring(Ring) -> get_ring_id() -> case ets:lookup(?ETS, id) of - [{_, Id}] -> Id; - _ -> {0, 0} + [{_, Id}] -> Id; + _ -> {0, 0} end. %% @doc Return metadata for the given bucket. If a bucket @@ -174,16 +198,16 @@ get_bucket_meta({_Type, _Name} = Bucket) -> riak_core_bucket:get_bucket(Bucket); get_bucket_meta(Bucket) -> case ets:lookup(?ETS, {bucket, Bucket}) of - [] -> undefined; - [{_, undefined}] -> undefined; - [{_, Meta}] -> {ok, Meta} + [] -> undefined; + [{_, undefined}] -> undefined; + [{_, Meta}] -> {ok, Meta} end. %% @doc Return the {@link chashbin} generated from the current ring get_chash_bin() -> case ets:lookup(?ETS, chashbin) of - [{chashbin, CHBin}] -> {ok, CHBin}; - _ -> {error, no_ring} + [{chashbin, CHBin}] -> {ok, CHBin}; + _ -> {error, no_ring} end. %% @spec write_ringfile() -> ok @@ -191,12 +215,14 @@ write_ringfile() -> gen_server:cast(?MODULE, write_ringfile). ring_trans(Fun, Args) -> - gen_server:call(?MODULE, {ring_trans, Fun, Args}, - infinity). + gen_server:call(?MODULE, + {ring_trans, Fun, Args}, + infinity). set_cluster_name(Name) -> - gen_server:call(?MODULE, {set_cluster_name, Name}, - infinity). + gen_server:call(?MODULE, + {set_cluster_name, Name}, + infinity). is_stable_ring() -> gen_server:call(?MODULE, is_stable_ring, infinity). @@ -205,116 +231,122 @@ is_stable_ring() -> %% ring in a manner that will trigger reconciliation on gossip. force_update() -> ring_trans(fun (Ring, _) -> - NewRing = riak_core_ring:update_member_meta(node(), - Ring, node(), - unused, - erlang:timestamp()), - {new_ring, NewRing} - end, - []), + NewRing = riak_core_ring:update_member_meta(node(), + Ring, + node(), + unused, + erlang:timestamp()), + {new_ring, NewRing} + end, + []), ok. do_write_ringfile(Ring) -> case ring_dir() of - "" -> nop; - Dir -> - {{Year, Month, Day}, {Hour, Minute, Second}} = - calendar:universal_time(), - TS = - io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - [Year, Month, Day, Hour, Minute, Second]), - Cluster = application:get_env(riak_core, cluster_name, - undefined), - FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, - do_write_ringfile(Ring, FN) + "" -> nop; + Dir -> + {{Year, Month, Day}, {Hour, Minute, Second}} = + calendar:universal_time(), + TS = + io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + [Year, Month, Day, Hour, Minute, Second]), + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, + do_write_ringfile(Ring, FN) end. do_write_ringfile(Ring, FN) -> ok = filelib:ensure_dir(FN), try ok = riak_core_util:replace_file(FN, - term_to_binary(Ring)) + term_to_binary(Ring)) catch - _:Err -> - logger:error("Unable to write ring to \"~s\" - ~p\n", - [FN, Err]), - {error, Err} + _:Err -> + logger:error("Unable to write ring to \"~s\" - ~p\n", + [FN, Err]), + {error, Err} end. %% @spec find_latest_ringfile() -> string() find_latest_ringfile() -> Dir = ring_dir(), case file:list_dir(Dir) of - {ok, Filenames} -> - Cluster = application:get_env(riak_core, cluster_name, - undefined), - Timestamps = [list_to_integer(TS) - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, ".")) - || FN <- Filenames], - C1 =:= Cluster], - SortedTimestamps = - lists:reverse(lists:sort(Timestamps)), - case SortedTimestamps of - [Latest | _] -> - {ok, - Dir ++ - "/riak_core_ring." ++ - Cluster ++ "." ++ integer_to_list(Latest)}; - _ -> {error, not_found} - end; - {error, Reason} -> {error, Reason} + {ok, Filenames} -> + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + Timestamps = [list_to_integer(TS) + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, ".")) + || FN <- Filenames], + C1 =:= Cluster], + SortedTimestamps = + lists:reverse(lists:sort(Timestamps)), + case SortedTimestamps of + [Latest | _] -> + {ok, + Dir ++ + "/riak_core_ring." ++ + Cluster ++ "." ++ integer_to_list(Latest)}; + _ -> {error, not_found} + end; + {error, Reason} -> {error, Reason} end. %% @spec read_ringfile(string()) -> riak_core_ring:riak_core_ring() | {error, any()} read_ringfile(RingFile) -> case file:read_file(RingFile) of - {ok, Binary} -> binary_to_term(Binary); - {error, Reason} -> {error, Reason} + {ok, Binary} -> binary_to_term(Binary); + {error, Reason} -> {error, Reason} end. %% @spec prune_ringfiles() -> ok | {error, Reason} prune_ringfiles() -> case ring_dir() of - "" -> ok; - Dir -> - Cluster = application:get_env(riak_core, cluster_name, - undefined), - case file:list_dir(Dir) of - {error, enoent} -> ok; - {error, Reason} -> {error, Reason}; - {ok, []} -> ok; - {ok, Filenames} -> - Timestamps = [TS - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, ".")) - || FN <- Filenames], - C1 =:= Cluster], - if Timestamps /= [] -> - %% there are existing ring files - TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) - || TS <- Timestamps], - TSL = lists:reverse(lists:sort([TS - || {ok, TS, []} - <- TSPat])), - Keep = prune_list(TSL), - KeepTSs = - [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - K)) - || K <- Keep], - DelFNs = [Dir ++ "/" ++ FN - || FN <- Filenames, - lists:all(fun (TS) -> - string:str(FN, TS) =:= 0 - end, - KeepTSs)], - _ = [file:delete(DelFN) || DelFN <- DelFNs], - ok; - true -> - %% directory wasn't empty, but there are no ring - %% files in it - ok - end - end + "" -> ok; + Dir -> + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + case file:list_dir(Dir) of + {error, enoent} -> ok; + {error, Reason} -> {error, Reason}; + {ok, []} -> ok; + {ok, Filenames} -> + Timestamps = [TS + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, + ".")) + || FN <- Filenames], + C1 =:= Cluster], + if Timestamps /= [] -> + %% there are existing ring files + TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) + || TS <- Timestamps], + TSL = lists:reverse(lists:sort([TS + || {ok, TS, []} + <- TSPat])), + Keep = prune_list(TSL), + KeepTSs = + [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + K)) + || K <- Keep], + DelFNs = [Dir ++ "/" ++ FN + || FN <- Filenames, + lists:all(fun (TS) -> + string:str(FN, TS) =:= + 0 + end, + KeepTSs)], + _ = [file:delete(DelFN) || DelFN <- DelFNs], + ok; + true -> + %% directory wasn't empty, but there are no ring + %% files in it + ok + end + end end. -ifdef(TEST). @@ -322,7 +354,7 @@ prune_ringfiles() -> %% @private (only used for test instances) stop() -> try gen_server:call(?MODULE, stop) catch - exit:{noproc, _} -> ok + exit:{noproc, _} -> ok end. -endif. @@ -341,28 +373,28 @@ init([Mode]) -> reload_ring(test) -> riak_core_ring:fresh(16, node()); reload_ring(live) -> case riak_core_ring_manager:find_latest_ringfile() of - {ok, RingFile} -> - case riak_core_ring_manager:read_ringfile(RingFile) of - {error, Reason} -> - logger:critical("Failed to read ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}); - Ring -> Ring - end; - {error, not_found} -> - logger:warning("No ring file available."), - riak_core_ring:fresh(); - {error, Reason} -> - logger:critical("Failed to load ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}) + {ok, RingFile} -> + case riak_core_ring_manager:read_ringfile(RingFile) of + {error, Reason} -> + logger:critical("Failed to read ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}); + Ring -> Ring + end; + {error, not_found} -> + logger:warning("No ring file available."), + riak_core_ring:fresh(); + {error, Reason} -> + logger:critical("Failed to load ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}) end. handle_call(get_raw_ring, _From, - #state{raw_ring = Ring} = State) -> + #state{raw_ring = Ring} = State) -> {reply, {ok, Ring}, State}; handle_call(get_raw_ring_chashbin, _From, - #state{raw_ring = Ring} = State) -> + #state{raw_ring = Ring} = State) -> {ok, CHBin} = get_chash_bin(), {reply, {ok, Ring, CHBin}, State}; handle_call({set_my_ring, Ring}, _From, State) -> @@ -379,29 +411,29 @@ handle_call(refresh_my_ring, _From, State) -> riak_core:stop("node removal completed, exiting."), {reply, ok, State2}; handle_call({ring_trans, Fun, Args}, _From, - State = #state{raw_ring = Ring}) -> + State = #state{raw_ring = Ring}) -> case catch Fun(Ring, Args) of - {new_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:random_recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - {set_only, NewRing} -> - State2 = prune_write_ring(NewRing, State), - {reply, {ok, NewRing}, State2}; - {reconciled_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - ignore -> {reply, not_changed, State}; - {ignore, Reason} -> - {reply, {not_changed, Reason}, State}; - Other -> - logger:error("ring_trans: invalid return value: ~p", - [Other]), - {reply, not_changed, State} + {new_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:random_recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + {set_only, NewRing} -> + State2 = prune_write_ring(NewRing, State), + {reply, {ok, NewRing}, State2}; + {reconciled_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + ignore -> {reply, not_changed, State}; + {ignore, Reason} -> + {reply, {not_changed, Reason}, State}; + Other -> + logger:error("ring_trans: invalid return value: ~p", + [Other]), + {reply, not_changed, State} end; handle_call({set_cluster_name, Name}, _From, - State = #state{raw_ring = Ring}) -> + State = #state{raw_ring = Ring}) -> NewRing = riak_core_ring:set_cluster_name(Ring, Name), State2 = prune_write_notify_ring(NewRing, State), {reply, ok, State2}; @@ -414,29 +446,31 @@ handle_call(stop, _From, State) -> handle_cast({refresh_my_ring, ClusterName}, State) -> {ok, Ring} = get_my_ring(), case riak_core_ring:cluster_name(Ring) of - ClusterName -> handle_cast(refresh_my_ring, State); - _ -> {noreply, State} + ClusterName -> handle_cast(refresh_my_ring, State); + _ -> {noreply, State} end; handle_cast(refresh_my_ring, State) -> - {_, _, State2} = handle_call(refresh_my_ring, undefined, - State), + {_, _, State2} = handle_call(refresh_my_ring, + undefined, + State), {noreply, State2}; handle_cast(write_ringfile, test) -> {noreply, test}; handle_cast(write_ringfile, - State = #state{raw_ring = Ring}) -> - ok = do_write_ringfile(Ring), {noreply, State}. + State = #state{raw_ring = Ring}) -> + ok = do_write_ringfile(Ring), + {noreply, State}. handle_info(inactivity_timeout, State) -> case is_stable_ring(State) of - {true, DeltaMS} -> - logger:debug("Promoting ring after ~p", [DeltaMS]), - promote_ring(), - State2 = State#state{inactivity_timer = undefined}, - {noreply, State2}; - {false, DeltaMS} -> - Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, - State2 = set_timer(Remaining, State), - {noreply, State2} + {true, DeltaMS} -> + logger:debug("Promoting ring after ~p", [DeltaMS]), + promote_ring(), + State2 = State#state{inactivity_timer = undefined}, + {noreply, State2}; + {false, DeltaMS} -> + Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, + State2 = set_timer(Remaining, State), + {noreply, State2} end; handle_info(_Info, State) -> {noreply, State}. @@ -451,66 +485,72 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% =================================================================== ring_dir() -> - case application:get_env(riak_core, ring_state_dir, - undefined) - of - undefined -> - filename:join(application:get_env(riak_core, - platform_data_dir, "data"), - "ring"); - D -> D + case application:get_env(riak_core, + ring_state_dir, + undefined) + of + undefined -> + filename:join(application:get_env(riak_core, + platform_data_dir, + "data"), + "ring"); + D -> D end. prune_list([X | Rest]) -> - lists:usort(lists:append([[X], back(1, X, Rest), - back(2, X, Rest), back(3, X, Rest), - back(4, X, Rest), back(5, X, Rest)])). + lists:usort(lists:append([[X], + back(1, X, Rest), + back(2, X, Rest), + back(3, X, Rest), + back(4, X, Rest), + back(5, X, Rest)])). back(_N, _X, []) -> []; back(N, X, [H | T]) -> case lists:nth(N, X) =:= lists:nth(N, H) of - true -> back(N, X, T); - false -> [H] + true -> back(N, X, T); + false -> [H] end. %% @private run_fixups([], _Bucket, BucketProps) -> BucketProps; run_fixups([{App, Fixup} | T], BucketName, - BucketProps) -> + BucketProps) -> BP = try Fixup:fixup(BucketName, BucketProps) of - {ok, NewBucketProps} -> NewBucketProps; - {error, Reason} -> - logger:error("Error while running bucket fixup module " - "~p from application ~p on bucket ~p: " - "~p", - [Fixup, App, BucketName, Reason]), - BucketProps - catch - What:Why -> - logger:error("Crash while running bucket fixup module " - "~p from application ~p on bucket ~p " - ": ~p:~p", - [Fixup, App, BucketName, What, Why]), - BucketProps - end, + {ok, NewBucketProps} -> NewBucketProps; + {error, Reason} -> + logger:error("Error while running bucket fixup module " + "~p from application ~p on bucket ~p: " + "~p", + [Fixup, App, BucketName, Reason]), + BucketProps + catch + What:Why -> + logger:error("Crash while running bucket fixup module " + "~p from application ~p on bucket ~p " + ": ~p:~p", + [Fixup, App, BucketName, What, Why]), + BucketProps + end, run_fixups(T, BucketName, BP). set_ring(Ring, State) -> set_ring_global(Ring), Now = os:timestamp(), State2 = State#state{raw_ring = Ring, - ring_changed_time = Now}, + ring_changed_time = Now}, State3 = maybe_set_timer(?PROMOTE_TIMEOUT, State2), State3. maybe_set_timer(Duration, - State = #state{inactivity_timer = undefined}) -> + State = #state{inactivity_timer = undefined}) -> set_timer(Duration, State); maybe_set_timer(_Duration, State) -> State. set_timer(Duration, State) -> - Timer = erlang:send_after(Duration, self(), - inactivity_timeout), + Timer = erlang:send_after(Duration, + self(), + inactivity_timeout), State#state{inactivity_timer = Timer}. setup_ets(Mode) -> @@ -518,14 +558,14 @@ setup_ets(Mode) -> %% eunit tests, but is unneeded for normal Riak operation. catch ets:delete(?ETS), Access = case Mode of - live -> protected; - test -> public - end, + live -> protected; + test -> public + end, (?ETS) = ets:new(?ETS, - [named_table, Access, {read_concurrency, true}]), + [named_table, Access, {read_concurrency, true}]), Id = reset_ring_id(), ets:insert(?ETS, - [{changes, 0}, {promoted, 0}, {id, Id}]), + [{changes, 0}, {promoted, 0}, {id, Id}]), ok. cleanup_ets(test) -> ets:delete(?ETS). @@ -534,11 +574,11 @@ reset_ring_id() -> %% Maintain ring id epoch using persistent_term to ensure ring id remains %% monotonic even if the riak_core_ring_manager crashes and restarts Epoch = case persistent_term:get(riak_ring_id_epoch, - undefined) - of - undefined -> 0; - Value -> Value - end, + undefined) + of + undefined -> 0; + Value -> Value + end, persistent_term:put(riak_ring_id_epoch, Epoch + 1), {Epoch + 1, 0}. @@ -547,39 +587,40 @@ reset_ring_id() -> %% process. set_ring_global(Ring) -> DefaultProps = case application:get_env(riak_core, - default_bucket_props) - of - {ok, Val} -> Val; - _ -> [] - end, + default_bucket_props) + of + {ok, Val} -> Val; + _ -> [] + end, %% run fixups on the ring before storing it in persistent_term FixedRing = case riak_core:bucket_fixups() of - [] -> Ring; - Fixups -> - Buckets = riak_core_ring:get_buckets(Ring), - lists:foldl(fun (Bucket, AccRing) -> - BucketProps = - riak_core_bucket:get_bucket(Bucket, - Ring), - %% Merge anything in the default properties but not in - %% the bucket's properties. This is to ensure default - %% properties added after the bucket is created are - %% inherited to the bucket. - MergedProps = - riak_core_bucket:merge_props(BucketProps, - DefaultProps), - %% fixup the ring - NewBucketProps = run_fixups(Fixups, - Bucket, - MergedProps), - %% update the bucket in the ring - riak_core_ring:update_meta({bucket, - Bucket}, - NewBucketProps, - AccRing) - end, - Ring, Buckets) - end, + [] -> Ring; + Fixups -> + Buckets = riak_core_ring:get_buckets(Ring), + lists:foldl(fun (Bucket, AccRing) -> + BucketProps = + riak_core_bucket:get_bucket(Bucket, + Ring), + %% Merge anything in the default properties but not in + %% the bucket's properties. This is to ensure default + %% properties added after the bucket is created are + %% inherited to the bucket. + MergedProps = + riak_core_bucket:merge_props(BucketProps, + DefaultProps), + %% fixup the ring + NewBucketProps = run_fixups(Fixups, + Bucket, + MergedProps), + %% update the bucket in the ring + riak_core_ring:update_meta({bucket, + Bucket}, + NewBucketProps, + AccRing) + end, + Ring, + Buckets) + end, %% Mark ring as tainted to check if it is ever leaked over gossip or %% relied upon for any non-local ring operations. TaintedRing = riak_core_ring:set_tainted(FixedRing), @@ -591,27 +632,29 @@ set_ring_global(Ring) -> %% special meaning in `riak_core_bucket:get_bucket_props/2`. We then %% cleanup these values in a subsequent `ets:match_delete`. OldBuckets = ets:select(?ETS, - [{{{bucket, '$1'}, '_'}, [], ['$1']}]), + [{{{bucket, '$1'}, '_'}, [], ['$1']}]), BucketDefaults = [{{bucket, Bucket}, undefined} - || Bucket <- OldBuckets], + || Bucket <- OldBuckets], BucketMeta = [{{bucket, Bucket}, Meta} - || Bucket <- riak_core_ring:get_buckets(TaintedRing), - {ok, Meta} - <- [riak_core_ring:get_meta({bucket, Bucket}, - TaintedRing)]], + || Bucket <- riak_core_ring:get_buckets(TaintedRing), + {ok, Meta} + <- [riak_core_ring:get_meta({bucket, Bucket}, + TaintedRing)]], BucketMeta2 = lists:ukeysort(1, - BucketMeta ++ BucketDefaults), + BucketMeta ++ BucketDefaults), CHBin = - chashbin:create(riak_core_ring:chash(TaintedRing)), + chashbin:create(riak_core_ring:chash(TaintedRing)), {Epoch, Id} = ets:lookup_element(?ETS, id, 2), - Actions = [{ring, TaintedRing}, {raw_ring, Ring}, - {id, {Epoch, Id + 1}}, {chashbin, CHBin} - | BucketMeta2], + Actions = [{ring, TaintedRing}, + {raw_ring, Ring}, + {id, {Epoch, Id + 1}}, + {chashbin, CHBin} + | BucketMeta2], ets:insert(?ETS, Actions), ets:match_delete(?ETS, {{bucket, '_'}, undefined}), case persistent_term:get(?RING_KEY, undefined) of - ets -> ok; - _ -> persistent_term:put(?RING_KEY, ets) + ets -> ok; + _ -> persistent_term:put(?RING_KEY, ets) end, ok. @@ -627,7 +670,7 @@ prune_write_notify_ring(Ring, State) -> prune_write_ring(Ring, State) -> riak_core_ring:check_tainted(Ring, - "Error: Persisting tainted ring"), + "Error: Persisting tainted ring"), ok = riak_core_ring_manager:prune_ringfiles(), _ = do_write_ringfile(Ring), State2 = set_ring(Ring, State), @@ -635,7 +678,7 @@ prune_write_ring(Ring, State) -> is_stable_ring(#state{ring_changed_time = Then}) -> DeltaUS = erlang:max(0, - timer:now_diff(os:timestamp(), Then)), + timer:now_diff(os:timestamp(), Then)), DeltaMS = DeltaUS div 1000, IsStable = DeltaMS >= (?PROMOTE_TIMEOUT), {IsStable, DeltaMS}. @@ -647,8 +690,11 @@ is_stable_ring(#state{ring_changed_time = Then}) -> back_test() -> X = [1, 2, 3], - List1 = [[1, 2, 3], [4, 2, 3], [7, 8, 3], [11, 12, 13], - [1, 2, 3]], + List1 = [[1, 2, 3], + [4, 2, 3], + [7, 8, 3], + [11, 12, 13], + [1, 2, 3]], List2 = [[7, 8, 9], [1, 2, 3]], List3 = [[1, 2, 3]], ?assertEqual([[4, 2, 3]], (back(1, X, List1))), @@ -659,15 +705,19 @@ back_test() -> prune_list_test() -> TSList1 = [[2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36], [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 32, 36], + [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], + [2011, 2, 28, 16, 32, 36]], TSList2 = [[2011, 2, 28, 16, 32, 36], - [2011, 2, 28, 16, 31, 16], [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 31, 16], + [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], + [2011, 2, 28, 16, 32, 36]], PrunedList1 = [[2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16]], + [2011, 2, 28, 16, 32, 16]], PrunedList2 = [[2011, 2, 28, 16, 31, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 32, 36]], ?assertEqual(PrunedList1, (prune_list(TSList1))), ?assertEqual(PrunedList2, (prune_list(TSList2))). @@ -678,8 +728,8 @@ set_ring_global_test() -> set_ring_global(Ring), promote_ring(), ?assert((riak_core_ring:nearly_equal(Ring, - persistent_term:get(?RING_KEY, - undefined)))), + persistent_term:get(?RING_KEY, + undefined)))), cleanup_ets(test). set_my_ring_test() -> @@ -694,32 +744,33 @@ set_my_ring_test() -> refresh_my_ring_test() -> {spawn, fun () -> - setup_ets(test), - Core_Settings = [{ring_creation_size, 4}, - {ring_state_dir, "_build/test/tmp"}, - {cluster_name, "test"}], - [begin - put({?MODULE, AppKey}, - application:get_env(riak_core, AppKey, undefined)), - ok = application:set_env(riak_core, AppKey, Val) - end - || {AppKey, Val} <- Core_Settings], - stop_core_processes(), - riak_core_ring_events:start_link(), - riak_core_ring_manager:start_link(test), - riak_core_vnode_sup:start_link(), - riak_core_vnode_master:start_link(riak_core_vnode), - riak_core_test_util:setup_mockring1(), - ?assertEqual(ok, - (riak_core_ring_manager:refresh_my_ring())), - stop_core_processes(), - %% Cleanup the ring file created for this test - {ok, RingFile} = find_latest_ringfile(), - file:delete(RingFile), - [ok = application:set_env(riak_core, AppKey, - get({?MODULE, AppKey})) - || {AppKey, _Val} <- Core_Settings], - ok + setup_ets(test), + Core_Settings = [{ring_creation_size, 4}, + {ring_state_dir, "_build/test/tmp"}, + {cluster_name, "test"}], + [begin + put({?MODULE, AppKey}, + application:get_env(riak_core, AppKey, undefined)), + ok = application:set_env(riak_core, AppKey, Val) + end + || {AppKey, Val} <- Core_Settings], + stop_core_processes(), + riak_core_ring_events:start_link(), + riak_core_ring_manager:start_link(test), + riak_core_vnode_sup:start_link(), + riak_core_vnode_master:start_link(riak_core_vnode), + riak_core_test_util:setup_mockring1(), + ?assertEqual(ok, + (riak_core_ring_manager:refresh_my_ring())), + stop_core_processes(), + %% Cleanup the ring file created for this test + {ok, RingFile} = find_latest_ringfile(), + file:delete(RingFile), + [ok = application:set_env(riak_core, + AppKey, + get({?MODULE, AppKey})) + || {AppKey, _Val} <- Core_Settings], + ok end}. stop_core_processes() -> @@ -741,21 +792,21 @@ do_write_ringfile_test() -> %% Check happy path GenR = fun (Name) -> riak_core_ring:fresh(64, Name) end, ?assertEqual(ok, - (do_write_ringfile(GenR(happy), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(happy), ?TEST_RINGFILE))), %% errors expected error_logger:tty(false), %% Check write fails (create .tmp file with no write perms) ok = file:write_file(?TMP_RINGFILE, - <<"no write for you">>), + <<"no write for you">>), ok = file:change_mode(?TMP_RINGFILE, 8#00444), ?assertMatch({error, _}, - (do_write_ringfile(GenR(tmp_perms), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(tmp_perms), ?TEST_RINGFILE))), ok = file:change_mode(?TMP_RINGFILE, 8#00644), ok = file:delete(?TMP_RINGFILE), %% Check rename fails ok = file:change_mode(?TEST_RINGDIR, 8#00444), ?assertMatch({error, _}, - (do_write_ringfile(GenR(ring_perms), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(ring_perms), ?TEST_RINGFILE))), ok = file:change_mode(?TEST_RINGDIR, 8#00755), error_logger:tty(true), %% Cleanup the ring file created for this test @@ -768,13 +819,13 @@ is_stable_ring_test() -> Within = {A, B - TimeoutSecs div 2, C}, Outside = {A, B - (TimeoutSecs + 1), C}, ?assertMatch({true, _}, - (is_stable_ring(#state{ring_changed_time = - {0, 0, 0}}))), + (is_stable_ring(#state{ring_changed_time = + {0, 0, 0}}))), ?assertMatch({true, _}, - (is_stable_ring(#state{ring_changed_time = Outside}))), + (is_stable_ring(#state{ring_changed_time = Outside}))), ?assertMatch({false, _}, - (is_stable_ring(#state{ring_changed_time = Within}))), + (is_stable_ring(#state{ring_changed_time = Within}))), ?assertMatch({false, _}, - (is_stable_ring(#state{ring_changed_time = Now}))). + (is_stable_ring(#state{ring_changed_time = Now}))). -endif. diff --git a/src/riak_core_ring_util.erl b/src/riak_core_ring_util.erl index 936bd1048..96c68d55b 100644 --- a/src/riak_core_ring_util.erl +++ b/src/riak_core_ring_util.erl @@ -21,9 +21,13 @@ %% ------------------------------------------------------------------- -module(riak_core_ring_util). --export([assign/2, check_ring/0, check_ring/1, - check_ring/2, hash_to_partition_id/2, - partition_id_to_hash/2, hash_is_partition_boundary/2]). +-export([assign/2, + check_ring/0, + check_ring/1, + check_ring/2, + hash_to_partition_id/2, + partition_id_to_hash/2, + hash_is_partition_boundary/2]). -ifdef(TEST). @@ -34,11 +38,11 @@ %% @doc Forcibly assign a partition to a specific node assign(Partition, ToNode) -> F = fun (Ring, _) -> - {new_ring, - riak_core_ring:transfer_node(Partition, ToNode, Ring)} - end, + {new_ring, + riak_core_ring:transfer_node(Partition, ToNode, Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @doc Check the local ring for any preflists that do not satisfy n_val @@ -48,7 +52,7 @@ check_ring() -> check_ring(Ring) -> {ok, Props} = application:get_env(riak_core, - default_bucket_props), + default_bucket_props), {n_val, Nval} = lists:keyfind(n_val, 1, Props), check_ring(Ring, Nval). @@ -56,17 +60,18 @@ check_ring(Ring) -> check_ring(Ring, Nval) -> Preflists = riak_core_ring:all_preflists(Ring, Nval), lists:foldl(fun (PL, Acc) -> - PLNodes = lists:usort([Node || {_, Node} <- PL]), - case length(PLNodes) of - Nval -> Acc; - _ -> ordsets:add_element(PL, Acc) - end - end, - [], Preflists). + PLNodes = lists:usort([Node || {_, Node} <- PL]), + case length(PLNodes) of + Nval -> Acc; + _ -> ordsets:add_element(PL, Acc) + end + end, + [], + Preflists). -spec hash_to_partition_id(chash:index() | - chash:index_as_int(), - riak_core_ring:ring_size()) -> riak_core_ring:partition_id(). + chash:index_as_int(), + riak_core_ring:ring_size()) -> riak_core_ring:partition_id(). %% @doc Map a key hash (as binary or integer) to a partition ID [0, ring_size) hash_to_partition_id(CHashKey, RingSize) @@ -78,15 +83,15 @@ hash_to_partition_id(CHashInt, RingSize) -> -spec partition_id_to_hash(riak_core_ring:partition_id(), - pos_integer()) -> chash:index_as_int(). + pos_integer()) -> chash:index_as_int(). %% @doc Identify the first key hash (integer form) in a partition ID [0, ring_size) partition_id_to_hash(Id, RingSize) -> Id * chash:ring_increment(RingSize). -spec hash_is_partition_boundary(chash:index() | - chash:index_as_int(), - pos_integer()) -> boolean(). + chash:index_as_int(), + pos_integer()) -> boolean(). %% @doc For user-facing tools, indicate whether a specified hash value %% is a valid "boundary" value (first hash in some partition) @@ -108,49 +113,55 @@ hash_is_partition_boundary(CHashInt, RingSize) -> %% Partition boundaries are reversable. reverse_test() -> IntIndex = riak_core_ring_util:partition_id_to_hash(31, - 32), + 32), HashIndex = <>, ?assertEqual(31, - (riak_core_ring_util:hash_to_partition_id(HashIndex, - 32))), + (riak_core_ring_util:hash_to_partition_id(HashIndex, + 32))), ?assertEqual(0, - (riak_core_ring_util:hash_to_partition_id(<<0:160>>, - 32))). + (riak_core_ring_util:hash_to_partition_id(<<0:160>>, + 32))). %% Index values somewhere in the middle of a partition can be mapped %% to partition IDs. partition_test() -> IntIndex = riak_core_ring_util:partition_id_to_hash(20, - 32) - + chash:ring_increment(32) div 3, + 32) + + chash:ring_increment(32) div 3, HashIndex = <>, ?assertEqual(20, - (riak_core_ring_util:hash_to_partition_id(HashIndex, - 32))). + (riak_core_ring_util:hash_to_partition_id(HashIndex, + 32))). %% Index values divisible by partition size are boundary values, others are not boundary_test() -> BoundaryIndex = - riak_core_ring_util:partition_id_to_hash(15, 32), + riak_core_ring_util:partition_id_to_hash(15, 32), ?assert((riak_core_ring_util:hash_is_partition_boundary(<>, - 32))), + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + 32):160>>, - 32))), + + + 32):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - - 32):160>>, - 32))), + - + 32):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + 1):160>>, - 32))), + + + 1):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - - 1):160>>, - 32))), + - + 1):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + 2):160>>, - 32))), + + + 2):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + 10):160>>, - 32))). + + + 10):160>>, + 32))). -endif. % TEST diff --git a/src/riak_core_send_msg.erl b/src/riak_core_send_msg.erl index 427cf00bf..d8d904b80 100644 --- a/src/riak_core_send_msg.erl +++ b/src/riak_core_send_msg.erl @@ -22,8 +22,10 @@ -module(riak_core_send_msg). --export([reply_unreliable/2, cast_unreliable/2, - send_event_unreliable/2, bang_unreliable/2]). +-export([reply_unreliable/2, + cast_unreliable/2, + send_event_unreliable/2, + bang_unreliable/2]). -ifdef(TEST). @@ -34,9 +36,9 @@ -compile({parse_transform, pulse_instrument}). -compile({pulse_replace_module, - [{gen_fsm, pulse_gen_fsm}, - {gen_fsm_compat, pulse_gen_fsm}, - {gen_server, pulse_gen_server}]}). + [{gen_fsm, pulse_gen_fsm}, + {gen_fsm_compat, pulse_gen_fsm}, + {gen_server, pulse_gen_server}]}). -endif. @@ -51,13 +53,14 @@ cast_unreliable(Dest, Request) -> %% NOTE: We'ed peeked inside gen_fsm.erl's guts to see its internals. send_event_unreliable({global, _Name} = GlobalTo, - Event) -> + Event) -> erlang:error({unimplemented_send, GlobalTo, Event}); send_event_unreliable({via, _Mod, _Name} = ViaTo, - Event) -> + Event) -> erlang:error({unimplemented_send, ViaTo, Event}); send_event_unreliable(Name, Event) -> - bang_unreliable(Name, {'$gen_event', Event}), ok. + bang_unreliable(Name, {'$gen_event', Event}), + ok. bang_unreliable(Dest, Msg) -> catch erlang:send(Dest, Msg, [noconnect, nosuspend]), diff --git a/src/riak_core_status.erl b/src/riak_core_status.erl index bf8d8e3a2..25d59da85 100644 --- a/src/riak_core_status.erl +++ b/src/riak_core_status.erl @@ -21,50 +21,57 @@ %% ------------------------------------------------------------------- -module(riak_core_status). --export([ringready/0, all_active_transfers/0, - transfers/0, partitions/2, ring_status/0]). +-export([ringready/0, + all_active_transfers/0, + transfers/0, + partitions/2, + ring_status/0]). -spec ringready() -> {ok, [atom()]} | {error, any()}. ringready() -> case get_rings() of - {[], Rings} -> - {N1, R1} = hd(Rings), - case rings_match(hash_ring(R1), tl(Rings)) of - true -> Nodes = [N || {N, _} <- Rings], {ok, Nodes}; - {false, N2} -> {error, {different_owners, N1, N2}} - end; - {Down, _Rings} -> {error, {nodes_down, Down}} + {[], Rings} -> + {N1, R1} = hd(Rings), + case rings_match(hash_ring(R1), tl(Rings)) of + true -> + Nodes = [N || {N, _} <- Rings], + {ok, Nodes}; + {false, N2} -> {error, {different_owners, N1, N2}} + end; + {Down, _Rings} -> {error, {nodes_down, Down}} end. -spec transfers() -> {[atom()], - [{waiting_to_handoff, atom(), integer()} | - {stopped, atom(), integer()}]}. + [{waiting_to_handoff, atom(), integer()} | + {stopped, atom(), integer()}]}. transfers() -> {Down, Rings} = get_rings(), %% Work out which vnodes are running and which partitions they claim F = fun ({N, R}, Acc) -> - {_Pri, Sec, Stopped} = partitions(N, R), - Acc1 = case Sec of - [] -> []; - _ -> [{waiting_to_handoff, N, length(Sec)}] - end, - case Stopped of - [] -> Acc1 ++ Acc; - _ -> Acc1 ++ [{stopped, N, length(Stopped)} | Acc] - end - end, + {_Pri, Sec, Stopped} = partitions(N, R), + Acc1 = case Sec of + [] -> []; + _ -> [{waiting_to_handoff, N, length(Sec)}] + end, + case Stopped of + [] -> Acc1 ++ Acc; + _ -> Acc1 ++ [{stopped, N, length(Stopped)} | Acc] + end + end, {Down, lists:foldl(F, [], Rings)}. %% @doc Produce status for all active transfers in the cluster. -spec all_active_transfers() -> {Xfers :: list(), - Down :: list()}. + Down :: list()}. all_active_transfers() -> {Xfers, Down} = - riak_core_util:rpc_every_member(riak_core_handoff_manager, - status, [{direction, outbound}], 5000), + riak_core_util:rpc_every_member(riak_core_handoff_manager, + status, + [{direction, outbound}], + 5000), {Xfers, Down}. ring_status() -> @@ -72,52 +79,69 @@ ring_status() -> %% are running on each node. {ok, Ring} = riak_core_ring_manager:get_raw_ring(), {AllMods, Down} = - riak_core_util:rpc_every_member_ann(riak_core, - vnode_modules, [], 1000), + riak_core_util:rpc_every_member_ann(riak_core, + vnode_modules, + [], + 1000), %% Check if the claimant is running and if it believes the ring is ready Claimant = riak_core_ring:claimant(Ring), - case riak_core_util:safe_rpc(Claimant, riak_core_ring, - ring_ready, [], 5000) - of - {badrpc, _} -> - Down2 = lists:usort([Claimant | Down]), - RingReady = undefined; - RingReady -> Down2 = Down, RingReady = RingReady + case riak_core_util:safe_rpc(Claimant, + riak_core_ring, + ring_ready, + [], + 5000) + of + {badrpc, _} -> + Down2 = lists:usort([Claimant | Down]), + RingReady = undefined; + RingReady -> + Down2 = Down, + RingReady = RingReady end, %% Get the list of pending ownership changes Changes = riak_core_ring:pending_changes(Ring), %% Group pending changes by (Owner, NextOwner) - Merged = lists:foldl(fun ({Idx, Owner, NextOwner, Mods, - Status}, - Acc) -> - orddict:append({Owner, NextOwner}, - {Idx, Mods, Status}, Acc) - end, - [], Changes), + Merged = lists:foldl(fun ({Idx, + Owner, + NextOwner, + Mods, + Status}, + Acc) -> + orddict:append({Owner, NextOwner}, + {Idx, Mods, Status}, + Acc) + end, + [], + Changes), %% For each pending transfer, determine which vnode modules have completed %% handoff and which we are still waiting on. %% Final result is of the form: %% [{Owner, NextOwner}, [{Index, WaitingMods, CompletedMods, Status}]] TransferStatus = orddict:map(fun ({Owner, _}, - Transfers) -> - case orddict:find(Owner, AllMods) of - error -> - [{Idx, down, Mods, Status} - || {Idx, Mods, Status} - <- Transfers]; - {ok, OwnerMods} -> - NodeMods = [Mod - || {_App, Mod} - <- OwnerMods], - [{Idx, NodeMods -- Mods, Mods, - Status} - || {Idx, Mods, Status} - <- Transfers] - end - end, - Merged), + Transfers) -> + case orddict:find(Owner, AllMods) of + error -> + [{Idx, down, Mods, Status} + || {Idx, Mods, Status} + <- Transfers]; + {ok, OwnerMods} -> + NodeMods = [Mod + || {_App, Mod} + <- OwnerMods], + [{Idx, + NodeMods -- Mods, + Mods, + Status} + || {Idx, Mods, Status} + <- Transfers] + end + end, + Merged), MarkedDown = riak_core_ring:down_members(Ring), - {Claimant, RingReady, Down2, MarkedDown, + {Claimant, + RingReady, + Down2, + MarkedDown, TransferStatus}. %% =================================================================== @@ -127,11 +151,13 @@ ring_status() -> %% Retrieve the rings for all other nodes by RPC get_rings() -> {RawRings, Down} = - riak_core_util:rpc_every_member(riak_core_ring_manager, - get_my_ring, [], 30000), + riak_core_util:rpc_every_member(riak_core_ring_manager, + get_my_ring, + [], + 30000), Rings = - orddict:from_list([{riak_core_ring:owner_node(R), R} - || {ok, R} <- RawRings]), + orddict:from_list([{riak_core_ring:owner_node(R), R} + || {ok, R} <- RawRings]), {lists:sort(Down), Rings}. %% Produce a hash of the 'chash' portion of the ring @@ -142,8 +168,8 @@ hash_ring(R) -> rings_match(_, []) -> true; rings_match(R1hash, [{N2, R2} | Rest]) -> case hash_ring(R2) of - R1hash -> rings_match(R1hash, Rest); - _ -> {false, N2} + R1hash -> rings_match(R1hash, Rest); + _ -> {false, N2} end. %% Get a list of active partition numbers - regardless of vnode type @@ -152,14 +178,18 @@ rings_match(R1hash, [{N2, R2} | Rest]) -> active_partitions(Node) -> case riak_core_util:safe_rpc(Node, - riak_core_vnode_manager, all_vnodes, [], 30000) - of - {badrpc, _} -> ordsets:new(); - VNodes -> - lists:foldl(fun ({_, P, _}, Ps) -> - ordsets:add_element(P, Ps) - end, - ordsets:new(), VNodes) + riak_core_vnode_manager, + all_vnodes, + [], + 30000) + of + {badrpc, _} -> ordsets:new(); + VNodes -> + lists:foldl(fun ({_, P, _}, Ps) -> + ordsets:add_element(P, Ps) + end, + ordsets:new(), + VNodes) end. %% Return a list of active primary partitions, active secondary partitions (to be handed off) @@ -167,7 +197,7 @@ active_partitions(Node) -> partitions(Node, Ring) -> Owners = riak_core_ring:all_owners(Ring), Owned = ordsets:from_list(owned_partitions(Owners, - Node)), + Node)), Active = active_partitions(Node), Stopped = ordsets:subtract(Owned, Active), Secondary = ordsets:subtract(Active, Owned), diff --git a/src/riak_core_sup.erl b/src/riak_core_sup.erl index 5b875eb0f..880f9819e 100644 --- a/src/riak_core_sup.erl +++ b/src/riak_core_sup.erl @@ -32,11 +32,15 @@ %% Helper macro for declaring children of supervisor -define(CHILD(I, Type, Timeout, Args), - {I, {I, start_link, Args}, permanent, Timeout, Type, - [I]}). + {I, + {I, start_link, Args}, + permanent, + Timeout, + Type, + [I]}). -define(CHILD(I, Type, Timeout), - ?CHILD(I, Type, Timeout, [])). + ?CHILD(I, Type, Timeout, [])). -define(CHILD(I, Type), ?CHILD(I, Type, 5000)). @@ -53,15 +57,15 @@ start_link() -> init([]) -> Children = lists:flatten([?CHILD(riak_core_vnode_sup, - supervisor, 305000), - ?CHILD(riak_core_eventhandler_sup, supervisor), - ?CHILD(riak_core_handoff_sup, supervisor), - ?CHILD(riak_core_ring_events, worker), - ?CHILD(riak_core_ring_manager, worker), - ?CHILD(riak_core_vnode_proxy_sup, supervisor), - ?CHILD(riak_core_node_watcher_events, worker), - ?CHILD(riak_core_node_watcher, worker), - ?CHILD(riak_core_vnode_manager, worker), - ?CHILD(riak_core_gossip, worker), - ?CHILD(riak_core_claimant, worker)]), + supervisor, 305000), + ?CHILD(riak_core_eventhandler_sup, supervisor), + ?CHILD(riak_core_handoff_sup, supervisor), + ?CHILD(riak_core_ring_events, worker), + ?CHILD(riak_core_ring_manager, worker), + ?CHILD(riak_core_vnode_proxy_sup, supervisor), + ?CHILD(riak_core_node_watcher_events, worker), + ?CHILD(riak_core_node_watcher, worker), + ?CHILD(riak_core_vnode_manager, worker), + ?CHILD(riak_core_gossip, worker), + ?CHILD(riak_core_claimant, worker)]), {ok, {{one_for_one, 10, 10}, Children}}. diff --git a/src/riak_core_test_util.erl b/src/riak_core_test_util.erl index fad058bf9..776b7d234 100644 --- a/src/riak_core_test_util.erl +++ b/src/riak_core_test_util.erl @@ -26,8 +26,12 @@ -ifdef(TEST). --export([setup_mockring1/0, fake_ring/2, stop_pid/1, - wait_for_pid/1, stop_pid/2, unlink_named_process/1]). +-export([setup_mockring1/0, + fake_ring/2, + stop_pid/1, + wait_for_pid/1, + stop_pid/2, + unlink_named_process/1]). -include_lib("eunit/include/eunit.hrl"). @@ -46,8 +50,8 @@ stop_pid(Pid, ExitType) -> wait_for_pid(Pid) -> Mref = erlang:monitor(process, Pid), receive - {'DOWN', Mref, process, _, _} -> ok - after 5000 -> {error, didnotexit} + {'DOWN', Mref, process, _, _} -> ok + after 5000 -> {error, didnotexit} end. unlink_named_process(Name) when is_atom(Name) -> @@ -56,29 +60,33 @@ unlink_named_process(Name) when is_atom(Name) -> setup_mockring1() -> % requires a running riak_core_ring_manager, in test-mode is ok Ring0 = riak_core_ring:fresh(16, node()), - Ring1 = riak_core_ring:add_member(node(), Ring0, - othernode@otherhost), - Ring2 = riak_core_ring:add_member(node(), Ring1, - othernode2@otherhost2), + Ring1 = riak_core_ring:add_member(node(), + Ring0, + othernode@otherhost), + Ring2 = riak_core_ring:add_member(node(), + Ring1, + othernode2@otherhost2), Ring3 = lists:foldl(fun (_, R) -> - riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), - othernode@otherhost, - R) - end, - Ring2, [1, 2, 3, 4, 5, 6]), + riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), + othernode@otherhost, + R) + end, + Ring2, + [1, 2, 3, 4, 5, 6]), Ring = lists:foldl(fun (_, R) -> - riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), - othernode2@otherhost2, - R) - end, - Ring3, [1, 2, 3, 4, 5, 6]), + riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), + othernode2@otherhost2, + R) + end, + Ring3, + [1, 2, 3, 4, 5, 6]), riak_core_ring_manager:set_ring_global(Ring). fake_ring(Size, NumNodes) -> ManyNodes = [list_to_atom("dev" ++ - integer_to_list(X) ++ "@127.0.0.1") - || _ <- lists:seq(0, Size div NumNodes), - X <- lists:seq(1, NumNodes)], + integer_to_list(X) ++ "@127.0.0.1") + || _ <- lists:seq(0, Size div NumNodes), + X <- lists:seq(1, NumNodes)], Nodes = lists:sublist(ManyNodes, Size), Inc = chash:ring_increment(Size), Indices = lists:seq(0, (Size - 1) * Inc, Inc), @@ -86,19 +94,24 @@ fake_ring(Size, NumNodes) -> [Node | OtherNodes] = Nodes, Ring = riak_core_ring:fresh(Size, Node), Ring2 = lists:foldl(fun (OtherNode, RingAcc) -> - RingAcc2 = riak_core_ring:add_member(Node, - RingAcc, - OtherNode), - riak_core_ring:set_member(Node, RingAcc2, - OtherNode, valid, - same_vclock) - end, - Ring, OtherNodes), + RingAcc2 = riak_core_ring:add_member(Node, + RingAcc, + OtherNode), + riak_core_ring:set_member(Node, + RingAcc2, + OtherNode, + valid, + same_vclock) + end, + Ring, + OtherNodes), Ring3 = lists:foldl(fun ({Idx, Owner}, RingAcc) -> - riak_core_ring:transfer_node(Idx, Owner, - RingAcc) - end, - Ring2, Owners), + riak_core_ring:transfer_node(Idx, + Owner, + RingAcc) + end, + Ring2, + Owners), Ring3. -endif. %TEST. diff --git a/src/riak_core_util.erl b/src/riak_core_util.erl index 033a14dea..81112b4b8 100644 --- a/src/riak_core_util.erl +++ b/src/riak_core_util.erl @@ -21,27 +21,66 @@ %% @doc Various functions that are useful throughout Riak. -module(riak_core_util). --export([moment/0, make_tmp_dir/0, replace_file/2, - compare_dates/2, reload_all/1, integer_to_list/2, - unique_id_62/0, str_to_node/1, chash_key/1, chash_key/2, - chash_std_keyfun/1, chash_bucketonly_keyfun/1, - mkclientid/1, start_app_deps/1, build_tree/3, - orddict_delta/2, safe_rpc/4, safe_rpc/5, - rpc_every_member/4, rpc_every_member_ann/4, count/2, - keydelete/2, multi_keydelete/2, multi_keydelete/3, - compose/1, compose/2, pmap/2, pmap/3, multi_rpc/4, - multi_rpc/5, multi_rpc_ann/4, multi_rpc_ann/5, - multicall_ann/4, multicall_ann/5, shuffle/1, is_arch/1, - format_ip_and_port/2, peername/2, sockname/2, sha/1, - md5/1, make_fold_req/1, make_fold_req/2, - make_fold_req/4, make_newest_fold_req/1, proxy_spawn/1, - proxy/2, enable_job_class/1, enable_job_class/2, - disable_job_class/1, disable_job_class/2, - job_class_enabled/1, job_class_enabled/2, - job_class_disabled_message/2, - report_job_request_disposition/6, - responsible_preflists/1, responsible_preflists/2, - get_index_n/1, preflist_siblings/1, posix_error/1]). +-export([moment/0, + make_tmp_dir/0, + replace_file/2, + compare_dates/2, + reload_all/1, + integer_to_list/2, + unique_id_62/0, + str_to_node/1, + chash_key/1, + chash_key/2, + chash_std_keyfun/1, + chash_bucketonly_keyfun/1, + mkclientid/1, + start_app_deps/1, + build_tree/3, + orddict_delta/2, + safe_rpc/4, + safe_rpc/5, + rpc_every_member/4, + rpc_every_member_ann/4, + count/2, + keydelete/2, + multi_keydelete/2, + multi_keydelete/3, + compose/1, + compose/2, + pmap/2, + pmap/3, + multi_rpc/4, + multi_rpc/5, + multi_rpc_ann/4, + multi_rpc_ann/5, + multicall_ann/4, + multicall_ann/5, + shuffle/1, + is_arch/1, + format_ip_and_port/2, + peername/2, + sockname/2, + sha/1, + md5/1, + make_fold_req/1, + make_fold_req/2, + make_fold_req/4, + make_newest_fold_req/1, + proxy_spawn/1, + proxy/2, + enable_job_class/1, + enable_job_class/2, + disable_job_class/1, + disable_job_class/2, + job_class_enabled/1, + job_class_enabled/2, + job_class_disabled_message/2, + report_job_request_disposition/6, + responsible_preflists/1, + responsible_preflists/2, + get_index_n/1, + preflist_siblings/1, + posix_error/1]). -include("riak_core_vnode.hrl"). @@ -56,8 +95,9 @@ -include_lib("eunit/include/eunit.hrl"). --export([counter_loop/1, incr_counter/1, - decr_counter/1]). +-export([counter_loop/1, + incr_counter/1, + decr_counter/1]). -endif. @@ -81,9 +121,9 @@ posix_error(Error) -> case erl_posix_msg:message(Error) of - "unknown POSIX error" -> - lists:flatten(io_lib:format("~p", [Error])); - Message -> Message + "unknown POSIX error" -> + lists:flatten(io_lib:format("~p", [Error])); + Message -> Message end. %% @spec moment() -> integer() @@ -109,7 +149,7 @@ compare_dates(A, B) when is_list(B) -> rfc1123_to_now(String) when is_list(String) -> GSec = - calendar:datetime_to_gregorian_seconds(httpd_util:convert_request_date(String)), + calendar:datetime_to_gregorian_seconds(httpd_util:convert_request_date(String)), ESec = GSec - (?SEC_TO_EPOCH), Sec = ESec rem 1000000, MSec = ESec div 1000000, @@ -120,11 +160,13 @@ rfc1123_to_now(String) when is_list(String) -> %% to the new directory. make_tmp_dir() -> TmpId = io_lib:format("riptemp.~p", - [erlang:phash2({riak_core_rand:uniform(), self()})]), + [erlang:phash2({riak_core_rand:uniform(), self()})]), TempDir = filename:join("/tmp", TmpId), case filelib:is_dir(TempDir) of - true -> make_tmp_dir(); - false -> ok = file:make_dir(TempDir), TempDir + true -> make_tmp_dir(); + false -> + ok = file:make_dir(TempDir), + TempDir end. %% @doc Atomically/safely (to some reasonable level of durablity) @@ -132,23 +174,23 @@ make_tmp_dir() -> %% slightly: If `FN' cannot be opened, will not error with a %% `badmatch', as before, but will instead return `{error, Reason}' -spec replace_file(string(), iodata()) -> ok | - {error, term()}. + {error, term()}. replace_file(FN, Data) -> TmpFN = FN ++ ".tmp", case file:open(TmpFN, [write, raw]) of - {ok, FH} -> - try ok = file:write(FH, Data), - ok = file:sync(FH), - ok = file:close(FH), - ok = file:rename(TmpFN, FN), - {ok, Contents} = read_file(FN), - true = Contents == iolist_to_binary(Data), - ok - catch - _:Err -> {error, Err} - end; - Err -> Err + {ok, FH} -> + try ok = file:write(FH, Data), + ok = file:sync(FH), + ok = file:close(FH), + ok = file:rename(TmpFN, FN), + {ok, Contents} = read_file(FN), + true = Contents == iolist_to_binary(Data), + ok + catch + _:Err -> {error, Err} + end; + Err -> Err end. %% @doc Similar to {@link file:read_file/1} but uses raw file `I/O' @@ -160,8 +202,8 @@ read_file(FName) -> read_file(FD, Acc) -> case file:read(FD, 4096) of - {ok, Data} -> read_file(FD, [Data | Acc]); - eof -> lists:reverse(Acc) + {ok, Data} -> read_file(FD, [Data | Acc]); + eof -> lists:reverse(Acc) end. %% @spec integer_to_list(Integer :: integer(), Base :: integer()) -> @@ -171,7 +213,7 @@ read_file(FD, Acc) -> integer_to_list(I, 10) -> erlang:integer_to_list(I); integer_to_list(I, Base) when is_integer(I), is_integer(Base), Base >= 2, - Base =< 1 + $Z - $A + 10 + 1 + $z - $a -> + Base =< 1 + $Z - $A + 10 + 1 + $z - $a -> if I < 0 -> [$- | integer_to_list(-I, Base, [])]; true -> integer_to_list(I, Base, []) end; @@ -183,9 +225,9 @@ integer_to_list(I0, Base, R0) -> D = I0 rem Base, I1 = I0 div Base, R1 = if D >= 36 -> [D - 36 + $a | R0]; - D >= 10 -> [D - 10 + $A | R0]; - true -> [D + $0 | R0] - end, + D >= 10 -> [D - 10 + $A | R0]; + true -> [D + $0 | R0] + end, if I1 =:= 0 -> R1; true -> integer_to_list(I1, Base, R1) end. @@ -200,7 +242,7 @@ md5(Bin) -> crypto:hash(md5, Bin). unique_id_62() -> Rand = sha(term_to_binary({make_ref(), - os:timestamp()})), + os:timestamp()})), <> = Rand, integer_to_list(I, 62). @@ -211,8 +253,8 @@ unique_id_62() -> %% Module. Return is a list of the results of code:purge/1 %% and code:load_file/1 on each node. -spec reload_all(Module :: atom()) -> [{boolean(), - {module, Module :: atom()} | - {error, term()}}]. + {module, Module :: atom()} | + {error, term()}}]. reload_all(Module) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -225,8 +267,16 @@ reload_all(Module) -> mkclientid(RemoteNode) -> {{Y, Mo, D}, {H, Mi, S}} = erlang:universaltime(), {_, _, NowPart} = os:timestamp(), - Id = erlang:phash2([Y, Mo, D, H, Mi, S, node(), - RemoteNode, NowPart, self()]), + Id = erlang:phash2([Y, + Mo, + D, + H, + Mi, + S, + node(), + RemoteNode, + NowPart, + self()]), <>. %% @spec chash_key(BKey :: riak_object:bkey()) -> chash:index() @@ -258,21 +308,21 @@ str_to_node(Node) when is_atom(Node) -> str_to_node(atom_to_list(Node)); str_to_node(NodeStr) -> case string:tokens(NodeStr, "@") of - [NodeName] -> - %% Node name only; no host name. If the local node has a hostname, - %% append it - case node_hostname() of - [] -> list_to_atom(NodeName); - Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) - end; - _ -> list_to_atom(NodeStr) + [NodeName] -> + %% Node name only; no host name. If the local node has a hostname, + %% append it + case node_hostname() of + [] -> list_to_atom(NodeName); + Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) + end; + _ -> list_to_atom(NodeStr) end. node_hostname() -> NodeStr = atom_to_list(node()), case string:tokens(NodeStr, "@") of - [_NodeName, Hostname] -> Hostname; - _ -> [] + [_NodeName, Hostname] -> Hostname; + _ -> [] end. %% @spec start_app_deps(App :: atom()) -> ok @@ -286,22 +336,22 @@ start_app_deps(App) -> %% @doc Start the named application if not already started. ensure_started(App) -> case application:start(App) of - ok -> ok; - {error, {already_started, App}} -> ok + ok -> ok; + {error, {already_started, App}} -> ok end. %% @doc Applies `Pred' to each element in `List', and returns a count of how many %% applications returned `true'. -spec count(fun((term()) -> boolean()), - [term()]) -> non_neg_integer(). + [term()]) -> non_neg_integer(). count(Pred, List) -> FoldFun = fun (E, A) -> - case Pred(E) of - false -> A; - true -> A + 1 - end - end, + case Pred(E) of + false -> A; + true -> A + 1 + end + end, lists:foldl(FoldFun, 0, List). %% @doc Returns a copy of `TupleList' where the first occurrence of a tuple whose @@ -324,18 +374,19 @@ multi_keydelete(KeysToDelete, TupleList) -> %% first element compares equal to any key in `KeysToDelete' is deleted, if %% there is such a tuple. -spec multi_keydelete([atom()], non_neg_integer(), - [tuple()]) -> [tuple()]. + [tuple()]) -> [tuple()]. multi_keydelete(KeysToDelete, N, TupleList) -> lists:foldl(fun (Key, Acc) -> - lists:keydelete(Key, N, Acc) - end, - TupleList, KeysToDelete). + lists:keydelete(Key, N, Acc) + end, + TupleList, + KeysToDelete). %% @doc Function composition: returns a function that is the composition of %% `F' and `G'. -spec compose(F :: fun((B) -> C), - G :: fun((A) -> B)) -> fun((A) -> C). + G :: fun((A) -> B)) -> fun((A) -> C). compose(F, G) when is_function(F, 1), is_function(G, 1) -> @@ -356,39 +407,45 @@ compose(Funs) when is_list(Funs) -> %% @doc Invoke function `F' over each element of list `L' in parallel, %% returning the results in the same order as the input list. -spec pmap(F, L1) -> L2 when F :: fun((A) -> B), - L1 :: [A], L2 :: [B]. + L1 :: [A], L2 :: [B]. pmap(F, L) -> Parent = self(), lists:foldl(fun (X, N) -> - spawn_link(fun () -> Parent ! {pmap, N, F(X)} end), - N + 1 - end, - 0, L), + spawn_link(fun () -> Parent ! {pmap, N, F(X)} end), + N + 1 + end, + 0, + L), L2 = [receive {pmap, N, R} -> {N, R} end || _ <- L], L3 = lists:keysort(1, L2), [R || {_, R} <- L3]. -record(pmap_acc, - {mapper, fn, n_pending = 0, pending = sets:new(), - n_done = 0, done = [], max_concurrent = 1}). + {mapper, + fn, + n_pending = 0, + pending = sets:new(), + n_done = 0, + done = [], + max_concurrent = 1}). %% @doc Parallel map with a cap on the number of concurrent worker processes. %% Note: Worker processes are linked to the parent, so a crash propagates. -spec pmap(Fun :: function(), List :: list(), - MaxP :: integer()) -> list(). + MaxP :: integer()) -> list(). pmap(Fun, List, MaxP) when MaxP < 1 -> pmap(Fun, List, 1); pmap(Fun, List, MaxP) when is_function(Fun), is_list(List), - is_integer(MaxP) -> + is_integer(MaxP) -> Mapper = self(), #pmap_acc{pending = Pending, done = Done} = - lists:foldl(fun pmap_worker/2, - #pmap_acc{mapper = Mapper, fn = Fun, - max_concurrent = MaxP}, - List), + lists:foldl(fun pmap_worker/2, + #pmap_acc{mapper = Mapper, fn = Fun, + max_concurrent = MaxP}, + List), All = pmap_collect_rest(Pending, Done), % Restore input order Sorted = lists:keysort(1, All), @@ -397,43 +454,43 @@ pmap(Fun, List, MaxP) %% @doc Fold function for {@link pmap/3} that spawns up to a max number of %% workers to execute the mapping function over the input list. pmap_worker(X, - Acc = #pmap_acc{n_pending = NP, pending = Pending, - n_done = ND, max_concurrent = MaxP, mapper = Mapper, - fn = Fn}) + Acc = #pmap_acc{n_pending = NP, pending = Pending, + n_done = ND, max_concurrent = MaxP, mapper = Mapper, + fn = Fn}) when NP < MaxP -> Worker = spawn_link(fun () -> - R = Fn(X), - Mapper ! {pmap_result, self(), {NP + ND, R}} - end), + R = Fn(X), + Mapper ! {pmap_result, self(), {NP + ND, R}} + end), Acc#pmap_acc{n_pending = NP + 1, - pending = sets:add_element(Worker, Pending)}; + pending = sets:add_element(Worker, Pending)}; pmap_worker(X, - Acc = #pmap_acc{n_pending = NP, pending = Pending, - n_done = ND, done = Done, max_concurrent = MaxP}) + Acc = #pmap_acc{n_pending = NP, pending = Pending, + n_done = ND, done = Done, max_concurrent = MaxP}) when NP == MaxP -> {Result, NewPending} = pmap_collect_one(Pending), pmap_worker(X, - Acc#pmap_acc{n_pending = NP - 1, pending = NewPending, - n_done = ND + 1, done = [Result | Done]}). + Acc#pmap_acc{n_pending = NP - 1, pending = NewPending, + n_done = ND + 1, done = [Result | Done]}). %% @doc Waits for one pending pmap task to finish pmap_collect_one(Pending) -> receive - {pmap_result, Pid, Result} -> - Size = sets:size(Pending), - NewPending = sets:del_element(Pid, Pending), - case sets:size(NewPending) of - Size -> pmap_collect_one(Pending); - _ -> {Result, NewPending} - end + {pmap_result, Pid, Result} -> + Size = sets:size(Pending), + NewPending = sets:del_element(Pid, Pending), + case sets:size(NewPending) of + Size -> pmap_collect_one(Pending); + _ -> {Result, NewPending} + end end. pmap_collect_rest(Pending, Done) -> case sets:size(Pending) of - 0 -> Done; - _ -> - {Result, NewPending} = pmap_collect_one(Pending), - pmap_collect_rest(NewPending, [Result | Done]) + 0 -> Done; + _ -> + {Result, NewPending} = pmap_collect_one(Pending), + pmap_collect_rest(NewPending, [Result | Done]) end. %% @doc Wraps an rpc:call/4 in a try/catch to handle the case where the @@ -441,16 +498,16 @@ pmap_collect_rest(Pending, Done) -> %% the sense that it won't crash the calling process if the rex %% process is down. -spec safe_rpc(Node :: node(), Module :: atom(), - Function :: atom(), Args :: [any()]) -> {badrpc, - any()} | - any(). + Function :: atom(), Args :: [any()]) -> {badrpc, + any()} | + any(). safe_rpc(Node, Module, Function, Args) -> try rpc:call(Node, Module, Function, Args) of - Result -> Result + Result -> Result catch - exit:{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + exit:{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @doc Wraps an rpc:call/5 in a try/catch to handle the case where the @@ -458,15 +515,15 @@ safe_rpc(Node, Module, Function, Args) -> %% the sense that it won't crash the calling process if the rex %% process is down. -spec safe_rpc(Node :: node(), Module :: atom(), - Function :: atom(), Args :: [any()], - Timeout :: timeout()) -> {badrpc, any()} | any(). + Function :: atom(), Args :: [any()], + Timeout :: timeout()) -> {badrpc, any()} | any(). safe_rpc(Node, Module, Function, Args, Timeout) -> try rpc:call(Node, Module, Function, Args, Timeout) of - Result -> Result + Result -> Result catch - 'EXIT':{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + 'EXIT':{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @spec rpc_every_member(atom(), atom(), [term()], integer()|infinity) @@ -484,14 +541,17 @@ rpc_every_member(Module, Function, Args, Timeout) -> rpc_every_member_ann(Module, Function, Args, Timeout) -> {ok, MyRing} = riak_core_ring_manager:get_my_ring(), Nodes = riak_core_ring:all_members(MyRing), - {Results, Down} = multicall_ann(Nodes, Module, Function, - Args, Timeout), + {Results, Down} = multicall_ann(Nodes, + Module, + Function, + Args, + Timeout), {Results, Down}. %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. -spec multi_rpc([node()], module(), atom(), - [any()]) -> [any()]. + [any()]) -> [any()]. multi_rpc(Nodes, Mod, Fun, Args) -> multi_rpc(Nodes, Mod, Fun, Args, infinity). @@ -499,19 +559,19 @@ multi_rpc(Nodes, Mod, Fun, Args) -> %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. -spec multi_rpc([node()], module(), atom(), [any()], - timeout()) -> [any()]. + timeout()) -> [any()]. multi_rpc(Nodes, Mod, Fun, Args, Timeout) -> pmap(fun (Node) -> - safe_rpc(Node, Mod, Fun, Args, Timeout) - end, - Nodes). + safe_rpc(Node, Mod, Fun, Args, Timeout) + end, + Nodes). %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. Each result is tagged %% with the corresponding node name. -spec multi_rpc_ann([node()], module(), atom(), - [any()]) -> [{node(), any()}]. + [any()]) -> [{node(), any()}]. multi_rpc_ann(Nodes, Mod, Fun, Args) -> multi_rpc_ann(Nodes, Mod, Fun, Args, infinity). @@ -520,7 +580,7 @@ multi_rpc_ann(Nodes, Mod, Fun, Args) -> %% results in the same order as the input list. Each result is tagged %% with the corresponding node name. -spec multi_rpc_ann([node()], module(), atom(), [any()], - timeout()) -> [{node(), any()}]. + timeout()) -> [{node(), any()}]. multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout) -> Results = multi_rpc(Nodes, Mod, Fun, Args, Timeout), @@ -532,8 +592,8 @@ multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout) -> %% the same order as the input list, and each result is tagged with the %% corresponding node name. -spec multicall_ann([node()], module(), atom(), - [any()]) -> {Results :: [{node(), any()}], - Down :: [node()]}. + [any()]) -> {Results :: [{node(), any()}], + Down :: [node()]}. multicall_ann(Nodes, Mod, Fun, Args) -> multicall_ann(Nodes, Mod, Fun, Args, infinity). @@ -544,16 +604,16 @@ multicall_ann(Nodes, Mod, Fun, Args) -> %% the same order as the input list, and each result is tagged with the %% corresponding node name. -spec multicall_ann([node()], module(), atom(), [any()], - timeout()) -> {Results :: [{node(), any()}], - Down :: [node()]}. + timeout()) -> {Results :: [{node(), any()}], + Down :: [node()]}. multicall_ann(Nodes, Mod, Fun, Args, Timeout) -> L = multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout), {Results, DownAnn} = lists:partition(fun ({_, - Result}) -> - Result /= {badrpc, nodedown} - end, - L), + Result}) -> + Result /= {badrpc, nodedown} + end, + L), {Down, _} = lists:unzip(DownAnn), {Results, Down}. @@ -565,41 +625,45 @@ multicall_ann(Nodes, Mod, Fun, Args, Timeout) -> %% have children by giving them backedges to other elements. -spec build_tree(N :: integer(), Nodes :: [term()], - Opts :: [term()]) -> orddict:orddict(). + Opts :: [term()]) -> orddict:orddict(). build_tree(N, Nodes, Opts) -> case lists:member(cycles, Opts) of - true -> - Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); - false -> Expand = Nodes + true -> + Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); + false -> Expand = Nodes end, {Tree, _} = lists:foldl(fun (Elm, {Result, Worklist}) -> - Len = erlang:min(N, length(Worklist)), - {Children, Rest} = lists:split(Len, - Worklist), - NewResult = [{Elm, Children} | Result], - {NewResult, Rest} - end, - {[], tl(Expand)}, Nodes), + Len = erlang:min(N, length(Worklist)), + {Children, Rest} = lists:split(Len, + Worklist), + NewResult = [{Elm, Children} | Result], + {NewResult, Rest} + end, + {[], tl(Expand)}, + Nodes), orddict:from_list(Tree). orddict_delta(A, B) -> %% Pad both A and B to the same length DummyA = [{Key, '$none'} || {Key, _} <- B], - A2 = orddict:merge(fun (_, Value, _) -> Value end, A, - DummyA), + A2 = orddict:merge(fun (_, Value, _) -> Value end, + A, + DummyA), DummyB = [{Key, '$none'} || {Key, _} <- A], - B2 = orddict:merge(fun (_, Value, _) -> Value end, B, - DummyB), + B2 = orddict:merge(fun (_, Value, _) -> Value end, + B, + DummyB), %% Merge and filter out equal values Merged = orddict:merge(fun (_, AVal, BVal) -> - {AVal, BVal} - end, - A2, B2), + {AVal, BVal} + end, + A2, + B2), Diff = orddict:filter(fun (_, {Same, Same}) -> false; - (_, _) -> true - end, - Merged), + (_, _) -> true + end, + Merged), Diff. shuffle(L) -> @@ -629,33 +693,33 @@ format_ip_and_port(Ip, Port) when is_list(Ip) -> lists:flatten(io_lib:format("~s:~p", [Ip, Port])); format_ip_and_port(Ip, Port) when is_tuple(Ip) -> lists:flatten(io_lib:format("~s:~p", - [inet_parse:ntoa(Ip), Port])). + [inet_parse:ntoa(Ip), Port])). peername(Socket, Transport) -> case Transport:peername(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. sockname(Socket, Transport) -> case Transport:sockname(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. %% @doc Convert a #riak_core_fold_req_v? record to the cluster's maximum %% supported record version. make_fold_req(#riak_core_fold_req_v1{foldfun = FoldFun, - acc0 = Acc0}) -> + acc0 = Acc0}) -> make_fold_req(FoldFun, Acc0, false, []); make_fold_req(#riak_core_fold_req_v2{foldfun = FoldFun, - acc0 = Acc0, forwardable = Forwardable, - opts = Opts}) -> + acc0 = Acc0, forwardable = Forwardable, + opts = Opts}) -> make_fold_req(FoldFun, Acc0, Forwardable, Opts). make_fold_req(FoldFun, Acc0) -> @@ -668,8 +732,8 @@ make_fold_req(FoldFun, Acc0, Forwardable, Opts) -> %% regardless of cluster support make_newest_fold_req(#riak_core_fold_req_v1{foldfun = - FoldFun, - acc0 = Acc0}) -> + FoldFun, + acc0 = Acc0}) -> make_fold_reqv(v2, FoldFun, Acc0, false, []); make_newest_fold_req(#riak_core_fold_req_v2{} = F) -> F. @@ -682,30 +746,32 @@ proxy_spawn(Fun) -> MRef = monitor(process, Pid), Pid ! {proxy, MRef}, receive - {proxy_reply, MRef, Result} -> - demonitor(MRef, [flush]), Result; - {'DOWN', MRef, _, _, Reason} -> {error, Reason} + {proxy_reply, MRef, Result} -> + demonitor(MRef, [flush]), + Result; + {'DOWN', MRef, _, _, Reason} -> {error, Reason} end. %% @private make_fold_reqv(_, FoldFun, Acc0, Forwardable, Opts) when is_function(FoldFun, 3) andalso - (Forwardable == true orelse Forwardable == false) - andalso is_list(Opts) -> + (Forwardable == true orelse Forwardable == false) + andalso is_list(Opts) -> #riak_core_fold_req_v2{foldfun = FoldFun, acc0 = Acc0, - forwardable = Forwardable, opts = Opts}. + forwardable = Forwardable, opts = Opts}. %% @private - used with proxy_spawn proxy(Parent, Fun) -> _ = monitor(process, Parent), receive - {proxy, MRef} -> - Result = Fun(), Parent ! {proxy_reply, MRef, Result}; - {'DOWN', _, _, _, _} -> ok + {proxy, MRef} -> + Result = Fun(), + Parent ! {proxy_reply, MRef, Result}; + {'DOWN', _, _, _, _} -> ok end. -spec enable_job_class(atom(), atom()) -> ok | - {error, term()}. + {error, term()}. %% @doc Enables the specified Application/Operation job class. %% This is the public API for use via RPC. @@ -713,13 +779,13 @@ proxy(Parent, Fun) -> %% or its complement disable_job_class/2. enable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> enable_job_class({Application, Operation}); enable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec disable_job_class(atom(), atom()) -> ok | - {error, term()}. + {error, term()}. %% @doc Disables the specified Application/Operation job class. %% This is the public API for use via RPC. @@ -727,25 +793,25 @@ enable_job_class(Application, Operation) -> %% or its complement enable_job_class/2. disable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> disable_job_class({Application, Operation}); disable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec job_class_enabled(atom(), atom()) -> boolean() | - {error, term()}. + {error, term()}. %% @doc Reports whether the specified Application/Operation job class is enabled. %% This is the public API for use via RPC. job_class_enabled(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> job_class_enabled({Application, Operation}); job_class_enabled(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec enable_job_class(Class :: term()) -> ok | - {error, term()}. + {error, term()}. %% @doc Internal API to enable the specified job class. %% WARNING: @@ -753,23 +819,26 @@ job_class_enabled(Application, Operation) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use enable_job_class/2. enable_job_class(Class) -> - case application:get_env(riak_core, job_accept_class, - undefined) - of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - true -> ok; - _ -> - application:set_env(riak_core, job_accept_class, - [Class | EnabledClasses]) - end; - _ -> - application:set_env(riak_core, job_accept_class, - [Class]) + case application:get_env(riak_core, + job_accept_class, + undefined) + of + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + true -> ok; + _ -> + application:set_env(riak_core, + job_accept_class, + [Class | EnabledClasses]) + end; + _ -> + application:set_env(riak_core, + job_accept_class, + [Class]) end. -spec disable_job_class(Class :: term()) -> ok | - {error, term()}. + {error, term()}. %% @doc Internal API to disable the specified job class. %% WARNING: @@ -777,17 +846,19 @@ enable_job_class(Class) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use disable_job_class/2. disable_job_class(Class) -> - case application:get_env(riak_core, job_accept_class, - undefined) - of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - false -> ok; - _ -> - application:set_env(riak_core, job_accept_class, - lists:delete(Class, EnabledClasses)) - end; - _ -> ok + case application:get_env(riak_core, + job_accept_class, + undefined) + of + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + false -> ok; + _ -> + application:set_env(riak_core, + job_accept_class, + lists:delete(Class, EnabledClasses)) + end; + _ -> ok end. -spec job_class_enabled(Class :: term()) -> boolean(). @@ -798,26 +869,27 @@ disable_job_class(Class) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use job_class_enabled/2. job_class_enabled(Class) -> - case application:get_env(riak_core, job_accept_class, - undefined) - of - undefined -> true; - [] -> false; - [_ | _] = EnabledClasses -> - lists:member(Class, EnabledClasses); - Other -> - % Don't crash if it's not a list - that should never be the case, - % but since the value *can* be manipulated externally be more - % accommodating. If someone mucks it up, nothing's going to be - % allowed, but give them a chance to catch on instead of crashing. - _ = logger:error("riak_core.job_accept_class is not a " - "list: ~p", - [Other]), - false + case application:get_env(riak_core, + job_accept_class, + undefined) + of + undefined -> true; + [] -> false; + [_ | _] = EnabledClasses -> + lists:member(Class, EnabledClasses); + Other -> + % Don't crash if it's not a list - that should never be the case, + % but since the value *can* be manipulated externally be more + % accommodating. If someone mucks it up, nothing's going to be + % allowed, but give them a chance to catch on instead of crashing. + _ = logger:error("riak_core.job_accept_class is not a " + "list: ~p", + [Other]), + false end. -spec job_class_disabled_message(ReturnType :: atom(), - Class :: term()) -> binary() | string(). + Class :: term()) -> binary() | string(). %% @doc The error message to be returned to a client for a disabled job class. %% WARNING: @@ -825,16 +897,16 @@ job_class_enabled(Class) -> %% when the Jobs API is live. job_class_disabled_message(binary, Class) -> erlang:list_to_binary(job_class_disabled_message(text, - Class)); + Class)); job_class_disabled_message(text, Class) -> lists:flatten(io_lib:format("Operation '~p' is not enabled", - [Class])). + [Class])). -spec report_job_request_disposition(Accepted :: - boolean(), - Class :: term(), Mod :: module(), - Func :: atom(), Line :: pos_integer(), - Client :: term()) -> ok | {error, term()}. + boolean(), + Class :: term(), Mod :: module(), + Func :: atom(), Line :: pos_integer(), + Client :: term()) -> ok | {error, term()}. %% @doc Report/record the disposition of an async job request. %% @@ -854,17 +926,17 @@ job_class_disabled_message(text, Class) -> %% request was received. %% report_job_request_disposition(true, Class, Mod, Func, - Line, Client) -> + Line, Client) -> logger:debug("Request '~p' accepted from ~p", - [Class, Client], - #{pid => erlang:self(), module => Mod, function => Func, - line => Line}); + [Class, Client], + #{pid => erlang:self(), module => Mod, function => Func, + line => Line}); report_job_request_disposition(false, Class, Mod, Func, - Line, Client) -> + Line, Client) -> logger:warning("Request '~p' disabled from ~p", - [Class, Client], - #{pid => erlang:self(), module => Mod, function => Func, - line => Line}). + [Class, Client], + #{pid => erlang:self(), module => Mod, function => Func, + line => Line}). %% =================================================================== %% Preflist utility functions @@ -891,14 +963,14 @@ preflist_siblings(Index) -> %% @doc See {@link preflist_siblings/1}. -spec preflist_siblings(index(), - riak_core_ring()) -> [index()]. + riak_core_ring()) -> [index()]. preflist_siblings(Index, Ring) -> MaxN = determine_max_n(Ring), preflist_siblings(Index, MaxN, Ring). -spec preflist_siblings(index(), pos_integer(), - riak_core_ring()) -> [index()]. + riak_core_ring()) -> [index()]. preflist_siblings(Index, N, Ring) -> IndexBin = <>, @@ -916,15 +988,15 @@ responsible_preflists(Index) -> responsible_preflists(Index, Ring). -spec responsible_preflists(index(), - riak_core_ring()) -> [index_n()]. + riak_core_ring()) -> [index_n()]. responsible_preflists(Index, Ring) -> AllN = determine_all_n(Ring), responsible_preflists(Index, AllN, Ring). -spec responsible_preflists(index(), - [pos_integer(), ...], - riak_core_ring()) -> [index_n()]. + [pos_integer(), ...], + riak_core_ring()) -> [index_n()]. responsible_preflists(Index, AllN, Ring) -> IndexBin = <>, @@ -932,12 +1004,12 @@ responsible_preflists(Index, AllN, Ring) -> Indices = [Idx || {Idx, _} <- PL], RevIndices = lists:reverse(Indices), lists:flatmap(fun (N) -> - responsible_preflists_n(RevIndices, N) - end, - AllN). + responsible_preflists_n(RevIndices, N) + end, + AllN). -spec responsible_preflists_n([index()], - pos_integer()) -> [index_n()]. + pos_integer()) -> [index_n()]. responsible_preflists_n(RevIndices, N) -> {Pred, _} = lists:split(N, RevIndices), @@ -955,15 +1027,17 @@ determine_max_n(Ring) -> determine_all_n(Ring) -> Buckets = riak_core_ring:get_buckets(Ring), BucketProps = [riak_core_bucket:get_bucket(Bucket, Ring) - || Bucket <- Buckets], + || Bucket <- Buckets], Default = application:get_env(riak_core, - default_bucket_props, undefined), + default_bucket_props, + undefined), DefaultN = proplists:get_value(n_val, Default), AllN = lists:foldl(fun (Props, AllN) -> - N = proplists:get_value(n_val, Props), - ordsets:add_element(N, AllN) - end, - [DefaultN], BucketProps), + N = proplists:get_value(n_val, Props), + ordsets:add_element(N, AllN) + end, + [DefaultN], + BucketProps), AllN. %% =================================================================== @@ -978,55 +1052,93 @@ moment_test() -> clientid_uniqueness_test() -> ClientIds = [mkclientid(somenode@somehost) - || _I <- lists:seq(0, 10000)], + || _I <- lists:seq(0, 10000)], length(ClientIds) =:= - length(sets:to_list(sets:from_list(ClientIds))). + length(sets:to_list(sets:from_list(ClientIds))). build_tree_test() -> - Flat = [1, 11, 12, 111, 112, 121, 122, 1111, 1112, 1121, - 1122, 1211, 1212, 1221, 1222], + Flat = [1, + 11, + 12, + 111, + 112, + 121, + 122, + 1111, + 1112, + 1121, + 1122, + 1211, + 1212, + 1221, + 1222], %% 2-ary tree decomposition - ATree = [{1, [11, 12]}, {11, [111, 112]}, - {12, [121, 122]}, {111, [1111, 1112]}, - {112, [1121, 1122]}, {121, [1211, 1212]}, - {122, [1221, 1222]}, {1111, []}, {1112, []}, {1121, []}, - {1122, []}, {1211, []}, {1212, []}, {1221, []}, - {1222, []}], + ATree = [{1, [11, 12]}, + {11, [111, 112]}, + {12, [121, 122]}, + {111, [1111, 1112]}, + {112, [1121, 1122]}, + {121, [1211, 1212]}, + {122, [1221, 1222]}, + {1111, []}, + {1112, []}, + {1121, []}, + {1122, []}, + {1211, []}, + {1212, []}, + {1221, []}, + {1222, []}], %% 2-ary tree decomposition with cyclic wrap-around - CTree = [{1, [11, 12]}, {11, [111, 112]}, - {12, [121, 122]}, {111, [1111, 1112]}, - {112, [1121, 1122]}, {121, [1211, 1212]}, - {122, [1221, 1222]}, {1111, [1, 11]}, {1112, [12, 111]}, - {1121, [112, 121]}, {1122, [122, 1111]}, - {1211, [1112, 1121]}, {1212, [1122, 1211]}, - {1221, [1212, 1221]}, {1222, [1222, 1]}], + CTree = [{1, [11, 12]}, + {11, [111, 112]}, + {12, [121, 122]}, + {111, [1111, 1112]}, + {112, [1121, 1122]}, + {121, [1211, 1212]}, + {122, [1221, 1222]}, + {1111, [1, 11]}, + {1112, [12, 111]}, + {1121, [112, 121]}, + {1122, [122, 1111]}, + {1211, [1112, 1121]}, + {1212, [1122, 1211]}, + {1221, [1212, 1221]}, + {1222, [1222, 1]}], ?assertEqual(ATree, (build_tree(2, Flat, []))), ?assertEqual(CTree, (build_tree(2, Flat, [cycles]))), ok. counter_loop(N) -> receive - {up, Pid} -> - N2 = N + 1, Pid ! {counter_value, N2}, counter_loop(N2); - down -> counter_loop(N - 1); - exit -> exit(normal) + {up, Pid} -> + N2 = N + 1, + Pid ! {counter_value, N2}, + counter_loop(N2); + down -> counter_loop(N - 1); + exit -> exit(normal) end. incr_counter(CounterPid) -> CounterPid ! {up, self()}, receive - {counter_value, N} -> N after 3000 -> ?assert(false) + {counter_value, N} -> N after 3000 -> ?assert(false) end. decr_counter(CounterPid) -> CounterPid ! down. multi_keydelete_test_() -> - Languages = [{lisp, 1958}, {ml, 1973}, {erlang, 1986}, - {haskell, 1990}, {ocaml, 1996}, {clojure, 2007}, - {elixir, 2012}], - ?_assertMatch([{lisp, _}, {ml, _}, {erlang, _}, - {haskell, _}], - (multi_keydelete([ocaml, clojure, elixir], Languages))). + Languages = [{lisp, 1958}, + {ml, 1973}, + {erlang, 1986}, + {haskell, 1990}, + {ocaml, 1996}, + {clojure, 2007}, + {elixir, 2012}], + ?_assertMatch([{lisp, _}, + {ml, _}, + {erlang, _}, + {haskell, _}], + (multi_keydelete([ocaml, clojure, elixir], Languages))). compose_test_() -> Upper = fun string:to_upper/1, @@ -1036,88 +1148,91 @@ compose_test_() -> Increment = fun (N) when is_integer(N) -> N + 1 end, Double = fun (N) when is_integer(N) -> N * 2 end, Square = fun (N) when is_integer(N) -> N * N end, - SquareDoubleIncrement = compose([Increment, Double, - Square]), + SquareDoubleIncrement = compose([Increment, + Double, + Square]), CompatibleTypes = compose(Increment, - fun (X) when is_list(X) -> list_to_integer(X) - end), + fun (X) when is_list(X) -> list_to_integer(X) + end), IncompatibleTypes = compose(Increment, - fun (X) when is_binary(X) -> binary_to_list(X) - end), + fun (X) when is_binary(X) -> binary_to_list(X) + end), [?_assertEqual("DLROW OLLEH", - (StripReverseUpper("Hello world!"))), + (StripReverseUpper("Hello world!"))), ?_assertEqual((Increment(Double(Square(3)))), - (SquareDoubleIncrement(3))), + (SquareDoubleIncrement(3))), ?_assertMatch(4, (CompatibleTypes("3"))), ?_assertError(function_clause, - (IncompatibleTypes(<<"42">>))), + (IncompatibleTypes(<<"42">>))), ?_assertError(function_clause, - (compose(fun (X, Y) -> {X, Y} end, fun (X) -> X end)))]. + (compose(fun (X, Y) -> {X, Y} end, fun (X) -> X end)))]. pmap_test_() -> Fgood = fun (X) -> 2 * X end, Fbad = fun (3) -> throw(die_on_3); - (X) -> Fgood(X) - end, + (X) -> Fgood(X) + end, Lin = [1, 2, 3, 4], Lout = [2, 4, 6, 8], - {setup, fun () -> error_logger:tty(false) end, + {setup, + fun () -> error_logger:tty(false) end, fun (_) -> error_logger:tty(true) end, [fun () -> - % Test simple map case - ?assertEqual(Lout, (pmap(Fgood, Lin))), - % Verify a crashing process will not stall pmap - Parent = self(), - Pid = spawn(fun () -> - % Caller trapping exits causes stall!! - % TODO: Consider pmapping in a spawned proc - % process_flag(trap_exit, true), - pmap(Fbad, Lin), - ?debugMsg("pmap finished just fine"), - Parent ! no_crash_yo - end), - MonRef = monitor(process, Pid), - receive - {'DOWN', MonRef, _, _, _} -> ok; - no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) - end + % Test simple map case + ?assertEqual(Lout, (pmap(Fgood, Lin))), + % Verify a crashing process will not stall pmap + Parent = self(), + Pid = spawn(fun () -> + % Caller trapping exits causes stall!! + % TODO: Consider pmapping in a spawned proc + % process_flag(trap_exit, true), + pmap(Fbad, Lin), + ?debugMsg("pmap finished just fine"), + Parent ! no_crash_yo + end), + MonRef = monitor(process, Pid), + receive + {'DOWN', MonRef, _, _, _} -> ok; + no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) + end end]}. bounded_pmap_test_() -> Fun1 = fun (X) -> X + 2 end, Tests = fun (CountPid) -> - GFun = fun (Max) -> - fun (X) -> - ?assert((incr_counter(CountPid) =< - Max)), - timer:sleep(1), - decr_counter(CountPid), - Fun1(X) - end - end, - [fun () -> - ?assertEqual((lists:seq(Fun1(1), Fun1(N))), - (pmap(GFun(MaxP), lists:seq(1, N), - MaxP))) - end - || MaxP <- lists:seq(1, 20), N <- lists:seq(0, 10)] - end, + GFun = fun (Max) -> + fun (X) -> + ?assert((incr_counter(CountPid) =< + Max)), + timer:sleep(1), + decr_counter(CountPid), + Fun1(X) + end + end, + [fun () -> + ?assertEqual((lists:seq(Fun1(1), Fun1(N))), + (pmap(GFun(MaxP), + lists:seq(1, N), + MaxP))) + end + || MaxP <- lists:seq(1, 20), N <- lists:seq(0, 10)] + end, {setup, fun () -> - Pid = spawn_link(?MODULE, counter_loop, [0]), - monitor(process, Pid), - Pid + Pid = spawn_link(?MODULE, counter_loop, [0]), + monitor(process, Pid), + Pid end, fun (Pid) -> - Pid ! exit, - receive - {'DOWN', _Ref, process, Pid, _Info} -> ok - after 3000 -> - ?debugMsg("pmap counter process did not go down " - "in time"), - ?assert(false) - end, - ok + Pid ! exit, + receive + {'DOWN', _Ref, process, Pid, _Info} -> ok + after 3000 -> + ?debugMsg("pmap counter process did not go down " + "in time"), + ?assert(false) + end, + ok end, Tests}. @@ -1128,10 +1243,10 @@ proxy_spawn_test() -> ?assertEqual({error, killer_fun}, B), %% Ensure no errant 'DOWN' messages receive - {'DOWN', _, _, _, _} = Msg -> - throw({error, {badmsg, Msg}}); - _ -> ok - after 1000 -> ok + {'DOWN', _, _, _, _} = Msg -> + throw({error, {badmsg, Msg}}); + _ -> ok + after 1000 -> ok end. -ifdef(PROPER). @@ -1141,8 +1256,8 @@ count_test() -> prop_count_correct() -> ?FORALL(List, (list(bool())), - (count(fun (E) -> E end, List) =:= - length([E || E <- List, E]))). + (count(fun (E) -> E end, List) =:= + length([E || E <- List, E]))). -endif. %% EQC diff --git a/src/riak_core_vnode.erl b/src/riak_core_vnode.erl index bc839ab35..3656fb97e 100644 --- a/src/riak_core_vnode.erl +++ b/src/riak_core_vnode.erl @@ -22,28 +22,43 @@ -include("riak_core_vnode.hrl"). --export([start_link/3, start_link/4, wait_for_init/1, - send_command/2, send_command_after/2]). - --export([init/1, started/2, started/3, active/2, - active/3, handle_event/3, handle_sync_event/4, - handle_info/3, terminate/3, code_change/4]). +-export([start_link/3, + start_link/4, + wait_for_init/1, + send_command/2, + send_command_after/2]). + +-export([init/1, + started/2, + started/3, + active/2, + active/3, + handle_event/3, + handle_sync_event/4, + handle_info/3, + terminate/3, + code_change/4]). -export([reply/2, monitor/1]). --export([get_mod_index/1, get_modstate/1, - set_forwarding/2, trigger_handoff/2, trigger_handoff/3, - trigger_delete/1, core_status/1, handoff_error/3]). +-export([get_mod_index/1, + get_modstate/1, + set_forwarding/2, + trigger_handoff/2, + trigger_handoff/3, + trigger_delete/1, + core_status/1, + handoff_error/3]). -export([cast_finish_handoff/1, - send_an_event/2, - send_req/2, - send_all_proxy_req/2, - cancel_handoff/1, - handoff_complete/1, - resize_transfer_complete/2, - handoff_data/3, - unregistered/1]). + send_an_event/2, + send_req/2, + send_all_proxy_req/2, + cancel_handoff/1, + handoff_complete/1, + resize_transfer_complete/2, + handoff_data/3, + unregistered/1]). -ifdef(TEST). @@ -60,121 +75,121 @@ -compile({parse_transform, pulse_instrument}). -compile({pulse_replace_module, - [{gen_fsm_compat, pulse_gen_fsm}, - {gen_server, pulse_gen_server}]}). + [{gen_fsm_compat, pulse_gen_fsm}, + {gen_server, pulse_gen_server}]}). -endif. -define(NORMAL_REASON(R), - R == normal orelse - R == shutdown orelse - is_tuple(R) andalso element(1, R) == shutdown). + R == normal orelse + R == shutdown orelse + is_tuple(R) andalso element(1, R) == shutdown). -export_type([vnode_opt/0, pool_opt/0]). -type vnode_opt() :: pool_opt(). -type pool_opt() :: {pool, WorkerModule :: module(), - PoolSize :: pos_integer(), WorkerArgs :: [term()]}. + PoolSize :: pos_integer(), WorkerArgs :: [term()]}. -callback init([partition()]) -> {ok, - ModState :: term()} | - {ok, ModState :: term(), [vnode_opt()]} | - {error, Reason :: term()}. + ModState :: term()} | + {ok, ModState :: term(), [vnode_opt()]} | + {error, Reason :: term()}. -callback handle_command(Request :: term(), - Sender :: sender(), ModState :: term()) -> continue | - {reply, - Reply :: - term(), - NewModState :: - term()} | - {noreply, - NewModState :: - term()} | - {async, - Work :: - function(), - From :: - sender(), - NewModState :: - term()} | - {stop, - Reason :: - term(), - NewModState :: - term()}. + Sender :: sender(), ModState :: term()) -> continue | + {reply, + Reply :: + term(), + NewModState :: + term()} | + {noreply, + NewModState :: + term()} | + {async, + Work :: + function(), + From :: + sender(), + NewModState :: + term()} | + {stop, + Reason :: + term(), + NewModState :: + term()}. -callback handle_coverage(Request :: term(), - keyspaces(), Sender :: sender(), - ModState :: term()) -> continue | - {reply, Reply :: term(), - NewModState :: term()} | - {noreply, - NewModState :: term()} | - {async, Work :: function(), - From :: sender(), - NewModState :: term()} | - {stop, Reason :: term(), - NewModState :: term()}. + keyspaces(), Sender :: sender(), + ModState :: term()) -> continue | + {reply, Reply :: term(), + NewModState :: term()} | + {noreply, + NewModState :: term()} | + {async, Work :: function(), + From :: sender(), + NewModState :: term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handle_exit(pid(), Reason :: term(), - ModState :: term()) -> {noreply, - NewModState :: term()} | - {stop, Reason :: term(), - NewModState :: term()}. + ModState :: term()) -> {noreply, + NewModState :: term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handoff_starting(handoff_dest(), - ModState :: term()) -> {boolean(), - NewModState :: term()}. + ModState :: term()) -> {boolean(), + NewModState :: term()}. -callback handoff_cancelled(ModState :: term()) -> {ok, - NewModState :: term()}. + NewModState :: term()}. -callback handoff_finished(handoff_dest(), - ModState :: term()) -> {ok, NewModState :: term()}. + ModState :: term()) -> {ok, NewModState :: term()}. -callback handle_handoff_command(Request :: term(), - Sender :: sender(), - ModState :: term()) -> {reply, Reply :: term(), - NewModState :: - term()} | - {noreply, - NewModState :: - term()} | - {async, - Work :: function(), - From :: sender(), - NewModState :: - term()} | - {forward, - NewModState :: - term()} | - {drop, - NewModState :: - term()} | - {stop, Reason :: term(), - NewModState :: term()}. + Sender :: sender(), + ModState :: term()) -> {reply, Reply :: term(), + NewModState :: + term()} | + {noreply, + NewModState :: + term()} | + {async, + Work :: function(), + From :: sender(), + NewModState :: + term()} | + {forward, + NewModState :: + term()} | + {drop, + NewModState :: + term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handle_handoff_data(binary(), - ModState :: term()) -> {reply, - ok | - {error, Reason :: term()}, - NewModState :: term()}. + ModState :: term()) -> {reply, + ok | + {error, Reason :: term()}, + NewModState :: term()}. -callback encode_handoff_item(Key :: term(), - Value :: term()) -> corrupted | binary(). + Value :: term()) -> corrupted | binary(). -callback is_empty(ModState :: term()) -> {boolean(), - NewModState :: term()} | - {false, Size :: pos_integer(), - NewModState :: term()}. + NewModState :: term()} | + {false, Size :: pos_integer(), + NewModState :: term()}. -callback terminate(Reason :: term(), - ModState :: term()) -> ok. + ModState :: term()) -> ok. -callback delete(ModState :: term()) -> {ok, - NewModState :: term()}. + NewModState :: term()}. %% This commands are not executed inside the VNode, instead they are %% part of the vnode_proxy contract. @@ -198,10 +213,10 @@ %% for people doing that! (it's called overflowing message queue hell and is %% really nasty!) -callback handle_overload_command(Request :: term(), - Sender :: sender(), Idx :: partition()) -> ok. + Sender :: sender(), Idx :: partition()) -> ok. -callback handle_overload_info(Request :: term(), - Idx :: partition()) -> ok. + Idx :: partition()) -> ok. %% handle_exit/3 is an optional behaviour callback that can be implemented. %% It will be called in the case that a process that is linked to the vnode @@ -238,90 +253,95 @@ start_link(Mod, Index, Forward) -> start_link(Mod, Index, 0, Forward). start_link(Mod, Index, InitialInactivityTimeout, - Forward) -> + Forward) -> gen_fsm_compat:start_link(?MODULE, - [Mod, Index, InitialInactivityTimeout, Forward], - []). + [Mod, Index, InitialInactivityTimeout, Forward], + []). %% #1 - State started wait_for_init(Vnode) -> - gen_fsm_compat:sync_send_event(Vnode, wait_for_init, - infinity). + gen_fsm_compat:sync_send_event(Vnode, + wait_for_init, + infinity). %% #2 - %% Send a command message for the vnode module by Pid - %% typically to do some deferred processing after returning yourself send_command(Pid, Request) -> gen_fsm_compat:send_event(Pid, - #riak_vnode_req_v1{request = Request}). + #riak_vnode_req_v1{request = Request}). %% #3 - handoff_error(Vnode, Err, Reason) -> gen_fsm_compat:send_event(Vnode, - {handoff_error, Err, Reason}). + {handoff_error, Err, Reason}). %% #4 - get_mod_index(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, - get_mod_index). + get_mod_index). %% #5 set_forwarding(VNode, ForwardTo) -> gen_fsm_compat:send_all_state_event(VNode, - {set_forwarding, ForwardTo}). + {set_forwarding, ForwardTo}). %% #6 trigger_handoff(VNode, TargetIdx, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, - {trigger_handoff, TargetIdx, - TargetNode}). + {trigger_handoff, + TargetIdx, + TargetNode}). %% #7 trigger_handoff(VNode, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, - {trigger_handoff, TargetNode}). + {trigger_handoff, TargetNode}). %% #8 trigger_delete(VNode) -> gen_fsm_compat:send_all_state_event(VNode, - trigger_delete). + trigger_delete). %% #9 core_status(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, - core_status). + core_status). %% #10 %% Sends a command to the FSM that called it after Time %% has passed. -spec send_command_after(integer(), - term()) -> reference(). + term()) -> reference(). send_command_after(Time, Request) -> gen_fsm_compat:send_event_after(Time, - #riak_vnode_req_v1{request = Request}). + #riak_vnode_req_v1{request = Request}). %%%%%%% %new APIs %% #11 - riak_core_vnode_manager - handle_vnode_event cast_finish_handoff(VNode) -> - gen_fsm_compat:send_all_state_event(VNode, finish_handoff). + gen_fsm_compat:send_all_state_event(VNode, + finish_handoff). %% #12 - riak_core_vnode_manager - handle_vnode_event cancel_handoff(VNode) -> - gen_fsm_compat:send_all_state_event(VNode, cancel_handoff). + gen_fsm_compat:send_all_state_event(VNode, + cancel_handoff). %% #13 - riak_core_vnode_master - send_an_event -send_an_event(VNode, Event)-> +send_an_event(VNode, Event) -> gen_fsm_compat:send_event(VNode, Event). %% #14 - riak_core_vnode_master - handle_cast/handle_call - %riak_core_vnode_master - command2 - %riak_core_vnode_proxy - handle_call -send_req(VNode, Req)-> + +%riak_core_vnode_master - command2 +%riak_core_vnode_proxy - handle_call +send_req(VNode, Req) -> gen_fsm_compat:send_event(VNode, Req). %% #15 - riak_core_vnode_master - handle_call -send_all_proxy_req(VNode, Req)-> +send_all_proxy_req(VNode, Req) -> gen_fsm_compat:send_all_state_event(VNode, Req). %% #16 - riak:core_handoff_sender - start_fold_ @@ -330,17 +350,19 @@ handoff_complete(VNode) -> %% #17 - riak:core_handoff_sender - start_fold_ resize_transfer_complete(VNode, NotSentAcc) -> - gen_fsm_compat:send_event(VNode, {resize_transfer_complete, NotSentAcc}). + gen_fsm_compat:send_event(VNode, + {resize_transfer_complete, NotSentAcc}). %% #18 - riak_core_handoff_receiver - process_message handoff_data(VNode, MsgData, VNodeTimeout) -> - gen_fsm_compat:sync_send_all_state_event(VNode, {handoff_data, MsgData}, VNodeTimeout). + gen_fsm_compat:sync_send_all_state_event(VNode, + {handoff_data, MsgData}, + VNodeTimeout). %% #19 - riak_core_vnode_proxy - handle_cast unregistered(VNode) -> gen_fsm_compat:send_event(VNode, unregistered). - %% @doc Send a reply to a vnode request. If %% the Ref is undefined just send the reply %% for compatibility with pre-0.12 requestors. @@ -354,7 +376,7 @@ reply({fsm, undefined, From}, Reply) -> riak_core_send_msg:send_event_unreliable(From, Reply); reply({fsm, Ref, From}, Reply) -> riak_core_send_msg:send_event_unreliable(From, - {Ref, Reply}); + {Ref, Reply}); reply({server, undefined, From}, Reply) -> riak_core_send_msg:reply_unreliable(From, Reply); reply({server, Ref, From}, Reply) -> @@ -368,7 +390,7 @@ reply(ignore, _Reply) -> ok. %% a monitor on `self()' in order to return a valid (if useless) %% monitor reference. -spec monitor(Sender :: sender()) -> Monitor :: - reference(). + reference(). monitor({fsm, _, From}) -> erlang:monitor(process, From); @@ -378,62 +400,61 @@ monitor({raw, _, From}) -> erlang:monitor(process, From); monitor(ignore) -> erlang:monitor(process, self()). - %% ======================== %% ======== %% State, Mode, Init, Terminate %% ======== %% ======================== -record(state, - {index :: partition(), mod :: module(), - modstate :: term(), - forward :: node() | [{integer(), node()}], - handoff_target = none :: none | {integer(), node()}, - handoff_pid :: pid() | undefined, - handoff_type :: - riak_core_handoff_manager:ho_type() | undefined, - pool_pid :: pid() | undefined, - pool_config :: tuple() | undefined, - manager_event_timer :: reference() | undefined, - inactivity_timeout :: non_neg_integer()}). - + {index :: partition(), + mod :: module(), + modstate :: term(), + forward :: node() | [{integer(), node()}], + handoff_target = none :: none | {integer(), node()}, + handoff_pid :: pid() | undefined, + handoff_type :: + riak_core_handoff_manager:ho_type() | undefined, + pool_pid :: pid() | undefined, + pool_config :: tuple() | undefined, + manager_event_timer :: reference() | undefined, + inactivity_timeout :: non_neg_integer()}). init([Mod, Index, InitialInactivityTimeout, Forward]) -> process_flag(trap_exit, true), State = #state{index = Index, mod = Mod, - forward = Forward, - inactivity_timeout = InitialInactivityTimeout}, + forward = Forward, + inactivity_timeout = InitialInactivityTimeout}, {ok, started, State, 0}. - terminate(Reason, _StateName, - #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> -%% Shutdown if the pool is still alive and a normal `Reason' is -%% given - there could be a race on delivery of the unregistered -%% event and successfully shutting down the pool. -try case is_pid(Pool) andalso - is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) - of - true -> - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok - end -catch -Type:Reason:Stacktrace -> - logger:error("Error while shutting down vnode worker " - "pool ~p:~p trace : ~p", - [Type, Reason, Stacktrace]) -after -case ModState of - %% Handoff completed, Mod:delete has been called, now terminate. - {deleted, ModState1} -> - Mod:terminate(Reason, ModState1); - _ -> Mod:terminate(Reason, ModState) -end -end. +terminate(Reason, _StateName, + #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> + %% Shutdown if the pool is still alive and a normal `Reason' is + %% given - there could be a race on delivery of the unregistered + %% event and successfully shutting down the pool. + try case is_pid(Pool) andalso + is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) + of + true -> + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok + end + catch + Type:Reason:Stacktrace -> + logger:error("Error while shutting down vnode worker " + "pool ~p:~p trace : ~p", + [Type, Reason, Stacktrace]) + after + case ModState of + %% Handoff completed, Mod:delete has been called, now terminate. + {deleted, ModState1} -> + Mod:terminate(Reason, ModState1); + _ -> Mod:terminate(Reason, ModState) + end + end. code_change(_OldVsn, StateName, State, _Extra) -> -{ok, StateName, State}. + {ok, StateName, State}. %% ======================== %% ======== @@ -441,384 +462,420 @@ code_change(_OldVsn, StateName, State, _Extra) -> %% ======== %% ======================== - %% started %% ======== started(timeout, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> case do_init(State) of - {ok, State2} -> - {next_state, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} + {ok, State2} -> + {next_state, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} end. - started(wait_for_init, _From, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> case do_init(State) of - {ok, State2} -> - {reply, ok, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} + {ok, State2} -> + {reply, ok, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} end. %%active %%%%%%%%%%%% active(timeout, State = #state{mod = Mod, index = Idx}) -> - riak_core_vnode_manager:vnode_event(Mod, Idx, self(), - inactive), + riak_core_vnode_manager:vnode_event(Mod, + Idx, + self(), + inactive), continue(State); active(#riak_coverage_req_v1{keyspaces = KeySpaces, - request = Request, sender = Sender}, + request = Request, sender = Sender}, State) -> %% Coverage request handled in handoff and non-handoff. Will be forwarded if set. vnode_coverage(Sender, Request, KeySpaces, State); active(#riak_vnode_req_v1{sender = Sender, - request = {resize_forward, Request}}, + request = {resize_forward, Request}}, State) -> vnode_command(Sender, Request, State); active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State = #state{handoff_target = HT}) when HT =:= none -> forward_or_vnode_command(Sender, Request, State); active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State = #state{handoff_type = resize, - handoff_target = {HOIdx, HONode}, index = Index, - forward = Forward, mod = Mod}) -> + handoff_target = {HOIdx, HONode}, index = Index, + forward = Forward, mod = Mod}) -> RequestHash = Mod:request_hash(Request), case RequestHash of - %% will never have enough information to forward request so only handle locally - undefined -> vnode_command(Sender, Request, State); - _ -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - FutureIndex = riak_core_ring:future_index(RequestHash, - Index, R), - case FutureIndex of - %% request for portion of keyspace currently being transferred - HOIdx -> - vnode_handoff_command(Sender, Request, {HOIdx, HONode}, - State); - %% some portions of keyspace already transferred - _Other when is_list(Forward) -> - vnode_resize_command(Sender, Request, FutureIndex, - State); - %% some portions of keyspace not already transferred - _Other -> vnode_command(Sender, Request, State) - end + %% will never have enough information to forward request so only handle locally + undefined -> vnode_command(Sender, Request, State); + _ -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + FutureIndex = riak_core_ring:future_index(RequestHash, + Index, + R), + case FutureIndex of + %% request for portion of keyspace currently being transferred + HOIdx -> + vnode_handoff_command(Sender, + Request, + {HOIdx, HONode}, + State); + %% some portions of keyspace already transferred + _Other when is_list(Forward) -> + vnode_resize_command(Sender, + Request, + FutureIndex, + State); + %% some portions of keyspace not already transferred + _Other -> vnode_command(Sender, Request, State) + end end; active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State) -> - vnode_handoff_command(Sender, Request, - State#state.handoff_target, State); + vnode_handoff_command(Sender, + Request, + State#state.handoff_target, + State); active(handoff_complete, State) -> State2 = start_manager_event_timer(handoff_complete, - State), + State), continue(State2); active({resize_transfer_complete, SeenIdxs}, State = #state{mod = Mod, modstate = ModState, - handoff_target = Target}) -> + handoff_target = Target}) -> case Target of - none -> continue(State); - _ -> - %% TODO: refactor similarties w/ finish_handoff handle_event - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(SeenIdxs, - State#state{modstate = NewModState}) + none -> continue(State); + _ -> + %% TODO: refactor similarties w/ finish_handoff handle_event + {ok, NewModState} = Mod:handoff_finished(Target, + ModState), + finish_handoff(SeenIdxs, + State#state{modstate = NewModState}) end; active({handoff_error, _Err, _Reason}, State) -> State2 = start_manager_event_timer(handoff_error, - State), + State), continue(State2); active({send_manager_event, Event}, State) -> State2 = start_manager_event_timer(Event, State), continue(State2); active({trigger_handoff, TargetNode}, State) -> active({trigger_handoff, State#state.index, TargetNode}, - State); + State); active({trigger_handoff, TargetIdx, TargetNode}, State) -> maybe_handoff(TargetIdx, TargetNode, State); active(trigger_delete, State = #state{mod = Mod, modstate = ModState, - index = Idx}) -> + index = Idx}) -> case mark_delete_complete(Idx, Mod) of - {ok, _NewRing} -> - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode deleted", [Idx, Mod]); - _ -> NewModState = ModState + {ok, _NewRing} -> + {ok, NewModState} = Mod:delete(ModState), + logger:debug("~p ~p vnode deleted", [Idx, Mod]); + _ -> NewModState = ModState end, maybe_shutdown_pool(State), riak_core_vnode_manager:unregister_vnode(Idx, Mod), continue(State#state{modstate = - {deleted, NewModState}}); + {deleted, NewModState}}); active(unregistered, State = #state{mod = Mod, index = Index}) -> %% Add exclusion so the ring handler will not try to spin this vnode %% up until it receives traffic. riak_core_handoff_manager:add_exclusion(Mod, Index), logger:debug("~p ~p vnode excluded and unregistered.", - [Index, Mod]), - {stop, normal, + [Index, Mod]), + {stop, + normal, State#state{handoff_target = none, - handoff_type = undefined, pool_pid = undefined}}. + handoff_type = undefined, pool_pid = undefined}}. active(_Event, _From, State) -> Reply = ok, - {reply, Reply, active, State, + {reply, + Reply, + active, + State, State#state.inactivity_timeout}. %% handle_event %%%%%%%%%%%%%%%% handle_event({set_forwarding, undefined}, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + State = #state{modstate = {deleted, _ModState}}) -> %% The vnode must forward requests when in the deleted state, therefore %% ignore requests to stop forwarding. continue(State); handle_event({set_forwarding, ForwardTo}, _StateName, - State) -> + State) -> logger:debug("vnode fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, State#state.index, - State#state.forward, ForwardTo]), + [State#state.mod, + State#state.index, + State#state.forward, + ForwardTo]), State2 = mod_set_forwarding(ForwardTo, State), continue(State2#state{forward = ForwardTo}); handle_event(finish_handoff, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + State = #state{modstate = {deleted, _ModState}}) -> stop_manager_event_timer(State), continue(State#state{handoff_target = none}); handle_event(finish_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState, - handoff_target = Target}) -> + State = #state{mod = Mod, modstate = ModState, + handoff_target = Target}) -> stop_manager_event_timer(State), case Target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(State#state{modstate = NewModState}) + none -> continue(State); + _ -> + {ok, NewModState} = Mod:handoff_finished(Target, + ModState), + finish_handoff(State#state{modstate = NewModState}) end; handle_event(cancel_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> %% it would be nice to pass {Err, Reason} to the vnode but the %% API doesn't currently allow for that. stop_manager_event_timer(State), case State#state.handoff_target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_cancelled(ModState), - continue(State#state{handoff_target = none, - handoff_type = undefined, - modstate = NewModState}) + none -> continue(State); + _ -> + {ok, NewModState} = Mod:handoff_cancelled(ModState), + continue(State#state{handoff_target = none, + handoff_type = undefined, + modstate = NewModState}) end; handle_event({trigger_handoff, TargetNode}, StateName, - State) -> - handle_event({trigger_handoff, State#state.index, - TargetNode}, - StateName, State); + State) -> + handle_event({trigger_handoff, + State#state.index, + TargetNode}, + StateName, + State); handle_event({trigger_handoff, _TargetIdx, _TargetNode}, - _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + _StateName, + State = #state{modstate = {deleted, _ModState}}) -> continue(State); -handle_event(R = {trigger_handoff, _TargetIdx, - _TargetNode}, - _StateName, State) -> +handle_event(R = {trigger_handoff, + _TargetIdx, + _TargetNode}, + _StateName, State) -> active(R, State); handle_event(trigger_delete, _StateName, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> continue(State); handle_event(trigger_delete, _StateName, State) -> active(trigger_delete, State); handle_event(R = #riak_vnode_req_v1{}, _StateName, - State) -> + State) -> active(R, State); handle_event(R = #riak_coverage_req_v1{}, _StateName, - State) -> + State) -> active(R, State). %%handle_sync_event %%%%%%%%%%%%%%%%%%%% handle_sync_event(current_state, _From, StateName, - State) -> + State) -> {reply, {StateName, State}, StateName, State}; handle_sync_event(get_mod_index, _From, StateName, - State = #state{index = Idx, mod = Mod}) -> - {reply, {Mod, Idx}, StateName, State, + State = #state{index = Idx, mod = Mod}) -> + {reply, + {Mod, Idx}, + StateName, + State, State#state.inactivity_timeout}; handle_sync_event({handoff_data, _BinObj}, _From, - StateName, - State = #state{modstate = {deleted, _ModState}}) -> - {reply, {error, vnode_exiting}, StateName, State, + StateName, + State = #state{modstate = {deleted, _ModState}}) -> + {reply, + {error, vnode_exiting}, + StateName, + State, State#state.inactivity_timeout}; handle_sync_event({handoff_data, BinObj}, _From, - StateName, - State = #state{mod = Mod, modstate = ModState}) -> + StateName, + State = #state{mod = Mod, modstate = ModState}) -> case Mod:handle_handoff_data(BinObj, ModState) of - {reply, ok, NewModState} -> - {reply, ok, StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - {reply, {error, Err}, NewModState} -> - logger:error("~p failed to store handoff obj: ~p", - [Mod, Err]), - {reply, {error, Err}, StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout} + {reply, ok, NewModState} -> + {reply, + ok, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + {reply, {error, Err}, NewModState} -> + logger:error("~p failed to store handoff obj: ~p", + [Mod, Err]), + {reply, + {error, Err}, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout} end; handle_sync_event(core_status, _From, StateName, - State = #state{index = Index, mod = Mod, - modstate = ModState, handoff_target = HT, - forward = FN}) -> + State = #state{index = Index, mod = Mod, + modstate = ModState, handoff_target = HT, + forward = FN}) -> Mode = case {FN, HT} of - {undefined, none} -> active; - {undefined, HT} -> handoff; - {FN, none} -> forward; - _ -> undefined - end, + {undefined, none} -> active; + {undefined, HT} -> handoff; + {FN, none} -> forward; + _ -> undefined + end, Status = [{index, Index}, {mod, Mod}] ++ - case FN of - undefined -> []; - _ -> [{forward, FN}] - end - ++ - case HT of - none -> []; - _ -> [{handoff_target, HT}] - end - ++ - case ModState of - {deleted, _} -> [deleted]; - _ -> [] - end, - {reply, {Mode, Status}, StateName, State, + case FN of + undefined -> []; + _ -> [{forward, FN}] + end + ++ + case HT of + none -> []; + _ -> [{handoff_target, HT}] + end + ++ + case ModState of + {deleted, _} -> [deleted]; + _ -> [] + end, + {reply, + {Mode, Status}, + StateName, + State, State#state.inactivity_timeout}. %%handle_info %%%%%%%%%%%%%% handle_info({'$vnode_proxy_ping', From, Ref, Msgs}, - StateName, State) -> + StateName, State) -> riak_core_vnode_proxy:cast(From, - {vnode_proxy_pong, Ref, Msgs}), - {next_state, StateName, State, + {vnode_proxy_pong, Ref, Msgs}), + {next_state, + StateName, + State, State#state.inactivity_timeout}; handle_info({'EXIT', Pid, Reason}, _StateName, - State = #state{mod = Mod, index = Index, pool_pid = Pid, - pool_config = PoolConfig}) -> + State = #state{mod = Mod, index = Index, pool_pid = Pid, + pool_config = PoolConfig}) -> case Reason of - Reason when Reason == normal; Reason == shutdown -> - continue(State#state{pool_pid = undefined}); - _ -> - logger:error("~p ~p worker pool crashed ~p\n", - [Index, Mod, Reason]), - {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, - logger:debug("starting worker pool ~p with size of " - "~p for vnode ~p.", - [WorkerModule, PoolSize, Index]), - {ok, NewPoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, Index, - WorkerArgs, worker_props), - continue(State#state{pool_pid = NewPoolPid}) + Reason when Reason == normal; Reason == shutdown -> + continue(State#state{pool_pid = undefined}); + _ -> + logger:error("~p ~p worker pool crashed ~p\n", + [Index, Mod, Reason]), + {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, + logger:debug("starting worker pool ~p with size of " + "~p for vnode ~p.", + [WorkerModule, PoolSize, Index]), + {ok, NewPoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, + Index, + WorkerArgs, + worker_props), + continue(State#state{pool_pid = NewPoolPid}) end; handle_info({'DOWN', _Ref, process, _Pid, normal}, - _StateName, State = #state{modstate = {deleted, _}}) -> + _StateName, State = #state{modstate = {deleted, _}}) -> %% these messages are produced by riak_kv_vnode's aae tree %% monitors; they are harmless, so don't yell about them. also %% only dustbin them in the deleted modstate, because pipe vnodes %% need them in other states continue(State); handle_info(Info, _StateName, - State = #state{mod = Mod, modstate = {deleted, _}, - index = Index}) -> + State = #state{mod = Mod, modstate = {deleted, _}, + index = Index}) -> logger:info("~p ~p ignored handle_info ~p - vnode " - "unregistering\n", - [Index, Mod, Info]), + "unregistering\n", + [Index, Mod, Info]), continue(State); handle_info({'EXIT', Pid, Reason}, StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> %% A linked processes has died so use the %% handle_exit callback to allow the vnode %% process to take appropriate action. %% If the function is not implemented default %% to crashing the process. try case Mod:handle_exit(Pid, Reason, ModState) of - {noreply, NewModState} -> - {next_state, StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - {stop, Reason1, NewModState} -> - {stop, Reason1, State#state{modstate = NewModState}} - end + {noreply, NewModState} -> + {next_state, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + {stop, Reason1, NewModState} -> + {stop, Reason1, State#state{modstate = NewModState}} + end catch - _ErrorType:undef -> {stop, linked_process_crash, State} + _ErrorType:undef -> {stop, linked_process_crash, State} end; handle_info(Info, StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case erlang:function_exported(Mod, handle_info, 2) of - true -> - {ok, NewModState} = Mod:handle_info(Info, ModState), - {next_state, StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - false -> - {next_state, StateName, State, - State#state.inactivity_timeout} + true -> + {ok, NewModState} = Mod:handle_info(Info, ModState), + {next_state, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + false -> + {next_state, + StateName, + State, + State#state.inactivity_timeout} end. - - - %% ======================== %% ======== %% Internal Helper Functions %% ======== %% ======================== do_init(State = #state{index = Index, mod = Mod, - forward = Forward}) -> + forward = Forward}) -> {ModState, Props} = case Mod:init([Index]) of - {ok, MS} -> {MS, []}; - {ok, MS, P} -> {MS, P}; - {error, R} -> {error, R} - end, + {ok, MS} -> {MS, []}; + {ok, MS, P} -> {MS, P}; + {error, R} -> {error, R} + end, case {ModState, Props} of - {error, Reason} -> {error, Reason}; - _ -> - case lists:keyfind(pool, 1, Props) of - {pool, WorkerModule, PoolSize, WorkerArgs} = - PoolConfig -> - logger:debug("starting worker pool ~p with size of " - "~p~n", - [WorkerModule, PoolSize]), - {ok, PoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, Index, - WorkerArgs, - worker_props); - _ -> PoolPid = PoolConfig = undefined - end, - riak_core_handoff_manager:remove_exclusion(Mod, Index), - Timeout = application:get_env(riak_core, - vnode_inactivity_timeout, - ?DEFAULT_TIMEOUT), - Timeout2 = Timeout + riak_core_rand:uniform(Timeout), - State2 = State#state{modstate = ModState, - inactivity_timeout = Timeout2, - pool_pid = PoolPid, pool_config = PoolConfig}, - logger:debug("vnode :: ~p/~p :: ~p~n", - [Mod, Index, Forward]), - State3 = mod_set_forwarding(Forward, State2), - {ok, State3} + {error, Reason} -> {error, Reason}; + _ -> + case lists:keyfind(pool, 1, Props) of + {pool, WorkerModule, PoolSize, WorkerArgs} = + PoolConfig -> + logger:debug("starting worker pool ~p with size of " + "~p~n", + [WorkerModule, PoolSize]), + {ok, PoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, + Index, + WorkerArgs, + worker_props); + _ -> PoolPid = PoolConfig = undefined + end, + riak_core_handoff_manager:remove_exclusion(Mod, Index), + Timeout = application:get_env(riak_core, + vnode_inactivity_timeout, + ?DEFAULT_TIMEOUT), + Timeout2 = Timeout + riak_core_rand:uniform(Timeout), + State2 = State#state{modstate = ModState, + inactivity_timeout = Timeout2, + pool_pid = PoolPid, pool_config = PoolConfig}, + logger:debug("vnode :: ~p/~p :: ~p~n", + [Mod, Index, Forward]), + State3 = mod_set_forwarding(Forward, State2), + {ok, State3} end. - continue(State) -> - {next_state, active, State, + {next_state, + active, + State, State#state.inactivity_timeout}. continue(State, NewModState) -> @@ -853,164 +910,202 @@ continue(State, NewModState) -> %% to a partition for which the transfer has already completed, are forwarded. All other %% requests are passed to handle_handoff_command. forward_or_vnode_command(Sender, Request, - State = #state{forward = Forward, mod = Mod, - index = Index}) -> + State = #state{forward = Forward, mod = Mod, + index = Index}) -> Resizing = is_list(Forward), RequestHash = case Resizing of - true -> Mod:request_hash(Request); - false -> undefined - end, + true -> Mod:request_hash(Request); + false -> undefined + end, case {Forward, RequestHash} of - %% typical vnode operation, no forwarding set, handle request locally - {undefined, _} -> vnode_command(Sender, Request, State); - %% implicit forwarding after ownership transfer/hinted handoff - {F, _} when not is_list(F) -> - vnode_forward(implicit, {Index, Forward}, Sender, - Request, State), - continue(State); - %% during resize we can't forward a request w/o request hash, always handle locally - {_, undefined} -> vnode_command(Sender, Request, State); - %% possible forwarding during ring resizing - {_, _} -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - FutureIndex = riak_core_ring:future_index(RequestHash, - Index, R), - vnode_resize_command(Sender, Request, FutureIndex, - State) + %% typical vnode operation, no forwarding set, handle request locally + {undefined, _} -> vnode_command(Sender, Request, State); + %% implicit forwarding after ownership transfer/hinted handoff + {F, _} when not is_list(F) -> + vnode_forward(implicit, + {Index, Forward}, + Sender, + Request, + State), + continue(State); + %% during resize we can't forward a request w/o request hash, always handle locally + {_, undefined} -> vnode_command(Sender, Request, State); + %% possible forwarding during ring resizing + {_, _} -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + FutureIndex = riak_core_ring:future_index(RequestHash, + Index, + R), + vnode_resize_command(Sender, + Request, + FutureIndex, + State) end. vnode_command(_Sender, _Request, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> continue(State); vnode_command(Sender, Request, - State = #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> + State = #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> case catch Mod:handle_command(Request, Sender, ModState) - of - {'EXIT', ExitReason} -> - reply(Sender, {vnode_error, ExitReason}), - logger:error("~p command failed ~p", [Mod, ExitReason]), - {stop, ExitReason, State#state{modstate = ModState}}; - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + of + {'EXIT', ExitReason} -> + reply(Sender, {vnode_error, ExitReason}), + logger:error("~p command failed ~p", [Mod, ExitReason]), + {stop, ExitReason, State#state{modstate = ModState}}; + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. vnode_coverage(Sender, Request, KeySpaces, - State = #state{index = Index, mod = Mod, - modstate = ModState, pool_pid = Pool, - forward = Forward}) -> + State = #state{index = Index, mod = Mod, + modstate = ModState, pool_pid = Pool, + forward = Forward}) -> %% Check if we should forward case Forward of - undefined -> - Action = Mod:handle_coverage(Request, KeySpaces, Sender, - ModState); - %% handle coverage requests locally during ring resize - Forwards when is_list(Forwards) -> - Action = Mod:handle_coverage(Request, KeySpaces, Sender, - ModState); - NextOwner -> - logger:debug("Forwarding coverage ~p -> ~p: ~p~n", - [node(), NextOwner, Index]), - riak_core_vnode_master:coverage(Request, - {Index, NextOwner}, KeySpaces, Sender, - riak_core_vnode_master:reg_name(Mod)), - Action = continue + undefined -> + Action = Mod:handle_coverage(Request, + KeySpaces, + Sender, + ModState); + %% handle coverage requests locally during ring resize + Forwards when is_list(Forwards) -> + Action = Mod:handle_coverage(Request, + KeySpaces, + Sender, + ModState); + NextOwner -> + logger:debug("Forwarding coverage ~p -> ~p: ~p~n", + [node(), NextOwner, Index]), + riak_core_vnode_master:coverage(Request, + {Index, NextOwner}, + KeySpaces, + Sender, + riak_core_vnode_master:reg_name(Mod)), + Action = continue end, case Action of - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. vnode_handoff_command(Sender, Request, ForwardTo, - State = #state{mod = Mod, modstate = ModState, - handoff_target = HOTarget, - handoff_type = HOType, pool_pid = Pool}) -> - case Mod:handle_handoff_command(Request, Sender, - ModState) - of - {reply, Reply, NewModState} -> - reply(Sender, Reply), continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, Work, - From), - continue(State, NewModState); - {forward, NewModState} -> - forward_request(HOType, Request, HOTarget, ForwardTo, - Sender, State), - continue(State, NewModState); - {forward, NewReq, NewModState} -> - forward_request(HOType, NewReq, HOTarget, ForwardTo, - Sender, State), - continue(State, NewModState); - {drop, NewModState} -> continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + State = #state{mod = Mod, modstate = ModState, + handoff_target = HOTarget, + handoff_type = HOType, pool_pid = Pool}) -> + case Mod:handle_handoff_command(Request, + Sender, + ModState) + of + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {forward, NewModState} -> + forward_request(HOType, + Request, + HOTarget, + ForwardTo, + Sender, + State), + continue(State, NewModState); + {forward, NewReq, NewModState} -> + forward_request(HOType, + NewReq, + HOTarget, + ForwardTo, + Sender, + State), + continue(State, NewModState); + {drop, NewModState} -> continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. %% @private wrap the request for resize forwards, and use the resize %% target. forward_request(resize, Request, _HOTarget, - ResizeTarget, Sender, State) -> + ResizeTarget, Sender, State) -> %% resize op and transfer ongoing - vnode_forward(resize, ResizeTarget, Sender, - {resize_forward, Request}, State); + vnode_forward(resize, + ResizeTarget, + Sender, + {resize_forward, Request}, + State); forward_request(undefined, Request, _HOTarget, - ResizeTarget, Sender, State) -> + ResizeTarget, Sender, State) -> %% resize op ongoing, no resize transfer ongoing, arrive here %% via forward_or_vnode_command - vnode_forward(resize, ResizeTarget, Sender, - {resize_forward, Request}, State); + vnode_forward(resize, + ResizeTarget, + Sender, + {resize_forward, Request}, + State); forward_request(_, Request, HOTarget, _ResizeTarget, - Sender, State) -> + Sender, State) -> %% normal explicit forwarding during owhership transfer - vnode_forward(explicit, HOTarget, Sender, Request, - State). + vnode_forward(explicit, + HOTarget, + Sender, + Request, + State). vnode_forward(Type, ForwardTo, Sender, Request, - State) -> + State) -> logger:debug("Forwarding (~p) {~p,~p} -> ~p~n", - [Type, State#state.index, node(), ForwardTo]), + [Type, State#state.index, node(), ForwardTo]), riak_core_vnode_master:command_unreliable(ForwardTo, - Request, Sender, - riak_core_vnode_master:reg_name(State#state.mod)). + Request, + Sender, + riak_core_vnode_master:reg_name(State#state.mod)). %% @doc during ring resizing if we have completed a transfer to the index that will %% handle request in future ring we forward to it. Otherwise we delegate %% to the local vnode like other requests during handoff vnode_resize_command(Sender, Request, FutureIndex, - State = #state{forward = Forward}) + State = #state{forward = Forward}) when is_list(Forward) -> case lists:keyfind(FutureIndex, 1, Forward) of - false -> vnode_command(Sender, Request, State); - {FutureIndex, FutureOwner} -> - vnode_handoff_command(Sender, Request, - {FutureIndex, FutureOwner}, State) + false -> vnode_command(Sender, Request, State); + {FutureIndex, FutureOwner} -> + vnode_handoff_command(Sender, + Request, + {FutureIndex, FutureOwner}, + State) end. - %% This code lives in riak_core_vnode rather than riak_core_vnode_manager %% because the ring_trans call is a synchronous call to the ring manager, %% and it is better to block an individual vnode rather than the vnode @@ -1018,141 +1113,152 @@ vnode_resize_command(Sender, Request, FutureIndex, %% to execute on multiple parallel vnodes because of the synchronization %% afforded by having all ring changes go through the single ring manager. mark_handoff_complete(SrcIdx, Target, SeenIdxs, Mod, - resize) -> + resize) -> Prev = node(), Source = {SrcIdx, Prev}, TransFun = fun (Ring, _) -> - Owner = riak_core_ring:index_owner(Ring, SrcIdx), - Status = riak_core_ring:resize_transfer_status(Ring, - Source, - Target, - Mod), - case {Owner, Status} of - {Prev, awaiting} -> - F = fun (SeenIdx, RingAcc) -> - riak_core_ring:schedule_resize_transfer(RingAcc, - Source, - SeenIdx) - end, - Ring2 = lists:foldl(F, Ring, - ordsets:to_list(SeenIdxs)), - Ring3 = - riak_core_ring:resize_transfer_complete(Ring2, - Source, - Target, - Mod), - %% local ring optimization (see below) - {set_only, Ring3}; - _ -> ignore - end - end, + Owner = riak_core_ring:index_owner(Ring, SrcIdx), + Status = riak_core_ring:resize_transfer_status(Ring, + Source, + Target, + Mod), + case {Owner, Status} of + {Prev, awaiting} -> + F = fun (SeenIdx, RingAcc) -> + riak_core_ring:schedule_resize_transfer(RingAcc, + Source, + SeenIdx) + end, + Ring2 = lists:foldl(F, + Ring, + ordsets:to_list(SeenIdxs)), + Ring3 = + riak_core_ring:resize_transfer_complete(Ring2, + Source, + Target, + Mod), + %% local ring optimization (see below) + {set_only, Ring3}; + _ -> ignore + end + end, Result = riak_core_ring_manager:ring_trans(TransFun, - []), + []), case Result of - {ok, _NewRing} -> resize; - _ -> continue + {ok, _NewRing} -> resize; + _ -> continue end; mark_handoff_complete(Idx, {Idx, New}, [], Mod, _) -> Prev = node(), Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Owner = - riak_core_ring:index_owner(Ring, - Idx), - {_, NextOwner, Status} = - riak_core_ring:next_owner(Ring, - Idx, - Mod), - NewStatus = - riak_core_ring:member_status(Ring, - New), - case {Owner, NextOwner, - NewStatus, Status} - of - {Prev, New, _, - awaiting} -> - Ring2 = - riak_core_ring:handoff_complete(Ring, - Idx, - Mod), - %% Optimization. Only alter the local ring without - %% triggering a gossip, thus implicitly coalescing - %% multiple vnode handoff completion events. In the - %% future we should decouple vnode handoff state from - %% the ring structure in order to make gossip independent - %% of ring size. - {set_only, Ring2}; - _ -> ignore - end - end, - []), + _) -> + Owner = + riak_core_ring:index_owner(Ring, + Idx), + {_, NextOwner, Status} = + riak_core_ring:next_owner(Ring, + Idx, + Mod), + NewStatus = + riak_core_ring:member_status(Ring, + New), + case {Owner, + NextOwner, + NewStatus, + Status} + of + {Prev, + New, + _, + awaiting} -> + Ring2 = + riak_core_ring:handoff_complete(Ring, + Idx, + Mod), + %% Optimization. Only alter the local ring without + %% triggering a gossip, thus implicitly coalescing + %% multiple vnode handoff completion events. In the + %% future we should decouple vnode handoff state from + %% the ring structure in order to make gossip independent + %% of ring size. + {set_only, + Ring2}; + _ -> ignore + end + end, + []), case Result of - {ok, NewRing} -> NewRing = NewRing; - _ -> - {ok, NewRing} = riak_core_ring_manager:get_my_ring() + {ok, NewRing} -> NewRing = NewRing; + _ -> + {ok, NewRing} = riak_core_ring_manager:get_my_ring() end, Owner = riak_core_ring:index_owner(NewRing, Idx), {_, NextOwner, Status} = - riak_core_ring:next_owner(NewRing, Idx, Mod), + riak_core_ring:next_owner(NewRing, Idx, Mod), NewStatus = riak_core_ring:member_status(NewRing, New), case {Owner, NextOwner, NewStatus, Status} of - {_, _, invalid, _} -> - %% Handing off to invalid node, don't give-up data. - continue; - {Prev, New, _, _} -> forward; - {Prev, _, _, _} -> - %% Handoff wasn't to node that is scheduled in next, so no change. - continue; - {_, _, _, _} -> shutdown + {_, _, invalid, _} -> + %% Handing off to invalid node, don't give-up data. + continue; + {Prev, New, _, _} -> forward; + {Prev, _, _, _} -> + %% Handoff wasn't to node that is scheduled in next, so no change. + continue; + {_, _, _, _} -> shutdown end. finish_handoff(State) -> finish_handoff([], State). finish_handoff(SeenIdxs, - State = #state{mod = Mod, modstate = ModState, - index = Idx, handoff_target = Target, - handoff_type = HOType}) -> - case mark_handoff_complete(Idx, Target, SeenIdxs, Mod, - HOType) - of - continue -> - continue(State#state{handoff_target = none, - handoff_type = undefined}); - resize -> - CurrentForwarding = resize_forwarding(State), - NewForwarding = [Target | CurrentForwarding], - State2 = mod_set_forwarding(NewForwarding, State), - continue(State2#state{handoff_target = none, - handoff_type = undefined, - forward = NewForwarding}); - Res when Res == forward; Res == shutdown -> - {_, HN} = Target, - %% Have to issue the delete now. Once unregistered the - %% vnode master will spin up a new vnode on demand. - %% Shutdown the async pool beforehand, don't want callbacks - %% running on non-existant data. - maybe_shutdown_pool(State), - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode finished handoff and deleted.", - [Idx, Mod]), - riak_core_vnode_manager:unregister_vnode(Idx, Mod), - logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, State#state.index, - State#state.forward, HN]), - State2 = mod_set_forwarding(HN, State), - continue(State2#state{modstate = - {deleted, - NewModState}, % like to fail if used - handoff_target = none, handoff_type = undefined, - forward = HN}) + State = #state{mod = Mod, modstate = ModState, + index = Idx, handoff_target = Target, + handoff_type = HOType}) -> + case mark_handoff_complete(Idx, + Target, + SeenIdxs, + Mod, + HOType) + of + continue -> + continue(State#state{handoff_target = none, + handoff_type = undefined}); + resize -> + CurrentForwarding = resize_forwarding(State), + NewForwarding = [Target | CurrentForwarding], + State2 = mod_set_forwarding(NewForwarding, State), + continue(State2#state{handoff_target = none, + handoff_type = undefined, + forward = NewForwarding}); + Res when Res == forward; Res == shutdown -> + {_, HN} = Target, + %% Have to issue the delete now. Once unregistered the + %% vnode master will spin up a new vnode on demand. + %% Shutdown the async pool beforehand, don't want callbacks + %% running on non-existant data. + maybe_shutdown_pool(State), + {ok, NewModState} = Mod:delete(ModState), + logger:debug("~p ~p vnode finished handoff and deleted.", + [Idx, Mod]), + riak_core_vnode_manager:unregister_vnode(Idx, Mod), + logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", + [State#state.mod, + State#state.index, + State#state.forward, + HN]), + State2 = mod_set_forwarding(HN, State), + continue(State2#state{modstate = + {deleted, + NewModState}, % like to fail if used + handoff_target = none, + handoff_type = undefined, forward = HN}) end. maybe_shutdown_pool(#state{pool_pid = Pool}) -> case is_pid(Pool) of - true -> - %% state.pool_pid will be cleaned up by handle_info message. - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok + true -> + %% state.pool_pid will be cleaned up by handle_info message. + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok end. resize_forwarding(#state{forward = F}) @@ -1162,116 +1268,128 @@ resize_forwarding(_) -> []. mark_delete_complete(Idx, Mod) -> Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Type = - riak_core_ring:vnode_type(Ring, - Idx), - {_, Next, Status} = - riak_core_ring:next_owner(Ring, - Idx), - case {Type, Next, Status} - of - {resized_primary, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, Ring3}; - {{fallback, _}, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, Ring3}; - _ -> ignore - end - end, - []), + _) -> + Type = + riak_core_ring:vnode_type(Ring, + Idx), + {_, Next, Status} = + riak_core_ring:next_owner(Ring, + Idx), + case {Type, Next, Status} + of + {resized_primary, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, + Ring3}; + {{fallback, _}, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, + Ring3}; + _ -> ignore + end + end, + []), Result. - maybe_handoff(_TargetIdx, _TargetNode, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> %% Modstate has been deleted, waiting for unregistered. No handoff. continue(State); maybe_handoff(TargetIdx, TargetNode, - State = #state{index = Idx, mod = Mod, - modstate = ModState, - handoff_target = CurrentTarget, - handoff_pid = HPid}) -> + State = #state{index = Idx, mod = Mod, + modstate = ModState, + handoff_target = CurrentTarget, + handoff_pid = HPid}) -> Target = {TargetIdx, TargetNode}, ExistingHO = is_pid(HPid) andalso - is_process_alive(HPid), + is_process_alive(HPid), ValidHN = case CurrentTarget of - none -> true; - Target -> not ExistingHO; - _ -> - logger:info("~s/~b: handoff request to ~p before " - "finishing handoff to ~p", - [Mod, Idx, Target, CurrentTarget]), - not ExistingHO - end, + none -> true; + Target -> not ExistingHO; + _ -> + logger:info("~s/~b: handoff request to ~p before " + "finishing handoff to ~p", + [Mod, Idx, Target, CurrentTarget]), + not ExistingHO + end, case ValidHN of - true -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - Resizing = riak_core_ring:is_resizing(R), - Primary = riak_core_ring:is_primary(R, {Idx, node()}), - HOType = case {Resizing, Primary} of - {true, _} -> resize; - {_, true} -> ownership; - {_, false} -> hinted - end, - case Mod:handoff_starting({HOType, Target}, ModState) of - {true, NewModState} -> - start_handoff(HOType, TargetIdx, TargetNode, - State#state{modstate = NewModState}); - {false, NewModState} -> continue(State, NewModState) - end; - false -> continue(State) + true -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + Resizing = riak_core_ring:is_resizing(R), + Primary = riak_core_ring:is_primary(R, {Idx, node()}), + HOType = case {Resizing, Primary} of + {true, _} -> resize; + {_, true} -> ownership; + {_, false} -> hinted + end, + case Mod:handoff_starting({HOType, Target}, ModState) of + {true, NewModState} -> + start_handoff(HOType, + TargetIdx, + TargetNode, + State#state{modstate = NewModState}); + {false, NewModState} -> continue(State, NewModState) + end; + false -> continue(State) end. start_handoff(HOType, TargetIdx, TargetNode, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case Mod:is_empty(ModState) of - {true, NewModState} -> - finish_handoff(State#state{modstate = NewModState, - handoff_type = HOType, - handoff_target = {TargetIdx, TargetNode}}); - {false, Size, NewModState} -> - State2 = State#state{modstate = NewModState}, - NewState = start_outbound(HOType, TargetIdx, TargetNode, - [{size, Size}], State2), - continue(NewState); - {false, NewModState} -> - State2 = State#state{modstate = NewModState}, - NewState = start_outbound(HOType, TargetIdx, TargetNode, - [], State2), - continue(NewState) + {true, NewModState} -> + finish_handoff(State#state{modstate = NewModState, + handoff_type = HOType, + handoff_target = + {TargetIdx, TargetNode}}); + {false, Size, NewModState} -> + State2 = State#state{modstate = NewModState}, + NewState = start_outbound(HOType, + TargetIdx, + TargetNode, + [{size, Size}], + State2), + continue(NewState); + {false, NewModState} -> + State2 = State#state{modstate = NewModState}, + NewState = start_outbound(HOType, + TargetIdx, + TargetNode, + [], + State2), + continue(NewState) end. start_outbound(HOType, TargetIdx, TargetNode, Opts, - State = #state{index = Idx, mod = Mod}) -> - case riak_core_handoff_manager:add_outbound(HOType, Mod, - Idx, TargetIdx, TargetNode, - self(), Opts) - of - {ok, Pid} -> - State#state{handoff_pid = Pid, handoff_type = HOType, - handoff_target = {TargetIdx, TargetNode}}; - {error, _Reason} -> - {ok, NewModState} = - Mod:handoff_cancelled(State#state.modstate), - State#state{modstate = NewModState} + State = #state{index = Idx, mod = Mod}) -> + case riak_core_handoff_manager:add_outbound(HOType, + Mod, + Idx, + TargetIdx, + TargetNode, + self(), + Opts) + of + {ok, Pid} -> + State#state{handoff_pid = Pid, handoff_type = HOType, + handoff_target = {TargetIdx, TargetNode}}; + {error, _Reason} -> + {ok, NewModState} = + Mod:handoff_cancelled(State#state.modstate), + State#state{modstate = NewModState} end. - - %% Individual vnode processes and the vnode manager are tightly coupled. When %% vnode events occur, the vnode must ensure that the events are forwarded to %% the vnode manager, which will make a state change decision and send an @@ -1281,34 +1399,37 @@ start_outbound(HOType, TargetIdx, TargetNode, Opts, %% messages until an appropriate message is received back from the vnode %% manager. The event timer functions below implement this logic. start_manager_event_timer(Event, - State = #state{mod = Mod, index = Idx}) -> - riak_core_vnode_manager:vnode_event(Mod, Idx, self(), - Event), + State = #state{mod = Mod, index = Idx}) -> + riak_core_vnode_manager:vnode_event(Mod, + Idx, + self(), + Event), stop_manager_event_timer(State), T2 = gen_fsm_compat:send_event_after(30000, - {send_manager_event, Event}), + {send_manager_event, Event}), State#state{manager_event_timer = T2}. stop_manager_event_timer(#state{manager_event_timer = - undefined}) -> + undefined}) -> ok; stop_manager_event_timer(#state{manager_event_timer = - T}) -> - _ = gen_fsm_compat:cancel_timer(T), ok. + T}) -> + _ = gen_fsm_compat:cancel_timer(T), + ok. mod_set_forwarding(_Forward, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> State; mod_set_forwarding(Forward, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case lists:member({set_vnode_forwarding, 2}, - Mod:module_info(exports)) - of - true -> - NewModState = Mod:set_vnode_forwarding(Forward, - ModState), - State#state{modstate = NewModState}; - false -> State + Mod:module_info(exports)) + of + true -> + NewModState = Mod:set_vnode_forwarding(Forward, + ModState), + State#state{modstate = NewModState}; + false -> State end. %% =================================================================== @@ -1322,8 +1443,8 @@ mod_set_forwarding(Forward, get_modstate(Pid) -> {_StateName, State} = - gen_fsm_compat:sync_send_all_state_event(Pid, - current_state), + gen_fsm_compat:sync_send_all_state_event(Pid, + current_state), {State#state.mod, State#state.modstate}. -ifdef(TEST). @@ -1331,15 +1452,16 @@ get_modstate(Pid) -> %% Start the garbage collection server test_link(Mod, Index) -> gen_fsm_compat:start_link(?MODULE, - [Mod, Index, 0, node()], []). + [Mod, Index, 0, node()], + []). %% Get the current state of the fsm for testing inspection -spec current_state(pid()) -> {atom(), state()} | - {error, term()}. + {error, term()}. current_state(Pid) -> gen_fsm_compat:sync_send_all_state_event(Pid, - current_state). + current_state). wait_for_process_death(Pid) -> wait_for_process_death(Pid, is_process_alive(Pid)). @@ -1351,15 +1473,17 @@ wait_for_process_death(_Pid, false) -> ok. wait_for_state_update(OriginalStateData, Pid) -> {_, CurrentStateData} = (?MODULE):current_state(Pid), wait_for_state_update(OriginalStateData, - CurrentStateData, Pid). + CurrentStateData, + Pid). wait_for_state_update(OriginalStateData, - OriginalStateData, Pid) -> + OriginalStateData, Pid) -> {_, CurrentStateData} = (?MODULE):current_state(Pid), wait_for_state_update(OriginalStateData, - CurrentStateData, Pid); + CurrentStateData, + Pid); wait_for_state_update(_OriginalState, _StateData, - _Pid) -> + _Pid) -> ok. %% =================================================================== @@ -1371,14 +1495,17 @@ pool_death_test() -> error_logger:tty(false), meck:unload(), meck:new(test_vnode, [non_strict, no_link]), - meck:expect(test_vnode, init, - fun (_) -> {ok, [], [{pool, test_pool_mod, 1, []}]} - end), - meck:expect(test_vnode, terminate, - fun (_, _) -> normal end), + meck:expect(test_vnode, + init, + fun (_) -> {ok, [], [{pool, test_pool_mod, 1, []}]} + end), + meck:expect(test_vnode, + terminate, + fun (_, _) -> normal end), meck:new(test_pool_mod, [non_strict, no_link]), - meck:expect(test_pool_mod, init_worker, - fun (_, _, _) -> {ok, []} end), + meck:expect(test_pool_mod, + init_worker, + fun (_, _, _) -> {ok, []} end), {ok, Pid} = riak_core_vnode:test_link(test_vnode, 0), {_, StateData1} = riak_core_vnode:current_state(Pid), PoolPid1 = StateData1#state.pool_pid, @@ -1393,4 +1520,5 @@ pool_death_test() -> wait_for_process_death(Pid), meck:validate(test_pool_mod), meck:validate(test_vnode). + -endif. diff --git a/src/riak_core_vnode_manager.erl b/src/riak_core_vnode_manager.erl index 6dc46e05c..ab40781eb 100644 --- a/src/riak_core_vnode_manager.erl +++ b/src/riak_core_vnode_manager.erl @@ -26,17 +26,29 @@ -export([start_link/0, stop/0]). --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). - --export([all_vnodes/0, all_vnodes/1, - all_vnodes_status/0, force_handoffs/0, repair/3, - all_handoffs/0, repair_status/1, xfer_complete/2, - kill_repairs/1]). - --export([all_index_pid/1, get_vnode_pid/2, - start_vnode/2, unregister_vnode/2, unregister_vnode/3, - vnode_event/4]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-export([all_vnodes/0, + all_vnodes/1, + all_vnodes_status/0, + force_handoffs/0, + repair/3, + all_handoffs/0, + repair_status/1, + xfer_complete/2, + kill_repairs/1]). + +-export([all_index_pid/1, + get_vnode_pid/2, + start_vnode/2, + unregister_vnode/2, + unregister_vnode/3, + vnode_event/4]). %% Field debugging -export([get_tab/0]). @@ -46,38 +58,41 @@ -record(monrec, {monref, key}). -record(xfer_status, - {status :: pending | complete, - mod_src_target :: {module(), index(), index()}}). + {status :: pending | complete, + mod_src_target :: {module(), index(), index()}}). -type xfer_status() :: #xfer_status{}. -record(repair, - {mod_partition :: mod_partition(), - filter_mod_fun :: {module(), atom()}, - minus_one_xfer :: xfer_status(), - plus_one_xfer :: xfer_status(), - pairs :: [{index(), node()}]}). + {mod_partition :: mod_partition(), + filter_mod_fun :: {module(), atom()}, + minus_one_xfer :: xfer_status(), + plus_one_xfer :: xfer_status(), + pairs :: [{index(), node()}]}). -type repair() :: #repair{}. -type repairs() :: [repair()]. -record(state, - {idxtab, forwarding :: dict:dict(), - handoff :: dict:dict(), known_modules :: [term()], - never_started :: [{integer(), term()}], - vnode_start_tokens :: integer(), - last_ring_id :: term(), repairs :: repairs()}). + {idxtab, + forwarding :: dict:dict(), + handoff :: dict:dict(), + known_modules :: [term()], + never_started :: [{integer(), term()}], + vnode_start_tokens :: integer(), + last_ring_id :: term(), + repairs :: repairs()}). -include("riak_core_handoff.hrl"). -include("riak_core_vnode.hrl"). -define(XFER_EQ(A, ModSrcTgt), - A#xfer_status.mod_src_target == ModSrcTgt). + A#xfer_status.mod_src_target == ModSrcTgt). -define(XFER_COMPLETE(X), - X#xfer_status.status == complete). + X#xfer_status.status == complete). -define(DEFAULT_OWNERSHIP_TRIGGER, 8). @@ -92,8 +107,10 @@ %% =================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], - []). + gen_server:start_link({local, ?MODULE}, + ?MODULE, + [], + []). stop() -> gen_server:cast(?MODULE, stop). @@ -103,10 +120,10 @@ all_vnodes_status() -> %% @doc Repair the given `ModPartition' pair for `Service' using the %% given `FilterModFun' to filter keys. -spec repair(atom(), {module(), partition()}, - {module(), atom()}) -> {ok, - Pairs :: [{partition(), node()}]} | - {down, Down :: [{partition(), node()}]} | - ownership_change_in_progress. + {module(), atom()}) -> {ok, + Pairs :: [{partition(), node()}]} | + {down, Down :: [{partition(), node()}]} | + ownership_change_in_progress. repair(Service, {_Module, Partition} = ModPartition, FilterModFun) -> @@ -119,13 +136,14 @@ repair(Service, {_Module, Partition} = ModPartition, %% @doc Get the status of the repair process for a given `ModPartition'. -spec repair_status(mod_partition()) -> in_progress | - not_found. + not_found. repair_status({_Module, Partition} = ModPartition) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), Owner = riak_core_ring:index_owner(Ring, Partition), gen_server:call({?MODULE, Owner}, - {repair_status, ModPartition}, ?LONG_TIMEOUT). + {repair_status, ModPartition}, + ?LONG_TIMEOUT). %% @doc Get all handoffs known by this manager. -spec all_handoffs() -> [known_handoff()]. @@ -140,7 +158,8 @@ all_handoffs() -> xfer_complete(Origin, Xfer) -> gen_server:call({?MODULE, Origin}, - {xfer_complete, Xfer}, ?LONG_TIMEOUT). + {xfer_complete, Xfer}, + ?LONG_TIMEOUT). kill_repairs(Reason) -> gen_server:cast(?MODULE, {kill_repairs, Reason}). @@ -156,22 +175,23 @@ unregister_vnode(Index, VNodeMod) -> unregister_vnode(Index, Pid, VNodeMod) -> gen_server:cast(?MODULE, - {unregister, Index, VNodeMod, Pid}). + {unregister, Index, VNodeMod, Pid}). start_vnode(Index, VNodeMod) -> gen_server:cast(?MODULE, - {Index, VNodeMod, start_vnode}). + {Index, VNodeMod, start_vnode}). vnode_event(Mod, Idx, Pid, Event) -> gen_server:cast(?MODULE, - {vnode_event, Mod, Idx, Pid, Event}). + {vnode_event, Mod, Idx, Pid, Event}). get_tab() -> gen_server:call(?MODULE, get_tab, infinity). get_vnode_pid(Index, VNodeMod) -> - gen_server:call(?MODULE, {Index, VNodeMod, get_vnode}, - infinity). + gen_server:call(?MODULE, + {Index, VNodeMod, get_vnode}, + infinity). %% =================================================================== %% ETS-based API: try to determine response by reading protected ETS @@ -181,26 +201,27 @@ get_vnode_pid(Index, VNodeMod) -> all_vnodes() -> case get_all_vnodes() of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, all_vnodes, infinity); - Result -> Result + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, all_vnodes, infinity); + Result -> Result end. all_vnodes(Mod) -> case get_all_vnodes(Mod) of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, {all_vnodes, Mod}, infinity); - Result -> Result + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, {all_vnodes, Mod}, infinity); + Result -> Result end. all_index_pid(VNodeMod) -> case get_all_index_pid(VNodeMod, ets_error) of - ets_error -> - gen_server:call(?MODULE, {all_index_pid, VNodeMod}, - infinity); - Result -> Result + ets_error -> + gen_server:call(?MODULE, + {all_index_pid, VNodeMod}, + infinity); + Result -> Result end. %% =================================================================== @@ -209,20 +230,20 @@ all_index_pid(VNodeMod) -> get_all_index_pid(Mod, Default) -> try [list_to_tuple(L) - || L - <- ets:match(?ETS, {idxrec, '_', '$1', Mod, '$2', '_'})] + || L + <- ets:match(?ETS, {idxrec, '_', '$1', Mod, '$2', '_'})] catch - _:_ -> Default + _:_ -> Default end. get_all_vnodes() -> Mods = [Mod - || {_App, Mod} <- riak_core:vnode_modules()], + || {_App, Mod} <- riak_core:vnode_modules()], get_all_vnodes(Mods). get_all_vnodes(Mods) when is_list(Mods) -> lists:flatmap(fun (Mod) -> get_all_vnodes(Mod) end, - Mods); + Mods); get_all_vnodes(Mod) -> IdxPids = get_all_index_pid(Mod, []), [{Mod, Idx, Pid} || {Idx, Pid} <- IdxPids]. @@ -234,16 +255,18 @@ get_all_vnodes(Mod) -> %% @private init(_State) -> {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], State = #state{forwarding = dict:new(), - handoff = dict:new(), known_modules = [], - never_started = [], vnode_start_tokens = 0, - repairs = []}, + handoff = dict:new(), known_modules = [], + never_started = [], vnode_start_tokens = 0, + repairs = []}, State2 = find_vnodes(State), AllVNodes = get_all_vnodes(Mods), - State3 = update_forwarding(AllVNodes, Mods, Ring, - State2), + State3 = update_forwarding(AllVNodes, + Mods, + Ring, + State2), State4 = update_handoff(AllVNodes, Ring, CHBin, State3), schedule_management_timer(), {ok, State4}. @@ -254,33 +277,33 @@ find_vnodes(State) -> %% to rebuild our ETS table for routing messages to the appropriate %% vnode. VnodePids = [Pid - || {_, Pid, worker, _} - <- supervisor:which_children(riak_core_vnode_sup), - is_pid(Pid) andalso is_process_alive(Pid)], + || {_, Pid, worker, _} + <- supervisor:which_children(riak_core_vnode_sup), + is_pid(Pid) andalso is_process_alive(Pid)], IdxTable = ets:new(?ETS, - [{keypos, 2}, named_table, protected]), + [{keypos, 2}, named_table, protected]), %% If the vnode manager is being restarted, scan the existing %% vnode children and work out which module and index they are %% responsible for. During startup it is possible that these %% vnodes may be shutting down as we check them if there are %% several types of vnodes active. PidIdxs = lists:flatten([try [{Pid, - riak_core_vnode:get_mod_index(Pid)}] - catch - _:_Err -> [] - end - || Pid <- VnodePids]), + riak_core_vnode:get_mod_index(Pid)}] + catch + _:_Err -> [] + end + || Pid <- VnodePids]), %% Populate the ETS table with processes running this VNodeMod (filtered %% in the list comprehension) F = fun (Pid, Idx, Mod) -> - Mref = erlang:monitor(process, Pid), - #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, - pid = Pid, monref = Mref} - end, + Mref = erlang:monitor(process, Pid), + #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, + pid = Pid, monref = Mref} + end, IdxRecs = [F(Pid, Idx, Mod) - || {Pid, {Mod, Idx}} <- PidIdxs], + || {Pid, {Mod, Idx}} <- PidIdxs], MonRecs = [#monrec{monref = Mref, key = Key} - || #idxrec{key = Key, monref = Mref} <- IdxRecs], + || #idxrec{key = Key, monref = Mref} <- IdxRecs], true = ets:insert_new(IdxTable, IdxRecs ++ MonRecs), State#state{idxtab = IdxTable}. @@ -289,39 +312,49 @@ handle_call(all_vnodes_status, _From, State) -> Reply = get_all_vnodes_status(State), {reply, Reply, State}; handle_call(all_vnodes, _From, State) -> - Reply = get_all_vnodes(), {reply, Reply, State}; + Reply = get_all_vnodes(), + {reply, Reply, State}; handle_call({all_vnodes, Mod}, _From, State) -> - Reply = get_all_vnodes(Mod), {reply, Reply, State}; + Reply = get_all_vnodes(Mod), + {reply, Reply, State}; handle_call({all_index_pid, Mod}, _From, State) -> Reply = get_all_index_pid(Mod, []), {reply, Reply, State}; handle_call({Partition, Mod, get_vnode}, _From, - State) -> + State) -> Pid = get_vnode(Partition, Mod, State), {reply, {ok, Pid}, State}; handle_call(get_tab, _From, State) -> {reply, ets:tab2list(State#state.idxtab), State}; -handle_call({repair, Service, - {Mod, Partition} = ModPartition, FilterModFun}, - _From, #state{repairs = Repairs} = State) -> +handle_call({repair, + Service, + {Mod, Partition} = ModPartition, + FilterModFun}, + _From, #state{repairs = Repairs} = State) -> case get_repair(ModPartition, Repairs) of - none -> - maybe_create_repair(Partition, Service, ModPartition, - FilterModFun, Mod, Repairs, State); - Repair -> - Pairs = Repair#repair.pairs, {reply, {ok, Pairs}, State} + none -> + maybe_create_repair(Partition, + Service, + ModPartition, + FilterModFun, + Mod, + Repairs, + State); + Repair -> + Pairs = Repair#repair.pairs, + {reply, {ok, Pairs}, State} end; handle_call(all_handoffs, _From, - State = #state{repairs = Repairs, handoff = HO}) -> + State = #state{repairs = Repairs, handoff = HO}) -> Handoffs = dict:to_list(HO) ++ - transform_repair_records(Repairs), + transform_repair_records(Repairs), {reply, Handoffs, State}; handle_call({repair_status, ModPartition}, _From, - State) -> + State) -> Repairs = State#state.repairs, case get_repair(ModPartition, Repairs) of - none -> {reply, not_found, State}; - #repair{} -> {reply, in_progress, State} + none -> {reply, not_found, State}; + #repair{} -> {reply, in_progress, State} end; %% NOTE: The `xfer_complete' logic assumes two things: %% @@ -334,34 +367,36 @@ handle_call({xfer_complete, ModSrcTgt}, _From, State) -> {Mod, _, Partition} = ModSrcTgt, ModPartition = {Mod, Partition}, case get_repair(ModPartition, Repairs) of - none -> - logger:error("Received xfer_complete for non-existing " - "repair: ~p", - [ModPartition]), - {reply, ok, State}; - #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = - R -> - R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> - MOX2 = MOX#xfer_status{status = complete}, - R#repair{minus_one_xfer = MOX2}; - ?XFER_EQ(POX, ModSrcTgt) -> - POX2 = POX#xfer_status{status = complete}, - R#repair{plus_one_xfer = POX2}; - true -> - logger:error("Received xfer_complete for non-existing " - "xfer: ~p", - [ModSrcTgt]) - end, - case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), - ?XFER_COMPLETE((R2#repair.plus_one_xfer))} - of - {true, true} -> - {reply, ok, - State#state{repairs = remove_repair(R2, Repairs)}}; - _ -> - {reply, ok, - State#state{repairs = replace_repair(R2, Repairs)}} - end + none -> + logger:error("Received xfer_complete for non-existing " + "repair: ~p", + [ModPartition]), + {reply, ok, State}; + #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = + R -> + R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> + MOX2 = MOX#xfer_status{status = complete}, + R#repair{minus_one_xfer = MOX2}; + ?XFER_EQ(POX, ModSrcTgt) -> + POX2 = POX#xfer_status{status = complete}, + R#repair{plus_one_xfer = POX2}; + true -> + logger:error("Received xfer_complete for non-existing " + "xfer: ~p", + [ModSrcTgt]) + end, + case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), + ?XFER_COMPLETE((R2#repair.plus_one_xfer))} + of + {true, true} -> + {reply, + ok, + State#state{repairs = remove_repair(R2, Repairs)}}; + _ -> + {reply, + ok, + State#state{repairs = replace_repair(R2, Repairs)}} + end end; handle_call(_, _From, State) -> {reply, ok, State}. @@ -370,54 +405,64 @@ transform_repair_records(Repairs) -> %% module/node values in the `pairs' field against %% `minus_one_xfer' and `plus_one_xfer' lists:flatten(lists:map(fun (#repair{pairs = - [{M1SrcIdx, Mnode}, _FixPartition, - {P1SrcIdx, Pnode}], - minus_one_xfer = - #xfer_status{mod_src_target = - {M1Mod, M1SrcIdx, - _M1DstIdx}}, - plus_one_xfer = - #xfer_status{mod_src_target = - {P1Mod, P1SrcIdx, - _P1DstIdx}}}) -> - [{{M1Mod, M1SrcIdx}, - {repair, inbound, Mnode}}, - {{P1Mod, P1SrcIdx}, - {repair, inbound, Pnode}}] - end, - Repairs)). + [{M1SrcIdx, Mnode}, + _FixPartition, + {P1SrcIdx, Pnode}], + minus_one_xfer = + #xfer_status{mod_src_target = + {M1Mod, + M1SrcIdx, + _M1DstIdx}}, + plus_one_xfer = + #xfer_status{mod_src_target = + {P1Mod, + P1SrcIdx, + _P1DstIdx}}}) -> + [{{M1Mod, M1SrcIdx}, + {repair, inbound, Mnode}}, + {{P1Mod, P1SrcIdx}, + {repair, inbound, Pnode}}] + end, + Repairs)). maybe_create_repair(Partition, Service, ModPartition, - FilterModFun, Mod, Repairs, State) -> + FilterModFun, Mod, Repairs, State) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), case riak_core_ring:pending_changes(Ring) of - [] -> - UpNodes = riak_core_node_watcher:nodes(Service), - Pairs = repair_pairs(Ring, Partition), - case check_up(Pairs, UpNodes) of - true -> - create_repair(Pairs, ModPartition, FilterModFun, Mod, - Partition, Repairs, State); - {false, Down} -> {reply, {down, Down}, State} - end; - _ -> {reply, ownership_change_in_progress, State} + [] -> + UpNodes = riak_core_node_watcher:nodes(Service), + Pairs = repair_pairs(Ring, Partition), + case check_up(Pairs, UpNodes) of + true -> + create_repair(Pairs, + ModPartition, + FilterModFun, + Mod, + Partition, + Repairs, + State); + {false, Down} -> {reply, {down, Down}, State} + end; + _ -> {reply, ownership_change_in_progress, State} end. create_repair(Pairs, ModPartition, FilterModFun, Mod, - Partition, Repairs, State) -> + Partition, Repairs, State) -> {MOP, _} = MinusOne = get_minus_one(Pairs), {POP, _} = PlusOne = get_plus_one(Pairs), - riak_core_handoff_manager:xfer(MinusOne, ModPartition, - FilterModFun), - riak_core_handoff_manager:xfer(PlusOne, ModPartition, - FilterModFun), + riak_core_handoff_manager:xfer(MinusOne, + ModPartition, + FilterModFun), + riak_core_handoff_manager:xfer(PlusOne, + ModPartition, + FilterModFun), MOXStatus = #xfer_status{status = pending, - mod_src_target = {Mod, MOP, Partition}}, + mod_src_target = {Mod, MOP, Partition}}, POXStatus = #xfer_status{status = pending, - mod_src_target = {Mod, POP, Partition}}, + mod_src_target = {Mod, POP, Partition}}, Repair = #repair{mod_partition = ModPartition, - filter_mod_fun = FilterModFun, pairs = Pairs, - minus_one_xfer = MOXStatus, plus_one_xfer = POXStatus}, + filter_mod_fun = FilterModFun, pairs = Pairs, + minus_one_xfer = MOXStatus, plus_one_xfer = POXStatus}, Repairs2 = Repairs ++ [Repair], State2 = State#state{repairs = Repairs2}, logger:debug("add repair ~p", [ModPartition]), @@ -425,29 +470,30 @@ create_repair(Pairs, ModPartition, FilterModFun, Mod, %% @private handle_cast({Partition, Mod, start_vnode}, State) -> - _ = get_vnode(Partition, Mod, State), {noreply, State}; + _ = get_vnode(Partition, Mod, State), + {noreply, State}; handle_cast({unregister, Index, Mod, Pid}, - #state{idxtab = T} = State) -> + #state{idxtab = T} = State) -> %% Update forwarding state to ensure vnode is not restarted in %% incorrect forwarding state if next request arrives before next %% ring event. {ok, Ring} = riak_core_ring_manager:get_my_ring(), State2 = update_forwarding({Mod, Index}, Ring, State), ets:match_delete(T, - {idxrec, {Index, Mod}, Index, Mod, Pid, '_'}), + {idxrec, {Index, Mod}, Index, Mod, Pid, '_'}), _ = unregister_vnode_stats(Mod, Index), riak_core_vnode_proxy:unregister_vnode(Mod, Index, Pid), {noreply, State2}; handle_cast({vnode_event, Mod, Idx, Pid, Event}, - State) -> + State) -> handle_vnode_event(Event, Mod, Idx, Pid, State); handle_cast(force_handoffs, State) -> AllVNodes = get_all_vnodes(), {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), State2 = update_handoff(AllVNodes, Ring, CHBin, State), _ = [maybe_trigger_handoff(Mod, Idx, Pid, State2) - || {Mod, Idx, Pid} <- AllVNodes], + || {Mod, Idx, Pid} <- AllVNodes], {noreply, State2}; handle_cast(maybe_start_vnodes, State) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -463,7 +509,7 @@ handle_info(management_tick, State0) -> schedule_management_timer(), RingID = riak_core_ring_manager:get_ring_id(), {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), State = maybe_ring_changed(RingID, Ring, CHBin, State0), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], AllVNodes = get_all_vnodes(Mods), @@ -471,33 +517,36 @@ handle_info(management_tick, State0) -> Transfers = riak_core_ring:pending_changes(Ring), %% Kill/cancel any repairs during ownership changes State3 = case Transfers of - [] -> State2; - _ -> - Repairs = State#state.repairs, - kill_repairs(Repairs, ownership_change), - trigger_ownership_handoff(Transfers, Mods, Ring, - State2), - State2#state{repairs = []} - end, + [] -> State2; + _ -> + Repairs = State#state.repairs, + kill_repairs(Repairs, ownership_change), + trigger_ownership_handoff(Transfers, + Mods, + Ring, + State2), + State2#state{repairs = []} + end, State4 = State3#state{vnode_start_tokens = - ?DEFAULT_VNODE_ROLLING_START}, + ?DEFAULT_VNODE_ROLLING_START}, State5 = maybe_start_vnodes(Ring, State4), Repairs2 = check_repairs(State4#state.repairs), {noreply, State5#state{repairs = Repairs2}}; handle_info({'DOWN', MonRef, process, _P, _I}, State) -> - delmon(MonRef, State), {noreply, State}. + delmon(MonRef, State), + {noreply, State}. %% @private handle_vnode_event(inactive, Mod, Idx, Pid, State) -> maybe_trigger_handoff(Mod, Idx, Pid, State), {noreply, State}; handle_vnode_event(handoff_complete, Mod, Idx, Pid, - State) -> + State) -> NewHO = dict:erase({Mod, Idx}, State#state.handoff), riak_core_vnode:cast_finish_handoff(Pid), {noreply, State#state{handoff = NewHO}}; handle_vnode_event(handoff_error, Mod, Idx, Pid, - State) -> + State) -> NewHO = dict:erase({Mod, Idx}, State#state.handoff), riak_core_vnode:cancel_handoff(Pid), {noreply, State#state{handoff = NewHO}}. @@ -513,69 +562,79 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% =================================================================== maybe_ring_changed(RingID, Ring, CHBin, - State = #state{last_ring_id = LastID}) -> + State = #state{last_ring_id = LastID}) -> case RingID of - LastID -> maybe_ensure_vnodes_started(Ring), State; - _ -> - ensure_vnodes_started(Ring), - State2 = ring_changed(Ring, CHBin, State), - State2#state{last_ring_id = RingID} + LastID -> + maybe_ensure_vnodes_started(Ring), + State; + _ -> + ensure_vnodes_started(Ring), + State2 = ring_changed(Ring, CHBin, State), + State2#state{last_ring_id = RingID} end. ring_changed(Ring, CHBin, State) -> %% Update vnode forwarding state AllVNodes = get_all_vnodes(), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], - State2 = update_forwarding(AllVNodes, Mods, Ring, - State), + State2 = update_forwarding(AllVNodes, + Mods, + Ring, + State), %% Update handoff state State3 = update_handoff(AllVNodes, Ring, CHBin, State2), %% Trigger ownership transfers. Transfers = riak_core_ring:pending_changes(Ring), - trigger_ownership_handoff(Transfers, Mods, Ring, - State3), + trigger_ownership_handoff(Transfers, + Mods, + Ring, + State3), State3. maybe_ensure_vnodes_started(Ring) -> ExitingStates = [leaving, exiting, invalid], Status = riak_core_ring:member_status(Ring, node()), case lists:member(Status, ExitingStates) of - true -> ensure_vnodes_started(Ring), ok; - _ -> ok + true -> + ensure_vnodes_started(Ring), + ok; + _ -> ok end. ensure_vnodes_started(Ring) -> spawn(fun () -> - try riak_core_ring_handler:ensure_vnodes_started(Ring) - catch - Type:Reason:Stacktrace -> - logger:error("~p", [{Type, Reason, Stacktrace}]) - end - end). + try riak_core_ring_handler:ensure_vnodes_started(Ring) + catch + Type:Reason:Stacktrace -> + logger:error("~p", [{Type, Reason, Stacktrace}]) + end + end). schedule_management_timer() -> ManagementTick = application:get_env(riak_core, - vnode_management_timer, 10000), - erlang:send_after(ManagementTick, ?MODULE, - management_tick). + vnode_management_timer, + 10000), + erlang:send_after(ManagementTick, + ?MODULE, + management_tick). trigger_ownership_handoff(Transfers, Mods, Ring, - State) -> + State) -> IsResizing = riak_core_ring:is_resizing(Ring), Throttle = limit_ownership_handoff(Transfers, - IsResizing), + IsResizing), Awaiting = [{Mod, Idx} - || {Idx, Node, _, CMods, S} <- Throttle, Mod <- Mods, - S =:= awaiting, Node =:= node(), - not lists:member(Mod, CMods)], + || {Idx, Node, _, CMods, S} <- Throttle, Mod <- Mods, + S =:= awaiting, Node =:= node(), + not lists:member(Mod, CMods)], _ = [maybe_trigger_handoff(Mod, Idx, State) - || {Mod, Idx} <- Awaiting], + || {Mod, Idx} <- Awaiting], ok. limit_ownership_handoff(Transfers, IsResizing) -> Limit = application:get_env(riak_core, - forced_ownership_handoff, - ?DEFAULT_OWNERSHIP_TRIGGER), + forced_ownership_handoff, + ?DEFAULT_OWNERSHIP_TRIGGER), limit_ownership_handoff(Limit, Transfers, IsResizing). limit_ownership_handoff(Limit, Transfers, false) -> @@ -585,28 +644,28 @@ limit_ownership_handoff(Limit, Transfers, true) -> %% since they remain in the list until all are complete. then %% treat transfers as normal Filtered = [Transfer - || {_, _, _, _, Status} = Transfer <- Transfers, - Status =:= awaiting], + || {_, _, _, _, Status} = Transfer <- Transfers, + Status =:= awaiting], limit_ownership_handoff(Limit, Filtered, false). %% @private idx2vnode(Idx, Mod, _State = #state{idxtab = T}) -> case ets:lookup(T, {Idx, Mod}) of - [I] -> I#idxrec.pid; - [] -> no_match + [I] -> I#idxrec.pid; + [] -> no_match end. %% @private delmon(MonRef, _State = #state{idxtab = T}) -> case ets:lookup(T, MonRef) of - [#monrec{key = {Index, Mod} = Key}] -> - _ = unregister_vnode_stats(Mod, Index), - ets:match_delete(T, - {idxrec, Key, '_', '_', '_', MonRef}), - ets:delete(T, MonRef); - [] -> - ets:match_delete(T, - {idxrec, '_', '_', '_', '_', MonRef}) + [#monrec{key = {Index, Mod} = Key}] -> + _ = unregister_vnode_stats(Mod, Index), + ets:match_delete(T, + {idxrec, Key, '_', '_', '_', MonRef}), + ets:delete(T, MonRef); + [] -> + ets:match_delete(T, + {idxrec, '_', '_', '_', '_', MonRef}) end. %% @private @@ -615,179 +674,201 @@ add_vnode_rec(I, _State = #state{idxtab = T}) -> %% @private get_vnode(Idx, Mod, State) when not is_list(Idx) -> - [Result] = get_vnode([Idx], Mod, State), Result; + [Result] = get_vnode([Idx], Mod, State), + Result; get_vnode(IdxList, Mod, State) -> Initial = [case idx2vnode(Idx, Mod, State) of - no_match -> Idx; - Pid -> {Idx, Pid} - end - || Idx <- IdxList], + no_match -> Idx; + Pid -> {Idx, Pid} + end + || Idx <- IdxList], {NotStarted, Started} = - lists:partition(fun erlang:is_integer/1, Initial), + lists:partition(fun erlang:is_integer/1, Initial), StartFun = fun (Idx) -> - ForwardTo = get_forward(Mod, Idx, State), - logger:debug("Will start VNode for partition ~p", - [Idx]), - {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, Idx, - ForwardTo), - register_vnode_stats(Mod, Idx, Pid), - logger:debug("Started VNode, waiting for initialization " - "to\n complete " - "~p, ~p ", - [Pid, Idx]), - ok = riak_core_vnode:wait_for_init(Pid), - logger:debug("VNode initialization ready ~p, ~p", - [Pid, Idx]), - {Idx, Pid} - end, + ForwardTo = get_forward(Mod, Idx, State), + logger:debug("Will start VNode for partition ~p", + [Idx]), + {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, + Idx, + ForwardTo), + register_vnode_stats(Mod, Idx, Pid), + logger:debug("Started VNode, waiting for initialization " + "to\n complete " + "~p, ~p ", + [Pid, Idx]), + ok = riak_core_vnode:wait_for_init(Pid), + logger:debug("VNode initialization ready ~p, ~p", + [Pid, Idx]), + {Idx, Pid} + end, Pairs = Started ++ - riak_core_util:pmap(StartFun, NotStarted, - ?DEFAULT_VNODE_ROLLING_START), + riak_core_util:pmap(StartFun, + NotStarted, + ?DEFAULT_VNODE_ROLLING_START), %% Return Pids in same order as input PairsDict = dict:from_list(Pairs), _ = [begin - Pid = dict:fetch(Idx, PairsDict), - MonRef = erlang:monitor(process, Pid), - IdxRec = #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, - pid = Pid, monref = MonRef}, - MonRec = #monrec{monref = MonRef, key = {Idx, Mod}}, - add_vnode_rec([IdxRec, MonRec], State) - end - || Idx <- NotStarted], + Pid = dict:fetch(Idx, PairsDict), + MonRef = erlang:monitor(process, Pid), + IdxRec = #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, + pid = Pid, monref = MonRef}, + MonRec = #monrec{monref = MonRef, key = {Idx, Mod}}, + add_vnode_rec([IdxRec, MonRec], State) + end + || Idx <- NotStarted], [dict:fetch(Idx, PairsDict) || Idx <- IdxList]. get_forward(Mod, Idx, #state{forwarding = Fwd}) -> case dict:find({Mod, Idx}, Fwd) of - {ok, ForwardTo} -> ForwardTo; - _ -> undefined + {ok, ForwardTo} -> ForwardTo; + _ -> undefined end. check_forward(Ring, Mod, Index) -> Node = node(), case riak_core_ring:next_owner(Ring, Index, Mod) of - {Node, '$resize', _} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, Mod), - {{Mod, Index}, Complete}; - {Node, '$delete', _} -> {{Mod, Index}, undefined}; - {Node, NextOwner, complete} -> - {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {Node, '$resize', _} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, + Mod), + {{Mod, Index}, Complete}; + {Node, '$delete', _} -> {{Mod, Index}, undefined}; + {Node, NextOwner, complete} -> + {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. check_forward_precomputed(Completed, Mod, Index, Node, - Ring) -> + Ring) -> case dict:find({Mod, Index}, Completed) of - {ok, '$resize'} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, Mod), - {{Mod, Index}, Complete}; - {ok, '$delete'} -> {{Mod, Index}, undefined}; - {ok, NextOwner} -> {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {ok, '$resize'} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, + Mod), + {{Mod, Index}, Complete}; + {ok, '$delete'} -> {{Mod, Index}, undefined}; + {ok, NextOwner} -> {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. compute_forwarding(Mods, Ring) -> Node = node(), CL = [{{Mod, Idx}, NextOwner} - || Mod <- Mods, - {Idx, Owner, NextOwner} - <- riak_core_ring:completed_next_owners(Mod, Ring), - Owner =:= Node], + || Mod <- Mods, + {Idx, Owner, NextOwner} + <- riak_core_ring:completed_next_owners(Mod, Ring), + Owner =:= Node], Completed = dict:from_list(CL), - Forwarding = [check_forward_precomputed(Completed, Mod, - I, N, Ring) - || {I, N} <- riak_core_ring:all_owners(Ring), - Mod <- Mods], + Forwarding = [check_forward_precomputed(Completed, + Mod, + I, + N, + Ring) + || {I, N} <- riak_core_ring:all_owners(Ring), + Mod <- Mods], dict:from_list(Forwarding). update_forwarding(AllVNodes, Mods, Ring, - State = #state{forwarding = Forwarding}) -> + State = #state{forwarding = Forwarding}) -> NewForwarding = compute_forwarding(Mods, Ring), %% Inform vnodes that have changed forwarding status VNodes = dict:from_list([{{Mod, Idx}, Pid} - || {Mod, Idx, Pid} <- AllVNodes]), + || {Mod, Idx, Pid} <- AllVNodes]), Diff = dict:filter(fun (K, V) -> - dict:find(K, Forwarding) /= {ok, V} - end, - NewForwarding), + dict:find(K, Forwarding) /= {ok, V} + end, + NewForwarding), dict:fold(fun ({Mod, Idx}, ForwardTo, _) -> - change_forward(VNodes, Mod, Idx, ForwardTo), ok - end, - ok, Diff), + change_forward(VNodes, Mod, Idx, ForwardTo), + ok + end, + ok, + Diff), State#state{forwarding = NewForwarding}. update_forwarding({Mod, Idx}, Ring, - State = #state{forwarding = Forwarding}) -> + State = #state{forwarding = Forwarding}) -> {_, ForwardTo} = check_forward(Ring, Mod, Idx), - NewForwarding = dict:store({Mod, Idx}, ForwardTo, - Forwarding), + NewForwarding = dict:store({Mod, Idx}, + ForwardTo, + Forwarding), State#state{forwarding = NewForwarding}. change_forward(VNodes, Mod, Idx, ForwardTo) -> case dict:find({Mod, Idx}, VNodes) of - error -> ok; - {ok, Pid} -> - riak_core_vnode:set_forwarding(Pid, ForwardTo), ok + error -> ok; + {ok, Pid} -> + riak_core_vnode:set_forwarding(Pid, ForwardTo), + ok end. update_handoff(AllVNodes, Ring, CHBin, State) -> case riak_core_ring:ring_ready(Ring) of - false -> State; - true -> - NewHO = lists:flatten([case should_handoff(Ring, CHBin, - Mod, Idx) - of - false -> []; - {true, primary, TargetNode} -> - [{{Mod, Idx}, - {ownership, outbound, TargetNode}}]; - {true, {fallback, _Node}, TargetNode} -> - [{{Mod, Idx}, - {hinted, outbound, TargetNode}}]; - {true, '$resize' = Action} -> - [{{Mod, Idx}, - {resize, outbound, Action}}]; - {true, '$delete' = Action} -> - [{{Mod, Idx}, {delete, local, Action}}] - end - || {Mod, Idx, _Pid} <- AllVNodes]), - State#state{handoff = dict:from_list(NewHO)} + false -> State; + true -> + NewHO = lists:flatten([case should_handoff(Ring, + CHBin, + Mod, + Idx) + of + false -> []; + {true, primary, TargetNode} -> + [{{Mod, Idx}, + {ownership, + outbound, + TargetNode}}]; + {true, {fallback, _Node}, TargetNode} -> + [{{Mod, Idx}, + {hinted, outbound, TargetNode}}]; + {true, '$resize' = Action} -> + [{{Mod, Idx}, + {resize, outbound, Action}}]; + {true, '$delete' = Action} -> + [{{Mod, Idx}, + {delete, local, Action}}] + end + || {Mod, Idx, _Pid} <- AllVNodes]), + State#state{handoff = dict:from_list(NewHO)} end. should_handoff(Ring, _CHBin, Mod, Idx) -> {_, NextOwner, _} = riak_core_ring:next_owner(Ring, - Idx), + Idx), Type = riak_core_ring:vnode_type(Ring, Idx), Ready = riak_core_ring:ring_ready(Ring), IsResizing = riak_core_ring:is_resizing(Ring), - case determine_handoff_target(Type, NextOwner, Ready, - IsResizing) - of - undefined -> false; - Action - when Action =:= '$resize' orelse Action =:= '$delete' -> - {true, Action}; - TargetNode -> - case app_for_vnode_module(Mod) of - undefined -> false; - {ok, App} -> - case lists:member(TargetNode, - riak_core_node_watcher:nodes(App)) - of - false -> false; - true -> {true, Type, TargetNode} - end - end + case determine_handoff_target(Type, + NextOwner, + Ready, + IsResizing) + of + undefined -> false; + Action + when Action =:= '$resize' orelse Action =:= '$delete' -> + {true, Action}; + TargetNode -> + case app_for_vnode_module(Mod) of + undefined -> false; + {ok, App} -> + case lists:member(TargetNode, + riak_core_node_watcher:nodes(App)) + of + false -> false; + true -> {true, Type, TargetNode} + end + end end. determine_handoff_target(Type, NextOwner, RingReady, - IsResize) -> + IsResize) -> Me = node(), - determine_handoff_target(Type, NextOwner, RingReady, - IsResize, NextOwner =:= Me). + determine_handoff_target(Type, + NextOwner, + RingReady, + IsResize, + NextOwner =:= Me). determine_handoff_target(primary, _, _, _, true) -> %% Never hand off to myself as a primary @@ -796,7 +877,7 @@ determine_handoff_target(primary, undefined, _, _, _) -> %% No ring change indicated for this partition undefined; determine_handoff_target(primary, NextOwner, true, _, - _) -> + _) -> %% Primary, ring is ready, go. This may be a node or a `$resize' %% action NextOwner; @@ -804,23 +885,23 @@ determine_handoff_target(primary, _, false, _, _) -> %% Ring isn't ready, no matter what, don't do a primary handoff undefined; determine_handoff_target({fallback, _Target}, - '$delete' = Action, _, _, _) -> + '$delete' = Action, _, _, _) -> %% partitions moved during resize and scheduled for deletion, indexes %% that exist in both the original and resized ring that were moved appear %% as fallbacks. Action; determine_handoff_target(resized_primary, - '$delete' = Action, _, _, _) -> + '$delete' = Action, _, _, _) -> %% partitions that no longer exist after the ring has been resized (shrunk) %% scheduled for deletion Action; determine_handoff_target(resized_primary, _, _, false, - _) -> + _) -> %% partitions that would have existed in a ring whose expansion was aborted %% and are still running need to be cleaned up after and shutdown '$delete'; determine_handoff_target({fallback, For}, undefined, _, - _, _) -> + _, _) -> %% Fallback vnode target is primary (hinted handoff). `For' can %% technically be a `$resize' action but unclear it ever would be For; @@ -828,12 +909,12 @@ determine_handoff_target(_, _, _, _, _) -> undefined. app_for_vnode_module(Mod) when is_atom(Mod) -> case application:get_env(riak_core, vnode_modules) of - {ok, Mods} -> - case lists:keysearch(Mod, 2, Mods) of - {value, {App, Mod}} -> {ok, App}; - false -> undefined - end; - undefined -> undefined + {ok, Mods} -> + case lists:keysearch(Mod, 2, Mods) of + {value, {App, Mod}} -> {ok, App}; + false -> undefined + end; + undefined -> undefined end. maybe_trigger_handoff(Mod, Idx, State) -> @@ -841,88 +922,93 @@ maybe_trigger_handoff(Mod, Idx, State) -> maybe_trigger_handoff(Mod, Idx, Pid, State). maybe_trigger_handoff(Mod, Idx, Pid, - _State = #state{handoff = HO}) -> + _State = #state{handoff = HO}) -> case dict:find({Mod, Idx}, HO) of - {ok, {resize, _Direction, '$resize'}} -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - case riak_core_ring:awaiting_resize_transfer(Ring, - {Idx, node()}, Mod) - of - undefined -> ok; - {TargetIdx, TargetNode} -> - riak_core_vnode:trigger_handoff(Pid, TargetIdx, - TargetNode) - end; - {ok, {delete, local, '$delete'}} -> - riak_core_vnode:trigger_delete(Pid); - {ok, {_Type, _Direction, TargetNode}} -> - riak_core_vnode:trigger_handoff(Pid, TargetNode), ok; - error -> ok + {ok, {resize, _Direction, '$resize'}} -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + case riak_core_ring:awaiting_resize_transfer(Ring, + {Idx, node()}, + Mod) + of + undefined -> ok; + {TargetIdx, TargetNode} -> + riak_core_vnode:trigger_handoff(Pid, + TargetIdx, + TargetNode) + end; + {ok, {delete, local, '$delete'}} -> + riak_core_vnode:trigger_delete(Pid); + {ok, {_Type, _Direction, TargetNode}} -> + riak_core_vnode:trigger_handoff(Pid, TargetNode), + ok; + error -> ok end. get_all_vnodes_status(#state{forwarding = Forwarding, - handoff = HO}) -> + handoff = HO}) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), Owners = riak_core_ring:all_owners(Ring), VNodes = get_all_vnodes(), Mods = [Mod - || {_App, Mod} <- riak_core:vnode_modules()], + || {_App, Mod} <- riak_core:vnode_modules()], ThisNode = node(), Types = [case Owner of - ThisNode -> {{Mod, Idx}, {type, primary}}; - _ -> {{Mod, Idx}, {type, secondary}} - end - || {Idx, Owner} <- Owners, Mod <- Mods], + ThisNode -> {{Mod, Idx}, {type, primary}}; + _ -> {{Mod, Idx}, {type, secondary}} + end + || {Idx, Owner} <- Owners, Mod <- Mods], Types2 = lists:keysort(1, Types), Pids = [{{Mod, Idx}, {pid, Pid}} - || {Mod, Idx, Pid} <- VNodes], + || {Mod, Idx, Pid} <- VNodes], Pids2 = lists:keysort(1, Pids), Forwarding1 = lists:sort(dict:to_list(Forwarding)), Forwarding2 = [{MI, {forwarding, Node}} - || {MI, Node} <- Forwarding1, Node /= undefined], + || {MI, Node} <- Forwarding1, Node /= undefined], Handoff1 = lists:sort(dict:to_list(HO)), Handoff2 = [{MI, {should_handoff, Node}} - || {MI, {_Type, _Direction, Node}} <- Handoff1], + || {MI, {_Type, _Direction, Node}} <- Handoff1], MergeFn = fun (_, V1, V2) - when is_list(V1) and is_list(V2) -> - V1 ++ V2; - (_, V1, V2) when is_list(V1) -> V1 ++ [V2]; - (_, V1, V2) -> [V1, V2] - end, + when is_list(V1) and is_list(V2) -> + V1 ++ V2; + (_, V1, V2) when is_list(V1) -> V1 ++ [V2]; + (_, V1, V2) -> [V1, V2] + end, Status = lists:foldl(fun (B, A) -> - orddict:merge(MergeFn, A, B) - end, - Types2, [Pids2, Forwarding2, Handoff2]), + orddict:merge(MergeFn, A, B) + end, + Types2, + [Pids2, Forwarding2, Handoff2]), Status. update_never_started(Ring, - State = #state{known_modules = KnownMods}) -> + State = #state{known_modules = KnownMods}) -> UnknownMods = [Mod - || {_App, Mod} <- riak_core:vnode_modules(), - not lists:member(Mod, KnownMods)], + || {_App, Mod} <- riak_core:vnode_modules(), + not lists:member(Mod, KnownMods)], case UnknownMods of - [] -> State; - _ -> - Indices = [Idx - || {Idx, _} <- riak_core_ring:all_owners(Ring)], - lists:foldl(fun (Mod, StateAcc) -> - update_never_started(Mod, Indices, StateAcc) - end, - State, UnknownMods) + [] -> State; + _ -> + Indices = [Idx + || {Idx, _} <- riak_core_ring:all_owners(Ring)], + lists:foldl(fun (Mod, StateAcc) -> + update_never_started(Mod, Indices, StateAcc) + end, + State, + UnknownMods) end. update_never_started(Mod, Indices, State) -> IdxPids = get_all_index_pid(Mod, []), AlreadyStarted = [Idx || {Idx, _Pid} <- IdxPids], NeverStarted = - ordsets:subtract(ordsets:from_list(Indices), - ordsets:from_list(AlreadyStarted)), + ordsets:subtract(ordsets:from_list(Indices), + ordsets:from_list(AlreadyStarted)), NeverStarted2 = [{Idx, Mod} || Idx <- NeverStarted], NeverStarted3 = NeverStarted2 ++ - State#state.never_started, + State#state.never_started, KnownModules = [Mod | State#state.known_modules], State#state{known_modules = KnownModules, - never_started = NeverStarted3}. + never_started = NeverStarted3}. maybe_start_vnodes(Ring, State) -> State2 = update_never_started(Ring, State), @@ -930,70 +1016,71 @@ maybe_start_vnodes(Ring, State) -> State3. maybe_start_vnodes(State = #state{vnode_start_tokens = - Tokens, - never_started = NeverStarted}) -> + Tokens, + never_started = NeverStarted}) -> case {Tokens, NeverStarted} of - {0, _} -> State; - {_, []} -> State; - {_, [{Idx, Mod} | NeverStarted2]} -> - _ = get_vnode(Idx, Mod, State), - gen_server:cast(?MODULE, maybe_start_vnodes), - State#state{vnode_start_tokens = Tokens - 1, - never_started = NeverStarted2} + {0, _} -> State; + {_, []} -> State; + {_, [{Idx, Mod} | NeverStarted2]} -> + _ = get_vnode(Idx, Mod, State), + gen_server:cast(?MODULE, maybe_start_vnodes), + State#state{vnode_start_tokens = Tokens - 1, + never_started = NeverStarted2} end. -spec check_repairs(repairs()) -> Repairs2 :: repairs(). check_repairs(Repairs) -> Check = fun (R = #repair{minus_one_xfer = MOX, - plus_one_xfer = POX}, - Repairs2) -> - Pairs = R#repair.pairs, - MO = get_minus_one(Pairs), - PO = get_plus_one(Pairs), - MOX2 = maybe_retry(R, MO, MOX), - POX2 = maybe_retry(R, PO, POX), - if (?XFER_COMPLETE(MOX2)) andalso - (?XFER_COMPLETE(POX2)) -> - Repairs2; - true -> - R2 = R#repair{minus_one_xfer = MOX2, - plus_one_xfer = POX2}, - [R2 | Repairs2] - end - end, + plus_one_xfer = POX}, + Repairs2) -> + Pairs = R#repair.pairs, + MO = get_minus_one(Pairs), + PO = get_plus_one(Pairs), + MOX2 = maybe_retry(R, MO, MOX), + POX2 = maybe_retry(R, PO, POX), + if (?XFER_COMPLETE(MOX2)) andalso + (?XFER_COMPLETE(POX2)) -> + Repairs2; + true -> + R2 = R#repair{minus_one_xfer = MOX2, + plus_one_xfer = POX2}, + [R2 | Repairs2] + end + end, lists:reverse(lists:foldl(Check, [], Repairs)). %% TODO: get all this repair, xfer status and Src business figured out. -spec maybe_retry(repair(), tuple(), - xfer_status()) -> Xfer2 :: xfer_status(). + xfer_status()) -> Xfer2 :: xfer_status(). maybe_retry(R, {SrcPartition, _} = Src, Xfer) -> case Xfer#xfer_status.status of - complete -> Xfer; - pending -> - {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, - FilterModFun = R#repair.filter_mod_fun, - riak_core_handoff_manager:xfer(Src, {Mod, Partition}, - FilterModFun), - #xfer_status{status = pending, - mod_src_target = {Mod, SrcPartition, Partition}} + complete -> Xfer; + pending -> + {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, + FilterModFun = R#repair.filter_mod_fun, + riak_core_handoff_manager:xfer(Src, + {Mod, Partition}, + FilterModFun), + #xfer_status{status = pending, + mod_src_target = {Mod, SrcPartition, Partition}} end. %% @private %% %% @doc Verify that all nodes are up involved in the repair. -spec check_up([{non_neg_integer(), node()}], - [node()]) -> true | - {false, Down :: [{non_neg_integer(), node()}]}. + [node()]) -> true | + {false, Down :: [{non_neg_integer(), node()}]}. check_up(Pairs, UpNodes) -> Down = [Pair - || {_Partition, Owner} = Pair <- Pairs, - not lists:member(Owner, UpNodes)], + || {_Partition, Owner} = Pair <- Pairs, + not lists:member(Owner, UpNodes)], case Down of - [] -> true; - _ -> {false, Down} + [] -> true; + _ -> {false, Down} end. %% @private @@ -1001,16 +1088,17 @@ check_up(Pairs, UpNodes) -> %% @doc Get the three `{Partition, Owner}' pairs involved in a repair %% operation for the given `Ring' and `Partition'. -spec repair_pairs(riak_core_ring:riak_core_ring(), - non_neg_integer()) -> [{Partition :: non_neg_integer(), - Owner :: node()}]. + non_neg_integer()) -> [{Partition :: non_neg_integer(), + Owner :: node()}]. repair_pairs(Ring, Partition) -> Owner = riak_core_ring:index_owner(Ring, Partition), CH = riak_core_ring:chash(Ring), [_, Before] = - chash:predecessors(<>, CH, 2), + chash:predecessors(<>, CH, 2), [After] = chash:successors(<>, - CH, 1), + CH, + 1), [Before, {Partition, Owner}, After]. %% @private @@ -1018,14 +1106,15 @@ repair_pairs(Ring, Partition) -> %% @doc Get the corresponding repair entry in `Repairs', if one %% exists, for the given `ModPartition'. -spec get_repair(mod_partition(), - repairs()) -> repair() | none. + repairs()) -> repair() | none. get_repair(ModPartition, Repairs) -> - case lists:keyfind(ModPartition, #repair.mod_partition, - Repairs) - of - false -> none; - Val -> Val + case lists:keyfind(ModPartition, + #repair.mod_partition, + Repairs) + of + false -> none; + Val -> Val end. %% @private @@ -1035,7 +1124,8 @@ get_repair(ModPartition, Repairs) -> remove_repair(Repair, Repairs) -> lists:keydelete(Repair#repair.mod_partition, - #repair.mod_partition, Repairs). + #repair.mod_partition, + Repairs). %% @private %% @@ -1044,14 +1134,16 @@ remove_repair(Repair, Repairs) -> replace_repair(Repair, Repairs) -> lists:keyreplace(Repair#repair.mod_partition, - #repair.mod_partition, Repairs, Repair). + #repair.mod_partition, + Repairs, + Repair). %% @private %% %% @doc Get the `{Partition, Owner}' pair that comes before the %% partition under repair. -spec get_minus_one([{index(), node()}]) -> {index(), - node()}. + node()}. get_minus_one([MinusOne, _, _]) -> MinusOne. @@ -1060,7 +1152,7 @@ get_minus_one([MinusOne, _, _]) -> MinusOne. %% @doc Get the `{Partition, Owner}' pair that comes after the %% partition under repair. -spec get_plus_one([{index(), node()}]) -> {index(), - node()}. + node()}. get_plus_one([_, _, PlusOne]) -> PlusOne. @@ -1085,12 +1177,15 @@ kill_repair(Repair, Reason) -> POModSrcTarget = POX#xfer_status.mod_src_target, %% Kill the remote senders riak_core_handoff_manager:kill_xfer(MOOwner, - MOModSrcTarget, Reason), + MOModSrcTarget, + Reason), riak_core_handoff_manager:kill_xfer(POOwner, - POModSrcTarget, Reason), + POModSrcTarget, + Reason), %% Kill the local receivers riak_core_handoff_manager:kill_xfer(node(), - {Mod, undefined, Partition}, Reason). + {Mod, undefined, Partition}, + Reason). register_vnode_stats(_Mod, _Index, _Pid) -> %% STATS diff --git a/src/riak_core_vnode_master.erl b/src/riak_core_vnode_master.erl index 7088eee83..9c563123f 100644 --- a/src/riak_core_vnode_master.erl +++ b/src/riak_core_vnode_master.erl @@ -28,16 +28,31 @@ -behaviour(gen_server). --export([start_link/1, start_link/2, start_link/3, - get_vnode_pid/2, start_vnode/2, command/3, command/4, - command_unreliable/3, command_unreliable/4, - sync_command/3, sync_command/4, coverage/5, - command_return_vnode/4, sync_spawn_command/3, - make_request/3, make_coverage_request/4, all_nodes/1, - reg_name/1]). - --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([start_link/1, + start_link/2, + start_link/3, + get_vnode_pid/2, + start_vnode/2, + command/3, + command/4, + command_unreliable/3, + command_unreliable/4, + sync_command/3, + sync_command/4, + coverage/5, + command_return_vnode/4, + sync_spawn_command/3, + make_request/3, + make_coverage_request/4, + all_nodes/1, + reg_name/1]). + +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {idxtab, sup_name, vnode_mod, legacy}). @@ -64,8 +79,10 @@ start_link(VNodeMod, LegacyMod) -> start_link(VNodeMod, LegacyMod, Service) -> RegName = reg_name(VNodeMod), - gen_server:start_link({local, RegName}, ?MODULE, - [Service, VNodeMod, LegacyMod, RegName], []). + gen_server:start_link({local, RegName}, + ?MODULE, + [Service, VNodeMod, LegacyMod, RegName], + []). start_vnode(Index, VNodeMod) -> riak_core_vnode_manager:start_vnode(Index, VNodeMod). @@ -83,28 +100,32 @@ command(PrefListOrCmd, Msg, Sender, VMaster) -> command2(PrefListOrCmd, Msg, Sender, VMaster, normal). command_unreliable(PrefListOrCmd, Msg, Sender, - VMaster) -> - command2(PrefListOrCmd, Msg, Sender, VMaster, - unreliable). + VMaster) -> + command2(PrefListOrCmd, + Msg, + Sender, + VMaster, + unreliable). %% Send the command to the preflist given with responses going to Sender command2([], _Msg, _Sender, _VMaster, _How) -> ok; command2([{Index, Pid} | Rest], Msg, Sender, VMaster, - How = normal) + How = normal) when is_pid(Pid) -> - Request= make_request(Msg, Sender, Index), - riak_core_vnode:send_req(Pid, Request), - command2(Rest, Msg, Sender, VMaster, How); + Request = make_request(Msg, Sender, Index), + riak_core_vnode:send_req(Pid, Request), + command2(Rest, Msg, Sender, VMaster, How); command2([{Index, Pid} | Rest], Msg, Sender, VMaster, - How = unreliable) + How = unreliable) when is_pid(Pid) -> riak_core_send_msg:send_event_unreliable(Pid, - make_request(Msg, Sender, Index)), + make_request(Msg, Sender, Index)), command2(Rest, Msg, Sender, VMaster, How); command2([{Index, Node} | Rest], Msg, Sender, VMaster, - How) -> + How) -> proxy_cast({VMaster, Node}, - make_request(Msg, Sender, Index), How), + make_request(Msg, Sender, Index), + How), command2(Rest, Msg, Sender, VMaster, How); command2(DestTuple, Msg, Sender, VMaster, How) when is_tuple(DestTuple) -> @@ -114,27 +135,29 @@ command2(DestTuple, Msg, Sender, VMaster, How) %% Send a command to a covering set of vnodes coverage(Msg, CoverageVNodes, Keyspaces, - {Type, Ref, From}, VMaster) + {Type, Ref, From}, VMaster) when is_list(CoverageVNodes) -> [proxy_cast({VMaster, Node}, - make_coverage_request(Msg, Keyspaces, - {Type, {Ref, {Index, Node}}, From}, - Index)) + make_coverage_request(Msg, + Keyspaces, + {Type, {Ref, {Index, Node}}, From}, + Index)) || {Index, Node} <- CoverageVNodes]; coverage(Msg, {Index, Node}, Keyspaces, Sender, - VMaster) -> + VMaster) -> proxy_cast({VMaster, Node}, - make_coverage_request(Msg, Keyspaces, Sender, Index)). + make_coverage_request(Msg, Keyspaces, Sender, Index)). %% Send the command to an individual Index/Node combination, but also %% return the pid for the vnode handling the request, as `{ok, VnodePid}'. command_return_vnode({Index, Node}, Msg, Sender, - VMaster) -> + VMaster) -> Req = make_request(Msg, Sender, Index), Mod = vmaster_to_vmod(VMaster), - riak_core_vnode_proxy:command_return_vnode({Mod, Index, - Node}, - Req). + riak_core_vnode_proxy:command_return_vnode({Mod, + Index, + Node}, + Req). %% Send a synchronous command to an individual Index/Node combination. %% Will not return until the vnode has returned @@ -146,12 +169,13 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% the From for handle_call so that the {reply} return gets %% sent here. Request = make_request(Msg, - {server, undefined, undefined}, Index), + {server, undefined, undefined}, + Index), case gen_server:call({VMaster, Node}, Request, Timeout) - of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + of + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Send a synchronous spawned command to an individual Index/Node combination. @@ -159,32 +183,34 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% continue to handle requests. sync_spawn_command({Index, Node}, Msg, VMaster) -> Request = make_request(Msg, - {server, undefined, undefined}, Index), - case gen_server:call({VMaster, Node}, {spawn, Request}, - infinity) - of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + {server, undefined, undefined}, + Index), + case gen_server:call({VMaster, Node}, + {spawn, Request}, + infinity) + of + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Make a request record - exported for use by legacy modules -spec make_request(vnode_req(), sender(), - partition()) -> riak_vnode_req_v1(). + partition()) -> riak_vnode_req_v1(). make_request(Request, Sender, Index) -> #riak_vnode_req_v1{index = Index, sender = Sender, - request = Request}. + request = Request}. %% Make a request record - exported for use by legacy modules -spec make_coverage_request(vnode_req(), keyspaces(), - sender(), partition()) -> riak_coverage_req_v1(). + sender(), partition()) -> riak_coverage_req_v1(). make_coverage_request(Request, KeySpaces, Sender, - Index) -> + Index) -> #riak_coverage_req_v1{index = Index, - keyspaces = KeySpaces, sender = Sender, - request = Request}. + keyspaces = KeySpaces, sender = Sender, + request = Request}. %% Request a list of Pids for all vnodes %% @deprecated @@ -200,7 +226,7 @@ init([Service, VNodeMod, LegacyMod, _RegName]) -> gen_server:cast(self(), {wait_for_service, Service}), {ok, #state{idxtab = undefined, vnode_mod = VNodeMod, - legacy = LegacyMod}}. + legacy = LegacyMod}}. proxy_cast(Who, Req) -> proxy_cast(Who, Req, normal). @@ -208,13 +234,13 @@ proxy_cast({VMaster, Node}, Req, How) -> do_proxy_cast({VMaster, Node}, Req, How). do_proxy_cast({VMaster, Node}, - Req = #riak_vnode_req_v1{index = Idx}, How) -> + Req = #riak_vnode_req_v1{index = Idx}, How) -> Mod = vmaster_to_vmod(VMaster), Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx, Node), send_an_event(Proxy, Req, How), ok; do_proxy_cast({VMaster, Node}, - Req = #riak_coverage_req_v1{index = Idx}, How) -> + Req = #riak_coverage_req_v1{index = Idx}, How) -> Mod = vmaster_to_vmod(VMaster), Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx, Node), send_an_event(Proxy, Req, How), @@ -227,66 +253,69 @@ send_an_event(Dest, Event, unreliable) -> handle_cast({wait_for_service, Service}, State) -> case Service of - undefined -> ok; - _ -> - logger:debug("Waiting for service: ~p", [Service]), - riak_core:wait_for_service(Service) + undefined -> ok; + _ -> + logger:debug("Waiting for service: ~p", [Service]), + riak_core:wait_for_service(Service) end, {noreply, State}; handle_cast(Req = #riak_vnode_req_v1{index = Idx}, - State = #state{vnode_mod = Mod}) -> + State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req), {noreply, State}; handle_cast(Req = #riak_coverage_req_v1{index = Idx}, - State = #state{vnode_mod = Mod}) -> + State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req), {noreply, State}; handle_cast(Other, State = #state{legacy = Legacy}) when Legacy =/= undefined -> case catch Legacy:rewrite_cast(Other) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_cast(Req, State); - _ -> {noreply, State} + {ok, #riak_vnode_req_v1{} = Req} -> + handle_cast(Req, State); + _ -> {noreply, State} end. handle_call({return_vnode, - Req = #riak_vnode_req_v1{index = Idx}}, - _From, State = #state{vnode_mod = Mod}) -> + Req = #riak_vnode_req_v1{index = Idx}}, + _From, State = #state{vnode_mod = Mod}) -> {ok, Pid} = - riak_core_vnode_proxy:command_return_vnode({Mod, Idx, - node()}, - Req), + riak_core_vnode_proxy:command_return_vnode({Mod, + Idx, + node()}, + Req), {reply, {ok, Pid}, State}; handle_call(Req = #riak_vnode_req_v1{index = Idx, - sender = {server, undefined, undefined}}, - From, State = #state{vnode_mod = Mod}) -> + sender = {server, undefined, undefined}}, + From, State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, - Req#riak_vnode_req_v1{sender = - {server, undefined, - From}}), + Req#riak_vnode_req_v1{sender = + {server, + undefined, + From}}), {noreply, State}; handle_call({spawn, - Req = #riak_vnode_req_v1{index = Idx, - sender = {server, undefined, undefined}}}, - From, State = #state{vnode_mod = Mod}) -> + Req = #riak_vnode_req_v1{index = Idx, + sender = {server, undefined, undefined}}}, + From, State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), Sender = {server, undefined, From}, spawn_link(fun () -> - riak_core_vnode:send_all_proxy_req(Proxy, Req#riak_vnode_req_v1{sender - = - Sender}) - end), + riak_core_vnode:send_all_proxy_req(Proxy, + Req#riak_vnode_req_v1{sender + = + Sender}) + end), {noreply, State}; handle_call(Other, From, - State = #state{legacy = Legacy}) + State = #state{legacy = Legacy}) when Legacy =/= undefined -> case catch Legacy:rewrite_call(Other, From) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_call(Req, From, State); - _ -> {noreply, State} + {ok, #riak_vnode_req_v1{} = Req} -> + handle_call(Req, From, State); + _ -> {noreply, State} end. handle_info(_Info, State) -> {noreply, State}. diff --git a/src/riak_core_vnode_proxy.erl b/src/riak_core_vnode_proxy.erl index bf78ada04..76fbdc4bb 100644 --- a/src/riak_core_vnode_proxy.erl +++ b/src/riak_core_vnode_proxy.erl @@ -18,12 +18,20 @@ %% ------------------------------------------------------------------- -module(riak_core_vnode_proxy). --export([start_link/2, init/1, reg_name/2, reg_name/3, - call/2, call/3, cast/2, unregister_vnode/3, - command_return_vnode/2, overloaded/1]). - --export([system_continue/3, system_terminate/4, - system_code_change/4]). +-export([start_link/2, + init/1, + reg_name/2, + reg_name/3, + call/2, + call/3, + cast/2, + unregister_vnode/3, + command_return_vnode/2, + overloaded/1]). + +-export([system_continue/3, + system_terminate/4, + system_code_change/4]). -include("riak_core_vnode.hrl"). @@ -34,15 +42,16 @@ -endif. -record(state, - {mod :: atom(), index :: partition(), - vnode_pid :: pid() | undefined, - vnode_mref :: reference() | undefined, - check_mailbox :: non_neg_integer(), - check_threshold :: pos_integer() | undefined, - check_counter :: non_neg_integer(), - check_interval :: pos_integer(), - check_request_interval :: non_neg_integer(), - check_request :: undefined | sent | ignore}). + {mod :: atom(), + index :: partition(), + vnode_pid :: pid() | undefined, + vnode_mref :: reference() | undefined, + check_mailbox :: non_neg_integer(), + check_threshold :: pos_integer() | undefined, + check_counter :: non_neg_integer(), + check_interval :: pos_integer(), + check_request_interval :: non_neg_integer(), + check_request :: undefined | sent | ignore}). -define(DEFAULT_CHECK_INTERVAL, 5000). @@ -52,7 +61,7 @@ reg_name(Mod, Index) -> ModBin = atom_to_binary(Mod, latin1), IdxBin = list_to_binary(integer_to_list(Index)), AllBin = <<$p, $r, $o, $x, $y, $_, ModBin/binary, $_, - IdxBin/binary>>, + IdxBin/binary>>, binary_to_atom(AllBin, latin1). reg_name(Mod, Index, Node) -> @@ -60,54 +69,55 @@ reg_name(Mod, Index, Node) -> start_link(Mod, Index) -> RegName = reg_name(Mod, Index), - proc_lib:start_link(?MODULE, init, - [[self(), RegName, Mod, Index]]). + proc_lib:start_link(?MODULE, + init, + [[self(), RegName, Mod, Index]]). init([Parent, RegName, Mod, Index]) -> erlang:register(RegName, self()), proc_lib:init_ack(Parent, {ok, self()}), Interval = application:get_env(riak_core, - vnode_check_interval, - ?DEFAULT_CHECK_INTERVAL), + vnode_check_interval, + ?DEFAULT_CHECK_INTERVAL), RequestInterval = application:get_env(riak_core, - vnode_check_request_interval, - Interval div 2), + vnode_check_request_interval, + Interval div 2), Threshold = application:get_env(riak_core, - vnode_overload_threshold, - ?DEFAULT_OVERLOAD_THRESHOLD), + vnode_overload_threshold, + ?DEFAULT_OVERLOAD_THRESHOLD), SafeInterval = case Threshold == undefined orelse - Interval < Threshold - of - true -> Interval; - false -> - logger:warning("Setting riak_core/vnode_check_interval " - "to ~b", - [Threshold div 2]), - Threshold div 2 - end, + Interval < Threshold + of + true -> Interval; + false -> + logger:warning("Setting riak_core/vnode_check_interval " + "to ~b", + [Threshold div 2]), + Threshold div 2 + end, SafeRequestInterval = case RequestInterval < - SafeInterval - of - true -> RequestInterval; - false -> - logger:warning("Setting riak_core/vnode_check_request_interva" - "l to ~b", - [SafeInterval div 2]), - SafeInterval div 2 - end, + SafeInterval + of + true -> RequestInterval; + false -> + logger:warning("Setting riak_core/vnode_check_request_interva" + "l to ~b", + [SafeInterval div 2]), + SafeInterval div 2 + end, State = #state{mod = Mod, index = Index, - check_mailbox = 0, check_counter = 0, - check_threshold = Threshold, - check_interval = SafeInterval, - check_request_interval = SafeRequestInterval}, + check_mailbox = 0, check_counter = 0, + check_threshold = Threshold, + check_interval = SafeInterval, + check_request_interval = SafeRequestInterval}, loop(Parent, State). unregister_vnode(Mod, Index, Pid) -> cast(reg_name(Mod, Index), {unregister_vnode, Pid}). -spec command_return_vnode({atom(), non_neg_integer(), - atom()}, - term()) -> {ok, pid()} | {error, term()}. + atom()}, + term()) -> {ok, pid()} | {error, term()}. command_return_vnode({Mod, Index, Node}, Req) -> call(reg_name(Mod, Index, Node), {return_vnode, Req}). @@ -118,12 +128,15 @@ overloaded({Mod, Index, Node}) -> overloaded(Pid) -> call(Pid, overloaded). call(Name, Msg) -> - call_reply(catch gen:call(Name, '$vnode_proxy_call', - Msg)). + call_reply(catch gen:call(Name, + '$vnode_proxy_call', + Msg)). call(Name, Msg, Timeout) -> - call_reply(catch gen:call(Name, '$vnode_proxy_call', - Msg, Timeout)). + call_reply(catch gen:call(Name, + '$vnode_proxy_call', + Msg, + Timeout)). -spec call_reply({atom(), term()}) -> term(). @@ -131,7 +144,8 @@ call_reply({ok, Res}) -> Res; call_reply({'EXIT', Reason}) -> {error, Reason}. cast(Name, Msg) -> - catch erlang:send(Name, {'$vnode_proxy_cast', Msg}), ok. + catch erlang:send(Name, {'$vnode_proxy_cast', Msg}), + ok. system_continue(Parent, _, State) -> loop(Parent, State). @@ -144,22 +158,28 @@ system_code_change(State, _, _, _) -> {ok, State}. %% @private loop(Parent, State) -> receive - {'$vnode_proxy_call', From, Msg} -> - {reply, Reply, NewState} = handle_call(Msg, From, - State), - {_, Reply} = gen:reply(From, Reply), - loop(Parent, NewState); - {'$vnode_proxy_cast', Msg} -> - {noreply, NewState} = handle_cast(Msg, State), - loop(Parent, NewState); - {'DOWN', _Mref, process, _Pid, _} -> - NewState = forget_vnode(State), loop(Parent, NewState); - {system, From, Msg} -> - sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], - State); - Msg -> - {noreply, NewState} = handle_proxy(Msg, State), - loop(Parent, NewState) + {'$vnode_proxy_call', From, Msg} -> + {reply, Reply, NewState} = handle_call(Msg, + From, + State), + {_, Reply} = gen:reply(From, Reply), + loop(Parent, NewState); + {'$vnode_proxy_cast', Msg} -> + {noreply, NewState} = handle_cast(Msg, State), + loop(Parent, NewState); + {'DOWN', _Mref, process, _Pid, _} -> + NewState = forget_vnode(State), + loop(Parent, NewState); + {system, From, Msg} -> + sys:handle_system_msg(Msg, + From, + Parent, + ?MODULE, + [], + State); + Msg -> + {noreply, NewState} = handle_proxy(Msg, State), + loop(Parent, NewState) end. %% @private @@ -168,9 +188,10 @@ handle_call({return_vnode, Req}, _From, State) -> riak_core_vnode:send_req(Pid, Req), {reply, {ok, Pid}, NewState}; handle_call(overloaded, _From, - State = #state{check_mailbox = Mailbox, - check_threshold = Threshold}) -> - Result = Mailbox > Threshold, {reply, Result, State}; + State = #state{check_mailbox = Mailbox, + check_threshold = Threshold}) -> + Result = Mailbox > Threshold, + {reply, Result, State}; handle_call(_Msg, _From, State) -> {reply, ok, State}. %% @private @@ -182,29 +203,30 @@ handle_cast({unregister_vnode, Pid}, State) -> NewState = forget_vnode(State), {noreply, NewState}; handle_cast({vnode_proxy_pong, Ref, Msgs}, - State = #state{check_request = RequestState, - check_mailbox = Mailbox}) -> + State = #state{check_request = RequestState, + check_mailbox = Mailbox}) -> NewState = case Ref of - RequestState -> - State#state{check_mailbox = Mailbox - Msgs, - check_request = undefined, check_counter = 0}; - _ -> State - end, + RequestState -> + State#state{check_mailbox = Mailbox - Msgs, + check_request = undefined, + check_counter = 0}; + _ -> State + end, {noreply, NewState}; handle_cast(_Msg, State) -> {noreply, State}. %% @private handle_proxy(Msg, - State = #state{check_threshold = undefined}) -> + State = #state{check_threshold = undefined}) -> {Pid, NewState} = get_vnode_pid(State), Pid ! Msg, {noreply, NewState}; handle_proxy(Msg, - State = #state{check_counter = Counter, - check_mailbox = Mailbox, check_interval = Interval, - check_request_interval = RequestInterval, - check_request = RequestState, - check_threshold = Threshold}) -> + State = #state{check_counter = Counter, + check_mailbox = Mailbox, check_interval = Interval, + check_request_interval = RequestInterval, + check_request = RequestState, + check_threshold = Threshold}) -> %% %% NOTE: This function is intentionally written as it is for performance %% reasons -- the vnode proxy is on the critical path of Riak and @@ -222,88 +244,97 @@ handle_proxy(Msg, %% ensure unnecessary work is not being performed needlessly. %% case State#state.vnode_pid of - undefined -> {Pid, State2} = get_vnode_pid(State); - KnownPid -> Pid = KnownPid, State2 = State + undefined -> {Pid, State2} = get_vnode_pid(State); + KnownPid -> + Pid = KnownPid, + State2 = State end, Mailbox2 = case Mailbox =< Threshold of - true -> Pid ! Msg, Mailbox + 1; - false -> handle_overload(Msg, State), Mailbox - end, + true -> + Pid ! Msg, + Mailbox + 1; + false -> + handle_overload(Msg, State), + Mailbox + end, Counter2 = Counter + 1, case Counter2 of - RequestInterval -> - %% Ping the vnode in hopes that we get a pong back before hitting - %% the hard query interval and triggering an expensive process_info - %% call. A successful pong from the vnode means that all messages - %% sent before the ping have already been handled and therefore - %% we can adjust our mailbox estimate accordingly. - case RequestState of - undefined -> - RequestState2 = send_proxy_ping(Pid, Mailbox2); - _ -> RequestState2 = RequestState - end, - Mailbox3 = Mailbox2, - Counter3 = Counter2; - Interval -> - %% Time to directly check the mailbox size. This operation may - %% be extremely expensive. If the vnode is currently active, - %% the proxy will be descheduled until the vnode finishes - %% execution and becomes descheduled itself. - {_, L} = erlang:process_info(Pid, message_queue_len), - Counter3 = 0, - Mailbox3 = L + 1, - %% Send a new proxy ping so that if the new length is above the - %% threshold then the proxy will detect the work is completed, - %% rather than being stuck in overload state until the interval - %% counts are reached. - RequestState2 = send_proxy_ping(Pid, Mailbox3); - _ -> - Mailbox3 = Mailbox2, - Counter3 = Counter2, - RequestState2 = RequestState + RequestInterval -> + %% Ping the vnode in hopes that we get a pong back before hitting + %% the hard query interval and triggering an expensive process_info + %% call. A successful pong from the vnode means that all messages + %% sent before the ping have already been handled and therefore + %% we can adjust our mailbox estimate accordingly. + case RequestState of + undefined -> + RequestState2 = send_proxy_ping(Pid, Mailbox2); + _ -> RequestState2 = RequestState + end, + Mailbox3 = Mailbox2, + Counter3 = Counter2; + Interval -> + %% Time to directly check the mailbox size. This operation may + %% be extremely expensive. If the vnode is currently active, + %% the proxy will be descheduled until the vnode finishes + %% execution and becomes descheduled itself. + {_, L} = erlang:process_info(Pid, message_queue_len), + Counter3 = 0, + Mailbox3 = L + 1, + %% Send a new proxy ping so that if the new length is above the + %% threshold then the proxy will detect the work is completed, + %% rather than being stuck in overload state until the interval + %% counts are reached. + RequestState2 = send_proxy_ping(Pid, Mailbox3); + _ -> + Mailbox3 = Mailbox2, + Counter3 = Counter2, + RequestState2 = RequestState end, {noreply, State2#state{check_counter = Counter3, - check_mailbox = Mailbox3, - check_request = RequestState2}}. + check_mailbox = Mailbox3, + check_request = RequestState2}}. handle_overload(Msg, - #state{mod = Mod, index = Index}) -> + #state{mod = Mod, index = Index}) -> %% STATS %riak_core_stat:update(dropped_vnode_requests), case Msg of - {'$gen_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, Sender, - Index); - {'$gen_all_state_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, Sender, - Index); - {'$gen_event', - #riak_coverage_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, Sender, - Index); - _ -> catch Mod:handle_overload_info(Msg, Index) + {'$gen_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + {'$gen_all_state_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + {'$gen_event', + #riak_coverage_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + _ -> catch Mod:handle_overload_info(Msg, Index) end. %% @private forget_vnode(State) -> State#state{vnode_pid = undefined, - vnode_mref = undefined, check_mailbox = 0, - check_counter = 0, check_request = undefined}. + vnode_mref = undefined, check_mailbox = 0, + check_counter = 0, check_request = undefined}. %% @private get_vnode_pid(State = #state{mod = Mod, index = Index, - vnode_pid = undefined}) -> + vnode_pid = undefined}) -> {ok, Pid} = riak_core_vnode_manager:get_vnode_pid(Index, - Mod), + Mod), Mref = erlang:monitor(process, Pid), NewState = State#state{vnode_pid = Pid, - vnode_mref = Mref}, + vnode_mref = Mref}, {Pid, NewState}; get_vnode_pid(State = #state{vnode_pid = Pid}) -> {Pid, State}. @@ -312,126 +343,150 @@ get_vnode_pid(State = #state{vnode_pid = Pid}) -> send_proxy_ping(Pid, MailboxSizeAfterPing) -> Ref = make_ref(), Pid ! - {'$vnode_proxy_ping', self(), Ref, - MailboxSizeAfterPing}, + {'$vnode_proxy_ping', + self(), + Ref, + MailboxSizeAfterPing}, Ref. -ifdef(TEST). update_msg_counter() -> Count = case erlang:get(count) of - undefined -> 0; - Val -> Val - end, + undefined -> 0; + Val -> Val + end, put(count, Count + 1). fake_loop() -> receive - block -> fake_loop_block(); - slow -> fake_loop_slow(); - {get_count, Pid} -> - Pid ! {count, erlang:get(count)}, fake_loop(); - %% Original tests do not expect replies - the - %% results below expect the pings to be counted - %% towards messages received. If you ever wanted - %% to re-instance, uncomment below. - %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> - %% ReplyTo ! {Ref, Msgs}, - %% fake_loop(); - _Msg -> update_msg_counter(), fake_loop() + block -> fake_loop_block(); + slow -> fake_loop_slow(); + {get_count, Pid} -> + Pid ! {count, erlang:get(count)}, + fake_loop(); + %% Original tests do not expect replies - the + %% results below expect the pings to be counted + %% towards messages received. If you ever wanted + %% to re-instance, uncomment below. + %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> + %% ReplyTo ! {Ref, Msgs}, + %% fake_loop(); + _Msg -> + update_msg_counter(), + fake_loop() end. fake_loop_slow() -> timer:sleep(100), receive - _Msg -> update_msg_counter(), fake_loop_slow() + _Msg -> + update_msg_counter(), + fake_loop_slow() end. fake_loop_block() -> receive unblock -> fake_loop() end. overload_test_() -> - {timeout, 900, + {timeout, + 900, {foreach, fun () -> - VnodePid = spawn(fun fake_loop/0), - meck:unload(), - meck:new(riak_core_vnode_manager, [passthrough]), - meck:expect(riak_core_vnode_manager, get_vnode_pid, - fun (_Index, fakemod) -> {ok, VnodePid}; - (Index, Mod) -> meck:passthrough([Index, Mod]) - end), - meck:new(fakemod, [non_strict]), - meck:expect(fakemod, handle_overload_info, - fun (hello, _Idx) -> ok end), - {ok, ProxyPid} = - riak_core_vnode_proxy:start_link(fakemod, 0), - unlink(ProxyPid), - {VnodePid, ProxyPid} + VnodePid = spawn(fun fake_loop/0), + meck:unload(), + meck:new(riak_core_vnode_manager, [passthrough]), + meck:expect(riak_core_vnode_manager, + get_vnode_pid, + fun (_Index, fakemod) -> {ok, VnodePid}; + (Index, Mod) -> meck:passthrough([Index, Mod]) + end), + meck:new(fakemod, [non_strict]), + meck:expect(fakemod, + handle_overload_info, + fun (hello, _Idx) -> ok end), + {ok, ProxyPid} = + riak_core_vnode_proxy:start_link(fakemod, 0), + unlink(ProxyPid), + {VnodePid, ProxyPid} end, fun ({VnodePid, ProxyPid}) -> - unlink(VnodePid), - unlink(ProxyPid), - exit(VnodePid, kill), - exit(ProxyPid, kill) + unlink(VnodePid), + unlink(ProxyPid), + exit(VnodePid, kill), + exit(ProxyPid, kill) end, [fun ({_VnodePid, ProxyPid}) -> - {"should not discard in normal operation", timeout, 60, - fun () -> - ToSend = (?DEFAULT_OVERLOAD_THRESHOLD), - [ProxyPid ! hello || _ <- lists:seq(1, ToSend)], - %% synchronize on the proxy and the mailbox - {ok, ok} = gen:call(ProxyPid, '$vnode_proxy_call', sync, - infinity), - ProxyPid ! {get_count, self()}, - receive - {count, Count} -> - %% First will hit the request check interval, - %% then will check message queue every interval - %% (no new ping will be resubmitted after the first - %% as the request will already have a reference) - PingReqs = 1 - + % for first request intarval - ToSend div (?DEFAULT_CHECK_INTERVAL), - ?assertEqual((ToSend + PingReqs), Count) - end - end} + {"should not discard in normal operation", + timeout, + 60, + fun () -> + ToSend = (?DEFAULT_OVERLOAD_THRESHOLD), + [ProxyPid ! hello || _ <- lists:seq(1, ToSend)], + %% synchronize on the proxy and the mailbox + {ok, ok} = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ProxyPid ! {get_count, self()}, + receive + {count, Count} -> + %% First will hit the request check interval, + %% then will check message queue every interval + %% (no new ping will be resubmitted after the first + %% as the request will already have a reference) + PingReqs = 1 + + % for first request intarval + ToSend div + (?DEFAULT_CHECK_INTERVAL), + ?assertEqual((ToSend + PingReqs), Count) + end + end} end, fun ({VnodePid, ProxyPid}) -> - {"should discard during overflow", timeout, 60, - fun () -> - VnodePid ! block, - [ProxyPid ! hello || _ <- lists:seq(1, 50000)], - %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, '$vnode_proxy_call', sync, - infinity), - ?assertEqual({ok, ok}, Reply), - VnodePid ! unblock, - VnodePid ! {get_count, self()}, - receive - {count, Count} -> - %% Threshold + 10 unanswered vnode_proxy_ping - ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + 10), - Count) - end - end} + {"should discard during overflow", + timeout, + 60, + fun () -> + VnodePid ! block, + [ProxyPid ! hello || _ <- lists:seq(1, 50000)], + %% synchronize on the mailbox - no-op that hits msg catchall + Reply = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ?assertEqual({ok, ok}, Reply), + VnodePid ! unblock, + VnodePid ! {get_count, self()}, + receive + {count, Count} -> + %% Threshold + 10 unanswered vnode_proxy_ping + ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + + 10), + Count) + end + end} end, fun ({VnodePid, ProxyPid}) -> - {"should tolerate slow vnodes", timeout, 60, - fun () -> - VnodePid ! slow, - [ProxyPid ! hello || _ <- lists:seq(1, 50000)], - %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, '$vnode_proxy_call', sync, - infinity), - ?assertEqual({ok, ok}, Reply), - %% check that the outstanding message count is - %% reasonable - {message_queue_len, L} = erlang:process_info(VnodePid, - message_queue_len), - %% Threshold + 2 unanswered vnode_proxy_ping (one - %% for first ping, second after process_info check) - ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 2)) - end} + {"should tolerate slow vnodes", + timeout, + 60, + fun () -> + VnodePid ! slow, + [ProxyPid ! hello || _ <- lists:seq(1, 50000)], + %% synchronize on the mailbox - no-op that hits msg catchall + Reply = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ?assertEqual({ok, ok}, Reply), + %% check that the outstanding message count is + %% reasonable + {message_queue_len, L} = erlang:process_info(VnodePid, + message_queue_len), + %% Threshold + 2 unanswered vnode_proxy_ping (one + %% for first ping, second after process_info check) + ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 2)) + end} end]}}. -endif. diff --git a/src/riak_core_vnode_proxy_sup.erl b/src/riak_core_vnode_proxy_sup.erl index 2478769a6..c9b0801f4 100644 --- a/src/riak_core_vnode_proxy_sup.erl +++ b/src/riak_core_vnode_proxy_sup.erl @@ -33,15 +33,15 @@ init([]) -> Indices = get_indices(), VMods = riak_core:vnode_modules(), Proxies = [proxy_ref(Mod, Index) - || {_, Mod} <- VMods, Index <- Indices], + || {_, Mod} <- VMods, Index <- Indices], {ok, {{one_for_one, 5, 10}, Proxies}}. start_proxy(Mod, Index) -> Ref = proxy_ref(Mod, Index), Pid = case supervisor:start_child(?MODULE, Ref) of - {ok, Child} -> Child; - {error, {already_started, Child}} -> Child - end, + {ok, Child} -> Child; + {error, {already_started, Child}} -> Child + end, Pid. stop_proxy(Mod, Index) -> @@ -59,7 +59,10 @@ start_proxies(Mod) -> proxy_ref(Mod, Index) -> {{Mod, Index}, {riak_core_vnode_proxy, start_link, [Mod, Index]}, - permanent, 5000, worker, [riak_core_vnode_proxy]}. + permanent, + 5000, + worker, + [riak_core_vnode_proxy]}. %% @private get_indices() -> diff --git a/src/riak_core_vnode_sup.erl b/src/riak_core_vnode_sup.erl index 29637df14..f4bfb7527 100644 --- a/src/riak_core_vnode_sup.erl +++ b/src/riak_core_vnode_sup.erl @@ -33,7 +33,7 @@ start_vnode(Mod, Index, ForwardTo) when is_integer(Index) -> supervisor:start_child(?MODULE, - [Mod, Index, ForwardTo]). + [Mod, Index, ForwardTo]). start_link() -> %% This simple_one_for_one supervisor can do a controlled shutdown. @@ -46,5 +46,9 @@ start_link() -> init([]) -> {ok, {{simple_one_for_one, 10, 10}, - [{undefined, {riak_core_vnode, start_link, []}, - temporary, 300000, worker, dynamic}]}}. + [{undefined, + {riak_core_vnode, start_link, []}, + temporary, + 300000, + worker, + dynamic}]}}. diff --git a/src/riak_core_vnode_worker.erl b/src/riak_core_vnode_worker.erl index a682fae20..0a7aa7fa8 100644 --- a/src/riak_core_vnode_worker.erl +++ b/src/riak_core_vnode_worker.erl @@ -23,8 +23,12 @@ -include("riak_core_vnode.hrl"). % gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, - handle_info/2, terminate/2, code_change/3]). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). % API -export([start_link/1, handle_work/3, handle_work/4]). @@ -32,24 +36,27 @@ -type mod_state() :: term(). -record(state, - {module :: atom(), modstate :: mod_state()}). + {module :: atom(), modstate :: mod_state()}). -callback init_worker(partition(), Args :: term(), - Props :: [{atom(), term()}]) -> {ok, mod_state()}. + Props :: [{atom(), term()}]) -> {ok, mod_state()}. -callback handle_work(Work :: term(), sender(), - mod_state()) -> {reply, Reply :: term(), mod_state()} | - {noreply, mod_state()}. + mod_state()) -> {reply, Reply :: term(), mod_state()} | + {noreply, mod_state()}. start_link(Args) -> WorkerMod = proplists:get_value(worker_callback_mod, - Args), + Args), [VNodeIndex, WorkerArgs, WorkerProps, Caller] = - proplists:get_value(worker_args, Args), + proplists:get_value(worker_args, Args), gen_server:start_link(?MODULE, - [WorkerMod, VNodeIndex, WorkerArgs, WorkerProps, - Caller], - []). + [WorkerMod, + VNodeIndex, + WorkerArgs, + WorkerProps, + Caller], + []). handle_work(Worker, Work, From) -> handle_work(Worker, Work, From, self()). @@ -57,32 +64,38 @@ handle_work(Worker, Work, From) -> handle_work(Worker, Work, From, Caller) -> gen_server:cast(Worker, {work, Work, From, Caller}). -init([Module, VNodeIndex, WorkerArgs, WorkerProps, +init([Module, + VNodeIndex, + WorkerArgs, + WorkerProps, Caller]) -> {ok, WorkerState} = Module:init_worker(VNodeIndex, - WorkerArgs, WorkerProps), + WorkerArgs, + WorkerProps), %% let the pool queue manager know there might be a worker to checkout riak_core_vnode_worker_pool:worker_started(Caller), {ok, #state{module = Module, modstate = WorkerState}}. handle_call(Event, _From, State) -> logger:debug("Vnode worker received synchronous event: " - "~p.", - [Event]), + "~p.", + [Event]), {reply, ok, State}. handle_cast({work, Work, WorkFrom, Caller}, - #state{module = Mod, modstate = ModState} = State) -> - NewModState = case Mod:handle_work(Work, WorkFrom, - ModState) - of - {reply, Reply, NS} -> - riak_core_vnode:reply(WorkFrom, Reply), NS; - {noreply, NS} -> NS - end, + #state{module = Mod, modstate = ModState} = State) -> + NewModState = case Mod:handle_work(Work, + WorkFrom, + ModState) + of + {reply, Reply, NS} -> + riak_core_vnode:reply(WorkFrom, Reply), + NS; + {noreply, NS} -> NS + end, %% check the worker back into the pool riak_core_vnode_worker_pool:checkin_worker(Caller, - self()), + self()), {noreply, State#state{modstate = NewModState}}; handle_cast(_Event, State) -> {noreply, State}. diff --git a/src/riak_core_vnode_worker_pool.erl b/src/riak_core_vnode_worker_pool.erl index b750c929f..a59f77262 100644 --- a/src/riak_core_vnode_worker_pool.erl +++ b/src/riak_core_vnode_worker_pool.erl @@ -42,13 +42,19 @@ -behaviour(gen_statem). %% API --export([start_link/5, start_link/6, stop/2, - shutdown_pool/2, handle_work/3, worker_started/1, - checkin_worker/2]). +-export([start_link/5, + start_link/6, + stop/2, + shutdown_pool/2, + handle_work/3, + worker_started/1, + checkin_worker/2]). %% gen_statem callbacks --export([init/1, terminate/3, code_change/4, - callback_mode/0]). +-export([init/1, + terminate/3, + code_change/4, + callback_mode/0]). %% gen_statem states -export([ready/3, queue/3, shutdown/3]). @@ -58,16 +64,24 @@ %% ======== start_link(WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps) -> - start_link(WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps, []). + WorkerProps) -> + start_link(WorkerMod, + PoolSize, + VNodeIndex, + WorkerArgs, + WorkerProps, + []). start_link(WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps, Opts) -> + WorkerProps, Opts) -> gen_statem:start_link(?MODULE, - [WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps, Opts], - []). + [WorkerMod, + PoolSize, + VNodeIndex, + WorkerArgs, + WorkerProps, + Opts], + []). % #1 cast handle_work(Pid, Work, From) -> @@ -88,7 +102,7 @@ stop(Pid, Reason) -> % #5 call %% wait for all the workers to finish any current work -spec shutdown_pool(pid(), integer()) -> ok | - {error, vnode_shutdown}. + {error, vnode_shutdown}. shutdown_pool(Pid, Wait) -> gen_statem:call(Pid, {shutdown, Wait}, infinity). @@ -100,33 +114,43 @@ shutdown_pool(Pid, Wait) -> %% ======================== -record(state, - {queue :: queue:queue() | list(), pool :: pid(), - monitors = [] :: list(), - queue_strategy = fifo :: fifo | filo, - shutdown :: undefined | {pid(), reference()}}). + {queue :: queue:queue() | list(), + pool :: pid(), + monitors = [] :: list(), + queue_strategy = fifo :: fifo | filo, + shutdown :: undefined | {pid(), reference()}}). callback_mode() -> [state_functions, state_enter]. -init([WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps, Opts]) -> +init([WorkerMod, + PoolSize, + VNodeIndex, + WorkerArgs, + WorkerProps, + Opts]) -> {ok, Pid} = poolboy:start_link([{worker_module, - riak_core_vnode_worker}, - {worker_args, - [VNodeIndex, WorkerArgs, WorkerProps, - self()]}, - {worker_callback_mod, WorkerMod}, - {size, PoolSize}, {max_overflow, 0}]), + riak_core_vnode_worker}, + {worker_args, + [VNodeIndex, + WorkerArgs, + WorkerProps, + self()]}, + {worker_callback_mod, WorkerMod}, + {size, PoolSize}, + {max_overflow, 0}]), DefaultStrategy = application:get_env(riak_core, - queue_worker_strategy, fifo), - State = case proplists:get_value(strategy, Opts, - DefaultStrategy) - of - fifo -> - #state{pool = Pid, queue = queue:new(), - queue_strategy = fifo}; - filo -> - #state{pool = Pid, queue = [], queue_strategy = filo} - end, + queue_worker_strategy, + fifo), + State = case proplists:get_value(strategy, + Opts, + DefaultStrategy) + of + fifo -> + #state{pool = Pid, queue = queue:new(), + queue_strategy = fifo}; + filo -> + #state{pool = Pid, queue = [], queue_strategy = filo} + end, {ok, ready, State}. % #4 call @@ -153,11 +177,11 @@ ready(enter, _, State) -> {keep_state, State}; ready(cast, {work, Work, From} = Msg, #state{pool = Pool, monitors = Monitors} = State) -> case poolboy:checkout(Pool, false) of - full -> {next_state, queue, in(Msg, State)}; - Pid when is_pid(Pid) -> - NewMonitors = monitor_worker(Pid, From, Work, Monitors), - riak_core_vnode_worker:handle_work(Pid, Work, From), - {next_state, ready, State#state{monitors = NewMonitors}} + full -> {next_state, queue, in(Msg, State)}; + Pid when is_pid(Pid) -> + NewMonitors = monitor_worker(Pid, From, Work, Monitors), + riak_core_vnode_worker:handle_work(Pid, Work, From), + {next_state, ready, State#state{monitors = NewMonitors}} end; %% #2 ready(cast, worker_start, State) -> @@ -168,7 +192,9 @@ ready(cast, {checkin, WorkerPid}, State) -> %% #5 ready({call, From}, {shutdown, Wait}, State) -> %% change to shutdown state with a state_timeout of 'Wait' ms, force after timeout expires - {next_state, shutdown, State#state{shutdown = From}, + {next_state, + shutdown, + State#state{shutdown = From}, [{state_timeout, Wait, force_shutdown}]}; %% info EXIT signal of erlang:monitor(process, Worker) ready(info, {'DOWN', _Ref, _Type, Pid, Info}, State) -> @@ -191,7 +217,9 @@ queue(cast, {checkin, WorkerPid}, State) -> %% #5 queue({call, From}, {shutdown, Wait}, State) -> %% change to shutdown state with a state_timeout of 'Wait' ms, force after timeout expires - {next_state, shutdown, State#state{shutdown = From}, + {next_state, + shutdown, + State#state{shutdown = From}, [{state_timeout, Wait, force_shutdown}]}; %% info EXIT signal of erlang:monitor(process, Worker) queue(info, {'DOWN', _Ref, _Type, Pid, Info}, State) -> @@ -203,20 +231,21 @@ queue(info, {'DOWN', _Ref, _Type, Pid, Info}, State) -> %% enter shutdown(enter, _, - #state{monitors = Monitors, shutdown = From} = State) -> + #state{monitors = Monitors, shutdown = From} = State) -> discard_queued_work(State), case Monitors of - [] -> {stop_and_reply, shutdown, [{reply, From, ok}]}; - _ -> {keep_state, State#state{queue = new(State)}} + [] -> {stop_and_reply, shutdown, [{reply, From, ok}]}; + _ -> {keep_state, State#state{queue = new(State)}} end; %% force shutdown timeout shutdown(state_timeout, _, - #state{monitors = Monitors, shutdown = FromOrigin}) -> + #state{monitors = Monitors, shutdown = FromOrigin}) -> %% we've waited too long to shutdown, time to force the issue. _ = [riak_core_vnode:reply(From, - {error, vnode_shutdown}) - || {_, _, From, _} <- Monitors], - {stop_and_reply, shutdown, + {error, vnode_shutdown}) + || {_, _, From, _} <- Monitors], + {stop_and_reply, + shutdown, [{reply, FromOrigin, {error, vnode_shutdown}}]}; %% #1 shutdown(cast, {work, _Work, From}, State) -> @@ -227,20 +256,21 @@ shutdown(cast, worker_start, State) -> worker_started(State, shutdown); %% #3 shutdown(cast, {checkin, Pid}, - #state{pool = Pool, monitors = Monitors0, - shutdown = From} = - State) -> + #state{pool = Pool, monitors = Monitors0, + shutdown = From} = + State) -> Monitors = demonitor_worker(Pid, Monitors0), poolboy:checkin(Pool, Pid), case Monitors of - [] -> %% work all done, time to exit! - {stop_and_reply, shutdown, [{reply, From, ok}]}; - _ -> {keep_state, State#state{monitors = Monitors}} + [] -> %% work all done, time to exit! + {stop_and_reply, shutdown, [{reply, From, ok}]}; + _ -> {keep_state, State#state{monitors = Monitors}} end; %% #5 shutdown({call, From}, {shutdown, _Wait}, State) -> %% duplicate shutdown call - {keep_state, State, + {keep_state, + State, [{reply, From, {error, vnode_shutdown}}]}; %% info EXIT signal of erlang:monitor(process, Worker) shutdown(info, {'DOWN', _Ref, _, Pid, Info}, State) -> @@ -257,31 +287,33 @@ shutdown(info, {'DOWN', _Ref, _, Pid, Info}, State) -> %% worker. Only active workers are tracked monitor_worker(Worker, From, Work, Monitors) -> case lists:keyfind(Worker, 1, Monitors) of - {Worker, Ref, _OldFrom, _OldWork} -> - %% reuse old monitor and just update the from & work - lists:keyreplace(Worker, 1, Monitors, - {Worker, Ref, From, Work}); - false -> - Ref = erlang:monitor(process, Worker), - [{Worker, Ref, From, Work} | Monitors] + {Worker, Ref, _OldFrom, _OldWork} -> + %% reuse old monitor and just update the from & work + lists:keyreplace(Worker, + 1, + Monitors, + {Worker, Ref, From, Work}); + false -> + Ref = erlang:monitor(process, Worker), + [{Worker, Ref, From, Work} | Monitors] end. demonitor_worker(Worker, Monitors) -> case lists:keyfind(Worker, 1, Monitors) of - {Worker, Ref, _From, _Work} -> - erlang:demonitor(Ref), - lists:keydelete(Worker, 1, Monitors); - false -> - %% not monitored? - Monitors + {Worker, Ref, _From, _Work} -> + erlang:demonitor(Ref), + lists:keydelete(Worker, 1, Monitors); + false -> + %% not monitored? + Monitors end. discard_queued_work(State) -> case out(State) of - {{value, {work, _Work, From}}, Rem} -> - riak_core_vnode:reply(From, {error, vnode_shutdown}), - discard_queued_work(State#state{queue = Rem}); - {empty, _Empty} -> ok + {{value, {work, _Work, From}}, Rem} -> + riak_core_vnode:reply(From, {error, vnode_shutdown}), + discard_queued_work(State#state{queue = Rem}); + {empty, _Empty} -> ok end. in(Msg, @@ -302,60 +334,65 @@ new(#state{queue_strategy = fifo}) -> queue:new(); new(#state{queue_strategy = filo}) -> []. worker_started(#state{pool = Pool, - monitors = Monitors} = - State, - StateName) -> + monitors = Monitors} = + State, + StateName) -> %% a new worker just started - if we have work pending, try to do it case out(State) of - {{value, {work, Work, From}}, Rem} -> - case poolboy:checkout(Pool, false) of - full -> {next_state, queue, State}; - Pid when is_pid(Pid) -> - NewMonitors = monitor_worker(Pid, From, Work, Monitors), - riak_core_vnode_worker:handle_work(Pid, Work, From), - {next_state, queue, - State#state{queue = Rem, monitors = NewMonitors}} - end; - {empty, _} -> - {next_state, - %% If we are in state queueing with nothing in the queue, - %% move to the ready state so that the next incoming job - %% checks out the new worker from poolboy. - if StateName == queue -> ready; - true -> StateName - end, - State} + {{value, {work, Work, From}}, Rem} -> + case poolboy:checkout(Pool, false) of + full -> {next_state, queue, State}; + Pid when is_pid(Pid) -> + NewMonitors = monitor_worker(Pid, From, Work, Monitors), + riak_core_vnode_worker:handle_work(Pid, Work, From), + {next_state, + queue, + State#state{queue = Rem, monitors = NewMonitors}} + end; + {empty, _} -> + {next_state, + %% If we are in state queueing with nothing in the queue, + %% move to the ready state so that the next incoming job + %% checks out the new worker from poolboy. + if StateName == queue -> ready; + true -> StateName + end, + State} end. checkin(#state{pool = Pool, monitors = Monitors} = - State, - Worker) -> + State, + Worker) -> case out(State) of - {{value, {work, Work, From}}, Rem} -> - %% there is outstanding work to do - instead of checking - %% the worker back in, just hand it more work to do - NewMonitors = monitor_worker(Worker, From, Work, - Monitors), - riak_core_vnode_worker:handle_work(Worker, Work, From), - {next_state, queue, - State#state{queue = Rem, monitors = NewMonitors}}; - {empty, Empty} -> - NewMonitors = demonitor_worker(Worker, Monitors), - poolboy:checkin(Pool, Worker), - {next_state, ready, - State#state{queue = Empty, monitors = NewMonitors}} + {{value, {work, Work, From}}, Rem} -> + %% there is outstanding work to do - instead of checking + %% the worker back in, just hand it more work to do + NewMonitors = monitor_worker(Worker, + From, + Work, + Monitors), + riak_core_vnode_worker:handle_work(Worker, Work, From), + {next_state, + queue, + State#state{queue = Rem, monitors = NewMonitors}}; + {empty, Empty} -> + NewMonitors = demonitor_worker(Worker, Monitors), + poolboy:checkin(Pool, Worker), + {next_state, + ready, + State#state{queue = Empty, monitors = NewMonitors}} end. exit_worker(#state{monitors = Monitors} = State, Pid, - Info) -> + Info) -> %% remove the listing for the dead worker case lists:keyfind(Pid, 1, Monitors) of - {Pid, _, From, Work} -> - riak_core_vnode:reply(From, - {error, {worker_crash, Info, Work}}), - NewMonitors = lists:keydelete(Pid, 1, Monitors), - %% trigger to do more work will be 'worker_start' message - %% when poolboy replaces this worker (if not a 'checkin' or 'handle_work') - {ok, State#state{monitors = NewMonitors}}; - false -> {ok, State} + {Pid, _, From, Work} -> + riak_core_vnode:reply(From, + {error, {worker_crash, Info, Work}}), + NewMonitors = lists:keydelete(Pid, 1, Monitors), + %% trigger to do more work will be 'worker_start' message + %% when poolboy replaces this worker (if not a 'checkin' or 'handle_work') + {ok, State#state{monitors = NewMonitors}}; + false -> {ok, State} end. diff --git a/src/vclock.erl b/src/vclock.erl index 95d2287ea..742064d89 100644 --- a/src/vclock.erl +++ b/src/vclock.erl @@ -31,11 +31,23 @@ -module(vclock). --export([fresh/0, fresh/2, descends/2, dominates/2, - descends_dot/2, pure_dot/1, merge/1, get_counter/2, - get_timestamp/2, get_dot/2, valid_dot/1, increment/2, - increment/3, all_nodes/1, equal/2, prune/3, - timestamp/0]). +-export([fresh/0, + fresh/2, + descends/2, + dominates/2, + descends_dot/2, + pure_dot/1, + merge/1, + get_counter/2, + get_timestamp/2, + get_dot/2, + valid_dot/1, + increment/2, + increment/3, + all_nodes/1, + equal/2, + prune/3, + timestamp/0]). -ifdef(TEST). @@ -43,13 +55,16 @@ -endif. --export_type([vclock/0, timestamp/0, vclock_node/0, - dot/0, pure_dot/0]). +-export_type([vclock/0, + timestamp/0, + vclock_node/0, + dot/0, + pure_dot/0]). -type vclock() :: [dot()]. -type dot() :: {vclock_node(), - {counter(), timestamp()}}. + {counter(), timestamp()}}. -type pure_dot() :: {vclock_node(), counter()}. @@ -72,7 +87,7 @@ fresh(Node, Count) -> [{Node, {Count, timestamp()}}]. %% @doc Return true if Va is a direct descendant of Vb, %% else false -- remember, a vclock is its own descendant! -spec descends(Va :: vclock(), - Vb :: vclock()) -> boolean(). + Vb :: vclock()) -> boolean(). descends(_, []) -> % all vclocks descend from the empty vclock @@ -80,9 +95,9 @@ descends(_, []) -> descends(Va, Vb) -> [{NodeB, {CtrB, _T}} | RestB] = Vb, case lists:keyfind(NodeB, 1, Va) of - false -> false; - {_, {CtrA, _TSA}} -> - CtrA >= CtrB andalso descends(Va, RestB) + false -> false; + {_, {CtrA, _TSA}} -> + CtrA >= CtrB andalso descends(Va, RestB) end. %% @doc does the given `vclock()' descend from the given `dot()'. The @@ -137,7 +152,7 @@ merge([First | Rest]) -> merge([], NClock) -> NClock; merge([AClock | VClocks], NClock) -> merge(VClocks, - merge(lists:keysort(1, AClock), NClock, [])). + merge(lists:keysort(1, AClock), NClock, [])). merge([], [], AccClock) -> lists:reverse(AccClock); merge([], Left, AccClock) -> @@ -150,41 +165,41 @@ merge(V = [{Node1, {Ctr1, TS1} = CT1} = NCT1 | VClock], if Node1 < Node2 -> merge(VClock, N, [NCT1 | AccClock]); Node1 > Node2 -> merge(V, NClock, [NCT2 | AccClock]); true -> - ({_Ctr, _TS} = CT) = if Ctr1 > Ctr2 -> CT1; - Ctr1 < Ctr2 -> CT2; - true -> {Ctr1, erlang:max(TS1, TS2)} - end, - merge(VClock, NClock, [{Node1, CT} | AccClock]) + ({_Ctr, _TS} = CT) = if Ctr1 > Ctr2 -> CT1; + Ctr1 < Ctr2 -> CT2; + true -> {Ctr1, erlang:max(TS1, TS2)} + end, + merge(VClock, NClock, [{Node1, CT} | AccClock]) end. % @doc Get the counter value in VClock set from Node. -spec get_counter(Node :: vclock_node(), - VClock :: vclock()) -> counter(). + VClock :: vclock()) -> counter(). get_counter(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - {_, {Ctr, _TS}} -> Ctr; - false -> 0 + {_, {Ctr, _TS}} -> Ctr; + false -> 0 end. % @doc Get the timestamp value in a VClock set from Node. -spec get_timestamp(Node :: vclock_node(), - VClock :: vclock()) -> timestamp() | undefined. + VClock :: vclock()) -> timestamp() | undefined. get_timestamp(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - {_, {_Ctr, TS}} -> TS; - false -> undefined + {_, {_Ctr, TS}} -> TS; + false -> undefined end. % @doc Get the entry `dot()' for `vclock_node()' from `vclock()'. -spec get_dot(Node :: vclock_node(), - VClock :: vclock()) -> {ok, dot()} | undefined. + VClock :: vclock()) -> {ok, dot()} | undefined. get_dot(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - false -> undefined; - Entry -> {ok, Entry} + false -> undefined; + Entry -> {ok, Entry} end. %% @doc is the given argument a valid dot, or entry? @@ -197,23 +212,24 @@ valid_dot(_) -> false. % @doc Increment VClock at Node. -spec increment(Node :: vclock_node(), - VClock :: vclock()) -> vclock(). + VClock :: vclock()) -> vclock(). increment(Node, VClock) -> increment(Node, timestamp(), VClock). % @doc Increment VClock at Node. -spec increment(Node :: vclock_node(), - IncTs :: timestamp(), VClock :: vclock()) -> vclock(). + IncTs :: timestamp(), VClock :: vclock()) -> vclock(). increment(Node, IncTs, VClock) -> - {{_Ctr, _TS} = C1, NewV} = case lists:keytake(Node, 1, - VClock) - of - false -> {{1, IncTs}, VClock}; - {value, {_N, {C, _T}}, ModV} -> - {{C + 1, IncTs}, ModV} - end, + {{_Ctr, _TS} = C1, NewV} = case lists:keytake(Node, + 1, + VClock) + of + false -> {{1, IncTs}, VClock}; + {value, {_N, {C, _T}}, ModV} -> + {{C + 1, IncTs}, ModV} + end, [{Node, C1} | NewV]. % @doc Return the list of all nodes that have ever incremented VClock. @@ -222,11 +238,11 @@ increment(Node, IncTs, VClock) -> all_nodes(VClock) -> [X || {X, {_, _}} <- VClock]. -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, - 1970 * 365 + 478). + 1970 * 365 + 478). -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH, - (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH) * 24 * 60 * - 60). %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}) + (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH) * 24 * 60 * + 60). %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}) % @doc Return a timestamp for a vector clock -spec timestamp() -> timestamp(). @@ -236,55 +252,55 @@ timestamp() -> %% but significantly faster. {MegaSeconds, Seconds, _} = os:timestamp(), (?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH) + - MegaSeconds * 1000000 - + Seconds. + MegaSeconds * 1000000 + + Seconds. % @doc Compares two VClocks for equality. -spec equal(VClockA :: vclock(), - VClockB :: vclock()) -> boolean(). + VClockB :: vclock()) -> boolean(). equal(VA, VB) -> lists:sort(VA) =:= lists:sort(VB). % @doc Possibly shrink the size of a vclock, depending on current age and size. -spec prune(V :: vclock(), Now :: integer(), - BucketProps :: term()) -> vclock(). + BucketProps :: term()) -> vclock(). prune(V, Now, BucketProps) -> %% This sort need to be deterministic, to avoid spurious merge conflicts later. %% We achieve this by using the node ID as secondary key. SortV = lists:sort(fun ({N1, {_, T1}}, {N2, {_, T2}}) -> - {T1, N1} < {T2, N2} - end, - V), + {T1, N1} < {T2, N2} + end, + V), prune_vclock1(SortV, Now, BucketProps). % @private prune_vclock1(V, Now, BProps) -> case length(V) =< get_property(small_vclock, BProps) of - true -> V; - false -> - {_, {_, HeadTime}} = hd(V), - case Now - HeadTime < get_property(young_vclock, BProps) - of - true -> V; - false -> prune_vclock1(V, Now, BProps, HeadTime) - end + true -> V; + false -> + {_, {_, HeadTime}} = hd(V), + case Now - HeadTime < get_property(young_vclock, BProps) + of + true -> V; + false -> prune_vclock1(V, Now, BProps, HeadTime) + end end. % @private prune_vclock1(V, Now, BProps, HeadTime) -> % has a precondition that V is longer than small and older than young case length(V) > get_property(big_vclock, BProps) orelse - Now - HeadTime > get_property(old_vclock, BProps) - of - true -> prune_vclock1(tl(V), Now, BProps); - false -> V + Now - HeadTime > get_property(old_vclock, BProps) + of + true -> prune_vclock1(tl(V), Now, BProps); + false -> V end. get_property(Key, PairList) -> case lists:keyfind(Key, 1, PairList) of - {_Key, Value} -> Value; - false -> undefined + {_Key, Value} -> Value; + false -> undefined end. %% =================================================================== @@ -315,30 +331,35 @@ prune_small_test() -> Now = riak_core_util:moment(), OldTime = Now - 32000000, SmallVC = [{<<"1">>, {1, OldTime}}, - {<<"2">>, {2, OldTime}}, {<<"3">>, {3, OldTime}}], + {<<"2">>, {2, OldTime}}, + {<<"3">>, {3, OldTime}}], Props = [{small_vclock, 4}], ?assertEqual((lists:sort(SmallVC)), - (lists:sort(prune(SmallVC, Now, Props)))). + (lists:sort(prune(SmallVC, Now, Props)))). prune_young_test() -> % vclock with all entries younger than young_vclock will be untouched Now = riak_core_util:moment(), NewTime = Now - 1, - VC = [{<<"1">>, {1, NewTime}}, {<<"2">>, {2, NewTime}}, - {<<"3">>, {3, NewTime}}], + VC = [{<<"1">>, {1, NewTime}}, + {<<"2">>, {2, NewTime}}, + {<<"3">>, {3, NewTime}}], Props = [{small_vclock, 1}, {young_vclock, 1000}], ?assertEqual((lists:sort(VC)), - (lists:sort(prune(VC, Now, Props)))). + (lists:sort(prune(VC, Now, Props)))). prune_big_test() -> % vclock not preserved by small or young will be pruned down to % no larger than big_vclock entries Now = riak_core_util:moment(), NewTime = Now - 1000, - VC = [{<<"1">>, {1, NewTime}}, {<<"2">>, {2, NewTime}}, - {<<"3">>, {3, NewTime}}], - Props = [{small_vclock, 1}, {young_vclock, 1}, - {big_vclock, 2}, {old_vclock, 100000}], + VC = [{<<"1">>, {1, NewTime}}, + {<<"2">>, {2, NewTime}}, + {<<"3">>, {3, NewTime}}], + Props = [{small_vclock, 1}, + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 100000}], ?assert((length(prune(VC, Now, Props)) =:= 2)). prune_old_test() -> @@ -347,10 +368,13 @@ prune_old_test() -> Now = riak_core_util:moment(), NewTime = Now - 1000, OldTime = Now - 100000, - VC = [{<<"1">>, {1, NewTime}}, {<<"2">>, {2, OldTime}}, - {<<"3">>, {3, OldTime}}], - Props = [{small_vclock, 1}, {young_vclock, 1}, - {big_vclock, 2}, {old_vclock, 10000}], + VC = [{<<"1">>, {1, NewTime}}, + {<<"2">>, {2, OldTime}}, + {<<"3">>, {3, OldTime}}], + Props = [{small_vclock, 1}, + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 10000}], ?assert((length(prune(VC, Now, Props)) =:= 1)). prune_order_test() -> @@ -359,12 +383,14 @@ prune_order_test() -> Now = riak_core_util:moment(), OldTime = Now - 100000, VC1 = [{<<"1">>, {1, OldTime}}, - {<<"2">>, {2, OldTime}}], + {<<"2">>, {2, OldTime}}], VC2 = lists:reverse(VC1), - Props = [{small_vclock, 1}, {young_vclock, 1}, - {big_vclock, 2}, {old_vclock, 10000}], + Props = [{small_vclock, 1}, + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 10000}], ?assertEqual((prune(VC1, Now, Props)), - (prune(VC2, Now, Props))). + (prune(VC2, Now, Props))). accessor_test() -> VC = [{<<"1">>, {1, 1}}, {<<"2">>, {2, 2}}], @@ -377,39 +403,45 @@ accessor_test() -> ?assertEqual([<<"1">>, <<"2">>], (all_nodes(VC))). merge_test() -> - VC1 = [{<<"1">>, {1, 1}}, {<<"2">>, {2, 2}}, - {<<"4">>, {4, 4}}], + VC1 = [{<<"1">>, {1, 1}}, + {<<"2">>, {2, 2}}, + {<<"4">>, {4, 4}}], VC2 = [{<<"3">>, {3, 3}}, {<<"4">>, {3, 3}}], ?assertEqual([], (merge(vclock:fresh()))), - ?assertEqual([{<<"1">>, {1, 1}}, {<<"2">>, {2, 2}}, - {<<"3">>, {3, 3}}, {<<"4">>, {4, 4}}], - (merge([VC1, VC2]))). + ?assertEqual([{<<"1">>, {1, 1}}, + {<<"2">>, {2, 2}}, + {<<"3">>, {3, 3}}, + {<<"4">>, {4, 4}}], + (merge([VC1, VC2]))). merge_less_left_test() -> VC1 = [{<<"5">>, {5, 5}}], VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], - ?assertEqual([{<<"5">>, {5, 5}}, {<<"6">>, {6, 6}}, - {<<"7">>, {7, 7}}], - (vclock:merge([VC1, VC2]))). + ?assertEqual([{<<"5">>, {5, 5}}, + {<<"6">>, {6, 6}}, + {<<"7">>, {7, 7}}], + (vclock:merge([VC1, VC2]))). merge_less_right_test() -> VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], VC2 = [{<<"5">>, {5, 5}}], - ?assertEqual([{<<"5">>, {5, 5}}, {<<"6">>, {6, 6}}, - {<<"7">>, {7, 7}}], - (vclock:merge([VC1, VC2]))). + ?assertEqual([{<<"5">>, {5, 5}}, + {<<"6">>, {6, 6}}, + {<<"7">>, {7, 7}}], + (vclock:merge([VC1, VC2]))). merge_same_id_test() -> VC1 = [{<<"1">>, {1, 2}}, {<<"2">>, {1, 4}}], VC2 = [{<<"1">>, {1, 3}}, {<<"3">>, {1, 5}}], - ?assertEqual([{<<"1">>, {1, 3}}, {<<"2">>, {1, 4}}, - {<<"3">>, {1, 5}}], - (vclock:merge([VC1, VC2]))). + ?assertEqual([{<<"1">>, {1, 3}}, + {<<"2">>, {1, 4}}, + {<<"3">>, {1, 5}}], + (vclock:merge([VC1, VC2]))). get_entry_test() -> VC = vclock:fresh(), VC1 = increment(a, - increment(c, increment(b, increment(a, VC)))), + increment(c, increment(b, increment(a, VC)))), ?assertMatch({ok, {a, {2, _}}}, (get_dot(a, VC1))), ?assertMatch({ok, {b, {1, _}}}, (get_dot(b, VC1))), ?assertMatch({ok, {c, {1, _}}}, (get_dot(c, VC1))), @@ -419,7 +451,7 @@ valid_entry_test() -> VC = vclock:fresh(), VC1 = increment(c, increment(b, increment(a, VC))), [begin - {ok, E} = get_dot(Actor, VC1), ?assert((valid_dot(E))) + {ok, E} = get_dot(Actor, VC1), ?assert((valid_dot(E))) end || Actor <- [a, b, c]], ?assertNot((valid_dot(undefined))), From ff1714c3d2d809d33286aa3a270f4a04f6f59575 Mon Sep 17 00:00:00 2001 From: woelki Date: Tue, 29 Sep 2020 13:56:20 +0200 Subject: [PATCH 3/5] format with Erlang version 23 --- src/chash.erl | 98 +- src/chashbin.erl | 124 +- src/gen_fsm_compat.erl | 1256 +++++++------- src/gen_nb_server.erl | 278 ++-- src/riak_core.erl | 416 ++--- src/riak_core_apl.erl | 596 +++---- src/riak_core_app.erl | 34 +- src/riak_core_base64url.erl | 20 +- src/riak_core_bucket.erl | 94 +- src/riak_core_bucket_props.erl | 316 ++-- src/riak_core_claim.erl | 632 +++---- src/riak_core_claim_util.erl | 488 +++--- src/riak_core_claimant.erl | 1142 ++++++------- src/riak_core_eventhandler_guard.erl | 28 +- src/riak_core_eventhandler_sup.erl | 52 +- src/riak_core_gossip.erl | 354 ++-- src/riak_core_handoff_listener.erl | 52 +- src/riak_core_handoff_listener_sup.erl | 12 +- src/riak_core_handoff_manager.erl | 980 +++++------ src/riak_core_handoff_receiver.erl | 180 +- src/riak_core_handoff_receiver_sup.erl | 12 +- src/riak_core_handoff_sender.erl | 738 ++++----- src/riak_core_handoff_sender_sup.erl | 16 +- src/riak_core_handoff_sup.erl | 12 +- src/riak_core_node_watcher.erl | 614 +++---- src/riak_core_node_watcher_events.erl | 40 +- src/riak_core_priority_queue.erl | 156 +- src/riak_core_rand.erl | 14 +- src/riak_core_ring.erl | 2104 ++++++++++++------------ src/riak_core_ring_events.erl | 46 +- src/riak_core_ring_handler.erl | 222 +-- src/riak_core_ring_manager.erl | 708 ++++---- src/riak_core_ring_util.erl | 106 +- src/riak_core_send_msg.erl | 16 +- src/riak_core_status.erl | 192 +-- src/riak_core_sup.erl | 36 +- src/riak_core_test_util.erl | 86 +- src/riak_core_util.erl | 904 +++++----- src/riak_core_vnode.erl | 1586 +++++++++--------- src/riak_core_vnode_manager.erl | 1028 ++++++------ src/riak_core_vnode_master.erl | 206 +-- src/riak_core_vnode_proxy.erl | 554 +++---- src/riak_core_vnode_proxy_sup.erl | 8 +- src/riak_core_vnode_sup.erl | 12 +- src/riak_core_vnode_worker.erl | 62 +- src/riak_core_vnode_worker_pool.erl | 270 +-- src/vclock.erl | 222 +-- 47 files changed, 8561 insertions(+), 8561 deletions(-) diff --git a/src/chash.erl b/src/chash.erl index d238dc6d0..f7afa4efe 100644 --- a/src/chash.erl +++ b/src/chash.erl @@ -37,25 +37,25 @@ -module(chash). -export([contains_name/2, - fresh/2, - lookup/2, - key_of/1, - members/1, - merge_rings/2, - next_index/2, - nodes/1, - predecessors/2, - predecessors/3, - ring_increment/1, - size/1, - successors/2, - successors/3, - update/3]). + fresh/2, + lookup/2, + key_of/1, + members/1, + merge_rings/2, + next_index/2, + nodes/1, + predecessors/2, + predecessors/3, + ring_increment/1, + size/1, + successors/2, + successors/3, + update/3]). -export_type([chash/0, index/0, index_as_int/0]). -define(RINGTOP, - trunc(math:pow(2, 160) - 1)). % SHA-1 space + trunc(math:pow(2, 160) - 1)). % SHA-1 space -ifdef(TEST). @@ -86,7 +86,7 @@ %% @doc Return true if named Node owns any partitions in the ring, else false. -spec contains_name(Name :: chash_node(), - CHash :: chash()) -> boolean(). + CHash :: chash()) -> boolean(). contains_name(Name, CHash) -> {_NumPartitions, Nodes} = CHash, @@ -97,7 +97,7 @@ contains_name(Name, CHash) -> %% is not much larger than the intended eventual number of %% participating nodes, then performance will suffer. -spec fresh(NumPartitions :: num_partitions(), - SeedNode :: chash_node()) -> chash(). + SeedNode :: chash_node()) -> chash(). fresh(NumPartitions, SeedNode) -> Inc = ring_increment(NumPartitions), @@ -107,7 +107,7 @@ fresh(NumPartitions, SeedNode) -> %% @doc Find the Node that owns the partition identified by IndexAsInt. -spec lookup(IndexAsInt :: index_as_int(), - CHash :: chash()) -> chash_node(). + CHash :: chash()) -> chash_node(). lookup(IndexAsInt, CHash) -> {_NumPartitions, Nodes} = CHash, @@ -134,7 +134,7 @@ members(CHash) -> %% If multiple nodes are actively claiming nodes in the same %% time period, churn will occur. Be prepared to live with it. -spec merge_rings(CHashA :: chash(), - CHashB :: chash()) -> chash(). + CHashB :: chash()) -> chash(). merge_rings(CHashA, CHashB) -> {NumPartitions, NodesA} = CHashA, @@ -146,7 +146,7 @@ merge_rings(CHashA, CHashB) -> %% @doc Given the integer representation of a chash key, %% return the next ring index integer value. -spec next_index(IntegerKey :: integer(), - CHash :: chash()) -> index_as_int(). + CHash :: chash()) -> index_as_int(). next_index(IntegerKey, {NumPartitions, _}) -> Inc = ring_increment(NumPartitions), @@ -161,7 +161,7 @@ nodes(CHash) -> %% @doc Given an object key, return all NodeEntries in order starting at Index. -spec ordered_from(Index :: index(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. ordered_from(Index, {NumPartitions, Nodes}) -> <> = Index, @@ -172,7 +172,7 @@ ordered_from(Index, {NumPartitions, Nodes}) -> %% @doc Given an object key, return all NodeEntries in reverse order %% starting at Index. -spec predecessors(Index :: index() | index_as_int(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. predecessors(Index, CHash) -> {NumPartitions, _Nodes} = CHash, @@ -181,20 +181,20 @@ predecessors(Index, CHash) -> %% @doc Given an object key, return the next N NodeEntries in reverse order %% starting at Index. -spec predecessors(Index :: index() | index_as_int(), - CHash :: chash(), N :: integer()) -> [node_entry()]. + CHash :: chash(), N :: integer()) -> [node_entry()]. predecessors(Index, CHash, N) when is_integer(Index) -> predecessors(<>, CHash, N); predecessors(Index, CHash, N) -> Num = max_n(N, CHash), {Res, _} = lists:split(Num, - lists:reverse(ordered_from(Index, CHash))), + lists:reverse(ordered_from(Index, CHash))), Res. %% @doc Return increment between ring indexes given %% the number of ring partitions. -spec ring_increment(NumPartitions :: - pos_integer()) -> pos_integer(). + pos_integer()) -> pos_integer(). ring_increment(NumPartitions) -> (?RINGTOP) div NumPartitions. @@ -208,7 +208,7 @@ size(CHash) -> %% @doc Given an object key, return all NodeEntries in order starting at Index. -spec successors(Index :: index(), - CHash :: chash()) -> [node_entry()]. + CHash :: chash()) -> [node_entry()]. successors(Index, CHash) -> {NumPartitions, _Nodes} = CHash, @@ -217,7 +217,7 @@ successors(Index, CHash) -> %% @doc Given an object key, return the next N NodeEntries in order %% starting at Index. -spec successors(Index :: index(), CHash :: chash(), - N :: integer()) -> [node_entry()]. + N :: integer()) -> [node_entry()]. successors(Index, CHash, N) -> Num = max_n(N, CHash), @@ -225,20 +225,20 @@ successors(Index, CHash, N) -> {NumPartitions, _Nodes} = CHash, if Num =:= NumPartitions -> Ordered; true -> - {Res, _} = lists:split(Num, Ordered), - Res + {Res, _} = lists:split(Num, Ordered), + Res end. %% @doc Make the partition beginning at IndexAsInt owned by Name'd node. -spec update(IndexAsInt :: index_as_int(), - Name :: chash_node(), CHash :: chash()) -> chash(). + Name :: chash_node(), CHash :: chash()) -> chash(). update(IndexAsInt, Name, CHash) -> {NumPartitions, Nodes} = CHash, NewNodes = lists:keyreplace(IndexAsInt, - 1, - Nodes, - {IndexAsInt, Name}), + 1, + Nodes, + {IndexAsInt, Name}), {NumPartitions, NewNodes}. %% ==================================================================== @@ -249,14 +249,14 @@ update(IndexAsInt, Name, CHash) -> %% @doc Return either N or the number of partitions in the ring, whichever %% is lesser. -spec max_n(N :: integer(), - CHash :: chash()) -> integer(). + CHash :: chash()) -> integer(). max_n(N, {NumPartitions, _Nodes}) -> erlang:min(N, NumPartitions). %% @private -spec random_node(NodeA :: chash_node(), - NodeB :: chash_node()) -> chash_node(). + NodeB :: chash_node()) -> chash_node(). random_node(NodeA, NodeA) -> NodeA; random_node(NodeA, NodeB) -> @@ -273,9 +273,9 @@ update_test() -> % Create a fresh ring... CHash = chash:fresh(5, Node), GetNthIndex = fun (N, {_, Nodes}) -> - {Index, _} = lists:nth(N, Nodes), - Index - end, + {Index, _} = lists:nth(N, Nodes), + Index + end, % Test update... FirstIndex = GetNthIndex(1, CHash), ThirdIndex = GetNthIndex(3, CHash), @@ -286,7 +286,7 @@ update_test() -> {_, Node}, {_, Node}, {_, Node}]} = - update(FirstIndex, NewNode, CHash), + update(FirstIndex, NewNode, CHash), {5, [{_, Node}, {_, Node}, @@ -294,13 +294,13 @@ update_test() -> {_, Node}, {_, Node}, {_, Node}]} = - update(ThirdIndex, NewNode, CHash). + update(ThirdIndex, NewNode, CHash). contains_test() -> CHash = chash:fresh(8, the_node), ?assertEqual(true, (contains_name(the_node, CHash))), ?assertEqual(false, - (contains_name(some_other_node, CHash))). + (contains_name(some_other_node, CHash))). max_n_test() -> CHash = chash:fresh(8, the_node), @@ -309,27 +309,27 @@ max_n_test() -> simple_size_test() -> ?assertEqual(8, - (length(chash:nodes(chash:fresh(8, the_node))))). + (length(chash:nodes(chash:fresh(8, the_node))))). successors_length_test() -> ?assertEqual(8, - (length(chash:successors(chash:key_of(0), - chash:fresh(8, the_node))))). + (length(chash:successors(chash:key_of(0), + chash:fresh(8, the_node))))). inverse_pred_test() -> CHash = chash:fresh(8, the_node), S = [I - || {I, _} <- chash:successors(chash:key_of(4), CHash)], + || {I, _} <- chash:successors(chash:key_of(4), CHash)], P = [I - || {I, _} - <- chash:predecessors(chash:key_of(4), CHash)], + || {I, _} + <- chash:predecessors(chash:key_of(4), CHash)], ?assertEqual(S, (lists:reverse(P))). merge_test() -> CHashA = chash:fresh(8, node_one), CHashB = chash:update(0, - node_one, - chash:fresh(8, node_two)), + node_one, + chash:fresh(8, node_two)), CHash = chash:merge_rings(CHashA, CHashB), ?assertEqual(node_one, (chash:lookup(0, CHash))). diff --git a/src/chashbin.erl b/src/chashbin.erl index 689cde58c..7e956de17 100644 --- a/src/chashbin.erl +++ b/src/chashbin.erl @@ -24,20 +24,20 @@ -module(chashbin). -export([create/1, - to_chash/1, - to_list/1, - to_list_filter/2, - responsible_index/2, - responsible_position/2, - index_owner/2, - num_partitions/1]). + to_chash/1, + to_list/1, + to_list_filter/2, + responsible_index/2, + responsible_position/2, + index_owner/2, + num_partitions/1]). -export([iterator/2, - exact_iterator/2, - itr_value/1, - itr_pop/2, - itr_next/1, - itr_next_while/2]). + exact_iterator/2, + itr_value/1, + itr_pop/2, + itr_next/1, + itr_next_while/2]). -export_type([chashbin/0]). @@ -52,32 +52,32 @@ -type index() :: chash:index_as_int(). -type pred_fun() :: fun(({index(), - node()}) -> boolean()). + node()}) -> boolean()). -type chash_key() :: index() | chash:index(). -ifndef(namespaced_types). -record(chashbin, - {size :: pos_integer(), - owners :: owners_bin(), - nodes :: erlang:tuple(node())}). + {size :: pos_integer(), + owners :: owners_bin(), + nodes :: erlang:tuple(node())}). -else. -record(chashbin, - {size :: pos_integer(), - owners :: owners_bin(), - nodes :: erlang:tuple(node())}). + {size :: pos_integer(), + owners :: owners_bin(), + nodes :: erlang:tuple(node())}). -endif. -type chashbin() :: #chashbin{}. -record(iterator, - {pos :: non_neg_integer(), - start :: non_neg_integer(), - chbin :: chashbin()}). + {pos :: non_neg_integer(), + start :: non_neg_integer(), + chbin :: chashbin()}). -type iterator() :: #iterator{}. @@ -92,10 +92,10 @@ create({Size, Owners}) -> Nodes1 = [Node || {_, Node} <- Owners], Nodes2 = lists:usort(Nodes1), Nodes3 = lists:zip(Nodes2, - lists:seq(1, length(Nodes2))), + lists:seq(1, length(Nodes2))), Bin = create_bin(Owners, Nodes3, <<>>), #chashbin{size = Size, owners = Bin, - nodes = list_to_tuple(Nodes2)}. + nodes = list_to_tuple(Nodes2)}. %% @doc Convert a `chashbin' back to a `chash' -spec to_chash(chashbin()) -> chash:chash(). @@ -115,17 +115,17 @@ to_list(#chashbin{owners = Bin, nodes = Nodes}) -> %% Convert a `chashbin' to a list of `{Index, Owner}' pairs for %% which `Pred({Index, Owner})' returns `true' -spec to_list_filter(pred_fun(), - chashbin()) -> [{index(), node()}]. + chashbin()) -> [{index(), node()}]. to_list_filter(Pred, - #chashbin{owners = Bin, nodes = Nodes}) -> + #chashbin{owners = Bin, nodes = Nodes}) -> [{Idx, element(Id, Nodes)} || <> <= Bin, - Pred({Idx, element(Id, Nodes)})]. + Pred({Idx, element(Id, Nodes)})]. %% @doc Determine the ring index responsible for a given chash key -spec responsible_index(chash_key(), - chashbin()) -> index(). + chashbin()) -> index(). responsible_index(<>, CHBin) -> responsible_index(HashKey, CHBin); @@ -135,7 +135,7 @@ responsible_index(HashKey, #chashbin{size = Size}) -> %% @doc Determine the ring position responsible for a given chash key -spec responsible_position(chash_key(), - chashbin()) -> non_neg_integer(). + chashbin()) -> non_neg_integer(). responsible_position(<>, CHBin) -> responsible_position(HashKey, CHBin); @@ -148,10 +148,10 @@ responsible_position(HashKey, #chashbin{size = Size}) -> index_owner(Idx, CHBin) -> case itr_value(exact_iterator(Idx, CHBin)) of - {Idx, Owner} -> Owner; - _ -> - %% Match the behavior for riak_core_ring:index_owner/2 - exit({badmatch, false}) + {Idx, Owner} -> Owner; + _ -> + %% Match the behavior for riak_core_ring:index_owner/2 + exit({badmatch, false}) end. %% @doc Return the number of partitions in a given `chashbin' @@ -166,7 +166,7 @@ num_partitions(#chashbin{size = Size}) -> Size. %% @doc %% Return an iterator pointing to the index responsible for the given chash key -spec iterator(first | chash_key(), - chashbin()) -> iterator(). + chashbin()) -> iterator(). iterator(first, CHBin) -> #iterator{pos = 0, start = 0, chbin = CHBin}; @@ -180,10 +180,10 @@ iterator(HashKey, CHBin) -> -spec itr_value(iterator()) -> {index(), node()}. itr_value(#iterator{pos = Pos, - chbin = #chashbin{owners = Bin, nodes = Nodes}}) -> + chbin = #chashbin{owners = Bin, nodes = Nodes}}) -> <<_:Pos/binary-unit:176, Idx:160/integer, Id:16/integer, _/binary>> = - Bin, + Bin, Owner = element(Id, Nodes), {Idx, Owner}. @@ -191,52 +191,52 @@ itr_value(#iterator{pos = Pos, -spec itr_next(iterator()) -> iterator() | done. itr_next(Itr = #iterator{pos = Pos, start = Start, - chbin = CHBin}) -> + chbin = CHBin}) -> Pos2 = (Pos + 1) rem CHBin#chashbin.size, case Pos2 of - Start -> done; - _ -> Itr#iterator{pos = Pos2} + Start -> done; + _ -> Itr#iterator{pos = Pos2} end. %% @doc %% Advance the iterator `N' times, returning a list of the traversed %% `{Index, Owner}' pairs as well as the new iterator state -spec itr_pop(pos_integer(), iterator()) -> {[{index(), - node()}], - iterator()}. + node()}], + iterator()}. itr_pop(N, Itr = #iterator{pos = Pos, chbin = CHBin}) -> #chashbin{size = Size, owners = Bin, nodes = Nodes} = - CHBin, + CHBin, L = case Bin of - <<_:Pos/binary-unit:176, Bin2:N/binary-unit:176, - _/binary>> -> - [{Idx, element(Id, Nodes)} - || <> <= Bin2]; - _ -> - Left = N + Pos - Size, - Skip = Pos - Left, - <> = - Bin, - L1 = [{Idx, element(Id, Nodes)} - || <> <= Bin2], - L2 = [{Idx, element(Id, Nodes)} - || <> <= Bin3], - L1 ++ L2 - end, + <<_:Pos/binary-unit:176, Bin2:N/binary-unit:176, + _/binary>> -> + [{Idx, element(Id, Nodes)} + || <> <= Bin2]; + _ -> + Left = N + Pos - Size, + Skip = Pos - Left, + <> = + Bin, + L1 = [{Idx, element(Id, Nodes)} + || <> <= Bin2], + L2 = [{Idx, element(Id, Nodes)} + || <> <= Bin3], + L1 ++ L2 + end, Pos2 = (Pos + N) rem Size, Itr2 = Itr#iterator{pos = Pos2}, {L, Itr2}. %% @doc Advance the iterator while `Pred({Index, Owner})' returns `true' -spec itr_next_while(pred_fun(), - iterator()) -> iterator(). + iterator()) -> iterator(). itr_next_while(Pred, Itr) -> case Pred(itr_value(Itr)) of - false -> Itr; - true -> itr_next_while(Pred, itr_next(Itr)) + false -> Itr; + true -> itr_next_while(Pred, itr_next(Itr)) end. %% =================================================================== @@ -245,7 +245,7 @@ itr_next_while(Pred, Itr) -> %% Convert list of {Index, Owner} pairs into `chashbin' binary representation -spec create_bin([{index(), node()}], - [{node(), pos_integer()}], binary()) -> owners_bin(). + [{node(), pos_integer()}], binary()) -> owners_bin(). create_bin([], _, Bin) -> Bin; create_bin([{Idx, Owner} | Owners], Nodes, Bin) -> diff --git a/src/gen_fsm_compat.erl b/src/gen_fsm_compat.erl index 5bfed9577..7622c14b6 100644 --- a/src/gen_fsm_compat.erl +++ b/src/gen_fsm_compat.erl @@ -106,34 +106,34 @@ %%% --------------------------------------------------- -export([start/3, - start/4, - start_link/3, - start_link/4, - stop/1, - stop/3, - send_event/2, - sync_send_event/2, - sync_send_event/3, - send_all_state_event/2, - sync_send_all_state_event/2, - sync_send_all_state_event/3, - reply/2, - start_timer/2, - send_event_after/2, - cancel_timer/1, - enter_loop/4, - enter_loop/5, - enter_loop/6, - wake_hib/7]). + start/4, + start_link/3, + start_link/4, + stop/1, + stop/3, + send_event/2, + sync_send_event/2, + sync_send_event/3, + send_all_state_event/2, + sync_send_all_state_event/2, + sync_send_all_state_event/3, + reply/2, + start_timer/2, + send_event_after/2, + cancel_timer/1, + enter_loop/4, + enter_loop/5, + enter_loop/6, + wake_hib/7]). %% Internal exports -export([init_it/6, - system_continue/3, - system_terminate/4, - system_code_change/4, - system_get_state/1, - system_replace_state/2, - format_status/2]). + system_continue/3, + system_terminate/4, + system_code_change/4, + system_get_state/1, + system_replace_state/2, + format_status/2]). -import(error_logger, [format/2]). @@ -142,82 +142,82 @@ %%% --------------------------------------------------- -callback init(Args :: term()) -> {ok, - StateName :: atom(), StateData :: term()} | - {ok, StateName :: atom(), StateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term()} | - ignore. + StateName :: atom(), StateData :: term()} | + {ok, StateName :: atom(), StateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term()} | + ignore. -callback handle_event(Event :: term(), - StateName :: atom(), - StateData :: term()) -> {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term(), - NewStateData :: term()}. + StateName :: atom(), + StateData :: term()) -> {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term(), + NewStateData :: term()}. -callback handle_sync_event(Event :: term(), - From :: {pid(), Tag :: term()}, StateName :: atom(), - StateData :: term()) -> {reply, Reply :: term(), - NextStateName :: atom(), - NewStateData :: term()} | - {reply, Reply :: term(), - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: term(), - Reply :: term(), - NewStateData :: term()} | - {stop, Reason :: term(), - NewStateData :: term()}. + From :: {pid(), Tag :: term()}, StateName :: atom(), + StateData :: term()) -> {reply, Reply :: term(), + NextStateName :: atom(), + NewStateData :: term()} | + {reply, Reply :: term(), + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: term(), + Reply :: term(), + NewStateData :: term()} | + {stop, Reason :: term(), + NewStateData :: term()}. -callback handle_info(Info :: term(), - StateName :: atom(), - StateData :: term()) -> {next_state, - NextStateName :: atom(), - NewStateData :: term()} | - {next_state, - NextStateName :: atom(), - NewStateData :: term(), - timeout() | hibernate} | - {stop, Reason :: normal | term(), - NewStateData :: term()}. + StateName :: atom(), + StateData :: term()) -> {next_state, + NextStateName :: atom(), + NewStateData :: term()} | + {next_state, + NextStateName :: atom(), + NewStateData :: term(), + timeout() | hibernate} | + {stop, Reason :: normal | term(), + NewStateData :: term()}. -callback terminate(Reason :: normal | - shutdown | - {shutdown, term()} | - term(), - StateName :: atom(), StateData :: term()) -> term(). + shutdown | + {shutdown, term()} | + term(), + StateName :: atom(), StateData :: term()) -> term(). -callback code_change(OldVsn :: term() | {down, term()}, - StateName :: atom(), StateData :: term(), - Extra :: term()) -> {ok, NextStateName :: atom(), - NewStateData :: term()}. + StateName :: atom(), StateData :: term(), + Extra :: term()) -> {ok, NextStateName :: atom(), + NewStateData :: term()}. -callback format_status(Opt, - StatusData) -> Status when Opt :: normal | terminate, - StatusData :: [PDict | - State], - PDict :: [{Key :: term(), - Value :: term()}], - State :: term(), - Status :: term(). + StatusData) -> Status when Opt :: normal | terminate, + StatusData :: [PDict | + State], + PDict :: [{Key :: term(), + Value :: term()}], + State :: term(), + Status :: term(). -optional_callbacks([handle_info/3, - terminate/3, - code_change/4, - format_status/2]). + terminate/3, + code_change/4, + format_status/2]). %%% --------------------------------------------------- %%% Starts a generic state machine. @@ -264,27 +264,27 @@ send_event(Name, Event) -> sync_send_event(Name, Event) -> case catch gen:call(Name, '$gen_sync_event', Event) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event]}}) end. sync_send_event(Name, Event, Timeout) -> case catch gen:call(Name, - '$gen_sync_event', - Event, - Timeout) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event, Timeout]}}) + '$gen_sync_event', + Event, + Timeout) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event, Timeout]}}) end. send_all_state_event({global, Name}, Event) -> catch global:send(Name, - {'$gen_all_state_event', Event}), + {'$gen_all_state_event', Event}), ok; send_all_state_event({via, Mod, Name}, Event) -> catch Mod:send(Name, {'$gen_all_state_event', Event}), @@ -295,27 +295,27 @@ send_all_state_event(Name, Event) -> sync_send_all_state_event(Name, Event) -> case catch gen:call(Name, - '$gen_sync_all_state_event', - Event) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_all_state_event, [Name, Event]}}) + '$gen_sync_all_state_event', + Event) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_all_state_event, [Name, Event]}}) end. sync_send_all_state_event(Name, Event, Timeout) -> case catch gen:call(Name, - '$gen_sync_all_state_event', - Event, - Timeout) - of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, - sync_send_all_state_event, - [Name, Event, Timeout]}}) + '$gen_sync_all_state_event', + Event, + Timeout) + of + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, + sync_send_all_state_event, + [Name, Event, Timeout]}}) end. %% Designed to be only callable within one of the callbacks @@ -337,9 +337,9 @@ send_event_after(Time, Event) -> %% an active timer/send_event_after, false otherwise. cancel_timer(Ref) -> case erlang:cancel_timer(Ref) of - false -> - receive {timeout, Ref, _} -> 0 after 0 -> false end; - RemainingTime -> RemainingTime + false -> + receive {timeout, Ref, _} -> 0 after 0 -> false end; + RemainingTime -> RemainingTime end. %% enter_loop/4,5,6 @@ -352,52 +352,52 @@ cancel_timer(Ref) -> %% including registering a name for it. enter_loop(Mod, Options, StateName, StateData) -> enter_loop(Mod, - Options, - StateName, - StateData, - self(), - infinity). + Options, + StateName, + StateData, + self(), + infinity). enter_loop(Mod, Options, StateName, StateData, - {Scope, _} = ServerName) + {Scope, _} = ServerName) when Scope == local; Scope == global -> enter_loop(Mod, - Options, - StateName, - StateData, - ServerName, - infinity); + Options, + StateName, + StateData, + ServerName, + infinity); enter_loop(Mod, Options, StateName, StateData, - {via, _, _} = ServerName) -> + {via, _, _} = ServerName) -> enter_loop(Mod, - Options, - StateName, - StateData, - ServerName, - infinity); + Options, + StateName, + StateData, + ServerName, + infinity); enter_loop(Mod, Options, StateName, StateData, - Timeout) -> + Timeout) -> enter_loop(Mod, - Options, - StateName, - StateData, - self(), - Timeout). + Options, + StateName, + StateData, + self(), + Timeout). enter_loop(Mod, Options, StateName, StateData, - ServerName, Timeout) -> + ServerName, Timeout) -> Name = gen:get_proc_name(ServerName), Parent = gen:get_parent(), Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), loop(Parent, - Name, - StateName, - StateData, - Mod, - Timeout, - HibernateAfterTimeout, - Debug). + Name, + StateName, + StateData, + Mod, + Timeout, + HibernateAfterTimeout, + Debug). %%% --------------------------------------------------- %%% Initiate the new process. @@ -413,42 +413,42 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) -> Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), case catch Mod:init(Args) of - {ok, StateName, StateData} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug); - {ok, StateName, StateData, Timeout} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, - Name, - StateName, - StateData, - Mod, - Timeout, - HibernateAfterTimeout, - Debug); - {stop, Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - ignore -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, ignore), - exit(normal); - {'EXIT', Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - Else -> - Error = {bad_return_value, Else}, - proc_lib:init_ack(Starter, {error, Error}), - exit(Error) + {ok, StateName, StateData} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug); + {ok, StateName, StateData, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, + Name, + StateName, + StateData, + Mod, + Timeout, + HibernateAfterTimeout, + Debug); + {stop, Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) end. %%----------------------------------------------------------------- @@ -457,194 +457,194 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) -> loop(Parent, Name, StateName, StateData, Mod, hibernate, HibernateAfterTimeout, Debug) -> proc_lib:hibernate(?MODULE, - wake_hib, - [Parent, - Name, - StateName, - StateData, - Mod, - HibernateAfterTimeout, - Debug]); + wake_hib, + [Parent, + Name, + StateName, + StateData, + Mod, + HibernateAfterTimeout, + Debug]); loop(Parent, Name, StateName, StateData, Mod, infinity, HibernateAfterTimeout, Debug) -> receive - Msg -> - decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug, - false) - after HibernateAfterTimeout -> - loop(Parent, - Name, - StateName, - StateData, - Mod, - hibernate, - HibernateAfterTimeout, - Debug) + Msg -> + decode_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug, + false) + after HibernateAfterTimeout -> + loop(Parent, + Name, + StateName, + StateData, + Mod, + hibernate, + HibernateAfterTimeout, + Debug) end; loop(Parent, Name, StateName, StateData, Mod, Time, HibernateAfterTimeout, Debug) -> Msg = receive - Input -> Input after Time -> {'$gen_event', timeout} - end, + Input -> Input after Time -> {'$gen_event', timeout} + end, decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug, - false). + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug, + false). wake_hib(Parent, Name, StateName, StateData, Mod, - HibernateAfterTimeout, Debug) -> + HibernateAfterTimeout, Debug) -> Msg = receive Input -> Input end, decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - hibernate, - HibernateAfterTimeout, - Debug, - true). + Parent, + Name, + StateName, + StateData, + Mod, + hibernate, + HibernateAfterTimeout, + Debug, + true). decode_msg(Msg, Parent, Name, StateName, StateData, Mod, - Time, HibernateAfterTimeout, Debug, Hib) -> + Time, HibernateAfterTimeout, Debug, Hib) -> case Msg of - {system, From, Req} -> - sys:handle_system_msg(Req, - From, - Parent, - ?MODULE, - Debug, - [Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout], - Hib); - {'EXIT', Parent, Reason} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - StateData, - Debug); - _Msg when Debug =:= [] -> - handle_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout); - _Msg -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, StateName}, - {in, Msg}), - handle_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug1) + {system, From, Req} -> + sys:handle_system_msg(Req, + From, + Parent, + ?MODULE, + Debug, + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout], + Hib); + {'EXIT', Parent, Reason} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + StateData, + Debug); + _Msg when Debug =:= [] -> + handle_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout); + _Msg -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, StateName}, + {in, Msg}), + handle_msg(Msg, + Parent, + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug1) end. %%----------------------------------------------------------------- %% Callback functions for system messages handling. %%----------------------------------------------------------------- system_continue(Parent, Debug, - [Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout]) -> + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout]) -> loop(Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug). + Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout, + Debug). -spec system_terminate(term(), _, _, - [term(), ...]) -> no_return(). + [term(), ...]) -> no_return(). system_terminate(Reason, _Parent, Debug, - [Name, - StateName, - StateData, - Mod, - _Time, - _HibernateAfterTimeout]) -> + [Name, + StateName, + StateData, + Mod, + _Time, + _HibernateAfterTimeout]) -> terminate(Reason, - Name, - [], - Mod, - StateName, - StateData, - Debug). + Name, + [], + Mod, + StateName, + StateData, + Debug). system_code_change([Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout], - _Module, OldVsn, Extra) -> + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout], + _Module, OldVsn, Extra) -> case catch Mod:code_change(OldVsn, - StateName, - StateData, - Extra) - of - {ok, NewStateName, NewStateData} -> - {ok, - [Name, - NewStateName, - NewStateData, - Mod, - Time, - HibernateAfterTimeout]}; - Else -> Else + StateName, + StateData, + Extra) + of + {ok, NewStateName, NewStateData} -> + {ok, + [Name, + NewStateName, + NewStateData, + Mod, + Time, + HibernateAfterTimeout]}; + Else -> Else end. system_get_state([_Name, - StateName, - StateData, - _Mod, - _Time, - _HibernateAfterTimeout]) -> + StateName, + StateData, + _Mod, + _Time, + _HibernateAfterTimeout]) -> {ok, {StateName, StateData}}. system_replace_state(StateFun, - [Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout]) -> + [Name, + StateName, + StateData, + Mod, + Time, + HibernateAfterTimeout]) -> Result = {NStateName, NStateData} = StateFun({StateName, - StateData}), + StateData}), {ok, Result, [Name, @@ -660,242 +660,242 @@ system_replace_state(StateFun, %%----------------------------------------------------------------- print_event(Dev, {in, Msg}, {Name, StateName}) -> case Msg of - {'$gen_event', Event} -> - io:format(Dev, - "*DBG* ~tp got event ~tp in state ~tw~n", - [Name, Event, StateName]); - {'$gen_all_state_event', Event} -> - io:format(Dev, - "*DBG* ~tp got all_state_event ~tp in " - "state ~tw~n", - [Name, Event, StateName]); - {timeout, Ref, {'$gen_timer', Message}} -> - io:format(Dev, - "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, {timeout, Ref, Message}, StateName]); - {timeout, _Ref, {'$gen_event', Event}} -> - io:format(Dev, - "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, Event, StateName]); - _ -> - io:format(Dev, - "*DBG* ~tp got ~tp in state ~tw~n", - [Name, Msg, StateName]) + {'$gen_event', Event} -> + io:format(Dev, + "*DBG* ~tp got event ~tp in state ~tw~n", + [Name, Event, StateName]); + {'$gen_all_state_event', Event} -> + io:format(Dev, + "*DBG* ~tp got all_state_event ~tp in " + "state ~tw~n", + [Name, Event, StateName]); + {timeout, Ref, {'$gen_timer', Message}} -> + io:format(Dev, + "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, {timeout, Ref, Message}, StateName]); + {timeout, _Ref, {'$gen_event', Event}} -> + io:format(Dev, + "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, Event, StateName]); + _ -> + io:format(Dev, + "*DBG* ~tp got ~tp in state ~tw~n", + [Name, Msg, StateName]) end; print_event(Dev, {out, Msg, To, StateName}, Name) -> io:format(Dev, - "*DBG* ~tp sent ~tp to ~tw~n and " - "switched to state ~tw~n", - [Name, Msg, To, StateName]); + "*DBG* ~tp sent ~tp to ~tw~n and " + "switched to state ~tw~n", + [Name, Msg, To, StateName]); print_event(Dev, return, {Name, StateName}) -> io:format(Dev, - "*DBG* ~tp switched to state ~tw~n", - [Name, StateName]). + "*DBG* ~tp switched to state ~tw~n", + [Name, StateName]). handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - _Time, HibernateAfterTimeout) -> + _Time, HibernateAfterTimeout) -> %No debug here From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {next_state, NStateName, NStateData, Time1} -> - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - []); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - []); - {stop, Reason, NStateData} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - []); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - [])), - reply(From, Reply), - exit(R); - {'EXIT', - {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> - error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " - "message: ~tp~n", - [Mod, Msg]), - loop(Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {'EXIT', What} -> - terminate(What, - Name, - Msg, - Mod, - StateName, - StateData, - []); - Reply -> - terminate({bad_return_value, Reply}, - Name, - Msg, - Mod, - StateName, - StateData, - []) + {next_state, NStateName, NStateData} -> + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {next_state, NStateName, NStateData, Time1} -> + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + []); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + []); + {stop, Reason, NStateData} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + []); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + [])), + reply(From, Reply), + exit(R); + {'EXIT', + {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> + error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " + "message: ~tp~n", + [Mod, Msg]), + loop(Parent, + Name, + StateName, + StateData, + Mod, + infinity, + HibernateAfterTimeout, + []); + {'EXIT', What} -> + terminate(What, + Name, + Msg, + Mod, + StateName, + StateData, + []); + Reply -> + terminate({bad_return_value, Reply}, + Name, + Msg, + Mod, + StateName, + StateData, + []) end. handle_msg(Msg, Parent, Name, StateName, StateData, Mod, - _Time, HibernateAfterTimeout, Debug) -> + _Time, HibernateAfterTimeout, Debug) -> From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, NStateName}, - return), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug1); - {next_state, NStateName, NStateData, Time1} -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, NStateName}, - return), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - Debug1); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug1); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - Debug1); - {stop, Reason, NStateData} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - Debug); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - Debug)), - _ = reply(Name, From, Reply, Debug, StateName), - exit(R); - {'EXIT', What} -> - terminate(What, - Name, - Msg, - Mod, - StateName, - StateData, - Debug); - Reply -> - terminate({bad_return_value, Reply}, - Name, - Msg, - Mod, - StateName, - StateData, - Debug) + {next_state, NStateName, NStateData} -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, NStateName}, + return), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug1); + {next_state, NStateName, NStateData, Time1} -> + Debug1 = sys:handle_debug(Debug, + fun print_event/3, + {Name, NStateName}, + return), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + Debug1); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + infinity, + HibernateAfterTimeout, + Debug1); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, + Name, + NStateName, + NStateData, + Mod, + Time1, + HibernateAfterTimeout, + Debug1); + {stop, Reason, NStateData} -> + terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + Debug); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, + Name, + Msg, + Mod, + StateName, + NStateData, + Debug)), + _ = reply(Name, From, Reply, Debug, StateName), + exit(R); + {'EXIT', What} -> + terminate(What, + Name, + Msg, + Mod, + StateName, + StateData, + Debug); + Reply -> + terminate({bad_return_value, Reply}, + Name, + Msg, + Mod, + StateName, + StateData, + Debug) end. dispatch({'$gen_event', Event}, Mod, StateName, - StateData) -> + StateData) -> Mod:StateName(Event, StateData); dispatch({'$gen_all_state_event', Event}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:handle_event(Event, StateName, StateData); dispatch({'$gen_sync_event', From, Event}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName(Event, From, StateData); dispatch({'$gen_sync_all_state_event', From, Event}, - Mod, StateName, StateData) -> + Mod, StateName, StateData) -> Mod:handle_sync_event(Event, - From, - StateName, - StateData); + From, + StateName, + StateData); dispatch({timeout, Ref, {'$gen_timer', Msg}}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName({timeout, Ref, Msg}, StateData); dispatch({timeout, _Ref, {'$gen_event', Event}}, Mod, - StateName, StateData) -> + StateName, StateData) -> Mod:StateName(Event, StateData); dispatch(Info, Mod, StateName, StateData) -> Mod:handle_info(Info, StateName, StateData). @@ -911,82 +911,82 @@ reply({To, Tag}, Reply) -> catch To ! {Tag, Reply}. reply(Name, {To, Tag}, Reply, Debug, StateName) -> reply({To, Tag}, Reply), sys:handle_debug(Debug, - fun print_event/3, - Name, - {out, Reply, To, StateName}). + fun print_event/3, + Name, + {out, Reply, To, StateName}). %%% --------------------------------------------------- %%% Terminate the server. %%% --------------------------------------------------- -spec terminate(term(), _, _, atom(), _, _, - _) -> no_return(). + _) -> no_return(). terminate(Reason, Name, Msg, Mod, StateName, StateData, - Debug) -> + Debug) -> case erlang:function_exported(Mod, terminate, 3) of - true -> - case catch Mod:terminate(Reason, StateName, StateData) - of - {'EXIT', R} -> - FmtStateData = format_status(terminate, - Mod, - get(), - StateData), - error_info(R, - Name, - Msg, - StateName, - FmtStateData, - Debug), - exit(R); - _ -> ok - end; - false -> ok + true -> + case catch Mod:terminate(Reason, StateName, StateData) + of + {'EXIT', R} -> + FmtStateData = format_status(terminate, + Mod, + get(), + StateData), + error_info(R, + Name, + Msg, + StateName, + FmtStateData, + Debug), + exit(R); + _ -> ok + end; + false -> ok end, case Reason of - normal -> exit(normal); - shutdown -> exit(shutdown); - {shutdown, _} = Shutdown -> exit(Shutdown); - _ -> - FmtStateData1 = format_status(terminate, - Mod, - get(), - StateData), - error_info(Reason, - Name, - Msg, - StateName, - FmtStateData1, - Debug), - exit(Reason) + normal -> exit(normal); + shutdown -> exit(shutdown); + {shutdown, _} = Shutdown -> exit(Shutdown); + _ -> + FmtStateData1 = format_status(terminate, + Mod, + get(), + StateData), + error_info(Reason, + Name, + Msg, + StateName, + FmtStateData1, + Debug), + exit(Reason) end. error_info(Reason, Name, Msg, StateName, StateData, - Debug) -> + Debug) -> Reason1 = case Reason of - {undef, [{M, F, A, L} | MFAs]} -> - case code:is_loaded(M) of - false -> - {'module could not be loaded', - [{M, F, A, L} | MFAs]}; - _ -> - case erlang:function_exported(M, F, length(A)) of - true -> Reason; - false -> - {'function not exported', - [{M, F, A, L} | MFAs]} - end - end; - _ -> Reason - end, + {undef, [{M, F, A, L} | MFAs]} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded', + [{M, F, A, L} | MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> Reason; + false -> + {'function not exported', + [{M, F, A, L} | MFAs]} + end + end; + _ -> Reason + end, Str = "** State machine ~tp terminating \n" ++ - get_msg_str(Msg) ++ - "** When State == ~tp~n** Data " - "== ~tp~n** Reason for termination = " - "~n** ~tp~n", + get_msg_str(Msg) ++ + "** When State == ~tp~n** Data " + "== ~tp~n** Reason for termination = " + "~n** ~tp~n", format(Str, - [Name, get_msg(Msg), StateName, StateData, Reason1]), + [Name, get_msg(Msg), StateName, StateData, Reason1]), sys:print_log(Debug), ok. @@ -1032,17 +1032,17 @@ format_status(Opt, StatusData) -> Mod, _Time, _HibernateAfterTimeout]] = - StatusData, + StatusData, Header = - gen:format_status_header("Status for state machine", - Name), + gen:format_status_header("Status for state machine", + Name), Log = sys:get_log(Debug), Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) - of - S when is_list(S) -> S; - S -> [S] - end, + of + S when is_list(S) -> S; + S -> [S] + end, [{header, Header}, {data, [{"Status", SysState}, @@ -1064,18 +1064,18 @@ format_status(Opt, StatusData) -> Mod, _Time, _HibernateAfterTimeout]] = - StatusData, + StatusData, Header = - gen:format_status_header("Status for state machine", - Name), + gen:format_status_header("Status for state machine", + Name), %% Log = sys:get_log(Debug), Log = sys:get_debug(log, Debug, []), Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) - of - S when is_list(S) -> S; - S -> [S] - end, + of + S when is_list(S) -> S; + S -> [S] + end, [{header, Header}, {data, [{"Status", SysState}, @@ -1088,14 +1088,14 @@ format_status(Opt, StatusData) -> format_status(Opt, Mod, PDict, State) -> DefStatus = case Opt of - terminate -> State; - _ -> [{data, [{"StateData", State}]}] - end, + terminate -> State; + _ -> [{data, [{"StateData", State}]}] + end, case erlang:function_exported(Mod, format_status, 2) of - true -> - case catch Mod:format_status(Opt, [PDict, State]) of - {'EXIT', _} -> DefStatus; - Else -> Else - end; - _ -> DefStatus + true -> + case catch Mod:format_status(Opt, [PDict, State]) of + {'EXIT', _} -> DefStatus; + Else -> Else + end; + _ -> DefStatus end. diff --git a/src/gen_nb_server.erl b/src/gen_nb_server.erl index 90c884ba8..3fb24d90f 100644 --- a/src/gen_nb_server.erl +++ b/src/gen_nb_server.erl @@ -29,70 +29,70 @@ %% gen_server callbacks -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -define(SERVER, ?MODULE). -record(state, {cb, sock, server_state}). -callback init(InitArgs :: list()) -> {ok, - State :: term()} | - {error, Reason :: term()}. + State :: term()} | + {error, Reason :: term()}. -callback handle_call(Msg :: term(), - From :: {pid(), term()}, State :: term()) -> {reply, - Reply :: - term(), - State :: - term()} | - {reply, - Reply :: - term(), - State :: - term(), - number() | - hibernate} | - {noreply, - State :: - term()} | - {noreply, - State :: - term(), - number() | - hibernate} | - {stop, - Reason :: - term(), - State :: - term()}. + From :: {pid(), term()}, State :: term()) -> {reply, + Reply :: + term(), + State :: + term()} | + {reply, + Reply :: + term(), + State :: + term(), + number() | + hibernate} | + {noreply, + State :: + term()} | + {noreply, + State :: + term(), + number() | + hibernate} | + {stop, + Reason :: + term(), + State :: + term()}. -callback handle_cast(Msg :: term(), - State :: term()) -> {noreply, State :: term()} | - {noreply, State :: term(), - number() | hibernate} | - {stop, Reason :: term(), - State :: term()}. + State :: term()) -> {noreply, State :: term()} | + {noreply, State :: term(), + number() | hibernate} | + {stop, Reason :: term(), + State :: term()}. -callback handle_info(Msg :: term(), - State :: term()) -> {noreply, State :: term()} | - {noreply, State :: term(), - number() | hibernate} | - {stop, Reason :: term(), - State :: term()}. + State :: term()) -> {noreply, State :: term()} | + {noreply, State :: term(), + number() | hibernate} | + {stop, Reason :: term(), + State :: term()}. -callback terminate(Reason :: term(), - State :: term()) -> ok. + State :: term()) -> ok. -callback sock_opts() -> [gen_tcp:listen_option()]. -callback new_connection(inet:socket(), - State :: term()) -> {ok, NewState :: term()} | - {stop, Reason :: term(), - NewState :: term()}. + State :: term()) -> {ok, NewState :: term()} | + {stop, Reason :: term(), + NewState :: term()}. %% @spec start_link(CallbackModule, IpAddr, Port, InitParams) -> Result %% CallbackModule = atom() @@ -103,114 +103,114 @@ %% @doc Start server listening on IpAddr:Port start_link(CallbackModule, IpAddr, Port, InitParams) -> gen_server:start_link(?MODULE, - [CallbackModule, IpAddr, Port, InitParams], - []). + [CallbackModule, IpAddr, Port, InitParams], + []). %% @hidden init([CallbackModule, IpAddr, Port, InitParams]) -> case CallbackModule:init(InitParams) of - {ok, ServerState} -> - case listen_on(CallbackModule, IpAddr, Port) of - {ok, Sock} -> - {ok, - #state{cb = CallbackModule, sock = Sock, - server_state = ServerState}}; - Error -> - CallbackModule:terminate(Error, ServerState), - Error - end; - Err -> Err + {ok, ServerState} -> + case listen_on(CallbackModule, IpAddr, Port) of + {ok, Sock} -> + {ok, + #state{cb = CallbackModule, sock = Sock, + server_state = ServerState}}; + Error -> + CallbackModule:terminate(Error, ServerState), + Error + end; + Err -> Err end. %% @hidden handle_call(Request, From, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_call(Request, From, ServerState) of - {reply, Reply, NewServerState} -> - {reply, - Reply, - State#state{server_state = NewServerState}}; - {reply, Reply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {reply, - Reply, - State#state{server_state = NewServerState}, - Arg}; - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}}; - {stop, Reason, Reply, NewServerState} -> - {stop, - Reason, - Reply, - State#state{server_state = NewServerState}} + {reply, Reply, NewServerState} -> + {reply, + Reply, + State#state{server_state = NewServerState}}; + {reply, Reply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {reply, + Reply, + State#state{server_state = NewServerState}, + Arg}; + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}}; + {stop, Reason, Reply, NewServerState} -> + {stop, + Reason, + Reply, + State#state{server_state = NewServerState}} end. %% @hidden handle_cast(Msg, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_cast(Msg, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end. %% @hidden handle_info({inet_async, - ListSock, - _Ref, - {ok, CliSocket}}, - #state{cb = Callback, server_state = ServerState} = - State) -> + ListSock, + _Ref, + {ok, CliSocket}}, + #state{cb = Callback, server_state = ServerState} = + State) -> inet_db:register_socket(CliSocket, inet_tcp), case Callback:new_connection(CliSocket, ServerState) of - {ok, NewServerState} -> - {ok, _} = prim_inet:async_accept(ListSock, -1), - {noreply, State#state{server_state = NewServerState}}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + {ok, NewServerState} -> + {ok, _} = prim_inet:async_accept(ListSock, -1), + {noreply, State#state{server_state = NewServerState}}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end; handle_info(Info, - #state{cb = Callback, server_state = ServerState} = - State) -> + #state{cb = Callback, server_state = ServerState} = + State) -> case Callback:handle_info(Info, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, + State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, + Reason, + State#state{server_state = NewServerState}} end. %% @hidden terminate(Reason, - #state{cb = Callback, sock = Sock, - server_state = ServerState}) -> + #state{cb = Callback, sock = Sock, + server_state = ServerState}) -> gen_tcp:close(Sock), Callback:terminate(Reason, ServerState), ok. @@ -228,21 +228,21 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Result = {ok, port()} | {error, any()} listen_on(CallbackModule, IpAddr, Port) when is_tuple(IpAddr) andalso - (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> + (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> SockOpts = [{ip, IpAddr} | CallbackModule:sock_opts()], case gen_tcp:listen(Port, SockOpts) of - {ok, LSock} -> - {ok, _Ref} = prim_inet:async_accept(LSock, -1), - {ok, LSock}; - Err -> Err + {ok, LSock} -> + {ok, _Ref} = prim_inet:async_accept(LSock, -1), + {ok, LSock}; + Err -> Err end; listen_on(CallbackModule, IpAddrStr, Port) -> case inet_parse:address(IpAddrStr) of - {ok, IpAddr} -> listen_on(CallbackModule, IpAddr, Port); - Err -> - logger:critical("Cannot start listener for ~p\n " - " on invalid address " - "~p:~p", - [CallbackModule, IpAddrStr, Port]), - Err + {ok, IpAddr} -> listen_on(CallbackModule, IpAddr, Port); + Err -> + logger:critical("Cannot start listener for ~p\n " + " on invalid address " + "~p:~p", + [CallbackModule, IpAddrStr, Port]), + Err end. diff --git a/src/riak_core.erl b/src/riak_core.erl index fd3670f2b..e3a303bed 100644 --- a/src/riak_core.erl +++ b/src/riak_core.erl @@ -22,26 +22,26 @@ -module(riak_core). -export([stop/0, - stop/1, - join/1, - join/4, - staged_join/1, - remove/1, - down/1, - leave/0, - remove_from_cluster/1]). + stop/1, + join/1, + join/4, + staged_join/1, + remove/1, + down/1, + leave/0, + remove_from_cluster/1]). -export([vnode_modules/0, health_check/1]). -export([register/1, - register/2, - bucket_fixups/0, - bucket_validators/0]). + register/2, + bucket_fixups/0, + bucket_validators/0]). -export([stat_mods/0, stat_prefix/0]). -export([add_guarded_event_handler/3, - add_guarded_event_handler/4]). + add_guarded_event_handler/4]). -export([delete_guarded_event_handler/3]). @@ -95,24 +95,24 @@ join(_, Node, Auto) -> join(node(), Node, false, Auto). join(_, Node, Rejoin, Auto) -> case net_adm:ping(Node) of - pang -> {error, not_reachable}; - pong -> standard_join(Node, Rejoin, Auto) + pang -> {error, not_reachable}; + pong -> standard_join(Node, Rejoin, Auto) end. get_other_ring(Node) -> riak_core_util:safe_rpc(Node, - riak_core_ring_manager, - get_raw_ring, - []). + riak_core_ring_manager, + get_raw_ring, + []). standard_join(Node, Rejoin, Auto) when is_atom(Node) -> case net_adm:ping(Node) of - pong -> - case get_other_ring(Node) of - {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); - _ -> {error, unable_to_get_join_ring} - end; - pang -> {error, not_reachable} + pong -> + case get_other_ring(Node) of + {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); + _ -> {error, unable_to_get_join_ring} + end; + pang -> {error, not_reachable} end. %% `init:get_status/0' will return a 2-tuple reflecting the init @@ -127,106 +127,106 @@ standard_join(Node, Ring, Rejoin, Auto) -> {ok, MyRing} = riak_core_ring_manager:get_raw_ring(), InitComplete = init_complete(init:get_status()), SameSize = riak_core_ring:num_partitions(MyRing) =:= - riak_core_ring:num_partitions(Ring), + riak_core_ring:num_partitions(Ring), Singleton = [node()] =:= - riak_core_ring:all_members(MyRing), + riak_core_ring:all_members(MyRing), case {InitComplete, Rejoin or Singleton, SameSize} of - {false, _, _} -> {error, node_still_starting}; - {_, false, _} -> {error, not_single_node}; - {_, _, false} -> {error, different_ring_sizes}; - _ -> - Ring2 = riak_core_ring:add_member(node(), Ring, node()), - Ring3 = riak_core_ring:set_owner(Ring2, node()), - Ring4 = riak_core_ring:update_member_meta(node(), - Ring3, - node(), - gossip_vsn, - 2), - Ring5 = Ring4, - Ring6 = maybe_auto_join(Auto, node(), Ring5), - riak_core_ring_manager:set_my_ring(Ring6), - riak_core_gossip:send_ring(Node, node()) + {false, _, _} -> {error, node_still_starting}; + {_, false, _} -> {error, not_single_node}; + {_, _, false} -> {error, different_ring_sizes}; + _ -> + Ring2 = riak_core_ring:add_member(node(), Ring, node()), + Ring3 = riak_core_ring:set_owner(Ring2, node()), + Ring4 = riak_core_ring:update_member_meta(node(), + Ring3, + node(), + gossip_vsn, + 2), + Ring5 = Ring4, + Ring6 = maybe_auto_join(Auto, node(), Ring5), + riak_core_ring_manager:set_my_ring(Ring6), + riak_core_gossip:send_ring(Node, node()) end. maybe_auto_join(false, _Node, Ring) -> Ring; maybe_auto_join(true, Node, Ring) -> riak_core_ring:update_member_meta(Node, - Ring, - Node, - '$autojoin', - true). + Ring, + Node, + '$autojoin', + true). remove(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> standard_remove(Node) + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> standard_remove(Node) end. standard_remove(Node) -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:remove_member(node(), - Ring2, - Node), - Ring4 = - riak_core_ring:ring_changed(node(), - Ring3), - {new_ring, Ring4} - end, - []), + Ring3 = + riak_core_ring:remove_member(node(), + Ring2, + Node), + Ring4 = + riak_core_ring:ring_changed(node(), + Ring3), + {new_ring, Ring4} + end, + []), ok. down(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case net_adm:ping(Node) of - pong -> {error, is_up}; - pang -> - case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> - riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:down_member(node(), - Ring2, - Node), - Ring4 = - riak_core_ring:ring_changed(node(), - Ring3), - {new_ring, Ring4} - end, - []), - ok - end + pong -> {error, is_up}; + pang -> + case {riak_core_ring:all_members(Ring), + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> + riak_core_ring_manager:ring_trans(fun (Ring2, _) -> + Ring3 = + riak_core_ring:down_member(node(), + Ring2, + Node), + Ring4 = + riak_core_ring:ring_changed(node(), + Ring3), + {new_ring, Ring4} + end, + []), + ok + end end. leave() -> Node = node(), {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> standard_leave(Node); - {_, _} -> {error, already_leaving} + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> standard_leave(Node); + {_, _} -> {error, already_leaving} end. standard_leave(Node) -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:leave_member(Node, - Ring2, - Node), - {new_ring, Ring3} - end, - []), + Ring3 = + riak_core_ring:leave_member(Node, + Ring2, + Node), + {new_ring, Ring3} + end, + []), ok. %% @spec remove_from_cluster(ExitingNode :: atom()) -> term() @@ -238,46 +238,46 @@ remove_from_cluster(ExitingNode) vnode_modules() -> case application:get_env(riak_core, vnode_modules) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. bucket_fixups() -> case application:get_env(riak_core, bucket_fixups) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. bucket_validators() -> case application:get_env(riak_core, bucket_validators) - of - undefined -> []; - {ok, Mods} -> Mods + of + undefined -> []; + {ok, Mods} -> Mods end. stat_mods() -> case application:get_env(riak_core, stat_mods) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. health_check(App) -> case application:get_env(riak_core, health_checks) of - undefined -> undefined; - {ok, Mods} -> - case lists:keyfind(App, 1, Mods) of - false -> undefined; - {App, MFA} -> MFA - end + undefined -> undefined; + {ok, Mods} -> + case lists:keyfind(App, 1, Mods) of + false -> undefined; + {App, MFA} -> MFA + end end. %% Get the application name if not supplied, first by get_application %% then by searching by module name get_app(undefined, Module) -> {ok, App} = case application:get_application(self()) of - {ok, AppName} -> {ok, AppName}; - undefined -> app_for_module(Module) - end, + {ok, AppName} -> {ok, AppName}; + undefined -> app_for_module(Module) + end, App; get_app(App, _Module) -> App. @@ -290,37 +290,37 @@ register(_App, []) -> %% to ensure the new fixups are run against %% the ring. {ok, _R} = riak_core_ring_manager:ring_trans(fun (R, - _A) -> - {new_ring, R} - end, - undefined), + _A) -> + {new_ring, R} + end, + undefined), riak_core_ring_events:force_sync_update(), ok; register(App, [{bucket_fixup, FixupMod} | T]) -> register_mod(get_app(App, FixupMod), - FixupMod, - bucket_fixups), + FixupMod, + bucket_fixups), register(App, T); register(App, [{repl_helper, FixupMod} | T]) -> register_mod(get_app(App, FixupMod), - FixupMod, - repl_helper), + FixupMod, + repl_helper), register(App, T); register(App, [{vnode_module, VNodeMod} | T]) -> register_mod(get_app(App, VNodeMod), - VNodeMod, - vnode_modules), + VNodeMod, + vnode_modules), register(App, T); register(App, [{health_check, HealthMFA} | T]) -> register_metadata(get_app(App, HealthMFA), - HealthMFA, - health_checks), + HealthMFA, + health_checks), register(App, T); register(App, - [{bucket_validator, ValidationMod} | T]) -> + [{bucket_validator, ValidationMod} | T]) -> register_mod(get_app(App, ValidationMod), - ValidationMod, - bucket_validators), + ValidationMod, + bucket_validators), register(App, T); register(App, [{stat_mod, StatMod} | T]) -> register_mod(App, StatMod, stat_mods), @@ -334,42 +334,42 @@ register(App, [{auth_mod, {AuthType, AuthMod}} | T]) -> register_mod(App, Module, Type) when is_atom(Type) -> case Type of - vnode_modules -> - riak_core_vnode_proxy_sup:start_proxies(Module); - stat_mods -> - %% STATS - %% riak_core_stats_sup:start_server(Module); - logger:warning("Metric collection disabled"), - ok; - _ -> ok + vnode_modules -> + riak_core_vnode_proxy_sup:start_proxies(Module); + stat_mods -> + %% STATS + %% riak_core_stats_sup:start_server(Module); + logger:warning("Metric collection disabled"), + ok; + _ -> ok end, case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Module}]); - {ok, Mods} -> - application:set_env(riak_core, - Type, - lists:usort([{App, Module} | Mods])) + undefined -> + application:set_env(riak_core, Type, [{App, Module}]); + {ok, Mods} -> + application:set_env(riak_core, + Type, + lists:usort([{App, Module} | Mods])) end. register_metadata(App, Value, Type) -> case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Value}]); - {ok, Values} -> - application:set_env(riak_core, - Type, - lists:usort([{App, Value} | Values])) + undefined -> + application:set_env(riak_core, Type, [{App, Value}]); + {ok, Values} -> + application:set_env(riak_core, + Type, + lists:usort([{App, Value} | Values])) end. register_proplist({Key, Value}, Type) -> case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{Key, Value}]); - {ok, Values} -> - application:set_env(riak_core, - Type, - lists:keystore(Key, 1, Values, {Key, Value})) + undefined -> + application:set_env(riak_core, Type, [{Key, Value}]); + {ok, Values} -> + application:set_env(riak_core, + Type, + lists:keystore(Key, 1, Values, {Key, Value})) end. %% @spec add_guarded_event_handler(HandlerMod, Handler, Args) -> AddResult @@ -379,9 +379,9 @@ register_proplist({Key, Value}, Type) -> %% AddResult = ok | {error, Reason::term()} add_guarded_event_handler(HandlerMod, Handler, Args) -> add_guarded_event_handler(HandlerMod, - Handler, - Args, - undefined). + Handler, + Args, + undefined). %% @spec add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> AddResult %% HandlerMod = module() @@ -396,11 +396,11 @@ add_guarded_event_handler(HandlerMod, Handler, Args) -> %% init() callback and exits when the handler crashes so it can be %% restarted by the supervisor. add_guarded_event_handler(HandlerMod, Handler, Args, - ExitFun) -> + ExitFun) -> riak_core_eventhandler_sup:start_guarded_handler(HandlerMod, - Handler, - Args, - ExitFun). + Handler, + Args, + ExitFun). %% @spec delete_guarded_event_handler(HandlerMod, Handler, Args) -> Result %% HandlerMod = module() @@ -419,10 +419,10 @@ add_guarded_event_handler(HandlerMod, Handler, Args, %% {error,module_not_found}. If the callback function fails with Reason, %% the function returns {'EXIT',Reason}. delete_guarded_event_handler(HandlerMod, Handler, - Args) -> + Args) -> riak_core_eventhandler_sup:stop_guarded_handler(HandlerMod, - Handler, - Args). + Handler, + Args). app_for_module(Mod) -> app_for_module(application:which_applications(), Mod). @@ -431,8 +431,8 @@ app_for_module([], _Mod) -> {ok, undefined}; app_for_module([{App, _, _} | T], Mod) -> {ok, Mods} = application:get_key(App, modules), case lists:member(Mod, Mods) of - true -> {ok, App}; - false -> app_for_module(T, Mod) + true -> {ok, App}; + false -> app_for_module(T, Mod) end. wait_for_application(App) -> @@ -440,29 +440,29 @@ wait_for_application(App) -> wait_for_application(App, Elapsed) -> case lists:keymember(App, - 1, - application:which_applications()) - of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for application ~p (~p " - "seconds)", - [App, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for application ~p to start\n " - " " - "(~p seconds).", - [App, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_application(App, - Elapsed + (?WAIT_POLL_INTERVAL)) + 1, + application:which_applications()) + of + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for application ~p (~p " + "seconds)", + [App, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for application ~p to start\n " + " " + "(~p seconds).", + [App, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_application(App, + Elapsed + (?WAIT_POLL_INTERVAL)) end. wait_for_service(Service) -> @@ -470,27 +470,27 @@ wait_for_service(Service) -> wait_for_service(Service, Elapsed) -> case lists:member(Service, - riak_core_node_watcher:services(node())) - of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for service ~p (~p seconds)", - [Service, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for service ~p to start\n " - " (~p " - "seconds)", - [Service, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_service(Service, - Elapsed + (?WAIT_POLL_INTERVAL)) + riak_core_node_watcher:services(node())) + of + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for service ~p (~p seconds)", + [Service, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for service ~p to start\n " + " (~p " + "seconds)", + [Service, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_service(Service, + Elapsed + (?WAIT_POLL_INTERVAL)) end. stat_prefix() -> diff --git a/src/riak_core_apl.erl b/src/riak_core_apl.erl index 3366fd93d..d693e7db5 100644 --- a/src/riak_core_apl.erl +++ b/src/riak_core_apl.erl @@ -25,23 +25,23 @@ -module(riak_core_apl). -export([active_owners/1, - active_owners/2, - get_apl/3, - get_apl/4, - get_apl_ann/2, - get_apl_ann/3, - get_apl_ann/4, - get_apl_ann_with_pnum/1, - get_primary_apl/3, - get_primary_apl/4, - get_primary_apl_chbin/4, - first_up/2, - offline_owners/1, - offline_owners/2]). + active_owners/2, + get_apl/3, + get_apl/4, + get_apl_ann/2, + get_apl_ann/3, + get_apl_ann/4, + get_apl_ann_with_pnum/1, + get_primary_apl/3, + get_primary_apl/4, + get_primary_apl_chbin/4, + first_up/2, + offline_owners/1, + offline_owners/2]). -export_type([preflist/0, - preflist_ann/0, - preflist_with_pnum_ann/0]). + preflist_ann/0, + preflist_with_pnum_ann/0]). -ifdef(TEST). @@ -58,15 +58,15 @@ -type preflist() :: [{index(), node()}]. -type preflist_ann() :: [{{index(), node()}, - primary | fallback}]. + primary | fallback}]. %% @type preflist_with_pnum_ann(). %% Annotated preflist where the partition value is an id/number %% (0 to ring_size-1) instead of a hash. -type preflist_with_pnum_ann() :: [{{riak_core_ring:partition_id(), - node()}, - primary | fallback}]. + node()}, + primary | fallback}]. -type iterator() :: term(). @@ -82,7 +82,7 @@ active_owners(Service) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), active_owners(Ring, - riak_core_node_watcher:nodes(Service)). + riak_core_node_watcher:nodes(Service)). -spec active_owners(ring(), [node()]) -> preflist_ann(). @@ -98,29 +98,29 @@ active_owners(Ring, UpNodes) -> get_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), get_apl_chbin(DocIdx, - N, - CHBin, - riak_core_node_watcher:nodes(Service)). + N, + CHBin, + riak_core_node_watcher:nodes(Service)). %% @doc Get the active preflist taking account of which nodes are up %% for a given chash/upnodes list. -spec get_apl_chbin(docidx(), n_val(), - chashbin:chashbin(), [node()]) -> preflist(). + chashbin:chashbin(), [node()]) -> preflist(). get_apl_chbin(DocIdx, N, CHBin, UpNodes) -> [{Partition, Node} || {{Partition, Node}, _Type} - <- get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes)]. + <- get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes)]. %% @doc Get the active preflist taking account of which nodes are up %% for a given ring/upnodes list. -spec get_apl(docidx(), n_val(), ring(), - [node()]) -> preflist(). + [node()]) -> preflist(). get_apl(DocIdx, N, Ring, UpNodes) -> [{Partition, Node} || {{Partition, Node}, _Type} - <- get_apl_ann(DocIdx, N, Ring, UpNodes)]. + <- get_apl_ann(DocIdx, N, Ring, UpNodes)]. %% @doc Get the active preflist taking account of which nodes are up for a given %% chash/upnodes list and annotate each node with type of primary/fallback. @@ -132,7 +132,7 @@ get_apl_ann(DocIdx, N, UpNodes) -> %% for a given ring/upnodes list and annotate each node with type of %% primary/fallback. -spec get_apl_ann(binary(), n_val(), ring(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann(DocIdx, N, Ring, UpNodes) -> UpNodes1 = UpNodes, @@ -144,7 +144,7 @@ get_apl_ann(DocIdx, N, Ring, UpNodes) -> %% @doc Get the active preflist for a given {bucket, key} and list of nodes %% and annotate each node with type of primary/fallback. -spec get_apl_ann(riak_core_bucket:bucket(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann({Bucket, Key}, UpNodes) -> BucketProps = riak_core_bucket:get_bucket(Bucket), @@ -169,7 +169,7 @@ get_apl_ann_with_pnum(BKey) -> %% for a given chash/upnodes list and annotate each node with type of %% primary/fallback. -spec get_apl_ann_chbin(binary(), n_val(), chashbin(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes) -> UpNodes1 = UpNodes, @@ -180,18 +180,18 @@ get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes) -> %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl(binary(), n_val(), - atom()) -> preflist_ann(). + atom()) -> preflist_ann(). get_primary_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), get_primary_apl_chbin(DocIdx, - N, - CHBin, - riak_core_node_watcher:nodes(Service)). + N, + CHBin, + riak_core_node_watcher:nodes(Service)). %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl_chbin(binary(), n_val(), - chashbin(), [node()]) -> preflist_ann(). + chashbin(), [node()]) -> preflist_ann(). get_primary_apl_chbin(DocIdx, N, CHBin, UpNodes) -> UpNodes1 = UpNodes, @@ -202,7 +202,7 @@ get_primary_apl_chbin(DocIdx, N, CHBin, UpNodes) -> %% @doc Same as get_apl, but returns only the primaries. -spec get_primary_apl(binary(), n_val(), ring(), - [node()]) -> preflist_ann(). + [node()]) -> preflist_ann(). get_primary_apl(DocIdx, N, Ring, UpNodes) -> UpNodes1 = UpNodes, @@ -217,11 +217,11 @@ first_up(DocIdx, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), Itr = chashbin:iterator(DocIdx, CHBin), UpSet = - ordsets:from_list(riak_core_node_watcher:nodes(Service)), + ordsets:from_list(riak_core_node_watcher:nodes(Service)), Itr2 = chashbin:itr_next_while(fun ({_P, Node}) -> - not ordsets:is_element(Node, UpSet) - end, - Itr), + not ordsets:is_element(Node, UpSet) + end, + Itr), chashbin:itr_value(Itr2). offline_owners(Service) -> @@ -230,77 +230,77 @@ offline_owners(Service) -> offline_owners(Service, CHBin) when is_atom(Service) -> UpSet = - ordsets:from_list(riak_core_node_watcher:nodes(Service)), + ordsets:from_list(riak_core_node_watcher:nodes(Service)), offline_owners(UpSet, CHBin); offline_owners(UpSet, CHBin) when is_list(UpSet) -> %% UpSet is an ordset of available nodes DownVNodes = chashbin:to_list_filter(fun ({_Index, - Node}) -> - not is_up(Node, UpSet) - end, - CHBin), + Node}) -> + not is_up(Node, UpSet) + end, + CHBin), DownVNodes. %% @doc Split a preference list into up and down lists. -spec check_up(preflist(), [node()], preflist_ann(), - preflist()) -> {preflist_ann(), preflist()}. + preflist()) -> {preflist_ann(), preflist()}. check_up([], _UpNodes, Up, Pangs) -> {lists:reverse(Up), lists:reverse(Pangs)}; check_up([{Partition, Node} | Rest], UpNodes, Up, - Pangs) -> + Pangs) -> case is_up(Node, UpNodes) of - true -> - check_up(Rest, - UpNodes, - [{{Partition, Node}, primary} | Up], - Pangs); - false -> - check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) + true -> + check_up(Rest, + UpNodes, + [{{Partition, Node}, primary} | Up], + Pangs); + false -> + check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) end. %% @doc Find fallbacks for downed nodes in the preference list. -spec find_fallbacks(preflist(), preflist(), [node()], - preflist_ann()) -> preflist_ann(). + preflist_ann()) -> preflist_ann(). find_fallbacks(_Pangs, [], _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks([], _Fallbacks, _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks([{Partition, _Node} | Rest] = Pangs, - [{_, FN} | Fallbacks], UpNodes, Secondaries) -> + [{_, FN} | Fallbacks], UpNodes, Secondaries) -> case is_up(FN, UpNodes) of - true -> - find_fallbacks(Rest, - Fallbacks, - UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) + true -> + find_fallbacks(Rest, + Fallbacks, + UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) end. %% @doc Find fallbacks for downed nodes in the preference list. -spec find_fallbacks_chbin(preflist(), iterator(), - [node()], preflist_ann()) -> preflist_ann(). + [node()], preflist_ann()) -> preflist_ann(). find_fallbacks_chbin([], _Fallbacks, _UpNodes, - Secondaries) -> + Secondaries) -> lists:reverse(Secondaries); find_fallbacks_chbin(_, done, _UpNodes, Secondaries) -> lists:reverse(Secondaries); find_fallbacks_chbin([{Partition, _Node} | Rest] = - Pangs, - Itr, UpNodes, Secondaries) -> + Pangs, + Itr, UpNodes, Secondaries) -> {_, FN} = chashbin:itr_value(Itr), Itr2 = chashbin:itr_next(Itr), case is_up(FN, UpNodes) of - true -> - find_fallbacks_chbin(Rest, - Itr2, - UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) + true -> + find_fallbacks_chbin(Rest, + Itr2, + UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) end. %% @doc Return true if a node is up. @@ -308,7 +308,7 @@ is_up(Node, UpNodes) -> lists:member(Node, UpNodes). %% @doc Return annotated preflist with partition ids/nums instead of hashes. -spec apl_with_partition_nums(preflist_ann(), - riak_core_ring:ring_size()) -> preflist_with_pnum_ann(). + riak_core_ring:ring_size()) -> preflist_with_pnum_ann(). apl_with_partition_nums(Apl, Size) -> [{{riak_core_ring_util:hash_to_partition_id(Hash, Size), @@ -321,41 +321,41 @@ apl_with_partition_nums(Apl, Size) -> smallest_test() -> Ring = riak_core_ring:fresh(1, node()), ?assertEqual([{0, node()}], - (get_apl(last_in_ring(), 1, Ring, [node()]))). + (get_apl(last_in_ring(), 1, Ring, [node()]))). four_node_test() -> Nodes = [nodea, nodeb, nodec, noded], Ring = perfect_ring(8, Nodes), ?assertEqual([{0, nodea}, - {182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodec}], - (get_apl(last_in_ring(), 3, Ring, Nodes))), + {182687704666362864775460604089535377456991567872, + nodeb}, + {365375409332725729550921208179070754913983135744, + nodec}], + (get_apl(last_in_ring(), 3, Ring, Nodes))), %% With a node down ?assertEqual([{182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodec}, - {0, noded}], - (get_apl(last_in_ring(), - 3, - Ring, - [nodeb, nodec, noded]))), + nodeb}, + {365375409332725729550921208179070754913983135744, + nodec}, + {0, noded}], + (get_apl(last_in_ring(), + 3, + Ring, + [nodeb, nodec, noded]))), %% With two nodes down ?assertEqual([{365375409332725729550921208179070754913983135744, - nodec}, - {0, noded}, - {182687704666362864775460604089535377456991567872, - nodec}], - (get_apl(last_in_ring(), 3, Ring, [nodec, noded]))), + nodec}, + {0, noded}, + {182687704666362864775460604089535377456991567872, + nodec}], + (get_apl(last_in_ring(), 3, Ring, [nodec, noded]))), %% With the other two nodes down ?assertEqual([{0, nodea}, - {182687704666362864775460604089535377456991567872, - nodeb}, - {365375409332725729550921208179070754913983135744, - nodea}], - (get_apl(last_in_ring(), 3, Ring, [nodea, nodeb]))). + {182687704666362864775460604089535377456991567872, + nodeb}, + {365375409332725729550921208179070754913983135744, + nodea}], + (get_apl(last_in_ring(), 3, Ring, [nodea, nodeb]))). %% Create a perfect ring - RingSize must be a multiple of nodes perfect_ring(RingSize, Nodes) @@ -363,13 +363,13 @@ perfect_ring(RingSize, Nodes) Ring = riak_core_ring:fresh(RingSize, node()), Owners = riak_core_ring:all_owners(Ring), TransferNode = fun ({Idx, _CurOwner}, - {Ring0, [NewOwner | Rest]}) -> - {riak_core_ring:transfer_node(Idx, NewOwner, Ring0), - Rest ++ [NewOwner]} - end, + {Ring0, [NewOwner | Rest]}) -> + {riak_core_ring:transfer_node(Idx, NewOwner, Ring0), + Rest ++ [NewOwner]} + end, {PerfectRing, _} = lists:foldl(TransferNode, - {Ring, Nodes}, - Owners), + {Ring, Nodes}, + Owners), PerfectRing. last_in_ring() -> @@ -381,222 +381,222 @@ six_node_test() -> {ok, [Ring]} = file:consult("test/my_ring"), %DocIdx = riak_core_util:chash_key({<<"foo">>, <<"bar">>}), DocIdx = <<73, 212, 27, 234, 104, 13, 150, 207, 0, 82, - 86, 183, 125, 225, 172, 154, 135, 46, 6, 112>>, + 86, 183, 125, 225, 172, 154, 135, 46, 6, 112>>, Nodes = ['dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev5@127.0.0.1', - 'dev6@127.0.0.1'], + 'dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev5@127.0.0.1', + 'dev6@127.0.0.1'], %% Fallbacks should be selected by finding the next-highest partition after %% the DocIdx of the key, in this case the 433883 partition. The N %% partitions at that point are the primary partitions. If any of the primaries %% are down, the next up node found by walking the preflist is used as the %% fallback for that partition. ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}], - (get_apl(DocIdx, 3, Ring, Nodes))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}], + (get_apl(DocIdx, 3, Ring, Nodes))), ?assertEqual([{456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- ['dev2@127.0.0.1']))), + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}, + {433883298582611803841718934712646521460354973696, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev2@127.0.0.1']))), ?assertEqual([{479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- ['dev2@127.0.0.1', 'dev3@127.0.0.1']))), + 'dev4@127.0.0.1'}, + {433883298582611803841718934712646521460354973696, + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev2@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev1@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- - ['dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1']))), + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev1@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev5@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev6@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- - ['dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev1@127.0.0.1']))), + 'dev5@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev6@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev3@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- ['dev4@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev3@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev6@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- ['dev4@127.0.0.1', 'dev3@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev6@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev4@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev1@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev1@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev2@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev2@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev2@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev2@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev5@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev2@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev2@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- + ['dev4@127.0.0.1', + 'dev3@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev5@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, - 'dev2@127.0.0.1'}, - {479555224749202520035584085735030365824602865664, - 'dev4@127.0.0.1'}, - {456719261665907161938651510223838443642478919680, - 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, - Nodes -- ['dev3@127.0.0.1']))), + 'dev2@127.0.0.1'}, + {479555224749202520035584085735030365824602865664, + 'dev4@127.0.0.1'}, + {456719261665907161938651510223838443642478919680, + 'dev5@127.0.0.1'}], + (get_apl(DocIdx, + 3, + Ring, + Nodes -- ['dev3@127.0.0.1']))), ok. six_node_bucket_key_ann_test() -> {ok, [Ring]} = file:consult("test/my_ring"), Nodes = ['dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev5@127.0.0.1', - 'dev6@127.0.0.1'], + 'dev2@127.0.0.1', + 'dev3@127.0.0.1', + 'dev4@127.0.0.1', + 'dev5@127.0.0.1', + 'dev6@127.0.0.1'], Bucket = <<"favorite">>, Key = <<"jethrotull">>, application:set_env(riak_core, - default_bucket_props, - [{n_val, 3}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), + default_bucket_props, + [{n_val, 3}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), riak_core_ring_manager:setup_ets(test), riak_core_ring_manager:set_ring_global(Ring), Size = riak_core_ring:num_partitions(Ring), ?assertEqual([{{34, 'dev5@127.0.0.1'}, primary}, - {{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev1@127.0.0.1'}, primary}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes), - Size))), + {{35, 'dev6@127.0.0.1'}, primary}, + {{36, 'dev1@127.0.0.1'}, primary}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes), + Size))), ?assertEqual([{{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev1@127.0.0.1'}, primary}, - {{34, 'dev2@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1']), - Size))), + {{36, 'dev1@127.0.0.1'}, primary}, + {{34, 'dev2@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1']), + Size))), ?assertEqual([{{36, 'dev1@127.0.0.1'}, primary}, - {{34, 'dev2@127.0.0.1'}, fallback}, - {{35, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1']), - Size))), + {{34, 'dev2@127.0.0.1'}, fallback}, + {{35, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev2@127.0.0.1'}, fallback}, - {{35, 'dev3@127.0.0.1'}, fallback}, - {{36, 'dev4@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1']), - Size))), + {{35, 'dev3@127.0.0.1'}, fallback}, + {{36, 'dev4@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev3@127.0.0.1'}, fallback}, - {{35, 'dev4@127.0.0.1'}, fallback}, - {{36, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1']), - Size))), + {{35, 'dev4@127.0.0.1'}, fallback}, + {{36, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev4@127.0.0.1'}, fallback}, - {{35, 'dev4@127.0.0.1'}, fallback}, - {{36, 'dev4@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1']), - Size))), + {{35, 'dev4@127.0.0.1'}, fallback}, + {{36, 'dev4@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1', + 'dev3@127.0.0.1']), + Size))), ?assertEqual([{{34, 'dev5@127.0.0.1'}, primary}, - {{35, 'dev6@127.0.0.1'}, primary}, - {{36, 'dev3@127.0.0.1'}, fallback}], - (apl_with_partition_nums(get_apl_ann({Bucket, Key}, - Nodes -- - ['dev1@127.0.0.1', - 'dev2@127.0.0.1']), - Size))), + {{35, 'dev6@127.0.0.1'}, primary}, + {{36, 'dev3@127.0.0.1'}, fallback}], + (apl_with_partition_nums(get_apl_ann({Bucket, Key}, + Nodes -- + ['dev1@127.0.0.1', + 'dev2@127.0.0.1']), + Size))), riak_core_ring_manager:cleanup_ets(test), ok. @@ -606,7 +606,7 @@ chbin_test_() -> chbin_test_scenario() -> [chbin_test_scenario(Size, NumNodes) || Size <- [32, 64, 128], - NumNodes <- [1, 2, 3, 4, 5, 8, Size div 4]], + NumNodes <- [1, 2, 3, 4, 5, 8, Size div 4]], ok. chbin_test_scenario(Size, NumNodes) -> @@ -617,23 +617,23 @@ chbin_test_scenario(Size, NumNodes) -> CHBin = chashbin:create(CHash), Inc = chash:ring_increment(Size), HashKeys = [<> - || X <- lists:seq(0, RingTop, Inc div 2)], + || X <- lists:seq(0, RingTop, Inc div 2)], Shuffled = riak_core_util:shuffle(Nodes), _ = CHBin, [begin - Up = max(0, NumNodes - Down), - UpNodes = lists:sublist(Shuffled, Up), - ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), - (get_apl_chbin(HashKey, N, CHBin, UpNodes))), - ?assertEqual((get_primary_apl(HashKey, - N, - Ring, - UpNodes)), - (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), - ok + Up = max(0, NumNodes - Down), + UpNodes = lists:sublist(Shuffled, Up), + ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), + (get_apl_chbin(HashKey, N, CHBin, UpNodes))), + ?assertEqual((get_primary_apl(HashKey, + N, + Ring, + UpNodes)), + (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), + ok end || HashKey <- HashKeys, N <- [1, 2, 3, 4], - Down <- [0, 1, 2, Size div 2, Size - 1, Size]], + Down <- [0, 1, 2, Size div 2, Size - 1, Size]], ok. -endif. diff --git a/src/riak_core_app.erl b/src/riak_core_app.erl index dea09a9e3..9fb23a656 100644 --- a/src/riak_core_app.erl +++ b/src/riak_core_app.erl @@ -42,32 +42,32 @@ stop(_State) -> validate_ring_state_directory_exists() -> riak_core_util:start_app_deps(riak_core), {ok, RingStateDir} = application:get_env(riak_core, - ring_state_dir), + ring_state_dir), case filelib:ensure_dir(filename:join(RingStateDir, - "dummy")) - of - ok -> ok; - {error, RingReason} -> - logger:critical("Ring state directory ~p does not exist, " - "and could not be created: ~p", - [RingStateDir, - riak_core_util:posix_error(RingReason)]), - throw({error, invalid_ring_state_dir}) + "dummy")) + of + ok -> ok; + {error, RingReason} -> + logger:critical("Ring state directory ~p does not exist, " + "and could not be created: ~p", + [RingStateDir, + riak_core_util:posix_error(RingReason)]), + throw({error, invalid_ring_state_dir}) end. start_riak_core_sup() -> %% Spin up the supervisor; prune ring files as necessary case riak_core_sup:start_link() of - {ok, Pid} -> - ok = register_applications(), - ok = add_ring_event_handler(), - {ok, Pid}; - {error, Reason} -> {error, Reason} + {ok, Pid} -> + ok = register_applications(), + ok = add_ring_event_handler(), + {ok, Pid}; + {error, Reason} -> {error, Reason} end. register_applications() -> ok. add_ring_event_handler() -> ok = - riak_core_ring_events:add_guarded_handler(riak_core_ring_handler, - []). + riak_core_ring_events:add_guarded_handler(riak_core_ring_handler, + []). diff --git a/src/riak_core_base64url.erl b/src/riak_core_base64url.erl index 7337c0742..15c1261ff 100644 --- a/src/riak_core_base64url.erl +++ b/src/riak_core_base64url.erl @@ -28,11 +28,11 @@ -module(riak_core_base64url). -export([decode/1, - decode_to_string/1, - encode/1, - encode_to_string/1, - mime_decode/1, - mime_decode_to_string/1]). + decode_to_string/1, + encode/1, + encode_to_string/1, + mime_decode/1, + mime_decode_to_string/1]). -spec decode(iodata()) -> binary(). @@ -68,7 +68,7 @@ urlencode(Base64) when is_list(Base64) -> string:strip(Padded, both, $=); urlencode(Base64) when is_binary(Base64) -> Padded = << <<(urlencode_digit(D))>> - || <> <= Base64 >>, + || <> <= Base64 >>, binary:replace(Padded, <<"=">>, <<"">>, [global]). urldecode(Base64url) when is_list(Base64url) -> @@ -77,15 +77,15 @@ urldecode(Base64url) when is_list(Base64url) -> Prepad ++ Padding; urldecode(Base64url) when is_binary(Base64url) -> Prepad = << <<(urldecode_digit(D))>> - || <> <= Base64url >>, + || <> <= Base64url >>, Padding = padding(Prepad), <>. padding(Base64) when is_binary(Base64) -> case byte_size(Base64) rem 4 of - 2 -> <<"==">>; - 3 -> <<"=">>; - _ -> <<"">> + 2 -> <<"==">>; + 3 -> <<"=">>; + _ -> <<"">> end; padding(Base64) when is_list(Base64) -> binary_to_list(padding(list_to_binary(Base64))). diff --git a/src/riak_core_bucket.erl b/src/riak_core_bucket.erl index b688dbaca..5b547539b 100644 --- a/src/riak_core_bucket.erl +++ b/src/riak_core_bucket.erl @@ -24,17 +24,17 @@ -module(riak_core_bucket). -export([append_bucket_defaults/1, - set_bucket/2, - get_bucket/1, - get_bucket/2, - reset_bucket/1, - get_buckets/1, - bucket_nval_map/1, - default_object_nval/0, - merge_props/2, - name/1, - n_val/1, - get_value/2]). + set_bucket/2, + get_bucket/1, + get_bucket/2, + reset_bucket/1, + get_buckets/1, + bucket_nval_map/1, + default_object_nval/0, + merge_props/2, + name/1, + n_val/1, + get_value/2]). -ifdef(TEST). @@ -43,7 +43,7 @@ -endif. -type property() :: {PropName :: atom(), - PropValue :: any()}. + PropValue :: any()}. -type properties() :: [property()]. @@ -57,9 +57,9 @@ -type bucket() :: binary() | {bucket_type(), binary()}. -export_type([property/0, - properties/0, - bucket/0, - nval_set/0]). + properties/0, + bucket/0, + nval_set/0]). %% @doc Add a list of defaults to global list of defaults for new %% buckets. If any item is in Items is already set in the @@ -73,42 +73,42 @@ append_bucket_defaults(Items) when is_list(Items) -> %% @doc Set the given BucketProps in Bucket or {BucketType, Bucket}. If BucketType does not %% exist, or is not active, {error, no_type} is returned. -spec set_bucket(bucket(), [{atom(), any()}]) -> ok | - {error, - no_type | [{atom(), atom()}]}. + {error, + no_type | [{atom(), atom()}]}. set_bucket({<<"default">>, Name}, BucketProps) -> set_bucket(Name, BucketProps); set_bucket(Name, BucketProps0) -> set_bucket(fun set_bucket_in_ring/2, - Name, - BucketProps0). + Name, + BucketProps0). set_bucket(StoreFun, Bucket, BucketProps0) -> OldBucket = get_bucket(Bucket), case riak_core_bucket_props:validate(update, - Bucket, - OldBucket, - BucketProps0) - of - {ok, BucketProps} -> - NewBucket = merge_props(BucketProps, OldBucket), - StoreFun(Bucket, NewBucket); - {error, Details} -> - logger:error("Bucket properties validation failed " - "~p~n", - [Details]), - {error, Details} + Bucket, + OldBucket, + BucketProps0) + of + {ok, BucketProps} -> + NewBucket = merge_props(BucketProps, OldBucket), + StoreFun(Bucket, NewBucket); + {error, Details} -> + logger:error("Bucket properties validation failed " + "~p~n", + [Details]), + {error, Details} end. set_bucket_in_ring(Bucket, BucketMeta) -> F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:update_meta(bucket_key(Bucket), - BucketMeta, - Ring)} - end, + {new_ring, + riak_core_ring:update_meta(bucket_key(Bucket), + BucketMeta, + Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @spec merge_props(list(), list()) -> list() @@ -152,11 +152,11 @@ reset_bucket({<<"default">>, Name}) -> reset_bucket(Name); reset_bucket(Bucket) -> F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:remove_meta(bucket_key(Bucket), Ring)} - end, + {new_ring, + riak_core_ring:remove_meta(bucket_key(Bucket), Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @doc Get bucket properties `Props' for all the buckets in the given @@ -166,12 +166,12 @@ reset_bucket(Bucket) -> get_buckets(Ring) -> RingNames = riak_core_ring:get_buckets(Ring), RingBuckets = [get_bucket(Name, Ring) - || Name <- RingNames], + || Name <- RingNames], RingBuckets. %% @doc returns a proplist containing all buckets and their respective N values -spec bucket_nval_map(riak_core_ring()) -> [{binary(), - integer()}]. + integer()}]. bucket_nval_map(Ring) -> [{riak_core_bucket:name(B), riak_core_bucket:n_val(B)} @@ -192,8 +192,8 @@ n_val(BProps) -> get_value(n_val, BProps). get_value(Key, Proplist) -> case lists:keyfind(Key, 1, Proplist) of - {Key, Value} -> Value; - _ -> undefined + {Key, Value} -> Value; + _ -> undefined end. bucket_key({<<"default">>, Name}) -> bucket_key(Name); @@ -208,8 +208,8 @@ bucket_key(Name) -> {bucket, Name}. simple_set_test() -> application:load(riak_core), application:set_env(riak_core, - ring_state_dir, - "_build/test/tmp"), + ring_state_dir, + "_build/test/tmp"), %% appending an empty list of defaults makes up for the fact that %% riak_core_app:start/2 is not called during eunit runs %% (that's where the usual defaults are set at startup), diff --git a/src/riak_core_bucket_props.erl b/src/riak_core_bucket_props.erl index 19021fe2f..368163a8c 100644 --- a/src/riak_core_bucket_props.erl +++ b/src/riak_core_bucket_props.erl @@ -20,10 +20,10 @@ -module(riak_core_bucket_props). -export([merge/2, - validate/4, - resolve/2, - defaults/0, - append_defaults/1]). + validate/4, + resolve/2, + defaults/0, + append_defaults/1]). -ifdef(TEST). @@ -32,100 +32,100 @@ -endif. -spec merge([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. + [{atom(), any()}]) -> [{atom(), any()}]. merge(Overriding, Other) -> lists:ukeymerge(1, - lists:ukeysort(1, Overriding), - lists:ukeysort(1, Other)). + lists:ukeysort(1, Overriding), + lists:ukeysort(1, Other)). -spec validate(create | update, - {riak_core_bucket:bucket_type(), undefined | binary()} | - binary(), - undefined | [{atom(), any()}], - [{atom(), any()}]) -> {ok, [{atom(), any()}]} | - {error, [{atom(), atom()}]}. + {riak_core_bucket:bucket_type(), undefined | binary()} | + binary(), + undefined | [{atom(), any()}], + [{atom(), any()}]) -> {ok, [{atom(), any()}]} | + {error, [{atom(), atom()}]}. validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps) -> + BucketProps) -> ReservedErrors = validate_reserved_names(Bucket), CoreErrors = validate_core_props(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps), + Bucket, + ExistingProps, + BucketProps), validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps, - riak_core:bucket_validators(), - [ReservedErrors, CoreErrors]). + Bucket, + ExistingProps, + BucketProps, + riak_core:bucket_validators(), + [ReservedErrors, CoreErrors]). validate(_CreateOrUpdate, _Bucket, _ExistingProps, - Props, [], ErrorLists) -> + Props, [], ErrorLists) -> case lists:flatten(ErrorLists) of - [] -> {ok, Props}; - Errors -> {error, Errors} + [] -> {ok, Props}; + Errors -> {error, Errors} end; validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps0, [{_App, Validator} | T], Errors0) -> + BucketProps0, [{_App, Validator} | T], Errors0) -> {BucketProps, Errors} = - Validator:validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps0), + Validator:validate(CreateOrUpdate, + Bucket, + ExistingProps, + BucketProps0), validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps, - T, - [Errors | Errors0]). + Bucket, + ExistingProps, + BucketProps, + T, + [Errors | Errors0]). validate_core_props(CreateOrUpdate, Bucket, - ExistingProps, BucketProps) -> + ExistingProps, BucketProps) -> lists:foldl(fun (Prop, Errors) -> - case validate_core_prop(CreateOrUpdate, - Bucket, - ExistingProps, - Prop) - of - true -> Errors; - Error -> [Error | Errors] - end - end, - [], - BucketProps). + case validate_core_prop(CreateOrUpdate, + Bucket, + ExistingProps, + Prop) + of + true -> Errors; + Error -> [Error | Errors] + end + end, + [], + BucketProps). validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, Claimant}) + undefined, {claimant, Claimant}) when Claimant =:= node() -> %% claimant valid on first call to create if claimant is this node true; validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, _BadClaimant}) -> + undefined, {claimant, _BadClaimant}) -> %% claimant not valid on first call to create if claimant is not this node {claimant, "Invalid claimant"}; validate_core_prop(create, {_Bucket, undefined}, - Existing, {claimant, Claimant}) -> + Existing, {claimant, Claimant}) -> %% subsequent creation calls cannot modify claimant and it should exist case lists:keyfind(claimant, 1, Existing) of - false -> - {claimant, - "No claimant details found in existing " - "properties"}; - {claimant, Claimant} -> true; - {claimant, _Other} -> - {claimant, "Cannot modify claimant property"} + false -> + {claimant, + "No claimant details found in existing " + "properties"}; + {claimant, Claimant} -> true; + {claimant, _Other} -> + {claimant, "Cannot modify claimant property"} end; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {claimant, _Claimant}) -> + _Existing, {claimant, _Claimant}) -> %% cannot update claimant {claimant, "Cannot update claimant property"}; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {ddl, _DDL}) -> + _Existing, {ddl, _DDL}) -> %% cannot update time series DDL {ddl, "Cannot update time series data definition"}; validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {table_def, _DDL}) -> + _Existing, {table_def, _DDL}) -> %% cannot update time series DDL (or, if it slips past riak_kv_console, %% the table_def SQL(ish) code that is parsed to make a DDL) %% @@ -133,19 +133,19 @@ validate_core_prop(update, {_Bucket, _BucketName}, %% errors from existing_atom functions {ddl, "Cannot update time series data definition"}; validate_core_prop(create, {_Bucket, undefined}, - undefined, {active, false}) -> + undefined, {active, false}) -> %% first creation call that sets active to false is always valid true; validate_core_prop(create, {_Bucket, undefined}, - _Existing, {active, false}) -> + _Existing, {active, false}) -> %% subsequent creation calls that leaves active false is valid true; validate_core_prop(update, {_Bucket, _}, _Existing, - {active, true}) -> + {active, true}) -> %% calls to update that do not modify active are valid true; validate_core_prop(_, {_Bucket, _}, _Existing, - {active, _}) -> + {active, _}) -> %% subsequent creation calls or update calls cannot modify active (it is modified directly %% by riak_core_claimant) {active, "Cannot modify active property"}; @@ -155,8 +155,8 @@ validate_core_prop(_, _, _, _) -> validate_reserved_names(Bucket) -> case validate_reserved_name(Bucket) of - ok -> []; - ErrStr -> [{reserved_name, ErrStr}] + ok -> []; + ErrStr -> [{reserved_name, ErrStr}] end. validate_reserved_name({<<"any">>, _}) -> @@ -168,35 +168,35 @@ validate_reserved_name(_) -> ok. defaults() -> application:get_env(riak_core, - default_bucket_props, - undefined). + default_bucket_props, + undefined). -spec append_defaults([{atom(), any()}]) -> ok. append_defaults(Items) when is_list(Items) -> OldDefaults = application:get_env(riak_core, - default_bucket_props, - []), + default_bucket_props, + []), NewDefaults = merge(OldDefaults, Items), FixedDefaults = case riak_core:bucket_fixups() of - [] -> NewDefaults; - Fixups -> - riak_core_ring_manager:run_fixups(Fixups, - default, - NewDefaults) - end, + [] -> NewDefaults; + Fixups -> + riak_core_ring_manager:run_fixups(Fixups, + default, + NewDefaults) + end, application:set_env(riak_core, - default_bucket_props, - FixedDefaults), + default_bucket_props, + FixedDefaults), %% do a noop transform on the ring, to make the fixups re-run catch riak_core_ring_manager:ring_trans(fun (Ring, _) -> - {new_ring, Ring} - end, - undefined), + {new_ring, Ring} + end, + undefined), ok. -spec resolve([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. + [{atom(), any()}]) -> [{atom(), any()}]. %%noinspection ErlangUnusedVariable resolve(PropsA, PropsB) @@ -204,37 +204,37 @@ resolve(PropsA, PropsB) PropsASorted = lists:ukeysort(1, PropsA), PropsBSorted = lists:ukeysort(1, PropsB), {_, Resolved} = lists:foldl(fun ({KeyA, _} = PropA, - {[{KeyA, _} = PropB | RestB], Acc}) -> - {RestB, - [{KeyA, resolve_prop(PropA, PropB)} - | Acc]}; - (PropA, {RestB, Acc}) -> - {RestB, [PropA | Acc]} - end, - {PropsBSorted, []}, - PropsASorted), + {[{KeyA, _} = PropB | RestB], Acc}) -> + {RestB, + [{KeyA, resolve_prop(PropA, PropB)} + | Acc]}; + (PropA, {RestB, Acc}) -> + {RestB, [PropA | Acc]} + end, + {PropsBSorted, []}, + PropsASorted), Resolved. resolve_prop({allow_mult, Mult1}, - {allow_mult, Mult2}) -> + {allow_mult, Mult2}) -> Mult1 orelse - Mult2; %% assumes allow_mult=true is default + Mult2; %% assumes allow_mult=true is default resolve_prop({basic_quorum, Basic1}, - {basic_quorum, Basic2}) -> + {basic_quorum, Basic2}) -> Basic1 andalso Basic2; resolve_prop({big_vclock, Big1}, {big_vclock, Big2}) -> max(Big1, Big2); resolve_prop({chash_keyfun, KeyFun1}, - {chash_keyfun, _KeyFun2}) -> + {chash_keyfun, _KeyFun2}) -> KeyFun1; %% arbitrary choice resolve_prop({dw, DW1}, {dw, DW2}) -> %% 'quorum' wins over set numbers max(DW1, DW2); resolve_prop({last_write_wins, LWW1}, - {last_write_wins, LWW2}) -> + {last_write_wins, LWW2}) -> LWW1 andalso LWW2; resolve_prop({linkfun, LinkFun1}, - {linkfun, _LinkFun2}) -> + {linkfun, _LinkFun2}) -> LinkFun1; %% arbitrary choice resolve_prop({n_val, N1}, {n_val, N2}) -> max(N1, N2); resolve_prop({notfound_ok, NF1}, {notfound_ok, NF2}) -> @@ -250,11 +250,11 @@ resolve_prop({pw, PW1}, {pw, PW2}) -> max(PW1, PW2); resolve_prop({r, R1}, {r, R2}) -> max(R1, R2); resolve_prop({rw, RW1}, {rw, RW2}) -> max(RW1, RW2); resolve_prop({small_vclock, Small1}, - {small_vclock, Small2}) -> + {small_vclock, Small2}) -> max(Small1, Small2); resolve_prop({w, W1}, {w, W2}) -> max(W1, W2); resolve_prop({young_vclock, Young1}, - {young_vclock, Young2}) -> + {young_vclock, Young2}) -> max(Young1, Young2); resolve_prop({_, V1}, {_, _V2}) -> V1. @@ -269,68 +269,68 @@ resolve_hooks(Hooks1, Hooks2) -> simple_resolve_test() -> Props1 = [{name, <<"test">>}, - {allow_mult, false}, - {basic_quorum, false}, - {big_vclock, 50}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, - {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 3}, - {notfound_ok, true}, - {old_vclock, 86400}, - {postcommit, []}, - {pr, 0}, - {precommit, [{a, b}]}, - {pw, 0}, - {r, quorum}, - {rw, quorum}, - {small_vclock, 50}, - {w, quorum}, - {young_vclock, 20}], + {allow_mult, false}, + {basic_quorum, false}, + {big_vclock, 50}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, quorum}, + {last_write_wins, false}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 3}, + {notfound_ok, true}, + {old_vclock, 86400}, + {postcommit, []}, + {pr, 0}, + {precommit, [{a, b}]}, + {pw, 0}, + {r, quorum}, + {rw, quorum}, + {small_vclock, 50}, + {w, quorum}, + {young_vclock, 20}], Props2 = [{name, <<"test">>}, - {allow_mult, true}, - {basic_quorum, true}, - {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, 3}, - {last_write_wins, true}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, - {notfound_ok, false}, - {old_vclock, 86401}, - {postcommit, [{a, b}]}, - {pr, 1}, - {precommit, [{c, d}]}, - {pw, 3}, - {r, 3}, - {rw, 3}, - {w, 1}, - {young_vclock, 30}], + {allow_mult, true}, + {basic_quorum, true}, + {big_vclock, 60}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, 3}, + {last_write_wins, true}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 5}, + {notfound_ok, false}, + {old_vclock, 86401}, + {postcommit, [{a, b}]}, + {pr, 1}, + {precommit, [{c, d}]}, + {pw, 3}, + {r, 3}, + {rw, 3}, + {w, 1}, + {young_vclock, 30}], Expected = [{name, <<"test">>}, - {allow_mult, true}, - {basic_quorum, false}, - {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, - {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, - {notfound_ok, true}, - {old_vclock, 86401}, - {postcommit, [{a, b}]}, - {pr, 1}, - {precommit, [{a, b}, {c, d}]}, - {pw, 3}, - {r, quorum}, - {rw, quorum}, - {small_vclock, 50}, - {w, quorum}, - {young_vclock, 30}], + {allow_mult, true}, + {basic_quorum, false}, + {big_vclock, 60}, + {chash_keyfun, {riak_core_util, chash_std_keyfun}}, + {dw, quorum}, + {last_write_wins, false}, + {linkfun, + {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, + {n_val, 5}, + {notfound_ok, true}, + {old_vclock, 86401}, + {postcommit, [{a, b}]}, + {pr, 1}, + {precommit, [{a, b}, {c, d}]}, + {pw, 3}, + {r, quorum}, + {rw, quorum}, + {small_vclock, 50}, + {w, quorum}, + {young_vclock, 30}], ?assertEqual((lists:ukeysort(1, Expected)), - (lists:ukeysort(1, resolve(Props1, Props2)))). + (lists:ukeysort(1, resolve(Props1, Props2)))). -endif. diff --git a/src/riak_core_claim.erl b/src/riak_core_claim.erl index c6aba2b6d..bb6efa098 100644 --- a/src/riak_core_claim.erl +++ b/src/riak_core_claim.erl @@ -54,33 +54,33 @@ -module(riak_core_claim). -export([claim/1, - claim/3, - claim_until_balanced/2, - claim_until_balanced/4]). + claim/3, + claim_until_balanced/2, + claim_until_balanced/4]). -export([default_wants_claim/1, - default_wants_claim/2, - default_choose_claim/1, - default_choose_claim/2, - default_choose_claim/3, - never_wants_claim/1, - never_wants_claim/2, - random_choose_claim/1, - random_choose_claim/2, - random_choose_claim/3]). + default_wants_claim/2, + default_choose_claim/1, + default_choose_claim/2, + default_choose_claim/3, + never_wants_claim/1, + never_wants_claim/2, + random_choose_claim/1, + random_choose_claim/2, + random_choose_claim/3]). -export([wants_claim_v2/1, - wants_claim_v2/2, - choose_claim_v2/1, - choose_claim_v2/2, - choose_claim_v2/3, - claim_rebalance_n/2, - claim_diversify/3, - claim_diagonal/3, - wants/1, - wants_owns_diff/2, - meets_target_n/2, - diagonal_stripe/2]). + wants_claim_v2/2, + choose_claim_v2/1, + choose_claim_v2/2, + choose_claim_v2/3, + claim_rebalance_n/2, + claim_diversify/3, + claim_diagonal/3, + wants/1, + wants_owns_diff/2, + meets_target_n/2, + diagonal_stripe/2]). -define(DEF_TARGET_N, 4). @@ -89,10 +89,10 @@ claim(Ring) -> claim(Ring, want, choose). claim(Ring, _, _) -> Members = riak_core_ring:claiming_members(Ring), lists:foldl(fun (Node, Ring0) -> - claim_until_balanced(Ring0, Node, want, choose) - end, - Ring, - Members). + claim_until_balanced(Ring0, Node, want, choose) + end, + Ring, + Members). claim_until_balanced(Ring, Node) -> claim_until_balanced(Ring, Node, want, choose). @@ -100,10 +100,10 @@ claim_until_balanced(Ring, Node) -> claim_until_balanced(Ring, Node, want, choose) -> NeedsIndexes = wants_claim_v2(Ring, Node), case NeedsIndexes of - no -> Ring; - {yes, _NumToClaim} -> - NewRing = choose_claim_v2(Ring, Node), - claim_until_balanced(NewRing, Node, want, choose) + no -> Ring; + {yes, _NumToClaim} -> + NewRing = choose_claim_v2(Ring, Node), + claim_until_balanced(NewRing, Node, want, choose) end. %% =================================================================== @@ -140,8 +140,8 @@ wants_claim_v2(Ring, Node) -> Avg = RingSize div NodeCount, Count = proplists:get_value(Node, Counts, 0), case Count < Avg of - false -> no; - true -> {yes, Avg - Count} + false -> no; + true -> {yes, Avg - Count} end. %% Provide default choose parameters if none given @@ -149,12 +149,12 @@ default_choose_params() -> default_choose_params([]). default_choose_params(Params) -> case proplists:get_value(target_n_val, Params) of - undefined -> - TN = application:get_env(riak_core, - target_n_val, - ?DEF_TARGET_N), - [{target_n_val, TN} | Params]; - _ -> Params + undefined -> + TN = application:get_env(riak_core, + target_n_val, + ?DEF_TARGET_N), + [{target_n_val, TN} | Params]; + _ -> Params end. choose_claim_v2(Ring) -> choose_claim_v2(Ring, node()). @@ -175,78 +175,78 @@ choose_claim_v2(Ring, Node, Params0) -> NodeCount = erlang:length(Active), %% Deltas::[node(), integer()] Deltas = get_deltas(RingSize, - NodeCount, - Owners, - Counts), + NodeCount, + Owners, + Counts), {_, Want} = lists:keyfind(Node, 1, Deltas), TargetN = proplists:get_value(target_n_val, Params), AllIndices = lists:zip(lists:seq(0, length(Owners) - 1), - [Idx || {Idx, _} <- Owners]), + [Idx || {Idx, _} <- Owners]), EnoughNodes = (NodeCount > TargetN) or - (NodeCount == TargetN) and (RingSize rem TargetN =:= 0), + (NodeCount == TargetN) and (RingSize rem TargetN =:= 0), case EnoughNodes of - true -> - %% If we have enough nodes to meet target_n, then we prefer to - %% claim indices that are currently causing violations, and then - %% fallback to indices in linear order. The filtering steps below - %% will ensure no new violations are introduced. - Violated = lists:flatten(find_violations(Ring, - TargetN)), - Violated2 = [lists:keyfind(Idx, 2, AllIndices) - || Idx <- Violated], - Indices = Violated2 ++ AllIndices -- Violated2; - false -> - %% If we do not have enough nodes to meet target_n, then we prefer - %% claiming the same indices that would occur during a - %% re-diagonalization of the ring with target_n nodes, falling - %% back to linear offsets off these preferred indices when the - %% number of indices desired is less than the computed set. - Padding = lists:duplicate(TargetN, undefined), - Expanded = lists:sublist(Active ++ Padding, TargetN), - PreferredClaim = riak_core_claim:diagonal_stripe(Ring, - Expanded), - PreferredNth = [begin - {Nth, Idx} = lists:keyfind(Idx, 2, AllIndices), - Nth - end - || {Idx, Owner} <- PreferredClaim, Owner =:= Node], - Offsets = lists:seq(0, - RingSize div length(PreferredNth)), - AllNth = lists:sublist([(X + Y) rem RingSize - || Y <- Offsets, X <- PreferredNth], - RingSize), - Indices = [lists:keyfind(Nth, 1, AllIndices) - || Nth <- AllNth] + true -> + %% If we have enough nodes to meet target_n, then we prefer to + %% claim indices that are currently causing violations, and then + %% fallback to indices in linear order. The filtering steps below + %% will ensure no new violations are introduced. + Violated = lists:flatten(find_violations(Ring, + TargetN)), + Violated2 = [lists:keyfind(Idx, 2, AllIndices) + || Idx <- Violated], + Indices = Violated2 ++ AllIndices -- Violated2; + false -> + %% If we do not have enough nodes to meet target_n, then we prefer + %% claiming the same indices that would occur during a + %% re-diagonalization of the ring with target_n nodes, falling + %% back to linear offsets off these preferred indices when the + %% number of indices desired is less than the computed set. + Padding = lists:duplicate(TargetN, undefined), + Expanded = lists:sublist(Active ++ Padding, TargetN), + PreferredClaim = riak_core_claim:diagonal_stripe(Ring, + Expanded), + PreferredNth = [begin + {Nth, Idx} = lists:keyfind(Idx, 2, AllIndices), + Nth + end + || {Idx, Owner} <- PreferredClaim, Owner =:= Node], + Offsets = lists:seq(0, + RingSize div length(PreferredNth)), + AllNth = lists:sublist([(X + Y) rem RingSize + || Y <- Offsets, X <- PreferredNth], + RingSize), + Indices = [lists:keyfind(Nth, 1, AllIndices) + || Nth <- AllNth] end, %% Filter out indices that conflict with the node's existing ownership Indices2 = prefilter_violations(Ring, - Node, - AllIndices, - Indices, - TargetN, - RingSize), + Node, + AllIndices, + Indices, + TargetN, + RingSize), %% Claim indices from the remaining candidate set Claim = select_indices(Owners, - Deltas, - Indices2, - TargetN, - RingSize), + Deltas, + Indices2, + TargetN, + RingSize), Claim2 = lists:sublist(Claim, Want), NewRing = lists:foldl(fun (Idx, Ring0) -> - riak_core_ring:transfer_node(Idx, Node, Ring0) - end, - Ring, - Claim2), + riak_core_ring:transfer_node(Idx, Node, Ring0) + end, + Ring, + Claim2), RingChanged = [] /= Claim2, RingMeetsTargetN = meets_target_n(NewRing, TargetN), case {RingChanged, EnoughNodes, RingMeetsTargetN} of - {false, _, _} -> - %% Unable to claim, fallback to re-diagonalization - sequential_claim(Ring, Node, TargetN); - {_, true, false} -> - %% Failed to meet target_n, fallback to re-diagonalization - sequential_claim(Ring, Node, TargetN); - _ -> NewRing + {false, _, _} -> + %% Unable to claim, fallback to re-diagonalization + sequential_claim(Ring, Node, TargetN); + {_, true, false} -> + %% Failed to meet target_n, fallback to re-diagonalization + sequential_claim(Ring, Node, TargetN); + _ -> NewRing end. %% @private for each node in owners return a tuple of owner and delta @@ -255,23 +255,23 @@ choose_claim_v2(Ring, Node, Params0) -> %% that many more partitions, a negative means the owner can lose that %% many paritions. -spec get_deltas(RingSize :: pos_integer(), - NodeCount :: pos_integer(), - Owners :: [{Index :: non_neg_integer(), node()}], - Counts :: [{node(), non_neg_integer()}]) -> Deltas :: - [{node(), - integer()}]. + NodeCount :: pos_integer(), + Owners :: [{Index :: non_neg_integer(), node()}], + Counts :: [{node(), non_neg_integer()}]) -> Deltas :: + [{node(), + integer()}]. get_deltas(RingSize, NodeCount, Owners, Counts) -> Avg = RingSize / NodeCount, %% the most any node should own Max = ceiling(RingSize / NodeCount), ActiveDeltas = [{Member, - Count, - normalise_delta(Avg - Count)} - || {Member, Count} <- Counts], + Count, + normalise_delta(Avg - Count)} + || {Member, Count} <- Counts], BalancedDeltas = rebalance_deltas(ActiveDeltas, - Max, - RingSize), + Max, + RingSize), add_default_deltas(Owners, BalancedDeltas, 0). %% @private a node can only claim whole partitions, but if RingSize @@ -293,55 +293,55 @@ normalise_delta(Delta) -> %% 6}, {n4, 8}, {n5,6} we rebalance the deltas so that select_indices %% doesn't leave some node not giving up enough partitions -spec rebalance_deltas([{node(), integer()}], - pos_integer(), pos_integer()) -> [{node(), integer()}]. + pos_integer(), pos_integer()) -> [{node(), integer()}]. rebalance_deltas(NodeDeltas, Max, RingSize) -> AppliedDeltas = [Own + Delta - || {_, Own, Delta} <- NodeDeltas], + || {_, Own, Delta} <- NodeDeltas], case lists:sum(AppliedDeltas) - RingSize of - 0 -> - [{Node, Delta} || {Node, _Cnt, Delta} <- NodeDeltas]; - N when N < 0 -> increase_keeps(NodeDeltas, N, Max, []) + 0 -> + [{Node, Delta} || {Node, _Cnt, Delta} <- NodeDeltas]; + N when N < 0 -> increase_keeps(NodeDeltas, N, Max, []) end. %% @private increases the delta for (some) nodes giving away %% partitions to the max they can keep -spec increase_keeps(Deltas :: [{node(), integer()}], - WantsError :: integer(), Max :: pos_integer(), - Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), - integer()}]. + WantsError :: integer(), Max :: pos_integer(), + Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), + integer()}]. increase_keeps(Rest, 0, _Max, Acc) -> [{Node, Delta} || {Node, _Own, Delta} - <- lists:usort(lists:append(Rest, Acc))]; + <- lists:usort(lists:append(Rest, Acc))]; increase_keeps([], N, Max, Acc) when N < 0 -> increase_takes(lists:reverse(Acc), N, Max, []); increase_keeps([{Node, Own, Delta} | Rest], N, Max, Acc) when Delta < 0 -> WouldOwn = Own + Delta, Additive = case WouldOwn + 1 =< Max of - true -> 1; - false -> 0 - end, + true -> 1; + false -> 0 + end, increase_keeps(Rest, - N + Additive, - Max, - [{Node, Own + Delta + Additive} | Acc]); + N + Additive, + Max, + [{Node, Own + Delta + Additive} | Acc]); increase_keeps([NodeDelta | Rest], N, Max, Acc) -> increase_keeps(Rest, N, Max, [NodeDelta | Acc]). %% @private increases the delta for (some) nodes taking partitions to the max %% they can ask for -spec increase_takes(Deltas :: [{node(), integer()}], - WantsError :: integer(), Max :: pos_integer(), - Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), - integer()}]. + WantsError :: integer(), Max :: pos_integer(), + Acc :: [{node(), integer()}]) -> Rebalanced :: [{node(), + integer()}]. increase_takes(Rest, 0, _Max, Acc) -> [{Node, Delta} || {Node, _Own, Delta} - <- lists:usort(lists:append(Rest, Acc))]; + <- lists:usort(lists:append(Rest, Acc))]; increase_takes([], N, _Max, Acc) when N < 0 -> [{Node, Delta} || {Node, _Own, Delta} <- lists:usort(Acc)]; @@ -349,53 +349,53 @@ increase_takes([{Node, Own, Delta} | Rest], N, Max, Acc) when Delta > 0 -> WouldOwn = Own + Delta, Additive = case WouldOwn + 1 =< Max of - true -> 1; - false -> 0 - end, + true -> 1; + false -> 0 + end, increase_takes(Rest, - N + Additive, - Max, - [{Node, Own, Delta + Additive} | Acc]); + N + Additive, + Max, + [{Node, Own, Delta + Additive} | Acc]); increase_takes([NodeDelta | Rest], N, Max, Acc) -> increase_takes(Rest, N, Max, [NodeDelta | Acc]). meets_target_n(Ring, TargetN) -> Owners = lists:keysort(1, - riak_core_ring:all_owners(Ring)), + riak_core_ring:all_owners(Ring)), meets_target_n(Owners, TargetN, 0, [], []). meets_target_n([{Part, Node} | Rest], TargetN, Index, - First, Last) -> + First, Last) -> case lists:keytake(Node, 1, Last) of - {value, {Node, LastIndex, _}, NewLast} -> - if Index - LastIndex >= TargetN -> - %% node repeat respects TargetN - meets_target_n(Rest, - TargetN, - Index + 1, - First, - [{Node, Index, Part} | NewLast]); - true -> - %% violation of TargetN - false - end; - false -> - %% haven't seen this node yet - meets_target_n(Rest, - TargetN, - Index + 1, - [{Node, Index} | First], - [{Node, Index, Part} | Last]) + {value, {Node, LastIndex, _}, NewLast} -> + if Index - LastIndex >= TargetN -> + %% node repeat respects TargetN + meets_target_n(Rest, + TargetN, + Index + 1, + First, + [{Node, Index, Part} | NewLast]); + true -> + %% violation of TargetN + false + end; + false -> + %% haven't seen this node yet + meets_target_n(Rest, + TargetN, + Index + 1, + [{Node, Index} | First], + [{Node, Index, Part} | Last]) end; meets_target_n([], TargetN, Index, First, Last) -> %% start through end guarantees TargetN %% compute violations at wrap around, but don't fail %% because of them: handle during reclaim Violations = lists:filter(fun ({Node, L, _}) -> - {Node, F} = proplists:lookup(Node, First), - Index - L + F < TargetN - end, - Last), + {Node, F} = proplists:lookup(Node, First), + Index - L + F < TargetN + end, + Last), {true, [Part || {_, _, Part} <- Violations]}. %% Claim diversify tries to build a perfectly diverse ownership list that meets @@ -405,22 +405,22 @@ meets_target_n([], TargetN, Index, First, Last) -> %% node is added and uses it to drive the selection of the next nodes. claim_diversify(Wants, Owners, Params) -> TN = proplists:get_value(target_n_val, - Params, - ?DEF_TARGET_N), + Params, + ?DEF_TARGET_N), Q = length(Owners), Claiming = [N || {N, W} <- Wants, W > 0], {ok, NewOwners, _AM} = - riak_core_claim_util:construct(riak_core_claim_util:gen_complete_len(Q), - Claiming, - TN), + riak_core_claim_util:construct(riak_core_claim_util:gen_complete_len(Q), + Claiming, + TN), {NewOwners, [diversified]}. %% Claim nodes in seq a,b,c,a,b,c trying to handle the wraparound %% case to meet target N claim_diagonal(Wants, Owners, Params) -> TN = proplists:get_value(target_n_val, - Params, - ?DEF_TARGET_N), + Params, + ?DEF_TARGET_N), Claiming = lists:sort([N || {N, W} <- Wants, W > 0]), S = length(Claiming), Q = length(Owners), @@ -430,10 +430,10 @@ claim_diagonal(Wants, Owners, Params) -> %% are available. Tail = Q - Reps * S, Last = case S >= TN + Tail of - true -> % If number wanted can be filled excluding first TN nodes - lists:sublist(lists:nthtail(TN - Tail, Claiming), Tail); - _ -> lists:sublist(Claiming, Tail) - end, + true -> % If number wanted can be filled excluding first TN nodes + lists:sublist(lists:nthtail(TN - Tail, Claiming), Tail); + _ -> lists:sublist(Claiming, Tail) + end, {lists:flatten([lists:duplicate(Reps, Claiming), Last]), [diagonalized]}. @@ -443,41 +443,41 @@ claim_diagonal(Wants, Owners, Params) -> %% attempts to eliminate tail violations (for example a ring that %% starts/ends n1 | n2 | ...| n3 | n4 | n1) -spec sequential_claim(riak_core_ring:riak_core_ring(), - node(), integer()) -> riak_core_ring:riak_core_ring(). + node(), integer()) -> riak_core_ring:riak_core_ring(). sequential_claim(Ring, Node, TargetN) -> Nodes = lists:usort([Node - | riak_core_ring:claiming_members(Ring)]), + | riak_core_ring:claiming_members(Ring)]), NodeCount = length(Nodes), RingSize = riak_core_ring:num_partitions(Ring), Overhang = RingSize rem NodeCount, HasTailViolation = Overhang > 0 andalso - Overhang < TargetN, + Overhang < TargetN, Shortfall = TargetN - Overhang, CompleteSequences = RingSize div NodeCount, MaxFetchesPerSeq = NodeCount - TargetN, MinFetchesPerSeq = ceiling(Shortfall / - CompleteSequences), + CompleteSequences), CanSolveViolation = CompleteSequences * MaxFetchesPerSeq - >= Shortfall, + >= Shortfall, Zipped = case HasTailViolation andalso CanSolveViolation - of - true -> - Partitions = lists:sort([I - || {I, _} - <- riak_core_ring:all_owners(Ring)]), - Nodelist = solve_tail_violations(RingSize, - Nodes, - Shortfall, - MinFetchesPerSeq), - lists:zip(Partitions, lists:flatten(Nodelist)); - false -> diagonal_stripe(Ring, Nodes) - end, + of + true -> + Partitions = lists:sort([I + || {I, _} + <- riak_core_ring:all_owners(Ring)]), + Nodelist = solve_tail_violations(RingSize, + Nodes, + Shortfall, + MinFetchesPerSeq), + lists:zip(Partitions, lists:flatten(Nodelist)); + false -> diagonal_stripe(Ring, Nodes) + end, lists:foldl(fun ({P, N}, Acc) -> - riak_core_ring:transfer_node(P, N, Acc) - end, - Ring, - Zipped). + riak_core_ring:transfer_node(P, N, Acc) + end, + Ring, + Zipped). %% @private every module has a ceiling function -spec ceiling(float()) -> integer(). @@ -485,104 +485,104 @@ sequential_claim(Ring, Node, TargetN) -> ceiling(F) -> T = trunc(F), case F - T == 0 of - true -> T; - false -> T + 1 + true -> T; + false -> T + 1 end. %% @private rem_fill increase the tail so that there is no wrap around %% preflist violation, by taking a `Shortfall' number nodes from %% earlier in the preflist -spec solve_tail_violations(integer(), [node()], - integer(), integer()) -> [node()]. + integer(), integer()) -> [node()]. solve_tail_violations(RingSize, Nodes, Shortfall, - MinFetchesPerSeq) -> + MinFetchesPerSeq) -> StartingNode = RingSize rem length(Nodes) + 1, build_nodelist(RingSize, - Nodes, - Shortfall, - StartingNode, - MinFetchesPerSeq, - []). + Nodes, + Shortfall, + StartingNode, + MinFetchesPerSeq, + []). %% @private build the node list by building tail to satisfy TargetN, then removing %% the added nodes from earlier segments -spec build_nodelist(integer(), [node()], integer(), - integer(), integer(), [node()]) -> [node()]. + integer(), integer(), [node()]) -> [node()]. build_nodelist(RingSize, Nodes, _Shortfall = 0, - _NodeCounter, _MinFetchesPerSeq, Acc) -> + _NodeCounter, _MinFetchesPerSeq, Acc) -> %% Finished shuffling, backfill if required ShuffledRing = lists:flatten(Acc), backfill_ring(RingSize, - Nodes, - (RingSize - length(ShuffledRing)) div length(Nodes), - Acc); + Nodes, + (RingSize - length(ShuffledRing)) div length(Nodes), + Acc); build_nodelist(RingSize, Nodes, Shortfall, NodeCounter, - MinFetchesPerSeq, _Acc = []) -> + MinFetchesPerSeq, _Acc = []) -> %% Build the tail with sufficient nodes to satisfy TargetN NodeCount = length(Nodes), LastSegLength = RingSize rem NodeCount + Shortfall, NewSeq = lists:sublist(Nodes, 1, LastSegLength), build_nodelist(RingSize, - Nodes, - Shortfall, - NodeCounter, - MinFetchesPerSeq, - NewSeq); + Nodes, + Shortfall, + NodeCounter, + MinFetchesPerSeq, + NewSeq); build_nodelist(RingSize, Nodes, Shortfall, NodeCounter, - MinFetchesPerSeq, Acc) -> + MinFetchesPerSeq, Acc) -> %% Build rest of list, subtracting minimum of MinFetchesPerSeq, Shortfall %% or (NodeCount - NodeCounter) each time NodeCount = length(Nodes), NodesToRemove = min(min(MinFetchesPerSeq, Shortfall), - NodeCount - NodeCounter), + NodeCount - NodeCounter), RemovalList = lists:sublist(Nodes, - NodeCounter, - NodesToRemove), + NodeCounter, + NodesToRemove), NewSeq = lists:subtract(Nodes, RemovalList), NewNodeCounter = NodeCounter + NodesToRemove, build_nodelist(RingSize, - Nodes, - Shortfall - NodesToRemove, - NewNodeCounter, - MinFetchesPerSeq, - [NewSeq | Acc]). + Nodes, + Shortfall - NodesToRemove, + NewNodeCounter, + MinFetchesPerSeq, + [NewSeq | Acc]). %% @private Backfill the ring with full sequences -spec backfill_ring(integer(), [node()], integer(), - [node()]) -> [node()]. + [node()]) -> [node()]. backfill_ring(_RingSize, _Nodes, _Remaining = 0, Acc) -> Acc; backfill_ring(RingSize, Nodes, Remaining, Acc) -> backfill_ring(RingSize, - Nodes, - Remaining - 1, - [Nodes | Acc]). + Nodes, + Remaining - 1, + [Nodes | Acc]). claim_rebalance_n(Ring, Node) -> Nodes = lists:usort([Node - | riak_core_ring:claiming_members(Ring)]), + | riak_core_ring:claiming_members(Ring)]), Zipped = diagonal_stripe(Ring, Nodes), lists:foldl(fun ({P, N}, Acc) -> - riak_core_ring:transfer_node(P, N, Acc) - end, - Ring, - Zipped). + riak_core_ring:transfer_node(P, N, Acc) + end, + Ring, + Zipped). diagonal_stripe(Ring, Nodes) -> %% diagonal stripes guarantee most disperse data Partitions = lists:sort([I - || {I, _} <- riak_core_ring:all_owners(Ring)]), + || {I, _} <- riak_core_ring:all_owners(Ring)]), Zipped = lists:zip(Partitions, - lists:sublist(lists:flatten(lists:duplicate(1 + - length(Partitions) - div - length(Nodes), - Nodes)), - 1, - length(Partitions))), + lists:sublist(lists:flatten(lists:duplicate(1 + + length(Partitions) + div + length(Nodes), + Nodes)), + 1, + length(Partitions))), Zipped. random_choose_claim(Ring) -> @@ -593,8 +593,8 @@ random_choose_claim(Ring, Node) -> random_choose_claim(Ring, Node, _Params) -> riak_core_ring:transfer_node(riak_core_ring:random_other_index(Ring), - Node, - Ring). + Node, + Ring). %% @spec never_wants_claim(riak_core_ring()) -> no %% @doc For use by nodes that should not claim any partitions. @@ -616,36 +616,36 @@ find_violations(Ring, TargetN) -> Owners2 = Owners ++ Suffix, %% Use a sliding window to determine violations {Bad, _} = lists:foldl(fun (P = {Idx, Owner}, - {Out, Window}) -> - Window2 = lists:sublist([P | Window], - TargetN - 1), - case lists:keyfind(Owner, 2, Window) of - {PrevIdx, Owner} -> - {[[PrevIdx, Idx] | Out], Window2}; - false -> {Out, Window2} - end - end, - {[], []}, - Owners2), + {Out, Window}) -> + Window2 = lists:sublist([P | Window], + TargetN - 1), + case lists:keyfind(Owner, 2, Window) of + {PrevIdx, Owner} -> + {[[PrevIdx, Idx] | Out], Window2}; + false -> {Out, Window2} + end + end, + {[], []}, + Owners2), lists:reverse(Bad). %% @private %% %% @doc Counts up the number of partitions owned by each node. -spec get_counts([node()], - [{integer(), _}]) -> [{node(), non_neg_integer()}]. + [{integer(), _}]) -> [{node(), non_neg_integer()}]. get_counts(Nodes, Ring) -> Empty = [{Node, 0} || Node <- Nodes], Counts = lists:foldl(fun ({_Idx, Node}, Counts) -> - case lists:member(Node, Nodes) of - true -> - dict:update_counter(Node, 1, Counts); - false -> Counts - end - end, - dict:from_list(Empty), - Ring), + case lists:member(Node, Nodes) of + true -> + dict:update_counter(Node, 1, Counts); + false -> Counts + end + end, + dict:from_list(Empty), + Ring), dict:to_list(Counts). %% @private @@ -660,16 +660,16 @@ add_default_deltas(IdxOwners, Deltas, Default) -> %% @doc Filter out candidate indices that would violate target_n given %% a node's current partition ownership. prefilter_violations(Ring, Node, AllIndices, Indices, - TargetN, RingSize) -> + TargetN, RingSize) -> CurrentIndices = riak_core_ring:indices(Ring, Node), CurrentNth = [lists:keyfind(Idx, 2, AllIndices) - || Idx <- CurrentIndices], + || Idx <- CurrentIndices], [{Nth, Idx} || {Nth, Idx} <- Indices, - lists:all(fun ({CNth, _}) -> - spaced_by_n(CNth, Nth, TargetN, RingSize) - end, - CurrentNth)]. + lists:all(fun ({CNth, _}) -> + spaced_by_n(CNth, Nth, TargetN, RingSize) + end, + CurrentNth)]. %% @private %% @@ -684,10 +684,10 @@ prefilter_violations(Ring, Node, AllIndices, Indices, %% the desired ownership is 3, then we try to claim at most 2 partitions %% from A. select_indices(_Owners, _Deltas, [], _TargetN, - _RingSize) -> + _RingSize) -> []; select_indices(Owners, Deltas, Indices, TargetN, - RingSize) -> + RingSize) -> OwnerDT = dict:from_list(Owners), {FirstNth, _} = hd(Indices), %% The `First' symbol indicates whether or not this is the first @@ -697,34 +697,34 @@ select_indices(Owners, Deltas, Indices, TargetN, %% is willing to part with. It's the subsequent partitions %% claimed by this node that must not break the target_n invariant. {Claim, _, _, _} = lists:foldl(fun ({Nth, Idx}, - {Out, LastNth, DeltaDT, First}) -> - Owner = dict:fetch(Idx, OwnerDT), - Delta = dict:fetch(Owner, DeltaDT), - MeetsTN = spaced_by_n(LastNth, - Nth, - TargetN, - RingSize), - case (Delta < 0) and - (First or MeetsTN) - of - true -> - NextDeltaDT = - dict:update_counter(Owner, - 1, - DeltaDT), - {[Idx | Out], - Nth, - NextDeltaDT, - false}; - false -> - {Out, - LastNth, - DeltaDT, - First} - end - end, - {[], FirstNth, dict:from_list(Deltas), true}, - Indices), + {Out, LastNth, DeltaDT, First}) -> + Owner = dict:fetch(Idx, OwnerDT), + Delta = dict:fetch(Owner, DeltaDT), + MeetsTN = spaced_by_n(LastNth, + Nth, + TargetN, + RingSize), + case (Delta < 0) and + (First or MeetsTN) + of + true -> + NextDeltaDT = + dict:update_counter(Owner, + 1, + DeltaDT), + {[Idx | Out], + Nth, + NextDeltaDT, + false}; + false -> + {Out, + LastNth, + DeltaDT, + First} + end + end, + {[], FirstNth, dict:from_list(Deltas), true}, + Indices), lists:reverse(Claim). %% @private @@ -732,12 +732,12 @@ select_indices(Owners, Deltas, Indices, TargetN, %% @doc Determine if two positions in the ring meet target_n spacing. spaced_by_n(NthA, NthB, TargetN, RingSize) -> case NthA > NthB of - true -> - NFwd = NthA - NthB, - NBack = NthB - NthA + RingSize; - false -> - NFwd = NthA - NthB + RingSize, - NBack = NthB - NthA + true -> + NFwd = NthA - NthB, + NBack = NthB - NthA + RingSize; + false -> + NFwd = NthA - NthB + RingSize, + NBack = NthB - NthA end, (NFwd >= TargetN) and (NBack >= TargetN). @@ -745,8 +745,8 @@ spaced_by_n(NthA, NthB, TargetN, RingSize) -> %% overloaded by (negative) compared to what it owns. wants_owns_diff(Wants, Owns) -> [case lists:keyfind(N, 1, Owns) of - {N, O} -> {N, W - O}; - false -> {N, W} + {N, O} -> {N, W - O}; + false -> {N, W} end || {N, W} <- Wants]. @@ -754,11 +754,11 @@ wants_owns_diff(Wants, Owns) -> %% considered balanced wants(Ring) -> Active = - lists:sort(riak_core_ring:claiming_members(Ring)), + lists:sort(riak_core_ring:claiming_members(Ring)), Inactive = riak_core_ring:all_members(Ring) -- Active, Q = riak_core_ring:num_partitions(Ring), ActiveWants = lists:zip(Active, - wants_counts(length(Active), Q)), + wants_counts(length(Active), Q)), InactiveWants = [{N, 0} || N <- Inactive], lists:sort(ActiveWants ++ InactiveWants). @@ -768,19 +768,19 @@ wants(Ring) -> wants_counts(S, Q) -> Max = roundup(Q / S), case S * Max - Q of - 0 -> lists:duplicate(S, Max); - X -> - lists:duplicate(X, Max - 1) ++ - lists:duplicate(S - X, Max) + 0 -> lists:duplicate(S, Max); + X -> + lists:duplicate(X, Max - 1) ++ + lists:duplicate(S - X, Max) end. %% Round up to next whole integer - ceil roundup(I) when I >= 0 -> T = erlang:trunc(I), case I - T of - Neg when Neg < 0 -> T; - Pos when Pos > 0 -> T + 1; - _ -> T + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T end. %% =================================================================== @@ -803,18 +803,18 @@ wants_claim_test() -> %% @private console helper function to return node lists for claiming %% partitions -spec gen_diag(pos_integer(), pos_integer()) -> [Node :: - atom()]. + atom()]. gen_diag(RingSize, NodeCount) -> Nodes = [list_to_atom(lists:concat(["n_", N])) - || N <- lists:seq(1, NodeCount)], + || N <- lists:seq(1, NodeCount)], {HeadNode, RestNodes} = {hd(Nodes), tl(Nodes)}, R0 = riak_core_ring:fresh(RingSize, HeadNode), RAdded = lists:foldl(fun (Node, Racc) -> - riak_core_ring:add_member(HeadNode, Racc, Node) - end, - R0, - RestNodes), + riak_core_ring:add_member(HeadNode, Racc, Node) + end, + R0, + RestNodes), Diag = diagonal_stripe(RAdded, Nodes), {_P, N} = lists:unzip(Diag), N. @@ -828,6 +828,6 @@ has_violations(Diag) -> NC = length(lists:usort(Diag)), Overhang = RS rem NC, Overhang > 0 andalso - Overhang < 4. %% hardcoded target n of 4 + Overhang < 4. %% hardcoded target n of 4 -endif. diff --git a/src/riak_core_claim_util.erl b/src/riak_core_claim_util.erl index a209223e0..d6f113699 100644 --- a/src/riak_core_claim_util.erl +++ b/src/riak_core_claim_util.erl @@ -25,52 +25,52 @@ -module(riak_core_claim_util). -export([ring_stats/2, - violation_stats/2, - balance_stats/1, - diversity_stats/2]). + violation_stats/2, + balance_stats/1, + diversity_stats/2]). -export([node_load/3, - print_analysis/1, - print_analysis/2, - sort_by_down_fbmax/1]). + print_analysis/1, + print_analysis/2, + sort_by_down_fbmax/1]). -export([adjacency_matrix/1, - summarize_am/1, - adjacency_matrix_from_al/1, - adjacency_list/1, - fixup_dam/2, - score_am/2, - count/2, - rms/1]). + summarize_am/1, + adjacency_matrix_from_al/1, + adjacency_list/1, + fixup_dam/2, + score_am/2, + count/2, + rms/1]). -export([make_ring/1, - gen_complete_diverse/1, - gen_complete_len/1, - construct/3]). + gen_complete_diverse/1, + gen_complete_len/1, + construct/3]). -export([num_perms/2, - num_combs/2, - fac/1, - perm_gen/1, - down_combos/2, - rotations/1, - substitutions/2]). + num_combs/2, + fac/1, + perm_gen/1, + down_combos/2, + rotations/1, + substitutions/2]). -record(load, - {node, % Node name - num_pri, % Number of primaries - num_fb, % Number of fallbacks - norm_fb}). % Normalised fallbacks - ratio of how many there are + {node, % Node name + num_pri, % Number of primaries + num_fb, % Number of fallbacks + norm_fb}). % Normalised fallbacks - ratio of how many there are -record(failure, - {down = [], % List of downed nodes - load = [], % List of #load{} records per up node - fbmin, - fbmean, - fbstddev, - fb10, - fb90, - fbmax}). + {down = [], % List of downed nodes + load = [], % List of #load{} records per up node + fbmin, + fbmean, + fbstddev, + fb10, + fb90, + fbmax}). %% ------------------------------------------------------------------- %% Ring statistics @@ -78,7 +78,7 @@ ring_stats(R, TN) -> violation_stats(R, TN) ++ - balance_stats(R) ++ diversity_stats(R, TN). + balance_stats(R) ++ diversity_stats(R, TN). %% TargetN violations violation_stats(R, TN) -> @@ -90,15 +90,15 @@ balance_stats(R) -> M = length(riak_core_ring:claiming_members(R)), AllOwners = riak_core_ring:all_owners(R), Counts = lists:foldl(fun ({_, N}, A) -> - orddict:update_counter(N, 1, A) - end, - [], - AllOwners), + orddict:update_counter(N, 1, A) + end, + [], + AllOwners), Avg = Q / M, Balance = lists:sum([begin - Delta = trunc(Avg - Count), Delta * Delta - end - || {_, Count} <- Counts]), + Delta = trunc(Avg - Count), Delta * Delta + end + || {_, Count} <- Counts]), [{balance, Balance}, {ownership, Counts}]. diversity_stats(R, TN) -> @@ -106,7 +106,7 @@ diversity_stats(R, TN) -> AM = adjacency_matrix(Owners), try [{diversity, riak_core_claim_util:score_am(AM, TN)}] catch - _:empty_list -> [{diversity, undefined}] + _:empty_list -> [{diversity, undefined}] end. %% ------------------------------------------------------------------- @@ -123,27 +123,27 @@ node_load(R, NVal, DownNodes) -> VL = vnode_load(R, NVal, DownNodes), TotFBs = lists:sum([NumFBs || {_N, _, NumFBs} <- VL]), [#load{node = N, num_pri = NumPris, num_fb = NumFBs, - norm_fb = norm_fb(NumFBs, TotFBs)} + norm_fb = norm_fb(NumFBs, TotFBs)} || {N, NumPris, NumFBs} <- VL]. vnode_load(R, NVal, DownNodes) -> UpNodes = riak_core_ring:all_members(R) -- DownNodes, Keys = [<<(I + 1):160/integer>> - || {I, _Owner} <- riak_core_ring:all_owners(R)], + || {I, _Owner} <- riak_core_ring:all_owners(R)], %% NValParts = Nval * riak_core_ring:num_partitions(R), AllPLs = [riak_core_apl:get_apl_ann(Key, - NVal, - R, - UpNodes) - || Key <- Keys], + NVal, + R, + UpNodes) + || Key <- Keys], FlatPLs = lists:flatten(AllPLs), [begin - Pris = lists:usort([Idx - || {{Idx, PN}, primary} <- FlatPLs, PN == N]), - FBs = lists:usort([Idx - || {{Idx, FN}, fallback} <- FlatPLs, FN == N]) - -- Pris, - {N, length(Pris), length(FBs)} + Pris = lists:usort([Idx + || {{Idx, PN}, primary} <- FlatPLs, PN == N]), + FBs = lists:usort([Idx + || {{Idx, FN}, fallback} <- FlatPLs, FN == N]) + -- Pris, + {N, length(Pris), length(FBs)} end || N <- UpNodes]. @@ -164,34 +164,34 @@ print_analysis(LoadAnalysis) -> print_analysis(IoDev, LoadAnalysis) -> io:format(IoDev, - " Min Mean/ SD 10th 90th Max DownNodes/" - "Worst\n", - []), + " Min Mean/ SD 10th 90th Max DownNodes/" + "Worst\n", + []), print_analysis1(IoDev, LoadAnalysis). %% @private print_analysis1(_IoDev, []) -> ok; print_analysis1(IoDev, - [#failure{down = Down, load = Load, fbmin = FBMin, - fbmean = FBMean, fbstddev = FBStdDev, fb10 = FB10, - fb90 = FB90, fbmax = FBMax} - | Rest]) -> + [#failure{down = Down, load = Load, fbmin = FBMin, + fbmean = FBMean, fbstddev = FBStdDev, fb10 = FB10, + fb90 = FB90, fbmax = FBMax} + | Rest]) -> %% Find the 3 worst FBmax Worst = [{N, NumFB} - || #load{node = N, num_fb = NumFB} - <- lists:sublist(lists:reverse(lists:keysort(#load.num_fb, - Load)), - 3)], + || #load{node = N, num_fb = NumFB} + <- lists:sublist(lists:reverse(lists:keysort(#load.num_fb, + Load)), + 3)], io:format(IoDev, - "~4b ~4b/~4b ~4b ~4b ~4b ~w/~w\n", - [FBMin, - toint(FBMean), - toint(FBStdDev), - toint(FB10), - toint(FB90), - FBMax, - Down, - Worst]), + "~4b ~4b/~4b ~4b ~4b ~4b ~w/~w\n", + [FBMin, + toint(FBMean), + toint(FBStdDev), + toint(FB10), + toint(FB90), + FBMax, + Down, + Worst]), print_analysis1(IoDev, Rest). %% @private round to nearest int @@ -201,20 +201,20 @@ toint(X) -> X. %% Order failures by number of nodes down ascending, then fbmax, then down list sort_by_down_fbmax(Failures) -> Cmp = fun (#failure{down = DownA, fbmax = FBMaxA}, - #failure{down = DownB, fbmax = FBMaxB}) -> - %% length(DownA) =< length(DownB) andalso - %% FBMaxA >= FBMaxB andalso - %% DownA =< DownB - case {length(DownA), length(DownB)} of - {DownALen, DownBLen} when DownALen < DownBLen -> true; - {DownALen, DownBLen} when DownALen > DownBLen -> false; - _ -> - if FBMaxA > FBMaxB -> true; - FBMaxA < FBMaxB -> false; - true -> DownA >= DownB - end - end - end, + #failure{down = DownB, fbmax = FBMaxB}) -> + %% length(DownA) =< length(DownB) andalso + %% FBMaxA >= FBMaxB andalso + %% DownA =< DownB + case {length(DownA), length(DownB)} of + {DownALen, DownBLen} when DownALen < DownBLen -> true; + {DownALen, DownBLen} when DownALen > DownBLen -> false; + _ -> + if FBMaxA > FBMaxB -> true; + FBMaxA < FBMaxB -> false; + true -> DownA >= DownB + end + end + end, lists:sort(Cmp, Failures). %% ------------------------------------------------------------------- @@ -274,12 +274,12 @@ adjacency_matrix(Owners) -> M = lists:usort(Owners), Tid = ets:new(am, [private, duplicate_bag]), try adjacency_matrix_populate(Tid, - M, - Owners, - Owners ++ Owners), - adjacency_matrix_result(Tid, ets:first(Tid), []) + M, + Owners, + Owners ++ Owners), + adjacency_matrix_result(Tid, ets:first(Tid), []) after - ets:delete(Tid) + ets:delete(Tid) end. %% @private extract the adjacency matrix from the duplicate bag @@ -289,41 +289,41 @@ adjacency_matrix_result(Tid, NodePair, Acc) -> ALs = ets:lookup(Tid, NodePair), Ds = [D || {_, D} <- ALs], adjacency_matrix_result(Tid, - ets:next(Tid, NodePair), - [{NodePair, Ds} | Acc]). + ets:next(Tid, NodePair), + [{NodePair, Ds} | Acc]). adjacency_matrix_populate(_Tid, _M, [], _OwnersCycle) -> ok; adjacency_matrix_populate(Tid, M, [Node | Owners], - [Node | OwnersCycle]) -> + [Node | OwnersCycle]) -> adjacency_matrix_add_dist(Tid, - Node, - M -- [Node], - OwnersCycle, - 0), + Node, + M -- [Node], + OwnersCycle, + 0), adjacency_matrix_populate(Tid, M, Owners, OwnersCycle). %% @private Compute the distance from node to the next of M nodes adjacency_matrix_add_dist(_Tid, _Node, _M, [], _) -> ok; adjacency_matrix_add_dist(_Tid, _Node, [], _OwnersCycle, - _) -> + _) -> ok; adjacency_matrix_add_dist(Tid, Node, M, - [OtherNode | OwnersCycle], Distance) -> + [OtherNode | OwnersCycle], Distance) -> case lists:member(OtherNode, M) of - true -> % haven't seen this node yet, add distance - ets:insert(Tid, {{Node, OtherNode}, Distance}), - adjacency_matrix_add_dist(Tid, - Node, - M -- [OtherNode], - OwnersCycle, - Distance + 1); - _ -> % already passed OtherNode - adjacency_matrix_add_dist(Tid, - Node, - M, - OwnersCycle, - Distance + 1) + true -> % haven't seen this node yet, add distance + ets:insert(Tid, {{Node, OtherNode}, Distance}), + adjacency_matrix_add_dist(Tid, + Node, + M -- [OtherNode], + OwnersCycle, + Distance + 1); + _ -> % already passed OtherNode + adjacency_matrix_add_dist(Tid, + Node, + M, + OwnersCycle, + Distance + 1) end. %% Make adjacency summary by working out counts of each distance @@ -337,11 +337,11 @@ count_distances([]) -> []; count_distances(Ds) -> MaxD = lists:max(Ds), PosCounts = lists:foldl(fun (D, Acc) -> - orddict:update_counter(D, 1, Acc) - end, - orddict:from_list([{D, 0} - || D <- lists:seq(0, MaxD)]), - Ds), + orddict:update_counter(D, 1, Acc) + end, + orddict:from_list([{D, 0} + || D <- lists:seq(0, MaxD)]), + Ds), %% PosCounts orddict must be initialized to make sure no distances %% are missing in the list comprehension [Count || {_Pos, Count} <- PosCounts]. @@ -350,10 +350,10 @@ count_distances(Ds) -> adjacency_matrix_from_al(AL) -> %% Make a count by distance of N1,N2 dict:to_list(lists:foldl(fun ({NPair, D}, Acc) -> - dict:append_list(NPair, [D], Acc) - end, - dict:new(), - AL)). + dict:append_list(NPair, [D], Acc) + end, + dict:new(), + AL)). %% Create a pair of node names and a list of distances adjacency_list(Owners) -> @@ -362,46 +362,46 @@ adjacency_list(Owners) -> adjacency_list(_M, [], _OwnersCycle, Acc) -> Acc; adjacency_list(M, [Node | Owners], [Node | OwnersCycle], - Acc) -> + Acc) -> adjacency_list(M, - Owners, - OwnersCycle, - distances(Node, M -- [Node], OwnersCycle, 0, Acc)). + Owners, + OwnersCycle, + distances(Node, M -- [Node], OwnersCycle, 0, Acc)). %% Compute the distance from node to the next of M nodes distances(_Node, _M, [], _, Distances) -> Distances; distances(_Node, [], _OwnersCycle, _, Distances) -> Distances; distances(Node, M, [OtherNode | OwnersCycle], Distance, - Distances) -> + Distances) -> case lists:member(OtherNode, M) of - true -> % haven't seen this node yet, add distance - distances(Node, - M -- [OtherNode], - OwnersCycle, - Distance + 1, - [{{Node, OtherNode}, Distance} | Distances]); - _ -> % already passed OtherNode - distances(Node, M, OwnersCycle, Distance + 1, Distances) + true -> % haven't seen this node yet, add distance + distances(Node, + M -- [OtherNode], + OwnersCycle, + Distance + 1, + [{{Node, OtherNode}, Distance} | Distances]); + _ -> % already passed OtherNode + distances(Node, M, OwnersCycle, Distance + 1, Distances) end. %% For each pair, get the count of distances < NVal score_am([], _NVal) -> undefined; score_am(AM, NVal) -> Cs = lists:flatten([begin - [C || {D, C} <- count(Ds, NVal), D < NVal] - end - || {_Pair, Ds} <- AM]), + [C || {D, C} <- count(Ds, NVal), D < NVal] + end + || {_Pair, Ds} <- AM]), rms(Cs). count(L, NVal) -> Acc0 = orddict:from_list([{D, 0} - || D <- lists:seq(0, NVal - 1)]), + || D <- lists:seq(0, NVal - 1)]), lists:foldl(fun (E, A) -> - orddict:update_counter(E, 1, A) - end, - Acc0, - L). + orddict:update_counter(E, 1, A) + end, + Acc0, + L). rms([]) -> throw(empty_list); rms(L) -> @@ -418,25 +418,25 @@ make_ring(Nodes) -> Idxs = [I || {I, _} <- riak_core_ring:all_owners(R0)], NewOwners = lists:zip(Idxs, Nodes), R1 = lists:foldl(fun (N, R) -> - riak_core_ring:add_member(hd(Nodes), R, N) - end, - R0, - Nodes), + riak_core_ring:add_member(hd(Nodes), R, N) + end, + R0, + Nodes), lists:foldl(fun ({I, N}, R) -> - riak_core_ring:transfer_node(I, N, R) - end, - R1, - NewOwners). + riak_core_ring:transfer_node(I, N, R) + end, + R1, + NewOwners). %% Generate a completion test function that makes sure all required %% distances are created gen_complete_diverse(RequiredDs) -> fun (Owners, DAM) -> - OwnersLen = length(Owners), - NextPow2 = next_pow2(OwnersLen), - {met_required(Owners, DAM, RequiredDs) andalso - OwnersLen == NextPow2, - NextPow2} + OwnersLen = length(Owners), + NextPow2 = next_pow2(OwnersLen), + {met_required(Owners, DAM, RequiredDs) andalso + OwnersLen == NextPow2, + NextPow2} end. %% Generate until a fixed length has been hit @@ -452,47 +452,47 @@ construct(Complete, M, NVal) -> %% Make an empty adjacency matrix for all pairs of members empty_adjacency_matrix(M) -> lists:foldl(fun (Pair, AM0) -> - dict:append_list(Pair, [], AM0) - end, - dict:new(), - [{F, T} || F <- M, T <- M, F /= T]). + dict:append_list(Pair, [], AM0) + end, + dict:new(), + [{F, T} || F <- M, T <- M, F /= T]). construct(Complete, M, Owners, DAM, NVal) -> %% Work out which pairs do not have the requiredDs case Complete(Owners, DAM) of - {true, _DesiredLen} -> {ok, Owners, DAM}; - {false, DesiredLen} -> - %% Easy ones - restrict the eligible list to not include the N-1 - %% previous nodes. If within NVal-1 of possibly closing the ring - %% then restrict in that direction as well. - Eligible0 = M -- lists:sublist(Owners, NVal - 1), - Eligible = case DesiredLen - length(Owners) of - Left when Left >= NVal -> - Eligible0; % At least Nval lest, no restriction - Left -> - Eligible0 -- - lists:sublist(lists:reverse(Owners), - NVal - Left) - end, - case Eligible of - [] -> - %% No eligible nodes - not enough to meet NVal, use any node - logger:debug("construct -- unable to construct without " - "violating NVal"), - {Owners1, DAM1} = prepend_next_owner(M, - M, - Owners, - DAM, - NVal), - construct(Complete, M, Owners1, DAM1, NVal); - _ -> - {Owners1, DAM1} = prepend_next_owner(M, - Eligible, - Owners, - DAM, - NVal), - construct(Complete, M, Owners1, DAM1, NVal) - end + {true, _DesiredLen} -> {ok, Owners, DAM}; + {false, DesiredLen} -> + %% Easy ones - restrict the eligible list to not include the N-1 + %% previous nodes. If within NVal-1 of possibly closing the ring + %% then restrict in that direction as well. + Eligible0 = M -- lists:sublist(Owners, NVal - 1), + Eligible = case DesiredLen - length(Owners) of + Left when Left >= NVal -> + Eligible0; % At least Nval lest, no restriction + Left -> + Eligible0 -- + lists:sublist(lists:reverse(Owners), + NVal - Left) + end, + case Eligible of + [] -> + %% No eligible nodes - not enough to meet NVal, use any node + logger:debug("construct -- unable to construct without " + "violating NVal"), + {Owners1, DAM1} = prepend_next_owner(M, + M, + Owners, + DAM, + NVal), + construct(Complete, M, Owners1, DAM1, NVal); + _ -> + {Owners1, DAM1} = prepend_next_owner(M, + Eligible, + Owners, + DAM, + NVal), + construct(Complete, M, Owners1, DAM1, NVal) + end end. %% Returns true only when we have met all required distances across all @@ -500,11 +500,11 @@ construct(Complete, M, Owners, DAM, NVal) -> met_required(Owners, DAM, RequiredDs) -> FixupDAM = fixup_dam(Owners, DAM), case [Pair - || {Pair, Ds} <- dict:to_list(FixupDAM), - RequiredDs -- Ds /= []] - of - [] -> true; - _ -> false + || {Pair, Ds} <- dict:to_list(FixupDAM), + RequiredDs -- Ds /= []] + of + [] -> true; + _ -> false end. %% Return next greatest power of 2 @@ -517,44 +517,44 @@ next_pow2(X, R) -> next_pow2(X, R * 2). %% Take the AM scores and cap by TargetN and find the node that %% improves the RMS prepend_next_owner(M, [Node], Owners, DAM, - _TN) -> % only one node, not a lot of decisions to make + _TN) -> % only one node, not a lot of decisions to make prepend(M, Node, Owners, DAM); prepend_next_owner(M, Eligible, Owners, DAM, TN) -> {_BestScore, Owners2, DAM2} = lists:foldl(fun (Node, - {RunningScore, - _RunningO, - _RunningDAM} = - Acc) -> - {Owners1, DAM1} = - prepend(M, - Node, - Owners, - DAM), - case - score_am(dict:to_list(DAM1), - TN) - of - BetterScore - when BetterScore < - RunningScore -> - {BetterScore, - Owners1, - DAM1}; - _ -> Acc - end - end, - {undefined, undefined, undefined}, - Eligible), + {RunningScore, + _RunningO, + _RunningDAM} = + Acc) -> + {Owners1, DAM1} = + prepend(M, + Node, + Owners, + DAM), + case + score_am(dict:to_list(DAM1), + TN) + of + BetterScore + when BetterScore < + RunningScore -> + {BetterScore, + Owners1, + DAM1}; + _ -> Acc + end + end, + {undefined, undefined, undefined}, + Eligible), {Owners2, DAM2}. %% Prepend N to the front of Owners, and update AM prepend(M, N, Owners, DAM) -> Ds = distances2(M -- [N], Owners), DAM2 = lists:foldl(fun ({T, D}, DAM1) -> - dict:append_list({N, T}, [D], DAM1) - end, - DAM, - Ds), + dict:append_list({N, T}, [D], DAM1) + end, + DAM, + Ds), {[N | Owners], DAM2}. %% Calculate the distances to each of the M nodes until @@ -565,19 +565,19 @@ distances2([], _Owners, _D, Acc) -> Acc; distances2(_M, [], _D, Acc) -> Acc; distances2(M, [T | Owners], D, Acc) -> case lists:member(T, M) of - true -> - distances2(M -- [T], Owners, D + 1, [{T, D} | Acc]); - false -> distances2(M, Owners, D + 1, Acc) + true -> + distances2(M -- [T], Owners, D + 1, [{T, D} | Acc]); + false -> distances2(M, Owners, D + 1, Acc) end. %% Fix up the dictionary AM adding in entries for the end of the owners list %% wrapping around to the start. fixup_dam(Owners, DAM) -> fixup_dam(lists:usort(Owners), - lists:reverse(Owners), - Owners, - 0, - DAM). + lists:reverse(Owners), + Owners, + 0, + DAM). fixup_dam([], _ToFix, _Owners, _D, DAM) -> DAM; fixup_dam(_M, [], _Owners, _D, DAM) -> DAM; @@ -585,10 +585,10 @@ fixup_dam(M, [N | ToFix], Owners, D, DAM) -> M2 = M -- [N], Ds = distances2(M2, Owners, D, []), DAM2 = lists:foldl(fun ({T, D0}, DAM1) -> - dict:append_list({N, T}, [D0], DAM1) - end, - DAM, - Ds), + dict:append_list({N, T}, [D0], DAM1) + end, + DAM, + Ds), fixup_dam(M2, ToFix, Owners, D + 1, DAM2). %% ------------------------------------------------------------------- @@ -610,9 +610,9 @@ fac(N) when N > 0 -> N * fac(N - 1). perm_gen([E]) -> [[E]]; perm_gen(L) -> lists:append([begin - [[X | Y] || Y <- perm_gen(lists:delete(X, L))] - end - || X <- L]). + [[X | Y] || Y <- perm_gen(lists:delete(X, L))] + end + || X <- L]). %% Pick all combinations of Depth nodes from the MemFbers list %% 0 = [] diff --git a/src/riak_core_claimant.erl b/src/riak_core_claimant.erl index efb3bb7df..c1a698862 100644 --- a/src/riak_core_claimant.erl +++ b/src/riak_core_claimant.erl @@ -26,30 +26,30 @@ -export([start_link/0]). -export([leave_member/1, - remove_member/1, - force_replace/2, - replace/2, - resize_ring/1, - abort_resize/0, - plan/0, - commit/0, - clear/0, - ring_changed/2]). + remove_member/1, + force_replace/2, + replace/2, + resize_ring/1, + abort_resize/0, + plan/0, + commit/0, + clear/0, + ring_changed/2]). -export([reassign_indices/1]). % helpers for claim sim %% gen_server callbacks -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -type action() :: leave | - remove | - {replace, node()} | - {force_replace, node()}. + remove | + {replace, node()} | + {force_replace, node()}. -type riak_core_ring() :: riak_core_ring:riak_core_ring(). @@ -57,25 +57,25 @@ %% A tuple representing a given cluster transition: %% {Ring, NewRing} where NewRing = f(Ring) -type ring_transition() :: {riak_core_ring(), - riak_core_ring()}. + riak_core_ring()}. -record(state, - {last_ring_id, - %% The set of staged cluster changes - changes :: [{node(), action()}], - %% Ring computed during the last planning stage based on - %% applying a set of staged cluster changes. When commiting - %% changes, the computed ring must match the previous planned - %% ring to be allowed. - next_ring :: riak_core_ring() | undefined, - %% Random number seed passed to remove_node to ensure the - %% current randomized remove algorithm is deterministic - %% between plan and commit phases - seed}). + {last_ring_id, + %% The set of staged cluster changes + changes :: [{node(), action()}], + %% Ring computed during the last planning stage based on + %% applying a set of staged cluster changes. When commiting + %% changes, the computed ring must match the previous planned + %% ring to be allowed. + next_ring :: riak_core_ring() | undefined, + %% Random number seed passed to remove_node to ensure the + %% current randomized remove algorithm is deterministic + %% between plan and commit phases + seed}). -define(ROUT(S, A), - ok).%%-define(ROUT(S,A),?debugFmt(S,A)). - %%-define(ROUT(S,A),io:format(S,A)). + ok).%%-define(ROUT(S,A),?debugFmt(S,A)). + %%-define(ROUT(S,A),io:format(S,A)). %%%=================================================================== %%% API @@ -84,9 +84,9 @@ %% @doc Spawn and register the riak_core_claimant server start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], - []). + ?MODULE, + [], + []). %% @doc Determine how the cluster will be affected by the staged changes, %% returning the set of pending changes as well as a list of ring @@ -94,7 +94,7 @@ start_link() -> %% (eg. the initial transition that applies the staged changes, and %% any additional transitions triggered by later rebalancing). -spec plan() -> {error, term()} | - {ok, [action()], [ring_transition()]}. + {ok, [action()], [ring_transition()]}. plan() -> gen_server:call(claimant(), plan, infinity). @@ -172,9 +172,9 @@ ring_changed(Node, Ring) -> reassign_indices(CState) -> reassign_indices(CState, - [], - riak_core_rand:rand_seed(), - fun no_log/2). + [], + riak_core_rand:rand_seed(), + fun no_log/2). %%%=================================================================== %%% Internal API helpers @@ -182,8 +182,8 @@ reassign_indices(CState) -> stage(Node, Action) -> gen_server:call(claimant(), - {stage, Node, Action}, - infinity). + {stage, Node, Action}, + infinity). claimant() -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -197,7 +197,7 @@ init([]) -> schedule_tick(), {ok, #state{changes = [], - seed = riak_core_rand:rand_seed()}}. + seed = riak_core_rand:rand_seed()}}. handle_call(clear, _From, State) -> State2 = clear_staged(State), @@ -205,19 +205,19 @@ handle_call(clear, _From, State) -> handle_call({stage, Node, Action}, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), {Reply, State2} = maybe_stage(Node, - Action, - Ring, - State), + Action, + Ring, + State), {reply, Reply, State2}; handle_call(plan, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case riak_core_ring:ring_ready(Ring) of - false -> - Reply = {error, ring_not_ready}, - {reply, Reply, State}; - true -> - {Reply, State2} = generate_plan(Ring, State), - {reply, Reply, State2} + false -> + Reply = {error, ring_not_ready}, + {reply, Reply, State}; + true -> + {Reply, State2} = generate_plan(Ring, State), + {reply, Reply, State2} end; handle_call(commit, _From, State) -> {Reply, State2} = commit_staged(State), @@ -248,24 +248,24 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% @doc Verify that a cluster change request is valid and add it to %% the list of staged changes. maybe_stage(Node, Action, Ring, - State = #state{changes = Changes}) -> + State = #state{changes = Changes}) -> case valid_request(Node, Action, Changes, Ring) of - true -> - Changes2 = orddict:store(Node, Action, Changes), - Changes3 = filter_changes(Changes2, Ring), - State2 = State#state{changes = Changes3}, - {ok, State2}; - Error -> {Error, State} + true -> + Changes2 = orddict:store(Node, Action, Changes), + Changes3 = filter_changes(Changes2, Ring), + State2 = State#state{changes = Changes3}, + {ok, State2}; + Error -> {Error, State} end. %% @private %% @doc Determine how the staged set of cluster changes will affect %% the cluster. See {@link plan/0} for additional details. generate_plan(Ring, - State = #state{changes = Changes}) -> + State = #state{changes = Changes}) -> Changes2 = filter_changes(Changes, Ring), Joining = [{Node, join} - || Node <- riak_core_ring:members(Ring, [joining])], + || Node <- riak_core_ring:members(Ring, [joining])], AllChanges = lists:ukeysort(1, Changes2 ++ Joining), State2 = State#state{changes = Changes2}, generate_plan(AllChanges, Ring, State2). @@ -274,15 +274,15 @@ generate_plan([], _, State) -> %% There are no changes to apply {{ok, [], []}, State}; generate_plan(Changes, Ring, - State = #state{seed = Seed}) -> + State = #state{seed = Seed}) -> case compute_all_next_rings(Changes, Seed, Ring) of - {error, invalid_resize_claim} -> - {{error, invalid_resize_claim}, State}; - {ok, NextRings} -> - {_, NextRing} = hd(NextRings), - State2 = State#state{next_ring = NextRing}, - Reply = {ok, Changes, NextRings}, - {Reply, State2} + {error, invalid_resize_claim} -> + {{error, invalid_resize_claim}, State}; + {ok, NextRings} -> + {_, NextRing} = hd(NextRings), + State2 = State#state{next_ring = NextRing}, + Reply = {ok, Changes, NextRings}, + {Reply, State2} end. %% @private @@ -292,46 +292,46 @@ commit_staged(State = #state{next_ring = undefined}) -> {{error, nothing_planned}, State}; commit_staged(State) -> case maybe_commit_staged(State) of - {ok, _} -> - State2 = State#state{next_ring = undefined, - changes = [], - seed = riak_core_rand:rand_seed()}, - {ok, State2}; - not_changed -> {error, State}; - {not_changed, Reason} -> {{error, Reason}, State} + {ok, _} -> + State2 = State#state{next_ring = undefined, + changes = [], + seed = riak_core_rand:rand_seed()}, + {ok, State2}; + not_changed -> {error, State}; + {not_changed, Reason} -> {{error, Reason}, State} end. %% @private maybe_commit_staged(State) -> riak_core_ring_manager:ring_trans(fun maybe_commit_staged/2, - State). + State). %% @private maybe_commit_staged(Ring, - State = #state{changes = Changes, seed = Seed}) -> + State = #state{changes = Changes, seed = Seed}) -> Changes2 = filter_changes(Changes, Ring), case compute_next_ring(Changes2, Seed, Ring) of - {error, invalid_resize_claim} -> - {ignore, invalid_resize_claim}; - {ok, NextRing} -> - maybe_commit_staged(Ring, NextRing, State) + {error, invalid_resize_claim} -> + {ignore, invalid_resize_claim}; + {ok, NextRing} -> + maybe_commit_staged(Ring, NextRing, State) end. %% @private maybe_commit_staged(Ring, NextRing, - #state{next_ring = PlannedRing}) -> + #state{next_ring = PlannedRing}) -> Claimant = riak_core_ring:claimant(Ring), IsReady = riak_core_ring:ring_ready(Ring), IsClaimant = Claimant == node(), IsSamePlan = same_plan(PlannedRing, NextRing), case {IsReady, IsClaimant, IsSamePlan} of - {false, _, _} -> {ignore, ring_not_ready}; - {_, false, _} -> ignore; - {_, _, false} -> {ignore, plan_changed}; - _ -> - NewRing = riak_core_ring:increment_vclock(Claimant, - NextRing), - {new_ring, NewRing} + {false, _, _} -> {ignore, ring_not_ready}; + {_, false, _} -> ignore; + {_, _, false} -> {ignore, plan_changed}; + _ -> + NewRing = riak_core_ring:increment_vclock(Claimant, + NextRing), + {new_ring, NewRing} end. %% @private @@ -343,12 +343,12 @@ maybe_commit_staged(Ring, NextRing, clear_staged(State) -> remove_joining_nodes(), State#state{changes = [], - seed = riak_core_rand:rand_seed()}. + seed = riak_core_rand:rand_seed()}. %% @private remove_joining_nodes() -> riak_core_ring_manager:ring_trans(fun remove_joining_nodes/2, - ok). + ok). %% @private remove_joining_nodes(Ring, _) -> @@ -357,134 +357,134 @@ remove_joining_nodes(Ring, _) -> Joining = riak_core_ring:members(Ring, [joining]), AreJoining = Joining /= [], case IsClaimant and AreJoining of - false -> ignore; - true -> - NewRing = remove_joining_nodes_from_ring(Claimant, - Joining, - Ring), - {new_ring, NewRing} + false -> ignore; + true -> + NewRing = remove_joining_nodes_from_ring(Claimant, + Joining, + Ring), + {new_ring, NewRing} end. %% @private remove_joining_nodes_from_ring(Claimant, Joining, - Ring) -> + Ring) -> NewRing = lists:foldl(fun (Node, RingAcc) -> - riak_core_ring:set_member(Claimant, - RingAcc, - Node, - invalid, - same_vclock) - end, - Ring, - Joining), + riak_core_ring:set_member(Claimant, + RingAcc, + Node, + invalid, + same_vclock) + end, + Ring, + Joining), NewRing2 = riak_core_ring:increment_vclock(Claimant, - NewRing), + NewRing), NewRing2. %% @private valid_request(Node, Action, Changes, Ring) -> case Action of - leave -> valid_leave_request(Node, Ring); - remove -> valid_remove_request(Node, Ring); - {replace, NewNode} -> - valid_replace_request(Node, NewNode, Changes, Ring); - {force_replace, NewNode} -> - valid_force_replace_request(Node, - NewNode, - Changes, - Ring); - {resize, NewRingSize} -> - valid_resize_request(NewRingSize, Changes, Ring); - abort_resize -> valid_resize_abort_request(Ring) + leave -> valid_leave_request(Node, Ring); + remove -> valid_remove_request(Node, Ring); + {replace, NewNode} -> + valid_replace_request(Node, NewNode, Changes, Ring); + {force_replace, NewNode} -> + valid_force_replace_request(Node, + NewNode, + Changes, + Ring); + {resize, NewRingSize} -> + valid_resize_request(NewRingSize, Changes, Ring); + abort_resize -> valid_resize_abort_request(Ring) end. %% @private valid_leave_request(Node, Ring) -> case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> true; - {_, joining} -> true; - {_, _} -> {error, already_leaving} + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> true; + {_, joining} -> true; + {_, _} -> {error, already_leaving} end. %% @private valid_remove_request(Node, Ring) -> IsClaimant = Node == riak_core_ring:claimant(Ring), case {IsClaimant, - riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {true, _, _} -> {error, is_claimant}; - {_, _, invalid} -> {error, not_member}; - {_, [Node], _} -> {error, only_member}; - _ -> true + riak_core_ring:all_members(Ring), + riak_core_ring:member_status(Ring, Node)} + of + {true, _, _} -> {error, is_claimant}; + {_, _, invalid} -> {error, not_member}; + {_, [Node], _} -> {error, only_member}; + _ -> true end. %% @private valid_replace_request(Node, NewNode, Changes, Ring) -> AlreadyReplacement = lists:member(NewNode, - existing_replacements(Changes)), + existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, - NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + NewNode) + == joining) + and not orddict:is_key(NewNode, Changes), case {riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, - NewJoining} - of - {invalid, _, _} -> {error, not_member}; - {leaving, _, _} -> {error, already_leaving}; - {_, true, _} -> {error, already_replacement}; - {_, _, false} -> {error, invalid_replacement}; - _ -> true + AlreadyReplacement, + NewJoining} + of + {invalid, _, _} -> {error, not_member}; + {leaving, _, _} -> {error, already_leaving}; + {_, true, _} -> {error, already_replacement}; + {_, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private valid_force_replace_request(Node, NewNode, Changes, - Ring) -> + Ring) -> IsClaimant = Node == riak_core_ring:claimant(Ring), AlreadyReplacement = lists:member(NewNode, - existing_replacements(Changes)), + existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, - NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + NewNode) + == joining) + and not orddict:is_key(NewNode, Changes), case {IsClaimant, - riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, - NewJoining} - of - {true, _, _, _} -> {error, is_claimant}; - {_, invalid, _, _} -> {error, not_member}; - {_, _, true, _} -> {error, already_replacement}; - {_, _, _, false} -> {error, invalid_replacement}; - _ -> true + riak_core_ring:member_status(Ring, Node), + AlreadyReplacement, + NewJoining} + of + {true, _, _, _} -> {error, is_claimant}; + {_, invalid, _, _} -> {error, not_member}; + {_, _, true, _} -> {error, already_replacement}; + {_, _, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private %% restrictions preventing resize along with other operations are temporary valid_resize_request(NewRingSize, [], Ring) -> IsResizing = riak_core_ring:num_partitions(Ring) =/= - NewRingSize, + NewRingSize, NodeCount = length(riak_core_ring:all_members(Ring)), Changes = length(riak_core_ring:pending_changes(Ring)) > - 0, + 0, case {IsResizing, NodeCount, Changes} of - {true, N, false} when N > 1 -> true; - {false, _, _} -> {error, same_size}; - {_, 1, _} -> {error, single_node}; - {_, _, true} -> {error, pending_changes} + {true, N, false} when N > 1 -> true; + {false, _, _} -> {error, same_size}; + {_, 1, _} -> {error, single_node}; + {_, _, true} -> {error, pending_changes} end. valid_resize_abort_request(Ring) -> IsResizing = riak_core_ring:is_resizing(Ring), IsPostResize = riak_core_ring:is_post_resize(Ring), case IsResizing andalso not IsPostResize of - true -> true; - false -> {error, not_resizing} + true -> true; + false -> {error, not_resizing} end. %% @private @@ -493,59 +493,59 @@ valid_resize_abort_request(Ring) -> %% changes that bypass the staging system. filter_changes(Changes, Ring) -> orddict:filter(fun (Node, Change) -> - filter_changes_pred(Node, Change, Changes, Ring) - end, - Changes). + filter_changes_pred(Node, Change, Changes, Ring) + end, + Changes). %% @private filter_changes_pred(Node, {Change, NewNode}, Changes, - Ring) + Ring) when (Change == replace) or (Change == force_replace) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsJoining = riak_core_ring:member_status(Ring, NewNode) - == joining, + == joining, NotChanging = not orddict:is_key(NewNode, Changes), IsMember and IsJoining and NotChanging; filter_changes_pred(Node, _, _, Ring) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsMember. %% @private existing_replacements(Changes) -> [Node || {_, {Change, Node}} <- Changes, - (Change == replace) or (Change == force_replace)]. + (Change == replace) or (Change == force_replace)]. %% @private %% Determine if two rings have logically equal cluster state same_plan(RingA, RingB) -> riak_core_ring:all_member_status(RingA) == - riak_core_ring:all_member_status(RingB) - andalso - riak_core_ring:all_owners(RingA) == - riak_core_ring:all_owners(RingB) - andalso - riak_core_ring:pending_changes(RingA) == - riak_core_ring:pending_changes(RingB). + riak_core_ring:all_member_status(RingB) + andalso + riak_core_ring:all_owners(RingA) == + riak_core_ring:all_owners(RingB) + andalso + riak_core_ring:pending_changes(RingA) == + riak_core_ring:pending_changes(RingB). schedule_tick() -> Tick = application:get_env(riak_core, - claimant_tick, - 10000), + claimant_tick, + 10000), erlang:send_after(Tick, ?MODULE, tick). tick(State = #state{last_ring_id = LastID}) -> case riak_core_ring_manager:get_ring_id() of - LastID -> - schedule_tick(), - State; - RingID -> - {ok, Ring} = riak_core_ring_manager:get_raw_ring(), - maybe_force_ring_update(Ring), - schedule_tick(), - State#state{last_ring_id = RingID} + LastID -> + schedule_tick(), + State; + RingID -> + {ok, Ring} = riak_core_ring_manager:get_raw_ring(), + maybe_force_ring_update(Ring), + schedule_tick(), + State#state{last_ring_id = RingID} end. maybe_force_ring_update(Ring) -> @@ -554,25 +554,25 @@ maybe_force_ring_update(Ring) -> %% Do not force if we have any joining nodes unless any of them are %% auto-joining nodes. Otherwise, we will force update continuously. JoinBlock = are_joining_nodes(Ring) andalso - auto_joining_nodes(Ring) == [], + auto_joining_nodes(Ring) == [], case IsClaimant and IsReady and not JoinBlock of - true -> do_maybe_force_ring_update(Ring); - false -> ok + true -> do_maybe_force_ring_update(Ring); + false -> ok end. do_maybe_force_ring_update(Ring) -> case compute_next_ring([], - riak_core_rand:rand_seed(), - Ring) - of - {ok, NextRing} -> - case same_plan(Ring, NextRing) of - false -> - logger:warning("Forcing update of stalled ring"), - riak_core_ring_manager:force_update(); - true -> ok - end; - _ -> ok + riak_core_rand:rand_seed(), + Ring) + of + {ok, NextRing} -> + case same_plan(Ring, NextRing) of + false -> + logger:warning("Forcing update of stalled ring"), + riak_core_ring_manager:force_update(); + true -> ok + end; + _ -> ok end. %% ========================================================================= @@ -586,31 +586,31 @@ compute_all_next_rings(Changes, Seed, Ring) -> %% @private compute_all_next_rings(Changes, Seed, Ring, Acc) -> case compute_next_ring(Changes, Seed, Ring) of - {error, invalid_resize_claim} = Err -> Err; - {ok, NextRing} -> - Acc2 = [{Ring, NextRing} | Acc], - case not same_plan(Ring, NextRing) of - true -> - FutureRing = riak_core_ring:future_ring(NextRing), - compute_all_next_rings([], Seed, FutureRing, Acc2); - false -> {ok, lists:reverse(Acc2)} - end + {error, invalid_resize_claim} = Err -> Err; + {ok, NextRing} -> + Acc2 = [{Ring, NextRing} | Acc], + case not same_plan(Ring, NextRing) of + true -> + FutureRing = riak_core_ring:future_ring(NextRing), + compute_all_next_rings([], Seed, FutureRing, Acc2); + false -> {ok, lists:reverse(Acc2)} + end end. %% @private compute_next_ring(Changes, Seed, Ring) -> Replacing = [{Node, NewNode} - || {Node, {replace, NewNode}} <- Changes], + || {Node, {replace, NewNode}} <- Changes], Ring2 = apply_changes(Ring, Changes), {_, Ring3} = maybe_handle_joining(node(), Ring2), {_, Ring4} = do_claimant_quiet(node(), - Ring3, - Replacing, - Seed), + Ring3, + Replacing, + Seed), {Valid, Ring5} = maybe_compute_resize(Ring, Ring4), case Valid of - false -> {error, invalid_resize_claim}; - true -> {ok, Ring5} + false -> {error, invalid_resize_claim}; + true -> {ok, Ring5} end. %% @private @@ -618,9 +618,9 @@ maybe_compute_resize(Orig, MbResized) -> OrigSize = riak_core_ring:num_partitions(Orig), NewSize = riak_core_ring:num_partitions(MbResized), case OrigSize =/= NewSize of - false -> {true, MbResized}; - true -> - validate_resized_ring(compute_resize(Orig, MbResized)) + false -> {true, MbResized}; + true -> + validate_resized_ring(compute_resize(Orig, MbResized)) end. %% @private @@ -634,32 +634,32 @@ compute_resize(Orig, Resized) -> %% need to operate on balanced, future ring (apply changes determined by claim) CState0 = riak_core_ring:future_ring(Resized), Type = case riak_core_ring:num_partitions(Orig) < - riak_core_ring:num_partitions(Resized) - of - true -> larger; - false -> smaller - end, + riak_core_ring:num_partitions(Resized) + of + true -> larger; + false -> smaller + end, %% Each index in the original ring must perform several transfers %% to properly resize the ring. The first transfer for each index %% is scheduled here. Subsequent transfers are scheduled by vnode CState1 = lists:foldl(fun ({Idx, _} = IdxOwner, - CStateAcc) -> - %% indexes being abandoned in a shrinking ring have - %% no next owner - NextOwner = try - riak_core_ring:index_owner(CStateAcc, - Idx) - catch - error:{badmatch, false} -> - none - end, - schedule_first_resize_transfer(Type, - IdxOwner, - NextOwner, - CStateAcc) - end, - CState0, - riak_core_ring:all_owners(Orig)), + CStateAcc) -> + %% indexes being abandoned in a shrinking ring have + %% no next owner + NextOwner = try + riak_core_ring:index_owner(CStateAcc, + Idx) + catch + error:{badmatch, false} -> + none + end, + schedule_first_resize_transfer(Type, + IdxOwner, + NextOwner, + CStateAcc) + end, + CState0, + riak_core_ring:all_owners(Orig)), riak_core_ring:set_pending_resize(CState1, Orig). %% @private @@ -667,30 +667,30 @@ compute_resize(Orig, Resized) -> %% the goal of ensuring the transfer will actually have data to send to the %% target. schedule_first_resize_transfer(smaller, - {Idx, _} = IdxOwner, none, Resized) -> + {Idx, _} = IdxOwner, none, Resized) -> %% partition no longer exists in shrunk ring, first successor will be %% new owner of its data Target = hd(riak_core_ring:preflist(<>, - Resized)), + Resized)), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - Target); + IdxOwner, + Target); schedule_first_resize_transfer(_Type, - {Idx, Owner} = IdxOwner, Owner, Resized) -> + {Idx, Owner} = IdxOwner, Owner, Resized) -> %% partition is not being moved during expansion, first predecessor will %% own at least a portion of its data Target = hd(chash:predecessors(Idx - 1, - riak_core_ring:chash(Resized))), + riak_core_ring:chash(Resized))), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - Target); + IdxOwner, + Target); schedule_first_resize_transfer(_, - {Idx, _Owner} = IdxOwner, NextOwner, Resized) -> + {Idx, _Owner} = IdxOwner, NextOwner, Resized) -> %% partition is being moved during expansion, schedule transfer to partition %% on new owner since it will still own some of its data riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - {Idx, NextOwner}). + IdxOwner, + {Idx, NextOwner}). %% @doc verify that resized ring was properly claimed (no owners are the dummy %% resized owner) in both the current and future ring @@ -701,24 +701,24 @@ validate_resized_ring(Ring) -> Members = riak_core_ring:all_members(Ring), FutureMembers = riak_core_ring:all_members(FutureRing), Invalid1 = [{Idx, Owner} - || {Idx, Owner} <- Owners, - not lists:member(Owner, Members)], + || {Idx, Owner} <- Owners, + not lists:member(Owner, Members)], Invalid2 = [{Idx, Owner} - || {Idx, Owner} <- FutureOwners, - not lists:member(Owner, FutureMembers)], + || {Idx, Owner} <- FutureOwners, + not lists:member(Owner, FutureMembers)], case Invalid1 ++ Invalid2 of - [] -> {true, Ring}; - _ -> {false, Ring} + [] -> {true, Ring}; + _ -> {false, Ring} end. %% @private apply_changes(Ring, Changes) -> NewRing = lists:foldl(fun ({Node, Cmd}, RingAcc2) -> - RingAcc3 = change({Cmd, Node}, RingAcc2), - RingAcc3 - end, - Ring, - Changes), + RingAcc3 = change({Cmd, Node}, RingAcc2), + RingAcc3 + end, + Ring, + Changes), NewRing. %% @private @@ -728,13 +728,13 @@ change({join, Node}, Ring) -> change({leave, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:leave_member(Node, Ring, Node), Ring2; change({remove, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:remove_member(Node, Ring, Node), Ring2; change({{replace, _NewNode}, Node}, Ring) -> @@ -745,14 +745,14 @@ change({{force_replace, NewNode}, Node}, Ring) -> Indices = riak_core_ring:indices(Ring, Node), Reassign = [{Idx, NewNode} || Idx <- Indices], Ring2 = riak_core_ring:add_member(NewNode, - Ring, - NewNode), + Ring, + NewNode), Ring3 = riak_core_ring:change_owners(Ring2, Reassign), Ring4 = riak_core_ring:remove_member(Node, Ring3, Node), case riak_core_ring:is_resizing(Ring4) of - true -> - replace_node_during_resize(Ring4, Node, NewNode); - false -> Ring4 + true -> + replace_node_during_resize(Ring4, Node, NewNode); + false -> Ring4 end; change({{resize, NewRingSize}, _Node}, Ring) -> riak_core_ring:resize(Ring, NewRingSize); @@ -762,8 +762,8 @@ change({abort_resize, _Node}, Ring) -> %%noinspection ErlangUnboundVariable internal_ring_changed(Node, CState) -> {Changed, CState5} = do_claimant(Node, - CState, - fun log/2), + CState, + fun log/2), inform_removed_nodes(Node, CState, CState5), %% Start/stop converge and rebalance delay timers %% (converge delay) @@ -775,173 +775,173 @@ internal_ring_changed(Node, CState) -> %% IsClaimant = riak_core_ring:claimant(CState5) =:= Node, WasPending = [] /= - riak_core_ring:pending_changes(CState), + riak_core_ring:pending_changes(CState), IsPending = [] /= - riak_core_ring:pending_changes(CState5), + riak_core_ring:pending_changes(CState5), %% Outer case statement already checks for ring_ready case {IsClaimant, Changed} of - {true, true} -> - %% STATS - %% riak_core_stat:update(converge_timer_end), - %% STATS - %% riak_core_stat:update(converge_timer_begin); - ok; - {true, false} -> - %% STATS - %% riak_core_stat:update(converge_timer_end); - ok; - _ -> ok + {true, true} -> + %% STATS + %% riak_core_stat:update(converge_timer_end), + %% STATS + %% riak_core_stat:update(converge_timer_begin); + ok; + {true, false} -> + %% STATS + %% riak_core_stat:update(converge_timer_end); + ok; + _ -> ok end, case {IsClaimant, WasPending, IsPending} of - {true, false, true} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_begin); - ok; - {true, true, false} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_end); - ok; - _ -> ok + {true, false, true} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_begin); + ok; + {true, true, false} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_end); + ok; + _ -> ok end, %% Set cluster name if it is undefined case {IsClaimant, riak_core_ring:cluster_name(CState5)} - of - {true, undefined} -> - ClusterName = {Node, riak_core_rand:rand_seed()}, - {_, _} = - riak_core_util:rpc_every_member(riak_core_ring_manager, - set_cluster_name, - [ClusterName], - 1000), - ok; - _ -> - ClusterName = riak_core_ring:cluster_name(CState5), - ok + of + {true, undefined} -> + ClusterName = {Node, riak_core_rand:rand_seed()}, + {_, _} = + riak_core_util:rpc_every_member(riak_core_ring_manager, + set_cluster_name, + [ClusterName], + 1000), + ok; + _ -> + ClusterName = riak_core_ring:cluster_name(CState5), + ok end, case Changed of - true -> - CState6 = riak_core_ring:set_cluster_name(CState5, - ClusterName), - riak_core_ring:increment_vclock(Node, CState6); - false -> CState5 + true -> + CState6 = riak_core_ring:set_cluster_name(CState5, + ClusterName), + riak_core_ring:increment_vclock(Node, CState6); + false -> CState5 end. inform_removed_nodes(Node, OldRing, NewRing) -> CName = riak_core_ring:cluster_name(NewRing), Exiting = riak_core_ring:members(OldRing, [exiting]) -- - [Node], + [Node], Invalid = riak_core_ring:members(NewRing, [invalid]), Changed = - ordsets:intersection(ordsets:from_list(Exiting), - ordsets:from_list(Invalid)), + ordsets:intersection(ordsets:from_list(Exiting), + ordsets:from_list(Invalid)), %% Tell exiting node to shutdown. _ = [riak_core_ring_manager:refresh_ring(ExitingNode, - CName) - || ExitingNode <- Changed], + CName) + || ExitingNode <- Changed], ok. do_claimant_quiet(Node, CState, Replacing, Seed) -> do_claimant(Node, - CState, - Replacing, - Seed, - fun no_log/2). + CState, + Replacing, + Seed, + fun no_log/2). do_claimant(Node, CState, Log) -> do_claimant(Node, - CState, - [], - riak_core_rand:rand_seed(), - Log). + CState, + [], + riak_core_rand:rand_seed(), + Log). do_claimant(Node, CState, Replacing, Seed, Log) -> AreJoining = are_joining_nodes(CState), {C1, CState2} = maybe_update_claimant(Node, CState), {C2, CState3} = maybe_handle_auto_joining(Node, - CState2), + CState2), case AreJoining of - true -> - %% Do not rebalance if there are joining nodes - Changed = C1 or C2, - CState5 = CState3; - false -> - {C3, CState4} = maybe_update_ring(Node, - CState3, - Replacing, - Seed, - Log), - {C4, CState5} = maybe_remove_exiting(Node, CState4), - Changed = C1 or C2 or C3 or C4 + true -> + %% Do not rebalance if there are joining nodes + Changed = C1 or C2, + CState5 = CState3; + false -> + {C3, CState4} = maybe_update_ring(Node, + CState3, + Replacing, + Seed, + Log), + {C4, CState5} = maybe_remove_exiting(Node, CState4), + Changed = C1 or C2 or C3 or C4 end, {Changed, CState5}. %% @private maybe_update_claimant(Node, CState) -> Members = riak_core_ring:members(CState, - [valid, leaving]), + [valid, leaving]), Claimant = riak_core_ring:claimant(CState), NextClaimant = hd(Members ++ [undefined]), ClaimantMissing = not lists:member(Claimant, Members), case {ClaimantMissing, NextClaimant} of - {true, Node} -> - %% Become claimant - CState2 = riak_core_ring:set_claimant(CState, Node), - CState3 = - riak_core_ring:increment_ring_version(Claimant, - CState2), - {true, CState3}; - _ -> {false, CState} + {true, Node} -> + %% Become claimant + CState2 = riak_core_ring:set_claimant(CState, Node), + CState3 = + riak_core_ring:increment_ring_version(Claimant, + CState2), + {true, CState3}; + _ -> {false, CState} end. %% @private maybe_update_ring(Node, CState, Replacing, Seed, Log) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - case riak_core_ring:claiming_members(CState) of - [] -> - %% Consider logging an error/warning here or even - %% intentionally crashing. This state makes no logical - %% sense given that it represents a cluster without any - %% active nodes. - {false, CState}; - _ -> - Resizing = riak_core_ring:is_resizing(CState), - {Changed, CState2} = update_ring(Node, - CState, - Replacing, - Seed, - Log, - Resizing), - {Changed, CState2} - end; - _ -> {false, CState} + Node -> + case riak_core_ring:claiming_members(CState) of + [] -> + %% Consider logging an error/warning here or even + %% intentionally crashing. This state makes no logical + %% sense given that it represents a cluster without any + %% active nodes. + {false, CState}; + _ -> + Resizing = riak_core_ring:is_resizing(CState), + {Changed, CState2} = update_ring(Node, + CState, + Replacing, + Seed, + Log, + Resizing), + {Changed, CState2} + end; + _ -> {false, CState} end. %% @private maybe_remove_exiting(Node, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - %% Change exiting nodes to invalid, skipping this node. - Exiting = riak_core_ring:members(CState, [exiting]) -- - [Node], - Changed = Exiting /= [], - CState2 = lists:foldl(fun (ENode, CState0) -> - ClearedCS = - riak_core_ring:clear_member_meta(Node, - CState0, - ENode), - riak_core_ring:set_member(Node, - ClearedCS, - ENode, - invalid, - same_vclock) - end, - CState, - Exiting), - {Changed, CState2}; - _ -> {false, CState} + Node -> + %% Change exiting nodes to invalid, skipping this node. + Exiting = riak_core_ring:members(CState, [exiting]) -- + [Node], + Changed = Exiting /= [], + CState2 = lists:foldl(fun (ENode, CState0) -> + ClearedCS = + riak_core_ring:clear_member_meta(Node, + CState0, + ENode), + riak_core_ring:set_member(Node, + ClearedCS, + ENode, + invalid, + same_vclock) + end, + CState, + Exiting), + {Changed, CState2}; + _ -> {false, CState} end. %% @private @@ -955,11 +955,11 @@ auto_joining_nodes(CState) -> %% case application:get_env(riak_core, staged_joins, true) of false -> Joining; true -> [Member || Member <- Joining, - riak_core_ring:get_member_meta(CState, - Member, - '$autojoin') - == - true].%% end. + riak_core_ring:get_member_meta(CState, + Member, + '$autojoin') + == + true].%% end. %% @private maybe_handle_auto_joining(Node, CState) -> @@ -975,102 +975,102 @@ maybe_handle_joining(Node, CState) -> maybe_handle_joining(Node, Joining, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - Changed = Joining /= [], - CState2 = lists:foldl(fun (JNode, CState0) -> - riak_core_ring:set_member(Node, - CState0, - JNode, - valid, - same_vclock) - end, - CState, - Joining), - {Changed, CState2}; - _ -> {false, CState} + Node -> + Changed = Joining /= [], + CState2 = lists:foldl(fun (JNode, CState0) -> + riak_core_ring:set_member(Node, + CState0, + JNode, + valid, + same_vclock) + end, + CState, + Joining), + {Changed, CState2}; + _ -> {false, CState} end. %% @private update_ring(CNode, CState, Replacing, Seed, Log, - false) -> + false) -> Next0 = riak_core_ring:pending_changes(CState), ?ROUT("Members: ~p~n", - [riak_core_ring:members(CState, - [joining, - valid, - leaving, - exiting, - invalid])]), + [riak_core_ring:members(CState, + [joining, + valid, + leaving, + exiting, + invalid])]), ?ROUT("Updating ring :: next0 : ~p~n", [Next0]), %% Remove tuples from next for removed nodes InvalidMembers = riak_core_ring:members(CState, - [invalid]), + [invalid]), Next2 = lists:filter(fun (NInfo) -> - {Owner, NextOwner, _} = - riak_core_ring:next_owner(NInfo), - not lists:member(Owner, InvalidMembers) and - not lists:member(NextOwner, InvalidMembers) - end, - Next0), + {Owner, NextOwner, _} = + riak_core_ring:next_owner(NInfo), + not lists:member(Owner, InvalidMembers) and + not lists:member(NextOwner, InvalidMembers) + end, + Next0), CState2 = riak_core_ring:set_pending_changes(CState, - Next2), + Next2), %% Transfer ownership after completed handoff {RingChanged1, CState3} = transfer_ownership(CState2, - Log), + Log), ?ROUT("Updating ring :: next1 : ~p~n", - [riak_core_ring:pending_changes(CState3)]), + [riak_core_ring:pending_changes(CState3)]), %% Ressign leaving/inactive indices {RingChanged2, CState4} = reassign_indices(CState3, - Replacing, - Seed, - Log), + Replacing, + Seed, + Log), ?ROUT("Updating ring :: next2 : ~p~n", - [riak_core_ring:pending_changes(CState4)]), + [riak_core_ring:pending_changes(CState4)]), %% Rebalance the ring as necessary. If pending changes exist ring %% is not rebalanced Next3 = rebalance_ring(CNode, CState4), Log(debug, - {"Pending ownership transfers: ~b~n", - [length(riak_core_ring:pending_changes(CState4))]}), + {"Pending ownership transfers: ~b~n", + [length(riak_core_ring:pending_changes(CState4))]}), %% Remove transfers to/from down nodes Next4 = handle_down_nodes(CState4, Next3), NextChanged = Next0 /= Next4, Changed = NextChanged or RingChanged1 or RingChanged2, case Changed of - true -> - OldS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next0]), - NewS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next4]), - Diff = ordsets:subtract(NewS, OldS), - _ = [Log(next, NChange) || NChange <- Diff], - ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), - CState5 = riak_core_ring:set_pending_changes(CState4, - Next4), - CState6 = riak_core_ring:increment_ring_version(CNode, - CState5), - {true, CState6}; - false -> {false, CState} + true -> + OldS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next0]), + NewS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next4]), + Diff = ordsets:subtract(NewS, OldS), + _ = [Log(next, NChange) || NChange <- Diff], + ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), + CState5 = riak_core_ring:set_pending_changes(CState4, + Next4), + CState6 = riak_core_ring:increment_ring_version(CNode, + CState5), + {true, CState6}; + false -> {false, CState} end; update_ring(CNode, CState, _Replacing, _Seed, _Log, - true) -> + true) -> {Installed, CState1} = - maybe_install_resized_ring(CState), + maybe_install_resized_ring(CState), {Aborted, CState2} = - riak_core_ring:maybe_abort_resize(CState1), + riak_core_ring:maybe_abort_resize(CState1), Changed = Installed orelse Aborted, case Changed of - true -> - CState3 = riak_core_ring:increment_ring_version(CNode, - CState2), - {true, CState3}; - false -> {false, CState} + true -> + CState3 = riak_core_ring:increment_ring_version(CNode, + CState2), + {true, CState3}; + false -> {false, CState} end. maybe_install_resized_ring(CState) -> case riak_core_ring:is_resize_complete(CState) of - true -> {true, riak_core_ring:future_ring(CState)}; - false -> {false, CState} + true -> {true, riak_core_ring:future_ring(CState)}; + false -> {false, CState} end. %% @private @@ -1078,34 +1078,34 @@ transfer_ownership(CState, Log) -> Next = riak_core_ring:pending_changes(CState), %% Remove already completed and transfered changes Next2 = lists:filter(fun (NInfo = {Idx, _, _, _, _}) -> - {_, NewOwner, S} = - riak_core_ring:next_owner(NInfo), - not - ((S == complete) and - (riak_core_ring:index_owner(CState, - Idx) - =:= NewOwner)) - end, - Next), + {_, NewOwner, S} = + riak_core_ring:next_owner(NInfo), + not + ((S == complete) and + (riak_core_ring:index_owner(CState, + Idx) + =:= NewOwner)) + end, + Next), CState2 = lists:foldl(fun (NInfo = {Idx, _, _, _, _}, - CState0) -> - case riak_core_ring:next_owner(NInfo) of - {_, Node, complete} -> - Log(ownership, {Idx, Node, CState0}), - riak_core_ring:transfer_node(Idx, - Node, - CState0); - _ -> CState0 - end - end, - CState, - Next2), + CState0) -> + case riak_core_ring:next_owner(NInfo) of + {_, Node, complete} -> + Log(ownership, {Idx, Node, CState0}), + riak_core_ring:transfer_node(Idx, + Node, + CState0); + _ -> CState0 + end + end, + CState, + Next2), NextChanged = Next2 /= Next, RingChanged = riak_core_ring:all_owners(CState) /= - riak_core_ring:all_owners(CState2), + riak_core_ring:all_owners(CState2), Changed = NextChanged or RingChanged, CState3 = riak_core_ring:set_pending_changes(CState2, - Next2), + Next2), {Changed, CState3}. %% @private @@ -1113,35 +1113,35 @@ reassign_indices(CState, Replacing, Seed, Log) -> Next = riak_core_ring:pending_changes(CState), Invalid = riak_core_ring:members(CState, [invalid]), CState2 = lists:foldl(fun (Node, CState0) -> - remove_node(CState0, - Node, - invalid, - Replacing, - Seed, - Log) - end, - CState, - Invalid), + remove_node(CState0, + Node, + invalid, + Replacing, + Seed, + Log) + end, + CState, + Invalid), CState3 = case Next of - [] -> - Leaving = riak_core_ring:members(CState, [leaving]), - lists:foldl(fun (Node, CState0) -> - remove_node(CState0, - Node, - leaving, - Replacing, - Seed, - Log) - end, - CState2, - Leaving); - _ -> CState2 - end, + [] -> + Leaving = riak_core_ring:members(CState, [leaving]), + lists:foldl(fun (Node, CState0) -> + remove_node(CState0, + Node, + leaving, + Replacing, + Seed, + Log) + end, + CState2, + Leaving); + _ -> CState2 + end, Owners1 = riak_core_ring:all_owners(CState), Owners2 = riak_core_ring:all_owners(CState3), RingChanged = Owners1 /= Owners2, NextChanged = Next /= - riak_core_ring:pending_changes(CState3), + riak_core_ring:pending_changes(CState3), {RingChanged or NextChanged, CState3}. %% @private @@ -1155,34 +1155,34 @@ rebalance_ring(_CNode, [], CState) -> Owners2 = riak_core_ring:all_owners(CState2), Owners3 = lists:zip(Owners1, Owners2), Next = [{Idx, PrevOwner, NewOwner, [], awaiting} - || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, - PrevOwner /= NewOwner], + || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, + PrevOwner /= NewOwner], Next; rebalance_ring(_CNode, Next, _CState) -> Next. %% @private handle_down_nodes(CState, Next) -> LeavingMembers = riak_core_ring:members(CState, - [leaving, invalid]), + [leaving, invalid]), DownMembers = riak_core_ring:members(CState, [down]), Next2 = [begin - OwnerLeaving = lists:member(O, LeavingMembers), - NextDown = lists:member(NO, DownMembers), - case OwnerLeaving and NextDown of - true -> - Active = riak_core_ring:active_members(CState) -- [O], - RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), - {Idx, O, RNode, Mods, Status}; - _ -> T - end - end - || T = {Idx, O, NO, Mods, Status} <- Next], + OwnerLeaving = lists:member(O, LeavingMembers), + NextDown = lists:member(NO, DownMembers), + case OwnerLeaving and NextDown of + true -> + Active = riak_core_ring:active_members(CState) -- [O], + RNode = + lists:nth(riak_core_rand:uniform(length(Active)), + Active), + {Idx, O, RNode, Mods, Status}; + _ -> T + end + end + || T = {Idx, O, NO, Mods, Status} <- Next], Next3 = [T - || T = {_, O, NO, _, _} <- Next2, - not lists:member(O, DownMembers), - not lists:member(NO, DownMembers)], + || T = {_, O, NO, _, _} <- Next2, + not lists:member(O, DownMembers), + not lists:member(NO, DownMembers)], Next3. %% @private @@ -1194,96 +1194,96 @@ reassign_indices_to(Node, NewNode, Ring) -> %% @private remove_node(CState, Node, Status, Replacing, Seed, - Log) -> + Log) -> Indices = riak_core_ring:indices(CState, Node), remove_node(CState, - Node, - Status, - Replacing, - Seed, - Log, - Indices). + Node, + Status, + Replacing, + Seed, + Log, + Indices). %% @private remove_node(CState, _Node, _Status, _Replacing, _Seed, - _Log, []) -> + _Log, []) -> CState; remove_node(CState, Node, Status, Replacing, Seed, Log, - Indices) -> + Indices) -> CStateT1 = riak_core_ring:change_owners(CState, - riak_core_ring:all_next_owners(CState)), + riak_core_ring:all_next_owners(CState)), case orddict:find(Node, Replacing) of - {ok, NewNode} -> - CStateT2 = reassign_indices_to(Node, NewNode, CStateT1); - error -> - CStateT2 = - riak_core_gossip:remove_from_cluster(CStateT1, - Node, - Seed) + {ok, NewNode} -> + CStateT2 = reassign_indices_to(Node, NewNode, CStateT1); + error -> + CStateT2 = + riak_core_gossip:remove_from_cluster(CStateT1, + Node, + Seed) end, Owners1 = riak_core_ring:all_owners(CState), Owners2 = riak_core_ring:all_owners(CStateT2), Owners3 = lists:zip(Owners1, Owners2), RemovedIndices = case Status of - invalid -> Indices; - leaving -> [] - end, + invalid -> Indices; + leaving -> [] + end, Reassign = [{Idx, NewOwner} - || {Idx, NewOwner} <- Owners2, - lists:member(Idx, RemovedIndices)], + || {Idx, NewOwner} <- Owners2, + lists:member(Idx, RemovedIndices)], Next = [{Idx, PrevOwner, NewOwner, [], awaiting} - || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, - PrevOwner /= NewOwner, - not lists:member(Idx, RemovedIndices)], + || {{Idx, PrevOwner}, {Idx, NewOwner}} <- Owners3, + PrevOwner /= NewOwner, + not lists:member(Idx, RemovedIndices)], _ = [Log(reassign, {Idx, NewOwner, CState}) - || {Idx, NewOwner} <- Reassign], + || {Idx, NewOwner} <- Reassign], %% Unlike rebalance_ring, remove_node can be called when Next is non-empty, %% therefore we need to merge the values. Original Next has priority. Next2 = lists:ukeysort(1, - riak_core_ring:pending_changes(CState) ++ Next), + riak_core_ring:pending_changes(CState) ++ Next), CState2 = riak_core_ring:change_owners(CState, - Reassign), + Reassign), CState3 = riak_core_ring:set_pending_changes(CState2, - Next2), + Next2), CState3. replace_node_during_resize(CState0, Node, NewNode) -> PostResize = riak_core_ring:is_post_resize(CState0), CState1 = replace_node_during_resize(CState0, - Node, - NewNode, - PostResize), + Node, + NewNode, + PostResize), riak_core_ring:increment_ring_version(riak_core_ring:claimant(CState1), - CState1). + CState1). replace_node_during_resize(CState0, Node, NewNode, - false) -> %% ongoing xfers + false) -> %% ongoing xfers %% for each of the indices being moved from Node to NewNode, reschedule resize %% transfers where the target is owned by Node. CState1 = - riak_core_ring:reschedule_resize_transfers(CState0, - Node, - NewNode), + riak_core_ring:reschedule_resize_transfers(CState0, + Node, + NewNode), %% since the resized chash is carried directly in state vs. being rebuilt via next %% list, perform reassignment {ok, FutureCHash} = - riak_core_ring:resized_ring(CState1), + riak_core_ring:resized_ring(CState1), FutureCState = riak_core_ring:set_chash(CState1, - FutureCHash), + FutureCHash), ReassignedFuture = reassign_indices_to(Node, - NewNode, - FutureCState), + NewNode, + FutureCState), ReassignedCHash = - riak_core_ring:chash(ReassignedFuture), + riak_core_ring:chash(ReassignedFuture), riak_core_ring:set_resized_ring(CState1, - ReassignedCHash); + ReassignedCHash); replace_node_during_resize(CState, Node, _NewNode, - true) -> %% performing cleanup + true) -> %% performing cleanup %% we are simply deleting data at this point, no reason to do that on either node NewNext = [{I, N, O, M, S} - || {I, N, O, M, S} - <- riak_core_ring:pending_changes(CState), - N =/= Node], + || {I, N, O, M, S} + <- riak_core_ring:pending_changes(CState), + N =/= Node], riak_core_ring:set_pending_changes(CState, NewNext). no_log(_, _) -> ok. @@ -1292,14 +1292,14 @@ log(debug, {Msg, Args}) -> logger:debug(Msg, Args); log(ownership, {Idx, NewOwner, CState}) -> Owner = riak_core_ring:index_owner(CState, Idx), logger:debug("(new-owner) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(reassign, {Idx, NewOwner, CState}) -> Owner = riak_core_ring:index_owner(CState, Idx), logger:debug("(reassign) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(next, {Idx, Owner, NewOwner}) -> logger:debug("(pending) ~b :: ~p -> ~p~n", - [Idx, Owner, NewOwner]); + [Idx, Owner, NewOwner]); log(_, _) -> ok. %% =================================================================== diff --git a/src/riak_core_eventhandler_guard.erl b/src/riak_core_eventhandler_guard.erl index 015ecab2b..c3b0f5afb 100644 --- a/src/riak_core_eventhandler_guard.erl +++ b/src/riak_core_eventhandler_guard.erl @@ -26,11 +26,11 @@ -export([start_link/3, start_link/4]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {handlermod, handler, exitfun}). @@ -39,16 +39,16 @@ start_link(HandlerMod, Handler, Args) -> start_link(HandlerMod, Handler, Args, ExitFun) -> gen_server:start_link(?MODULE, - [HandlerMod, Handler, Args, ExitFun], - []). + [HandlerMod, Handler, Args, ExitFun], + []). init([HandlerMod, Handler, Args, ExitFun]) -> ok = gen_event:add_sup_handler(HandlerMod, - Handler, - Args), + Handler, + Args), {ok, #state{handlermod = HandlerMod, handler = Handler, - exitfun = ExitFun}}. + exitfun = ExitFun}}. handle_call(_Request, _From, State) -> {reply, ok, State}. @@ -56,16 +56,16 @@ handle_call(_Request, _From, State) -> handle_cast(_Msg, State) -> {noreply, State}. handle_info({gen_event_EXIT, _Handler, shutdown}, - State) -> + State) -> {stop, normal, State}; handle_info({gen_event_EXIT, _Handler, normal}, - State) -> + State) -> {stop, normal, State}; handle_info({gen_event_EXIT, Handler, _Reason}, - State = #state{exitfun = undefined}) -> + State = #state{exitfun = undefined}) -> {stop, {gen_event_EXIT, Handler}, State}; handle_info({gen_event_EXIT, Handler, Reason}, - State = #state{exitfun = ExitFun}) -> + State = #state{exitfun = ExitFun}) -> ExitFun(Handler, Reason), {stop, {gen_event_EXIT, Handler}, State}; handle_info(_Info, State) -> {noreply, State}. diff --git a/src/riak_core_eventhandler_sup.erl b/src/riak_core_eventhandler_sup.erl index bb3dfb1c4..e0af9a677 100644 --- a/src/riak_core_eventhandler_sup.erl +++ b/src/riak_core_eventhandler_sup.erl @@ -28,42 +28,42 @@ -export([start_link/0, init/1]). -export([start_guarded_handler/3, - start_guarded_handler/4, - stop_guarded_handler/3]). + start_guarded_handler/4, + stop_guarded_handler/3]). start_guarded_handler(HandlerMod, Handler, Args) -> start_guarded_handler(HandlerMod, - Handler, - Args, - undefined). + Handler, + Args, + undefined). start_guarded_handler(HandlerMod, Handler, Args, - ExitFun) -> + ExitFun) -> case supervisor:start_child(?MODULE, - handler_spec(HandlerMod, - Handler, - Args, - ExitFun)) - of - {ok, _Pid} -> ok; - Other -> Other + handler_spec(HandlerMod, + Handler, + Args, + ExitFun)) + of + {ok, _Pid} -> ok; + Other -> Other end. stop_guarded_handler(HandlerMod, Handler, Args) -> case lists:member(Handler, - gen_event:which_handlers(HandlerMod)) - of - true -> - case gen_event:delete_handler(HandlerMod, Handler, Args) - of - {error, module_not_found} -> {error, module_not_found}; - O -> - Id = {HandlerMod, Handler}, - ok = supervisor:terminate_child(?MODULE, Id), - ok = supervisor:delete_child(?MODULE, Id), - O - end; - false -> {error, module_not_found} + gen_event:which_handlers(HandlerMod)) + of + true -> + case gen_event:delete_handler(HandlerMod, Handler, Args) + of + {error, module_not_found} -> {error, module_not_found}; + O -> + Id = {HandlerMod, Handler}, + ok = supervisor:terminate_child(?MODULE, Id), + ok = supervisor:delete_child(?MODULE, Id), + O + end; + false -> {error, module_not_found} end. handler_spec(HandlerMod, Handler, Args, ExitFun) -> diff --git a/src/riak_core_gossip.erl b/src/riak_core_gossip.erl index 865bcf793..5f517c4fd 100644 --- a/src/riak_core_gossip.erl +++ b/src/riak_core_gossip.erl @@ -36,21 +36,21 @@ -export([start_link/0, stop/0]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -export([distribute_ring/1, - send_ring/1, - send_ring/2, - remove_from_cluster/2, - remove_from_cluster/3, - random_gossip/1, - recursive_gossip/1, - random_recursive_gossip/1, - rejoin/2]). + send_ring/1, + send_ring/2, + remove_from_cluster/2, + remove_from_cluster/3, + random_gossip/1, + recursive_gossip/1, + random_recursive_gossip/1, + rejoin/2]). %% Default gossip rate: allow at most 45 gossip messages every 10 seconds -define(DEFAULT_LIMIT, {45, 10000}). @@ -65,7 +65,7 @@ %% Distribute a ring to all members of that ring. distribute_ring(Ring) -> gen_server:cast({?MODULE, node()}, - {distribute_ring, Ring}). + {distribute_ring, Ring}). %% send_ring/1 - %% Send the current node's ring to some other node. @@ -77,13 +77,13 @@ send_ring(ToNode) -> send_ring(node(), ToNode). send_ring(Node, Node) -> ok; send_ring(FromNode, ToNode) -> gen_server:cast({?MODULE, FromNode}, - {send_ring_to, ToNode}). + {send_ring_to, ToNode}). start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], - []). + ?MODULE, + [], + []). stop() -> gen_server:cast(?MODULE, stop). @@ -93,9 +93,9 @@ rejoin(Node, Ring) -> %% @doc Gossip state to a random node in the ring. random_gossip(Ring) -> case riak_core_ring:random_other_active_node(Ring) of - no_node -> % must be single node cluster - ok; - RandomNode -> send_ring(node(), RandomNode) + no_node -> % must be single node cluster + ok; + RandomNode -> send_ring(node(), RandomNode) end. %% @doc Gossip state to a fixed set of nodes determined from a binary @@ -111,7 +111,7 @@ recursive_gossip(Ring, Node) -> Tree = riak_core_util:build_tree(2, Nodes, [cycles]), Children = orddict:fetch(Node, Tree), _ = [send_ring(node(), OtherNode) - || OtherNode <- Children], + || OtherNode <- Children], ok. recursive_gossip(Ring) -> @@ -119,15 +119,15 @@ recursive_gossip(Ring) -> %% and therefore we fallback to random_recursive_gossip as necessary. Active = riak_core_ring:active_members(Ring), case lists:member(node(), Active) of - true -> recursive_gossip(Ring, node()); - false -> random_recursive_gossip(Ring) + true -> recursive_gossip(Ring, node()); + false -> random_recursive_gossip(Ring) end. random_recursive_gossip(Ring) -> Active = riak_core_ring:active_members(Ring), RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), + lists:nth(riak_core_rand:uniform(length(Active)), + Active), recursive_gossip(Ring, RNode). %% =================================================================== @@ -138,8 +138,8 @@ random_recursive_gossip(Ring) -> init(_State) -> schedule_next_reset(), {Tokens, _} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), State = #state{gossip_tokens = Tokens}, {ok, State}. @@ -147,26 +147,26 @@ handle_call(_, _From, State) -> {reply, ok, State}. %% @private handle_cast({send_ring_to, _Node}, - State = #state{gossip_tokens = 0}) -> + State = #state{gossip_tokens = 0}) -> %% Out of gossip tokens, ignore the send request {noreply, State}; handle_cast({send_ring_to, Node}, State) -> {ok, RingOut} = riak_core_ring_manager:get_raw_ring(), riak_core_ring:check_tainted(RingOut, - "Error: riak_core_gossip/send_ring_to " - ":: Sending tainted ring over gossip"), + "Error: riak_core_gossip/send_ring_to " + ":: Sending tainted ring over gossip"), gen_server:cast({?MODULE, Node}, - {reconcile_ring, RingOut}), + {reconcile_ring, RingOut}), Tokens = State#state.gossip_tokens - 1, {noreply, State#state{gossip_tokens = Tokens}}; handle_cast({distribute_ring, Ring}, State) -> Nodes = riak_core_ring:active_members(Ring), riak_core_ring:check_tainted(Ring, - "Error: riak_core_gossip/distribute_ring " - ":: Sending tainted ring over gossip"), + "Error: riak_core_gossip/distribute_ring " + ":: Sending tainted ring over gossip"), gen_server:abcast(Nodes, - ?MODULE, - {reconcile_ring, Ring}), + ?MODULE, + {reconcile_ring, Ring}), {noreply, State}; handle_cast({reconcile_ring, OtherRing}, State) -> %% Compare the two rings, see if there is anything that @@ -174,7 +174,7 @@ handle_cast({reconcile_ring, OtherRing}, State) -> %% STATS % riak_core_stat:update(gossip_received), riak_core_ring_manager:ring_trans(fun reconcile/2, - [OtherRing]), + [OtherRing]), {noreply, State}; handle_cast(gossip_ring, State) -> % Gossip the ring to some random other node... @@ -184,18 +184,18 @@ handle_cast(gossip_ring, State) -> handle_cast({rejoin, OtherRing}, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), SameCluster = riak_core_ring:cluster_name(Ring) =:= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), case SameCluster of - true -> - OtherNode = riak_core_ring:owner_node(OtherRing), - case riak_core:join(node(), OtherNode, true, true) of - ok -> ok; - {error, Reason} -> - logger:error("Could not rejoin cluster: ~p", [Reason]), - ok - end, - {noreply, State}; - false -> {noreply, State} + true -> + OtherNode = riak_core_ring:owner_node(OtherRing), + case riak_core:join(node(), OtherNode, true, true) of + ok -> ok; + {error, Reason} -> + logger:error("Could not rejoin cluster: ~p", [Reason]), + ok + end, + {noreply, State}; + false -> {noreply, State} end; handle_cast(_, State) -> {noreply, State}. @@ -203,8 +203,8 @@ handle_info(reset_tokens, State) -> schedule_next_reset(), gen_server:cast(?MODULE, gossip_ring), {Tokens, _} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), {noreply, State#state{gossip_tokens = Tokens}}; handle_info(_Info, State) -> {noreply, State}. @@ -220,211 +220,211 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. schedule_next_reset() -> {_, Reset} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, + ?DEFAULT_LIMIT), erlang:send_after(Reset, ?MODULE, reset_tokens). %%noinspection ErlangUnboundVariable reconcile(Ring0, [OtherRing0]) -> {Ring, OtherRing} = - riak_core_ring:reconcile_names(Ring0, OtherRing0), + riak_core_ring:reconcile_names(Ring0, OtherRing0), Node = node(), OtherNode = riak_core_ring:owner_node(OtherRing), Members = riak_core_ring:reconcile_members(Ring, - OtherRing), + OtherRing), WrongCluster = riak_core_ring:cluster_name(Ring) /= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), PreStatus = riak_core_ring:member_status(Members, - OtherNode), + OtherNode), IgnoreGossip = WrongCluster or (PreStatus =:= invalid) - or (PreStatus =:= down), + or (PreStatus =:= down), case IgnoreGossip of - true -> - Ring2 = Ring, - Changed = false; - false -> - {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, - Ring) + true -> + Ring2 = Ring, + Changed = false; + false -> + {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, + Ring) end, OtherStatus = riak_core_ring:member_status(Ring2, - OtherNode), + OtherNode), case {WrongCluster, OtherStatus, Changed} of - {true, _, _} -> - %% TODO: Tell other node to stop gossiping to this node. - %% STATS - % riak_core_stat:update(ignored_gossip), - ignore; - {_, down, _} -> - %% Tell other node to rejoin the cluster. - riak_core_gossip:rejoin(OtherNode, Ring2), - ignore; - {_, invalid, _} -> - %% Exiting/Removed node never saw shutdown cast, re-send. - ClusterName = riak_core_ring:cluster_name(Ring), - riak_core_ring_manager:refresh_ring(OtherNode, - ClusterName), - ignore; - {_, _, new_ring} -> - Ring3 = riak_core_ring:ring_changed(Node, Ring2), - %% STATS - % riak_core_stat:update(rings_reconciled), - log_membership_changes(Ring, Ring3), - {reconciled_ring, Ring3}; - {_, _, _} -> ignore + {true, _, _} -> + %% TODO: Tell other node to stop gossiping to this node. + %% STATS + % riak_core_stat:update(ignored_gossip), + ignore; + {_, down, _} -> + %% Tell other node to rejoin the cluster. + riak_core_gossip:rejoin(OtherNode, Ring2), + ignore; + {_, invalid, _} -> + %% Exiting/Removed node never saw shutdown cast, re-send. + ClusterName = riak_core_ring:cluster_name(Ring), + riak_core_ring_manager:refresh_ring(OtherNode, + ClusterName), + ignore; + {_, _, new_ring} -> + Ring3 = riak_core_ring:ring_changed(Node, Ring2), + %% STATS + % riak_core_stat:update(rings_reconciled), + log_membership_changes(Ring, Ring3), + {reconciled_ring, Ring3}; + {_, _, _} -> ignore end. log_membership_changes(OldRing, NewRing) -> OldStatus = riak_core_ring:all_member_status(OldRing), NewStatus = riak_core_ring:all_member_status(NewRing), do_log_membership_changes(lists:sort(OldStatus), - lists:sort(NewStatus)). + lists:sort(NewStatus)). do_log_membership_changes([], []) -> ok; do_log_membership_changes([{Node, Status} | Old], - [{Node, Status} | New]) -> + [{Node, Status} | New]) -> %% No change do_log_membership_changes(Old, New); do_log_membership_changes([{Node, Status1} | Old], - [{Node, Status2} | New]) -> + [{Node, Status2} | New]) -> %% State changed, did not join or leave log_node_changed(Node, Status1, Status2), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, _OldStatus} | _] = - Old, - [{NewNode, NewStatus} | New]) + Old, + [{NewNode, NewStatus} | New]) when NewNode < OldNode -> %% Node added log_node_added(NewNode, NewStatus), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, OldStatus} | Old], - [{NewNode, _NewStatus} | _] = New) + [{NewNode, _NewStatus} | _] = New) when OldNode < NewNode -> %% Node removed log_node_removed(OldNode, OldStatus), do_log_membership_changes(Old, New); do_log_membership_changes([{OldNode, OldStatus} | Old], - []) -> + []) -> %% Trailing nodes were removed log_node_removed(OldNode, OldStatus), do_log_membership_changes(Old, []); do_log_membership_changes([], - [{NewNode, NewStatus} | New]) -> + [{NewNode, NewStatus} | New]) -> %% Trailing nodes were added log_node_added(NewNode, NewStatus), do_log_membership_changes([], New). log_node_changed(Node, Old, New) -> logger:info("'~s' changed from '~s' to '~s'~n", - [Node, Old, New]). + [Node, Old, New]). log_node_added(Node, New) -> logger:info("'~s' joined cluster with status '~s'~n", - [Node, New]). + [Node, New]). log_node_removed(Node, Old) -> logger:info("'~s' removed from cluster (previously: " - "'~s')~n", - [Node, Old]). + "'~s')~n", + [Node, Old]). remove_from_cluster(Ring, ExitingNode) -> remove_from_cluster(Ring, - ExitingNode, - riak_core_rand:rand_seed()). + ExitingNode, + riak_core_rand:rand_seed()). remove_from_cluster(Ring, ExitingNode, Seed) -> % Get a list of indices owned by the ExitingNode... AllOwners = riak_core_ring:all_owners(Ring), % Transfer indexes to other nodes... ExitRing = case attempt_simple_transfer(Seed, - Ring, - AllOwners, - ExitingNode) - of - {ok, NR} -> NR; - target_n_fail -> - %% re-diagonalize - %% first hand off all claims to *any* one else, - %% just so rebalance doesn't include exiting node - Members = riak_core_ring:claiming_members(Ring), - Other = hd(lists:delete(ExitingNode, Members)), - TempRing = lists:foldl(fun ({I, N}, R) - when N == ExitingNode -> - riak_core_ring:transfer_node(I, - Other, - R); - (_, R) -> R - end, - Ring, - AllOwners), - riak_core_claim:claim_rebalance_n(TempRing, Other) - end, + Ring, + AllOwners, + ExitingNode) + of + {ok, NR} -> NR; + target_n_fail -> + %% re-diagonalize + %% first hand off all claims to *any* one else, + %% just so rebalance doesn't include exiting node + Members = riak_core_ring:claiming_members(Ring), + Other = hd(lists:delete(ExitingNode, Members)), + TempRing = lists:foldl(fun ({I, N}, R) + when N == ExitingNode -> + riak_core_ring:transfer_node(I, + Other, + R); + (_, R) -> R + end, + Ring, + AllOwners), + riak_core_claim:claim_rebalance_n(TempRing, Other) + end, ExitRing. attempt_simple_transfer(Seed, Ring, Owners, - ExitingNode) -> + ExitingNode) -> TargetN = application:get_env(riak_core, - target_n_val, - undefined), + target_n_val, + undefined), attempt_simple_transfer(Seed, - Ring, - Owners, - TargetN, - ExitingNode, - 0, - [{O, -TargetN} - || O <- riak_core_ring:claiming_members(Ring), - O /= ExitingNode]). + Ring, + Owners, + TargetN, + ExitingNode, + 0, + [{O, -TargetN} + || O <- riak_core_ring:claiming_members(Ring), + O /= ExitingNode]). attempt_simple_transfer(Seed, Ring, [{P, Exit} | Rest], - TargetN, Exit, Idx, Last) -> + TargetN, Exit, Idx, Last) -> %% handoff case [N || {N, I} <- Last, Idx - I >= TargetN] of - [] -> target_n_fail; - Candidates -> - %% these nodes don't violate target_n in the reverse direction - StepsToNext = fun (Node) -> - length(lists:takewhile(fun ({_, Owner}) -> - Node /= Owner - end, - Rest)) - end, - case lists:filter(fun (N) -> - Next = StepsToNext(N), - Next + 1 >= TargetN orelse - Next == length(Rest) - end, - Candidates) - of - [] -> target_n_fail; - Qualifiers -> - %% these nodes don't violate target_n forward - {Rand, Seed2} = - riak_core_rand:uniform_s(length(Qualifiers), Seed), - Chosen = lists:nth(Rand, Qualifiers), - %% choose one, and do the rest of the ring - attempt_simple_transfer(Seed2, - riak_core_ring:transfer_node(P, - Chosen, - Ring), - Rest, - TargetN, - Exit, - Idx + 1, - lists:keyreplace(Chosen, - 1, - Last, - {Chosen, Idx})) - end + [] -> target_n_fail; + Candidates -> + %% these nodes don't violate target_n in the reverse direction + StepsToNext = fun (Node) -> + length(lists:takewhile(fun ({_, Owner}) -> + Node /= Owner + end, + Rest)) + end, + case lists:filter(fun (N) -> + Next = StepsToNext(N), + Next + 1 >= TargetN orelse + Next == length(Rest) + end, + Candidates) + of + [] -> target_n_fail; + Qualifiers -> + %% these nodes don't violate target_n forward + {Rand, Seed2} = + riak_core_rand:uniform_s(length(Qualifiers), Seed), + Chosen = lists:nth(Rand, Qualifiers), + %% choose one, and do the rest of the ring + attempt_simple_transfer(Seed2, + riak_core_ring:transfer_node(P, + Chosen, + Ring), + Rest, + TargetN, + Exit, + Idx + 1, + lists:keyreplace(Chosen, + 1, + Last, + {Chosen, Idx})) + end end; attempt_simple_transfer(Seed, Ring, [{_, N} | Rest], - TargetN, Exit, Idx, Last) -> + TargetN, Exit, Idx, Last) -> %% just keep track of seeing this node attempt_simple_transfer(Seed, - Ring, - Rest, - TargetN, - Exit, - Idx + 1, - lists:keyreplace(N, 1, Last, {N, Idx})); + Ring, + Rest, + TargetN, + Exit, + Idx + 1, + lists:keyreplace(N, 1, Last, {N, Idx})); attempt_simple_transfer(_, Ring, [], _, _, _, _) -> {ok, Ring}. diff --git a/src/riak_core_handoff_listener.erl b/src/riak_core_handoff_listener.erl index 7f391707e..bfbe0f991 100644 --- a/src/riak_core_handoff_listener.erl +++ b/src/riak_core_handoff_listener.erl @@ -29,30 +29,30 @@ -export([start_link/0]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -export([get_handoff_ip/0, - sock_opts/0, - new_connection/2]). + sock_opts/0, + new_connection/2]). -record(state, - {ipaddr :: string(), portnum :: integer()}). + {ipaddr :: string(), portnum :: integer()}). start_link() -> PortNum = application:get_env(riak_core, - handoff_port, - undefined), + handoff_port, + undefined), IpAddr = application:get_env(riak_core, - handoff_ip, - undefined), + handoff_ip, + undefined), gen_nb_server:start_link(?MODULE, - IpAddr, - PortNum, - [IpAddr, PortNum]). + IpAddr, + PortNum, + [IpAddr, PortNum]). get_handoff_ip() -> gen_server:call(?MODULE, handoff_ip, infinity). @@ -65,10 +65,10 @@ sock_opts() -> [binary, {packet, 4}, {reuseaddr, true}, {backlog, 64}]. handle_call(handoff_ip, _From, - State = #state{ipaddr = I}) -> + State = #state{ipaddr = I}) -> {reply, {ok, I}, State}; handle_call(handoff_port, _From, - State = #state{portnum = P}) -> + State = #state{portnum = P}) -> {reply, {ok, P}, State}. handle_cast(_Msg, State) -> {noreply, State}. @@ -81,13 +81,13 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. new_connection(Socket, State) -> case riak_core_handoff_manager:add_inbound() of - {ok, Pid} -> - ok = gen_tcp:controlling_process(Socket, Pid), - ok = riak_core_handoff_receiver:set_socket(Pid, Socket), - {ok, State}; - {error, _Reason} -> - %% STATS - %% riak_core_stat:update(rejected_handoffs), - gen_tcp:close(Socket), - {ok, State} + {ok, Pid} -> + ok = gen_tcp:controlling_process(Socket, Pid), + ok = riak_core_handoff_receiver:set_socket(Pid, Socket), + {ok, State}; + {error, _Reason} -> + %% STATS + %% riak_core_stat:update(rejected_handoffs), + gen_tcp:close(Socket), + {ok, State} end. diff --git a/src/riak_core_handoff_listener_sup.erl b/src/riak_core_handoff_listener_sup.erl index f68fa4f35..ab398b591 100644 --- a/src/riak_core_handoff_listener_sup.erl +++ b/src/riak_core_handoff_listener_sup.erl @@ -26,12 +26,12 @@ -export([start_link/0, init/1]). -define(CHILD(I, Type), - {I, - {I, start_link, []}, - permanent, - brutal_kill, - Type, - [I]}). + {I, + {I, start_link, []}, + permanent, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_handoff_manager.erl b/src/riak_core_handoff_manager.erl index 6c0ac838b..3f78c0027 100644 --- a/src/riak_core_handoff_manager.erl +++ b/src/riak_core_handoff_manager.erl @@ -19,33 +19,33 @@ %% gen_server api -export([start_link/0, - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). %% exclusion api -export([add_exclusion/2, - get_exclusions/1, - remove_exclusion/2]). + get_exclusions/1, + remove_exclusion/2]). %% handoff api -export([add_outbound/6, - add_outbound/7, - add_inbound/0, - xfer/3, - kill_xfer/3, - status/0, - status/1, - status_update/2, - set_concurrency/1, - get_concurrency/0, - set_recv_data/2, - kill_handoffs/0, - kill_handoffs_in_direction/1, - handoff_change_enabled_setting/2]). + add_outbound/7, + add_inbound/0, + xfer/3, + kill_xfer/3, + status/0, + status/1, + status_update/2, + set_concurrency/1, + get_concurrency/0, + set_recv_data/2, + kill_handoffs/0, + kill_handoffs_in_direction/1, + handoff_change_enabled_setting/2]). -include("riak_core_handoff.hrl"). @@ -58,17 +58,17 @@ -endif. -record(state, - {excl, handoffs = [] :: [handoff_status()]}). + {excl, handoffs = [] :: [handoff_status()]}). %% this can be overridden with riak_core handoff_concurrency -define(HANDOFF_CONCURRENCY, 2). -define(HO_EQ(HOA, HOB), - HOA#handoff_status.mod_src_tgt == - HOB#handoff_status.mod_src_tgt - andalso - HOA#handoff_status.timestamp == - HOB#handoff_status.timestamp). + HOA#handoff_status.mod_src_tgt == + HOB#handoff_status.mod_src_tgt + andalso + HOA#handoff_status.timestamp == + HOB#handoff_status.timestamp). %%%=================================================================== %%% API @@ -76,74 +76,74 @@ start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], - []). + ?MODULE, + [], + []). init([]) -> {ok, #state{excl = sets:new(), handoffs = []}}. add_outbound(HOType, Module, Idx, Node, VnodePid, - Opts) -> + Opts) -> add_outbound(HOType, - Module, - Idx, - Idx, - Node, - VnodePid, - Opts). + Module, + Idx, + Idx, + Node, + VnodePid, + Opts). add_outbound(HOType, Module, SrcIdx, TargetIdx, Node, - VnodePid, Opts) -> + VnodePid, Opts) -> case application:get_env(riak_core, - disable_outbound_handoff) - of - {ok, true} -> {error, max_concurrency}; - _ -> - gen_server:call(?MODULE, - {add_outbound, - HOType, - Module, - SrcIdx, - TargetIdx, - Node, - VnodePid, - Opts}, - infinity) + disable_outbound_handoff) + of + {ok, true} -> {error, max_concurrency}; + _ -> + gen_server:call(?MODULE, + {add_outbound, + HOType, + Module, + SrcIdx, + TargetIdx, + Node, + VnodePid, + Opts}, + infinity) end. add_inbound() -> case application:get_env(riak_core, - disable_inbound_handoff) - of - {ok, true} -> {error, max_concurrency}; - _ -> gen_server:call(?MODULE, {add_inbound}, infinity) + disable_inbound_handoff) + of + {ok, true} -> {error, max_concurrency}; + _ -> gen_server:call(?MODULE, {add_inbound}, infinity) end. %% @doc Initiate a transfer from `SrcPartition' to `TargetPartition' %% for the given `Module' using the `FilterModFun' filter. -spec xfer({index(), node()}, mod_partition(), - {module(), atom()}) -> ok. + {module(), atom()}) -> ok. xfer({SrcPartition, SrcOwner}, {Module, TargetPartition}, FilterModFun) -> %% NOTE: This will not work with old nodes ReqOrigin = node(), gen_server:cast({?MODULE, SrcOwner}, - {send_handoff, - repair, - Module, - {SrcPartition, TargetPartition}, - ReqOrigin, - FilterModFun}). + {send_handoff, + repair, + Module, + {SrcPartition, TargetPartition}, + ReqOrigin, + FilterModFun}). %% @doc Associate `Data' with the inbound handoff `Recv'. -spec set_recv_data(pid(), proplists:proplist()) -> ok. set_recv_data(Recv, Data) -> gen_server:call(?MODULE, - {set_recv_data, Recv, Data}, - infinity). + {set_recv_data, Recv, Data}, + infinity). status() -> status(none). @@ -156,12 +156,12 @@ status(Filter) -> status_update(ModSrcTgt, Stats) -> gen_server:cast(?MODULE, - {status_update, ModSrcTgt, Stats}). + {status_update, ModSrcTgt, Stats}). set_concurrency(Limit) -> gen_server:call(?MODULE, - {set_concurrency, Limit}, - infinity). + {set_concurrency, Limit}, + infinity). get_concurrency() -> gen_server:call(?MODULE, get_concurrency, infinity). @@ -171,152 +171,152 @@ get_concurrency() -> kill_xfer(SrcNode, ModSrcTarget, Reason) -> gen_server:cast({?MODULE, SrcNode}, - {kill_xfer, ModSrcTarget, Reason}). + {kill_xfer, ModSrcTarget, Reason}). kill_handoffs() -> set_concurrency(0). -spec kill_handoffs_in_direction(inbound | - outbound) -> ok. + outbound) -> ok. kill_handoffs_in_direction(Direction) -> gen_server:call(?MODULE, - {kill_in_direction, Direction}, - infinity). + {kill_in_direction, Direction}, + infinity). add_exclusion(Module, Index) -> gen_server:cast(?MODULE, - {add_exclusion, {Module, Index}}). + {add_exclusion, {Module, Index}}). remove_exclusion(Module, Index) -> gen_server:cast(?MODULE, - {del_exclusion, {Module, Index}}). + {del_exclusion, {Module, Index}}). get_exclusions(Module) -> gen_server:call(?MODULE, - {get_exclusions, Module}, - infinity). + {get_exclusions, Module}, + infinity). %%%=================================================================== %%% Callbacks %%%=================================================================== handle_call({get_exclusions, Module}, _From, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> Reply = [I - || {M, I} <- sets:to_list(Excl), M =:= Module], + || {M, I} <- sets:to_list(Excl), M =:= Module], {reply, {ok, Reply}, State}; handle_call({add_outbound, - Type, - Mod, - SrcIdx, - TargetIdx, - Node, - Pid, - Opts}, - _From, State = #state{handoffs = HS}) -> + Type, + Mod, + SrcIdx, + TargetIdx, + Node, + Pid, + Opts}, + _From, State = #state{handoffs = HS}) -> case send_handoff(Type, - {Mod, SrcIdx, TargetIdx}, - Node, - Pid, - HS, - Opts) - of - {ok, - Handoff = #handoff_status{transport_pid = Sender}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Sender}, State#state{handoffs = HS2}}; - {false, - _ExistingHandoff = #handoff_status{transport_pid = - Sender}} -> - {reply, {ok, Sender}, State}; - Error -> {reply, Error, State} + {Mod, SrcIdx, TargetIdx}, + Node, + Pid, + HS, + Opts) + of + {ok, + Handoff = #handoff_status{transport_pid = Sender}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Sender}, State#state{handoffs = HS2}}; + {false, + _ExistingHandoff = #handoff_status{transport_pid = + Sender}} -> + {reply, {ok, Sender}, State}; + Error -> {reply, Error, State} end; handle_call({add_inbound}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case receive_handoff() of - {ok, - Handoff = #handoff_status{transport_pid = Receiver}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Receiver}, State#state{handoffs = HS2}}; - Error -> {reply, Error, State} + {ok, + Handoff = #handoff_status{transport_pid = Receiver}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Receiver}, State#state{handoffs = HS2}}; + Error -> {reply, Error, State} end; handle_call({set_recv_data, Recv, Data}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case lists:keyfind(Recv, - #handoff_status.transport_pid, - HS) - of - false -> - throw({error, - "set_recv_data called for non-existing " - "receiver", - Recv, - Data}); - #handoff_status{} = H -> - H2 = H#handoff_status{mod_src_tgt = - proplists:get_value(mod_src_tgt, Data), - vnode_pid = - proplists:get_value(vnode_pid, Data)}, - HS2 = lists:keyreplace(Recv, - #handoff_status.transport_pid, - HS, - H2), - {reply, ok, State#state{handoffs = HS2}} + #handoff_status.transport_pid, + HS) + of + false -> + throw({error, + "set_recv_data called for non-existing " + "receiver", + Recv, + Data}); + #handoff_status{} = H -> + H2 = H#handoff_status{mod_src_tgt = + proplists:get_value(mod_src_tgt, Data), + vnode_pid = + proplists:get_value(vnode_pid, Data)}, + HS2 = lists:keyreplace(Recv, + #handoff_status.transport_pid, + HS, + H2), + {reply, ok, State#state{handoffs = HS2}} end; handle_call({xfer_status, Xfer}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> TP = Xfer#handoff_status.transport_pid, case lists:keyfind(TP, - #handoff_status.transport_pid, - HS) - of - false -> {reply, not_found, State}; - _ -> {reply, in_progress, State} + #handoff_status.transport_pid, + HS) + of + false -> {reply, not_found, State}; + _ -> {reply, in_progress, State} end; handle_call({status, Filter}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> Status = lists:filter(filter(Filter), - [build_status(HO) || HO <- HS]), + [build_status(HO) || HO <- HS]), {reply, Status, State}; handle_call({set_concurrency, Limit}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> application:set_env(riak_core, - handoff_concurrency, - Limit), + handoff_concurrency, + Limit), case Limit < erlang:length(HS) of - true -> - %% Note: we don't update the state with the handoffs that we're - %% keeping because we'll still get the 'DOWN' messages with - %% a reason of 'max_concurrency' and we want to be able to do - %% something with that if necessary. - {_Keep, Discard} = lists:split(Limit, HS), - _ = [erlang:exit(Pid, max_concurrency) - || #handoff_status{transport_pid = Pid} <- Discard], - {reply, ok, State}; - false -> {reply, ok, State} + true -> + %% Note: we don't update the state with the handoffs that we're + %% keeping because we'll still get the 'DOWN' messages with + %% a reason of 'max_concurrency' and we want to be able to do + %% something with that if necessary. + {_Keep, Discard} = lists:split(Limit, HS), + _ = [erlang:exit(Pid, max_concurrency) + || #handoff_status{transport_pid = Pid} <- Discard], + {reply, ok, State}; + false -> {reply, ok, State} end; handle_call(get_concurrency, _From, State) -> Concurrency = get_concurrency_limit(), {reply, Concurrency, State}; handle_call({kill_in_direction, Direction}, _From, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> %% TODO (atb): Refactor this to comply with max_concurrency logspam PR's exit codes %% NB. As-is this handles worker termination the same way as set_concurrency; %% no state update is performed here, we let the worker DOWNs mark them %% as dead rather than trimming here. Kill = [H - || H = #handoff_status{direction = D} <- HS, - D =:= Direction], + || H = #handoff_status{direction = D} <- HS, + D =:= Direction], _ = [erlang:exit(Pid, max_concurrency) - || #handoff_status{transport_pid = Pid} <- Kill], + || #handoff_status{transport_pid = Pid} <- Kill], {reply, ok, State}. handle_cast({del_exclusion, {Mod, Idx}}, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> Excl2 = sets:del_element({Mod, Idx}, Excl), {noreply, State#state{excl = Excl2}}; handle_cast({add_exclusion, {Mod, Idx}}, - State = #state{excl = Excl}) -> + State = #state{excl = Excl}) -> %% Note: This function used to trigger a ring event after adding an %% exclusion to ensure that an exiting node would eventually shutdown %% after all vnodes had finished handoff. This behavior is now handled @@ -324,49 +324,49 @@ handle_cast({add_exclusion, {Mod, Idx}}, Excl2 = sets:add_element({Mod, Idx}, Excl), {noreply, State#state{excl = Excl2}}; handle_cast({status_update, ModSrcTgt, StatsUpdate}, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case lists:keyfind(ModSrcTgt, - #handoff_status.mod_src_tgt, - HS) - of - false -> - logger:error("status_update for non-existing handoff ~p", - [ModSrcTgt]), - {noreply, State}; - HO -> - Stats2 = update_stats(StatsUpdate, - HO#handoff_status.stats), - HO2 = HO#handoff_status{stats = Stats2}, - HS2 = lists:keyreplace(ModSrcTgt, - #handoff_status.mod_src_tgt, - HS, - HO2), - {noreply, State#state{handoffs = HS2}} + #handoff_status.mod_src_tgt, + HS) + of + false -> + logger:error("status_update for non-existing handoff ~p", + [ModSrcTgt]), + {noreply, State}; + HO -> + Stats2 = update_stats(StatsUpdate, + HO#handoff_status.stats), + HO2 = HO#handoff_status{stats = Stats2}, + HS2 = lists:keyreplace(ModSrcTgt, + #handoff_status.mod_src_tgt, + HS, + HO2), + {noreply, State#state{handoffs = HS2}} end; handle_cast({send_handoff, - Type, - Mod, - {Src, Target}, - ReqOrigin, - {FilterMod, FilterFun} = FMF}, - State = #state{handoffs = HS}) -> + Type, + Mod, + {Src, Target}, + ReqOrigin, + {FilterMod, FilterFun} = FMF}, + State = #state{handoffs = HS}) -> Filter = FilterMod:FilterFun(Target), %% TODO: make a record? {ok, VNode} = riak_core_vnode_manager:get_vnode_pid(Src, - Mod), + Mod), case send_handoff(Type, - {Mod, Src, Target}, - ReqOrigin, - VNode, - HS, - {Filter, FMF}, - ReqOrigin, - []) - of - {ok, Handoff} -> - HS2 = HS ++ [Handoff], - {noreply, State#state{handoffs = HS2}}; - _ -> {noreply, State} + {Mod, Src, Target}, + ReqOrigin, + VNode, + HS, + {Filter, FMF}, + ReqOrigin, + []) + of + {ok, Handoff} -> + HS2 = HS ++ [Handoff], + {noreply, State#state{handoffs = HS2}}; + _ -> {noreply, State} end; handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> HS = State#state.handoffs, @@ -374,77 +374,77 @@ handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> {noreply, State#state{handoffs = HS2}}. handle_info({'DOWN', Ref, process, _Pid, Reason}, - State = #state{handoffs = HS}) -> + State = #state{handoffs = HS}) -> case lists:keytake(Ref, - #handoff_status.transport_mon, - HS) - of - {value, - #handoff_status{mod_src_tgt = {M, S, I}, - direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, - req_origin = Origin}, - NewHS} -> - WarnVnode = case Reason of - %% if the reason the handoff process died was anything other - %% than 'normal' we should log the reason why as an error - normal -> false; - X - when X == max_concurrency orelse - element(1, X) == shutdown andalso - element(2, X) == max_concurrency -> - logger:info("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true; - _ -> - logger:error("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true - end, - %% if we have the vnode process pid, tell the vnode why the - %% handoff stopped so it can clean up its state - case WarnVnode andalso is_pid(Vnode) of - true -> - riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); - _ -> - case Origin of - none -> ok; - _ -> - %% Use proplist instead so it's more - %% flexible in future, or does - %% capabilities nullify that? - Msg = {M, S, I}, - riak_core_vnode_manager:xfer_complete(Origin, Msg) - end, - ok - end, - %% No monitor on vnode for receiver - if VnodeM /= undefined -> demonitor(VnodeM); - true -> ok - end, - %% removed the handoff from the list of active handoffs - {noreply, State#state{handoffs = NewHS}}; - false -> - case lists:keytake(Ref, #handoff_status.vnode_mon, HS) - of - {value, - #handoff_status{mod_src_tgt = {M, _, I}, - direction = Dir, transport_pid = Trans, - transport_mon = TransM}, - NewHS} -> - %% In this case the vnode died and the handoff - %% sender must be killed. - logger:error("An ~w handoff of partition ~w ~w was " - "terminated because the vnode died", - [Dir, M, I]), - demonitor(TransM), - exit(Trans, vnode_died), - {noreply, State#state{handoffs = NewHS}}; - _ -> {noreply, State} - end + #handoff_status.transport_mon, + HS) + of + {value, + #handoff_status{mod_src_tgt = {M, S, I}, + direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, + req_origin = Origin}, + NewHS} -> + WarnVnode = case Reason of + %% if the reason the handoff process died was anything other + %% than 'normal' we should log the reason why as an error + normal -> false; + X + when X == max_concurrency orelse + element(1, X) == shutdown andalso + element(2, X) == max_concurrency -> + logger:info("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true; + _ -> + logger:error("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true + end, + %% if we have the vnode process pid, tell the vnode why the + %% handoff stopped so it can clean up its state + case WarnVnode andalso is_pid(Vnode) of + true -> + riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); + _ -> + case Origin of + none -> ok; + _ -> + %% Use proplist instead so it's more + %% flexible in future, or does + %% capabilities nullify that? + Msg = {M, S, I}, + riak_core_vnode_manager:xfer_complete(Origin, Msg) + end, + ok + end, + %% No monitor on vnode for receiver + if VnodeM /= undefined -> demonitor(VnodeM); + true -> ok + end, + %% removed the handoff from the list of active handoffs + {noreply, State#state{handoffs = NewHS}}; + false -> + case lists:keytake(Ref, #handoff_status.vnode_mon, HS) + of + {value, + #handoff_status{mod_src_tgt = {M, _, I}, + direction = Dir, transport_pid = Trans, + transport_mon = TransM}, + NewHS} -> + %% In this case the vnode died and the handoff + %% sender must be killed. + logger:error("An ~w handoff of partition ~w ~w was " + "terminated because the vnode died", + [Dir, M, I]), + demonitor(TransM), + exit(Trans, vnode_died), + {noreply, State#state{handoffs = NewHS}}; + _ -> {noreply, State} + end end. terminate(_Reason, _State) -> ok. @@ -457,10 +457,10 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. build_status(HO) -> #handoff_status{mod_src_tgt = {Mod, SrcP, TargetP}, - src_node = SrcNode, target_node = TargetNode, - direction = Dir, status = Status, timestamp = StartTS, - transport_pid = TPid, type = Type} = - HO, + src_node = SrcNode, target_node = TargetNode, + direction = Dir, status = Status, timestamp = StartTS, + transport_pid = TPid, type = Type} = + HO, {status_v2, [{mod, Mod}, {src_partition, SrcP}, @@ -475,24 +475,24 @@ build_status(HO) -> {type, Type}]}. calc_stats(#handoff_status{stats = Stats, - timestamp = StartTS, size = Size}) -> + timestamp = StartTS, size = Size}) -> case dict:find(last_update, Stats) of - error -> no_stats; - {ok, LastUpdate} -> - Objs = dict:fetch(objs, Stats), - Bytes = dict:fetch(bytes, Stats), - CalcSize = get_size(Size), - Done = calc_pct_done(Objs, Bytes, CalcSize), - ElapsedS = timer:now_diff(LastUpdate, StartTS) / - 1000000, - ObjsS = round(Objs / ElapsedS), - BytesS = round(Bytes / ElapsedS), - [{objs_total, Objs}, - {objs_per_s, ObjsS}, - {bytes_per_s, BytesS}, - {last_update, LastUpdate}, - {size, CalcSize}, - {pct_done_decimal, Done}] + error -> no_stats; + {ok, LastUpdate} -> + Objs = dict:fetch(objs, Stats), + Bytes = dict:fetch(bytes, Stats), + CalcSize = get_size(Size), + Done = calc_pct_done(Objs, Bytes, CalcSize), + ElapsedS = timer:now_diff(LastUpdate, StartTS) / + 1000000, + ObjsS = round(Objs / ElapsedS), + BytesS = round(Bytes / ElapsedS), + [{objs_total, Objs}, + {objs_per_s, ObjsS}, + {bytes_per_s, BytesS}, + {last_update, LastUpdate}, + {size, CalcSize}, + {pct_done_decimal, Done}] end. get_size({F, dynamic}) -> F(); @@ -505,84 +505,84 @@ calc_pct_done(_, Bytes, {Size, bytes}) -> Bytes / Size. filter(none) -> fun (_) -> true end; filter({Key, Value} = _Filter) -> fun ({status_v2, Status}) -> - case proplists:get_value(Key, Status) of - Value -> true; - _ -> false - end + case proplists:get_value(Key, Status) of + Value -> true; + _ -> false + end end. resize_transfer_filter(Ring, Mod, Src, Target) -> fun (K) -> - {_, Hashed} = Mod:object_info(K), - riak_core_ring:is_future_index(Hashed, - Src, - Target, - Ring) + {_, Hashed} = Mod:object_info(K), + riak_core_ring:is_future_index(Hashed, + Src, + Target, + Ring) end. resize_transfer_notsent_fun(Ring, Mod, Src) -> Shrinking = riak_core_ring:num_partitions(Ring) > - riak_core_ring:future_num_partitions(Ring), + riak_core_ring:future_num_partitions(Ring), case Shrinking of - false -> NValMap = DefaultN = undefined; - true -> - NValMap = Mod:nval_map(Ring), - DefaultN = riak_core_bucket:default_object_nval() + false -> NValMap = DefaultN = undefined; + true -> + NValMap = Mod:nval_map(Ring), + DefaultN = riak_core_bucket:default_object_nval() end, fun (Key, Acc) -> - record_seen_index(Ring, - Shrinking, - NValMap, - DefaultN, - Mod, - Src, - Key, - Acc) + record_seen_index(Ring, + Shrinking, + NValMap, + DefaultN, + Mod, + Src, + Key, + Acc) end. record_seen_index(Ring, Shrinking, NValMap, DefaultN, - Mod, Src, Key, Seen) -> + Mod, Src, Key, Seen) -> {Bucket, Hashed} = Mod:object_info(Key), CheckNVal = case Shrinking of - false -> undefined; - true -> proplists:get_value(Bucket, NValMap, DefaultN) - end, + false -> undefined; + true -> proplists:get_value(Bucket, NValMap, DefaultN) + end, case riak_core_ring:future_index(Hashed, - Src, - CheckNVal, - Ring) - of - undefined -> Seen; - FutureIndex -> ordsets:add_element(FutureIndex, Seen) + Src, + CheckNVal, + Ring) + of + undefined -> Seen; + FutureIndex -> ordsets:add_element(FutureIndex, Seen) end. get_concurrency_limit() -> application:get_env(riak_core, - handoff_concurrency, - ?HANDOFF_CONCURRENCY). + handoff_concurrency, + ?HANDOFF_CONCURRENCY). %% true if handoff_concurrency (inbound + outbound) hasn't yet been reached handoff_concurrency_limit_reached() -> Receivers = - supervisor:count_children(riak_core_handoff_receiver_sup), + supervisor:count_children(riak_core_handoff_receiver_sup), Senders = - supervisor:count_children(riak_core_handoff_sender_sup), + supervisor:count_children(riak_core_handoff_sender_sup), ActiveReceivers = proplists:get_value(active, - Receivers), + Receivers), ActiveSenders = proplists:get_value(active, Senders), get_concurrency_limit() =< - ActiveReceivers + ActiveSenders. + ActiveReceivers + ActiveSenders. send_handoff(HOType, ModSrcTarget, Node, Pid, HS, - Opts) -> + Opts) -> send_handoff(HOType, - ModSrcTarget, - Node, - Pid, - HS, - {none, none}, - none, - Opts). + ModSrcTarget, + Node, + Pid, + HS, + {none, none}, + none, + Opts). %% @private %% @@ -592,119 +592,119 @@ send_handoff(HOType, ModSrcTarget, Node, Pid, HS, %% `Origin' is the node this request originated from so a reply %% can't be sent on completion. -spec send_handoff(ho_type(), - {module(), index(), index()}, node(), pid(), list(), - {predicate() | none, {module(), atom()} | none}, node(), - [{atom(), term()}]) -> {ok, handoff_status()} | - {error, max_concurrency} | - {false, handoff_status()}. + {module(), index(), index()}, node(), pid(), list(), + {predicate() | none, {module(), atom()} | none}, node(), + [{atom(), term()}]) -> {ok, handoff_status()} | + {error, max_concurrency} | + {false, handoff_status()}. send_handoff(HOType, {Mod, Src, Target}, Node, Vnode, - HS, {Filter, FilterModFun}, Origin, Opts) -> + HS, {Filter, FilterModFun}, Origin, Opts) -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - ShouldHandoff = case lists:keyfind({Mod, Src, Target}, - #handoff_status.mod_src_tgt, - HS) - of - false -> true; - Handoff = #handoff_status{target_node = Node, - vnode_pid = Vnode} -> - {false, Handoff}; - #handoff_status{transport_pid = Sender} -> - %% found a running handoff with a different vnode - %% source or a different target node, kill the current - %% one and the new one will start up - erlang:exit(Sender, - resubmit_handoff_change), - true - end, - case ShouldHandoff of - true -> - VnodeM = monitor(process, Vnode), - %% start the sender process - BaseOpts = [{src_partition, Src}, - {target_partition, Target}], - case HOType of - repair -> - HOFilter = Filter, - HOAcc0 = undefined, - HONotSentFun = undefined; - resize -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - HOFilter = resize_transfer_filter(Ring, - Mod, - Src, - Target), - HOAcc0 = ordsets:new(), - HONotSentFun = resize_transfer_notsent_fun(Ring, - Mod, - Src); - _ -> - HOFilter = none, - HOAcc0 = undefined, - HONotSentFun = undefined - end, - HOOpts = [{filter, HOFilter}, - {notsent_acc0, HOAcc0}, - {notsent_fun, HONotSentFun} - | BaseOpts], - {ok, Pid} = - riak_core_handoff_sender_sup:start_sender(HOType, - Mod, - Node, - Vnode, - HOOpts), - PidM = monitor(process, Pid), - Size = validate_size(proplists:get_value(size, Opts)), - %% successfully started up a new sender handoff - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = outbound, - timestamp = os:timestamp(), - src_node = node(), target_node = Node, - mod_src_tgt = {Mod, Src, Target}, - vnode_pid = Vnode, vnode_mon = VnodeM, - status = [], stats = dict:new(), - type = HOType, req_origin = Origin, - filter_mod_fun = FilterModFun, - size = Size}}; - %% handoff already going, just return it - AlreadyExists = {false, _CurrentHandoff} -> - AlreadyExists - end + true -> {error, max_concurrency}; + false -> + ShouldHandoff = case lists:keyfind({Mod, Src, Target}, + #handoff_status.mod_src_tgt, + HS) + of + false -> true; + Handoff = #handoff_status{target_node = Node, + vnode_pid = Vnode} -> + {false, Handoff}; + #handoff_status{transport_pid = Sender} -> + %% found a running handoff with a different vnode + %% source or a different target node, kill the current + %% one and the new one will start up + erlang:exit(Sender, + resubmit_handoff_change), + true + end, + case ShouldHandoff of + true -> + VnodeM = monitor(process, Vnode), + %% start the sender process + BaseOpts = [{src_partition, Src}, + {target_partition, Target}], + case HOType of + repair -> + HOFilter = Filter, + HOAcc0 = undefined, + HONotSentFun = undefined; + resize -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + HOFilter = resize_transfer_filter(Ring, + Mod, + Src, + Target), + HOAcc0 = ordsets:new(), + HONotSentFun = resize_transfer_notsent_fun(Ring, + Mod, + Src); + _ -> + HOFilter = none, + HOAcc0 = undefined, + HONotSentFun = undefined + end, + HOOpts = [{filter, HOFilter}, + {notsent_acc0, HOAcc0}, + {notsent_fun, HONotSentFun} + | BaseOpts], + {ok, Pid} = + riak_core_handoff_sender_sup:start_sender(HOType, + Mod, + Node, + Vnode, + HOOpts), + PidM = monitor(process, Pid), + Size = validate_size(proplists:get_value(size, Opts)), + %% successfully started up a new sender handoff + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = outbound, + timestamp = os:timestamp(), + src_node = node(), target_node = Node, + mod_src_tgt = {Mod, Src, Target}, + vnode_pid = Vnode, vnode_mon = VnodeM, + status = [], stats = dict:new(), + type = HOType, req_origin = Origin, + filter_mod_fun = FilterModFun, + size = Size}}; + %% handoff already going, just return it + AlreadyExists = {false, _CurrentHandoff} -> + AlreadyExists + end end. %% spawn a receiver process receive_handoff() -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - {ok, Pid} = - riak_core_handoff_receiver_sup:start_receiver(), - PidM = monitor(process, Pid), - %% successfully started up a new receiver - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = inbound, - timestamp = os:timestamp(), - mod_src_tgt = {undefined, undefined, undefined}, - src_node = undefined, target_node = undefined, - status = [], stats = dict:new(), - req_origin = none}} + true -> {error, max_concurrency}; + false -> + {ok, Pid} = + riak_core_handoff_receiver_sup:start_receiver(), + PidM = monitor(process, Pid), + %% successfully started up a new receiver + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = inbound, + timestamp = os:timestamp(), + mod_src_tgt = {undefined, undefined, undefined}, + src_node = undefined, target_node = undefined, + status = [], stats = dict:new(), + req_origin = none}} end. update_stats(StatsUpdate, Stats) -> #ho_stats{last_update = LU, objs = Objs, - bytes = Bytes} = - StatsUpdate, + bytes = Bytes} = + StatsUpdate, Stats2 = dict:update_counter(objs, Objs, Stats), Stats3 = dict:update_counter(bytes, Bytes, Stats2), dict:store(last_update, LU, Stats3). validate_size(Size = {N, U}) when is_number(N) andalso - N > 0 andalso (U =:= bytes orelse U =:= objects) -> + N > 0 andalso (U =:= bytes orelse U =:= objects) -> Size; validate_size(Size = {F, dynamic}) when is_function(F) -> @@ -718,65 +718,65 @@ validate_size(_) -> undefined. %% can have two simultaneous inbound xfers. kill_xfer_i(ModSrcTarget, Reason, HS) -> case lists:keytake(ModSrcTarget, - #handoff_status.mod_src_tgt, - HS) - of - false -> HS; - {value, Xfer, HS2} -> - #handoff_status{mod_src_tgt = - {Mod, SrcPartition, TargetPartition}, - type = Type, target_node = TargetNode, - src_node = SrcNode, transport_pid = TP} = - Xfer, - Msg = "~p transfer of ~p from ~p ~p to ~p ~p " - "killed for reason ~p", - case Type of - undefined -> ok; - _ -> - logger:info(Msg, - [Type, - Mod, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition, - Reason]) - end, - exit(TP, {kill_xfer, Reason}), - kill_xfer_i(ModSrcTarget, Reason, HS2) + #handoff_status.mod_src_tgt, + HS) + of + false -> HS; + {value, Xfer, HS2} -> + #handoff_status{mod_src_tgt = + {Mod, SrcPartition, TargetPartition}, + type = Type, target_node = TargetNode, + src_node = SrcNode, transport_pid = TP} = + Xfer, + Msg = "~p transfer of ~p from ~p ~p to ~p ~p " + "killed for reason ~p", + case Type of + undefined -> ok; + _ -> + logger:info(Msg, + [Type, + Mod, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition, + Reason]) + end, + exit(TP, {kill_xfer, Reason}), + kill_xfer_i(ModSrcTarget, Reason, HS2) end. handoff_change_enabled_setting(EnOrDis, Direction) -> SetFun = case EnOrDis of - enable -> fun handoff_enable/1; - disable -> fun handoff_disable/1 - end, + enable -> fun handoff_enable/1; + disable -> fun handoff_disable/1 + end, case Direction of - inbound -> SetFun(inbound); - outbound -> SetFun(outbound); - both -> - SetFun(inbound), - SetFun(outbound) + inbound -> SetFun(inbound); + outbound -> SetFun(outbound); + both -> + SetFun(inbound), + SetFun(outbound) end. handoff_enable(inbound) -> application:set_env(riak_core, - disable_inbound_handoff, - false); + disable_inbound_handoff, + false); handoff_enable(outbound) -> application:set_env(riak_core, - disable_outbound_handoff, - false). + disable_outbound_handoff, + false). handoff_disable(inbound) -> application:set_env(riak_core, - disable_inbound_handoff, - true), + disable_inbound_handoff, + true), kill_handoffs_in_direction(inbound); handoff_disable(outbound) -> application:set_env(riak_core, - disable_outbound_handoff, - true), + disable_outbound_handoff, + true), kill_handoffs_in_direction(outbound). %%%=================================================================== @@ -790,15 +790,15 @@ handoff_test_() -> {setup, %% called when the tests start and complete... fun () -> - {ok, ManPid} = start_link(), - {ok, RSupPid} = - riak_core_handoff_receiver_sup:start_link(), - {ok, SSupPid} = - riak_core_handoff_sender_sup:start_link(), - [ManPid, RSupPid, SSupPid] + {ok, ManPid} = start_link(), + {ok, RSupPid} = + riak_core_handoff_receiver_sup:start_link(), + {ok, SSupPid} = + riak_core_handoff_sender_sup:start_link(), + [ManPid, RSupPid, SSupPid] end, fun (PidList) -> - lists:foreach(fun (Pid) -> exit(Pid, kill) end, PidList) + lists:foreach(fun (Pid) -> exit(Pid, kill) end, PidList) end, %% actual list of test [?_test((simple_handoff())), @@ -810,12 +810,12 @@ simple_handoff() -> ?assertEqual(ok, (set_concurrency(0))), ?assertEqual({error, max_concurrency}, (add_inbound())), ?assertEqual({error, max_concurrency}, - (add_outbound(ownership, - riak_kv_vnode, - 0, - node(), - self(), - []))), + (add_outbound(ownership, + riak_kv_vnode, + 0, + node(), + self(), + []))), %% allow for a single handoff ?assertEqual(ok, (set_concurrency(1))), %% done @@ -834,14 +834,14 @@ config_disable() -> ?assertEqual(1, (length(status()))), Ref = monitor(process, Pid), CatchDownFun = fun () -> - receive - {'DOWN', Ref, process, Pid, max_concurrency} -> - ok; - Other -> {error, unexpected_message, Other} - after 1000 -> - {error, timeout_waiting_for_down_msg} - end - end, + receive + {'DOWN', Ref, process, Pid, max_concurrency} -> + ok; + Other -> {error, unexpected_message, Other} + after 1000 -> + {error, timeout_waiting_for_down_msg} + end + end, ?assertEqual(ok, (handoff_disable(inbound))), ?assertEqual(ok, (CatchDownFun())), %% We use wait_until because it's possible that the handoff manager process @@ -862,11 +862,11 @@ config_disable() -> wait_until(Fun, Retry, Delay) when Retry > 0 -> Res = Fun(), case Res of - true -> ok; - _ when Retry == 1 -> {fail, Res}; - _ -> - timer:sleep(Delay), - wait_until(Fun, Retry - 1, Delay) + true -> ok; + _ when Retry == 1 -> {fail, Res}; + _ -> + timer:sleep(Delay), + wait_until(Fun, Retry - 1, Delay) end. -endif. diff --git a/src/riak_core_handoff_receiver.erl b/src/riak_core_handoff_receiver.erl index d25f6857b..31d36402d 100644 --- a/src/riak_core_handoff_receiver.erl +++ b/src/riak_core_handoff_receiver.erl @@ -27,25 +27,25 @@ -behaviour(gen_server). -export([start_link/0, - set_socket/2, - supports_batching/0]). + set_socket/2, + supports_batching/0]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, - {sock :: port() | undefined, - peer :: term(), - recv_timeout_len :: non_neg_integer(), - vnode_timeout_len :: non_neg_integer(), - partition :: non_neg_integer() | undefined, - vnode_mod = riak_kv_vnode :: module(), - vnode :: pid() | undefined, - count = 0 :: non_neg_integer()}). + {sock :: port() | undefined, + peer :: term(), + recv_timeout_len :: non_neg_integer(), + vnode_timeout_len :: non_neg_integer(), + partition :: non_neg_integer() | undefined, + vnode_mod = riak_kv_vnode :: module(), + vnode :: pid() | undefined, + count = 0 :: non_neg_integer()}). %% set the TCP receive timeout to five minutes to be conservative. -define(RECV_TIMEOUT, 300000). @@ -63,13 +63,13 @@ supports_batching() -> true. init([]) -> {ok, #state{recv_timeout_len = - application:get_env(riak_core, - handoff_receive_timeout, - ?RECV_TIMEOUT), - vnode_timeout_len = - application:get_env(riak_core, - handoff_receive_vnode_timeout, - ?VNODE_TIMEOUT)}}. + application:get_env(riak_core, + handoff_receive_timeout, + ?RECV_TIMEOUT), + vnode_timeout_len = + application:get_env(riak_core, + handoff_receive_vnode_timeout, + ?VNODE_TIMEOUT)}}. handle_call({set_socket, Socket0}, _From, State) -> SockOpts = [{active, once}, {packet, 4}, {header, 1}], @@ -79,113 +79,113 @@ handle_call({set_socket, Socket0}, _From, State) -> {reply, ok, State#state{sock = Socket, peer = Peer}}. handle_info({tcp_closed, _Socket}, - State = #state{partition = Partition, count = Count, - peer = Peer}) -> + State = #state{partition = Partition, count = Count, + peer = Peer}) -> logger:info("Handoff receiver for partition ~p exited " - "after processing ~p objects from ~p", - [Partition, Count, Peer]), + "after processing ~p objects from ~p", + [Partition, Count, Peer]), {stop, normal, State}; handle_info({tcp_error, _Socket, Reason}, - State = #state{partition = Partition, count = Count, - peer = Peer}) -> + State = #state{partition = Partition, count = Count, + peer = Peer}) -> logger:info("Handoff receiver for partition ~p exited " - "after processing ~p objects from ~p: " - "TCP error ~p", - [Partition, Count, Peer, Reason]), + "after processing ~p objects from ~p: " + "TCP error ~p", + [Partition, Count, Peer, Reason]), {stop, normal, State}; handle_info({tcp, Socket, Data}, State) -> [MsgType | MsgData] = Data, case catch process_message(MsgType, MsgData, State) of - {'EXIT', Reason} -> - logger:error("Handoff receiver for partition ~p exited " - "abnormally after processing ~p objects " - "from ~p: ~p", - [State#state.partition, - State#state.count, - State#state.peer, - Reason]), - {stop, normal, State}; - NewState when is_record(NewState, state) -> - InetMod = inet, - InetMod:setopts(Socket, [{active, once}]), - {noreply, NewState, State#state.recv_timeout_len} + {'EXIT', Reason} -> + logger:error("Handoff receiver for partition ~p exited " + "abnormally after processing ~p objects " + "from ~p: ~p", + [State#state.partition, + State#state.count, + State#state.peer, + Reason]), + {stop, normal, State}; + NewState when is_record(NewState, state) -> + InetMod = inet, + InetMod:setopts(Socket, [{active, once}]), + {noreply, NewState, State#state.recv_timeout_len} end; handle_info(timeout, State) -> logger:error("Handoff receiver for partition ~p timed " - "out after processing ~p objects from " - "~p.", - [State#state.partition, - State#state.count, - State#state.peer]), + "out after processing ~p objects from " + "~p.", + [State#state.partition, + State#state.count, + State#state.peer]), {stop, normal, State}. process_message(?PT_MSG_INIT, MsgData, - State = #state{vnode_mod = VNodeMod, peer = Peer}) -> + State = #state{vnode_mod = VNodeMod, peer = Peer}) -> <> = MsgData, logger:info("Receiving handoff data for partition " - "~p:~p from ~p", - [VNodeMod, Partition, Peer]), + "~p:~p from ~p", + [VNodeMod, Partition, Peer]), {ok, VNode} = - riak_core_vnode_master:get_vnode_pid(Partition, - VNodeMod), + riak_core_vnode_master:get_vnode_pid(Partition, + VNodeMod), Data = [{mod_src_tgt, {VNodeMod, undefined, Partition}}, - {vnode_pid, VNode}], + {vnode_pid, VNode}], riak_core_handoff_manager:set_recv_data(self(), Data), State#state{partition = Partition, vnode = VNode}; process_message(?PT_MSG_BATCH, MsgData, State) -> lists:foldl(fun (Obj, StateAcc) -> - process_message(?PT_MSG_OBJ, Obj, StateAcc) - end, - State, - binary_to_term(MsgData)); + process_message(?PT_MSG_OBJ, Obj, StateAcc) + end, + State, + binary_to_term(MsgData)); process_message(?PT_MSG_OBJ, MsgData, - State = #state{vnode = VNode, count = Count, - vnode_timeout_len = VNodeTimeout}) -> + State = #state{vnode = VNode, count = Count, + vnode_timeout_len = VNodeTimeout}) -> try riak_core_vnode:handoff_data(VNode, - MsgData, - VNodeTimeout) + MsgData, + VNodeTimeout) of - ok -> State#state{count = Count + 1}; - E = {error, _} -> exit(E) + ok -> State#state{count = Count + 1}; + E = {error, _} -> exit(E) catch - exit:{timeout, _} -> - exit({error, - {vnode_timeout, - VNodeTimeout, - size(MsgData), - binary:part(MsgData, {0, min(size(MsgData), 128)})}}) + exit:{timeout, _} -> + exit({error, + {vnode_timeout, + VNodeTimeout, + size(MsgData), + binary:part(MsgData, {0, min(size(MsgData), 128)})}}) end; process_message(?PT_MSG_OLDSYNC, MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, <<(?PT_MSG_OLDSYNC):8, "sync">>), <> = MsgData, VNodeMod = binary_to_atom(VNodeModBin, utf8), State#state{vnode_mod = VNodeMod}; process_message(?PT_MSG_SYNC, _MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8, "sync">>), State; process_message(?PT_MSG_VERIFY_NODE, ExpectedName, - State = #state{sock = Socket, peer = Peer}) -> + State = #state{sock = Socket, peer = Peer}) -> case binary_to_term(ExpectedName) of - _Node when _Node =:= node() -> - gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), - State; - Node -> - logger:error("Handoff from ~p expects us to be ~s " - "but we are ~s.", - [Peer, Node, node()]), - exit({error, {wrong_node, Node}}) + _Node when _Node =:= node() -> + gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), + State; + Node -> + logger:error("Handoff from ~p expects us to be ~s " + "but we are ~s.", + [Peer, Node, node()]), + exit({error, {wrong_node, Node}}) end; process_message(?PT_MSG_CONFIGURE, MsgData, State) -> ConfProps = binary_to_term(MsgData), State#state{vnode_mod = - proplists:get_value(vnode_mod, ConfProps), - partition = proplists:get_value(partition, ConfProps)}; + proplists:get_value(vnode_mod, ConfProps), + partition = proplists:get_value(partition, ConfProps)}; process_message(_, _MsgData, - State = #state{sock = Socket}) -> + State = #state{sock = Socket}) -> gen_tcp:send(Socket, - <<(?PT_MSG_UNKNOWN):8, "unknown_msg">>), + <<(?PT_MSG_UNKNOWN):8, "unknown_msg">>), State. handle_cast(_Msg, State) -> {noreply, State}. @@ -196,8 +196,8 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. safe_peername(Skt, Mod) -> case Mod:peername(Skt) of - {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; - _ -> - {unknown, - unknown} % Real info is {Addr, Port} + {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; + _ -> + {unknown, + unknown} % Real info is {Addr, Port} end. diff --git a/src/riak_core_handoff_receiver_sup.erl b/src/riak_core_handoff_receiver_sup.erl index 1f682c6ef..cabc86e07 100644 --- a/src/riak_core_handoff_receiver_sup.erl +++ b/src/riak_core_handoff_receiver_sup.erl @@ -29,12 +29,12 @@ -export([start_receiver/0]). -define(CHILD(I, Type), - {I, - {I, start_link, []}, - temporary, - brutal_kill, - Type, - [I]}). + {I, + {I, start_link, []}, + temporary, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_handoff_sender.erl b/src/riak_core_handoff_sender.erl index df67035b9..406d1a5c0 100644 --- a/src/riak_core_handoff_sender.erl +++ b/src/riak_core_handoff_sender.erl @@ -38,50 +38,50 @@ -define(STATUS_INTERVAL, 2). -define(LOG_INFO(Str, Args), - logger:info("~p transfer of ~p from ~p ~p to ~p ~p " - "failed " - ++ Str, - [Type, - Module, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition] - ++ Args)). + logger:info("~p transfer of ~p from ~p ~p to ~p ~p " + "failed " + ++ Str, + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition] + ++ Args)). -define(LOG_FAIL(Str, Args), - logger:error("~p transfer of ~p from ~p ~p to ~p ~p " - "failed " - ++ Str, - [Type, - Module, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition] - ++ Args)). + logger:error("~p transfer of ~p from ~p ~p to ~p ~p " + "failed " + ++ Str, + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition] + ++ Args)). %% Accumulator for the visit item HOF -record(ho_acc, - {ack :: non_neg_integer(), - error :: ok | {error, any()}, - filter :: function(), - module :: module(), - parent :: pid(), - socket :: any(), - src_target :: {non_neg_integer(), non_neg_integer()}, - stats :: #ho_stats{}, - total_objects :: non_neg_integer(), - total_bytes :: non_neg_integer(), - use_batching :: boolean(), - item_queue :: [binary()], - item_queue_length :: non_neg_integer(), - item_queue_byte_size :: non_neg_integer(), - acksync_threshold :: non_neg_integer(), - acksync_timer :: timer:tref() | undefined, - type :: ho_type(), - notsent_acc :: term(), - notsent_fun :: function() | undefined}). + {ack :: non_neg_integer(), + error :: ok | {error, any()}, + filter :: function(), + module :: module(), + parent :: pid(), + socket :: any(), + src_target :: {non_neg_integer(), non_neg_integer()}, + stats :: #ho_stats{}, + total_objects :: non_neg_integer(), + total_bytes :: non_neg_integer(), + use_batching :: boolean(), + item_queue :: [binary()], + item_queue_length :: non_neg_integer(), + item_queue_byte_size :: non_neg_integer(), + acksync_threshold :: non_neg_integer(), + acksync_timer :: timer:tref() | undefined, + type :: ho_type(), + notsent_acc :: term(), + notsent_fun :: function() | undefined}). %%%=================================================================== %%% API @@ -89,8 +89,8 @@ start_link(TargetNode, Module, {Type, Opts}, Vnode) -> Pid = spawn_link(fun () -> - start_fold(TargetNode, Module, {Type, Opts}, Vnode) - end), + start_fold(TargetNode, Module, {Type, Opts}, Vnode) + end), {ok, Pid}. %%%=================================================================== @@ -98,27 +98,27 @@ start_link(TargetNode, Module, {Type, Opts}, Vnode) -> %%%=================================================================== start_fold_(TargetNode, Module, Type, Opts, ParentPid, - SrcNode, SrcPartition, TargetPartition) -> + SrcNode, SrcPartition, TargetPartition) -> %% Give workers one more chance to abort or get a lock or whatever. FoldOpts = maybe_call_handoff_started(Module, - SrcPartition), + SrcPartition), Filter = get_filter(Opts), [_Name, Host] = string:tokens(atom_to_list(TargetNode), - "@"), + "@"), {ok, Port} = get_handoff_port(TargetNode), TNHandoffIP = case get_handoff_ip(TargetNode) of - error -> Host; - {ok, "0.0.0.0"} -> Host; - {ok, Other} -> Other - end, + error -> Host; + {ok, "0.0.0.0"} -> Host; + {ok, Other} -> Other + end, SockOpts = [binary, - {packet, 4}, - {header, 1}, - {active, false}], + {packet, 4}, + {header, 1}, + {active, false}], {ok, Socket} = gen_tcp:connect(TNHandoffIP, - Port, - SockOpts, - 15000), + Port, + SockOpts, + 15000), RecvTimeout = get_handoff_receive_timeout(), %% We want to ensure that the node we think we are talking to %% really is the node we expect. @@ -128,16 +128,16 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% print an error and keep going with our fingers crossed. TargetBin = term_to_binary(TargetNode), VerifyNodeMsg = <<(?PT_MSG_VERIFY_NODE):8, - TargetBin/binary>>, + TargetBin/binary>>, ok = gen_tcp:send(Socket, VerifyNodeMsg), case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_VERIFY_NODE | _]} -> ok; - {ok, [?PT_MSG_UNKNOWN | _]} -> - logger:warning("Could not verify identity of peer ~s.", - [TargetNode]), - ok; - {error, timeout} -> exit({shutdown, timeout}); - {error, closed} -> exit({shutdown, wrong_node}) + {ok, [?PT_MSG_VERIFY_NODE | _]} -> ok; + {ok, [?PT_MSG_UNKNOWN | _]} -> + logger:warning("Could not verify identity of peer ~s.", + [TargetNode]), + ok; + {error, timeout} -> exit({shutdown, timeout}); + {error, closed} -> exit({shutdown, wrong_node}) end, %% Piggyback the sync command from previous releases to send %% the vnode type across. If talking to older nodes they'll @@ -145,13 +145,13 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% After 0.12.0 the calls can be switched to use PT_MSG_SYNC %% and PT_MSG_CONFIGURE VMaster = list_to_atom(atom_to_list(Module) ++ - "_master"), + "_master"), ModBin = atom_to_binary(Module, utf8), Msg = <<(?PT_MSG_OLDSYNC):8, ModBin/binary>>, ok = gen_tcp:send(Socket, Msg), AckSyncThreshold = application:get_env(riak_core, - handoff_acksync_threshold, - 25), + handoff_acksync_threshold, + 25), %% Now that handoff_concurrency applies to both outbound and %% inbound conns there is a chance that the receiver may %% decide to reject the senders attempt to start a handoff. @@ -160,301 +160,301 @@ start_fold_(TargetNode, Module, Type, Opts, ParentPid, %% socket at this point is a rejection by the receiver to %% enforce handoff_concurrency. case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> ok; - {error, timeout} -> exit({shutdown, timeout}); - {error, closed} -> exit({shutdown, max_concurrency}) + {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> ok; + {error, timeout} -> exit({shutdown, timeout}); + {error, closed} -> exit({shutdown, max_concurrency}) end, RemoteSupportsBatching = - remote_supports_batching(TargetNode), + remote_supports_batching(TargetNode), logger:info("Starting ~p transfer of ~p from ~p ~p " - "to ~p ~p", - [Type, - Module, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition]), + "to ~p ~p", + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition]), M = <<(?PT_MSG_INIT):8, TargetPartition:160/integer>>, ok = gen_tcp:send(Socket, M), StartFoldTime = os:timestamp(), Stats = #ho_stats{interval_end = - future_now(get_status_interval())}, + future_now(get_status_interval())}, UnsentAcc0 = get_notsent_acc0(Opts), UnsentFun = get_notsent_fun(Opts), Req = riak_core_util:make_fold_req(fun visit_item/3, - #ho_acc{ack = 0, error = ok, - filter = Filter, module = Module, - parent = ParentPid, - socket = Socket, - src_target = - {SrcPartition, - TargetPartition}, - stats = Stats, total_bytes = 0, - total_objects = 0, - use_batching = - RemoteSupportsBatching, - item_queue = [], - item_queue_length = 0, - item_queue_byte_size = 0, - acksync_threshold = - AckSyncThreshold, - type = Type, - notsent_acc = UnsentAcc0, - notsent_fun = UnsentFun}, - false, - FoldOpts), + #ho_acc{ack = 0, error = ok, + filter = Filter, module = Module, + parent = ParentPid, + socket = Socket, + src_target = + {SrcPartition, + TargetPartition}, + stats = Stats, total_bytes = 0, + total_objects = 0, + use_batching = + RemoteSupportsBatching, + item_queue = [], + item_queue_length = 0, + item_queue_byte_size = 0, + acksync_threshold = + AckSyncThreshold, + type = Type, + notsent_acc = UnsentAcc0, + notsent_fun = UnsentFun}, + false, + FoldOpts), %% IFF the vnode is using an async worker to perform the fold %% then sync_command will return error on vnode crash, %% otherwise it will wait forever but vnode crash will be %% caught by handoff manager. I know, this is confusing, a %% new handoff system will be written soon enough. AccRecord0 = case - riak_core_vnode_master:sync_command({SrcPartition, - SrcNode}, - Req, - VMaster, - infinity) - of - #ho_acc{} = Ret -> Ret; - Ret -> - logger:error("[handoff] Bad handoff record: ~p", - [Ret]), - Ret - end, + riak_core_vnode_master:sync_command({SrcPartition, + SrcNode}, + Req, + VMaster, + infinity) + of + #ho_acc{} = Ret -> Ret; + Ret -> + logger:error("[handoff] Bad handoff record: ~p", + [Ret]), + Ret + end, %% Send any straggler entries remaining in the buffer: AccRecord = send_objects(AccRecord0#ho_acc.item_queue, - AccRecord0), + AccRecord0), if AccRecord == {error, vnode_shutdown} -> - ?LOG_INFO("because the local vnode was shutdown", []), - throw({be_quiet, - error, - local_vnode_shutdown_requested}); + ?LOG_INFO("because the local vnode was shutdown", []), + throw({be_quiet, + error, + local_vnode_shutdown_requested}); true -> - ok % If not #ho_acc, get badmatch below + ok % If not #ho_acc, get badmatch below end, #ho_acc{error = ErrStatus, module = Module, - parent = ParentPid, total_objects = TotalObjects, - total_bytes = TotalBytes, stats = FinalStats, - acksync_timer = TRef, notsent_acc = NotSentAcc} = - AccRecord, + parent = ParentPid, total_objects = TotalObjects, + total_bytes = TotalBytes, stats = FinalStats, + acksync_timer = TRef, notsent_acc = NotSentAcc} = + AccRecord, _ = timer:cancel(TRef), case ErrStatus of - ok -> - %% One last sync to make sure the message has been received. - %% post-0.14 vnodes switch to handoff to forwarding immediately - %% so handoff_complete can only be sent once all of the data is - %% written. handle_handoff_data is a sync call, so once - %% we receive the sync the remote side will be up to date. - logger:debug("~p ~p Sending final sync", - [SrcPartition, Module]), - ok = gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8>>), - case gen_tcp:recv(Socket, 0, RecvTimeout) of - {ok, [?PT_MSG_SYNC | <<"sync">>]} -> - logger:debug("~p ~p Final sync received", - [SrcPartition, Module]); - {error, timeout} -> exit({shutdown, timeout}) - end, - FoldTimeDiff = end_fold_time(StartFoldTime), - ThroughputBytes = TotalBytes / FoldTimeDiff, - ok = - logger:info("~p transfer of ~p from ~p ~p to ~p ~p " - "completed: sent ~p bytes in ~p of ~p " - "objects in ~p seconds (~p/second)", - [Type, - Module, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition, - TotalBytes, - FinalStats#ho_stats.objs, - TotalObjects, - FoldTimeDiff, - ThroughputBytes]), - case Type of - repair -> ok; - resize -> - riak_core_vnode:resize_transfer_complete(ParentPid, - NotSentAcc); - _ -> riak_core_vnode:handoff_complete(ParentPid) - end; - {error, ErrReason} -> - if ErrReason == timeout -> exit({shutdown, timeout}); - true -> exit({shutdown, {error, ErrReason}}) - end + ok -> + %% One last sync to make sure the message has been received. + %% post-0.14 vnodes switch to handoff to forwarding immediately + %% so handoff_complete can only be sent once all of the data is + %% written. handle_handoff_data is a sync call, so once + %% we receive the sync the remote side will be up to date. + logger:debug("~p ~p Sending final sync", + [SrcPartition, Module]), + ok = gen_tcp:send(Socket, <<(?PT_MSG_SYNC):8>>), + case gen_tcp:recv(Socket, 0, RecvTimeout) of + {ok, [?PT_MSG_SYNC | <<"sync">>]} -> + logger:debug("~p ~p Final sync received", + [SrcPartition, Module]); + {error, timeout} -> exit({shutdown, timeout}) + end, + FoldTimeDiff = end_fold_time(StartFoldTime), + ThroughputBytes = TotalBytes / FoldTimeDiff, + ok = + logger:info("~p transfer of ~p from ~p ~p to ~p ~p " + "completed: sent ~p bytes in ~p of ~p " + "objects in ~p seconds (~p/second)", + [Type, + Module, + SrcNode, + SrcPartition, + TargetNode, + TargetPartition, + TotalBytes, + FinalStats#ho_stats.objs, + TotalObjects, + FoldTimeDiff, + ThroughputBytes]), + case Type of + repair -> ok; + resize -> + riak_core_vnode:resize_transfer_complete(ParentPid, + NotSentAcc); + _ -> riak_core_vnode:handoff_complete(ParentPid) + end; + {error, ErrReason} -> + if ErrReason == timeout -> exit({shutdown, timeout}); + true -> exit({shutdown, {error, ErrReason}}) + end end. start_fold(TargetNode, Module, {Type, Opts}, - ParentPid) -> + ParentPid) -> SrcNode = node(), SrcPartition = get_src_partition(Opts), TargetPartition = get_target_partition(Opts), try start_fold_(TargetNode, - Module, - Type, - Opts, - ParentPid, - SrcNode, - SrcPartition, - TargetPartition) + Module, + Type, + Opts, + ParentPid, + SrcNode, + SrcPartition, + TargetPartition) catch - exit:{shutdown, max_concurrency} -> - %% Need to fwd the error so the handoff mgr knows - exit({shutdown, max_concurrency}); - exit:{shutdown, timeout} -> - %% A receive timeout during handoff - %% STATS - %% riak_core_stat:update(handoff_timeouts), - ?LOG_FAIL("because of TCP recv timeout", []), - exit({shutdown, timeout}); - exit:{shutdown, {error, Reason}} -> - ?LOG_FAIL("because of ~p", [Reason]), - riak_core_vnode:handoff_error(ParentPid, - fold_error, - Reason), - exit({shutdown, {error, Reason}}); - {be_quiet, Err, Reason} -> - riak_core_vnode:handoff_error(ParentPid, Err, Reason); - Err:Reason:Stacktrace -> - ?LOG_FAIL("because of ~p:~p ~p", - [Err, Reason, Stacktrace]), - riak_core_vnode:handoff_error(ParentPid, Err, Reason) + exit:{shutdown, max_concurrency} -> + %% Need to fwd the error so the handoff mgr knows + exit({shutdown, max_concurrency}); + exit:{shutdown, timeout} -> + %% A receive timeout during handoff + %% STATS + %% riak_core_stat:update(handoff_timeouts), + ?LOG_FAIL("because of TCP recv timeout", []), + exit({shutdown, timeout}); + exit:{shutdown, {error, Reason}} -> + ?LOG_FAIL("because of ~p", [Reason]), + riak_core_vnode:handoff_error(ParentPid, + fold_error, + Reason), + exit({shutdown, {error, Reason}}); + {be_quiet, Err, Reason} -> + riak_core_vnode:handoff_error(ParentPid, Err, Reason); + Err:Reason:Stacktrace -> + ?LOG_FAIL("because of ~p:~p ~p", + [Err, Reason, Stacktrace]), + riak_core_vnode:handoff_error(ParentPid, Err, Reason) end. start_visit_item_timer() -> Ival = case application:get_env(riak_core, - handoff_receive_timeout, - undefined) - of - TO when is_integer(TO) -> erlang:max(1000, TO div 3); - _ -> 60 * 1000 - end, + handoff_receive_timeout, + undefined) + of + TO when is_integer(TO) -> erlang:max(1000, TO div 3); + _ -> 60 * 1000 + end, timer:send_interval(Ival, tick_send_sync). visit_item(K, V, - Acc0 = #ho_acc{acksync_threshold = AccSyncThreshold}) -> + Acc0 = #ho_acc{acksync_threshold = AccSyncThreshold}) -> %% Eventually, a vnode worker proc will be doing this fold, but we don't %% know the pid of that proc ahead of time. So we have to start the %% timer some time after the fold has started execution on that proc %% ... like now, perhaps. Acc = case get(is_visit_item_timer_set) of - undefined -> - put(is_visit_item_timer_set, true), - {ok, TRef} = start_visit_item_timer(), - Acc0#ho_acc{acksync_timer = TRef}; - _ -> Acc0 - end, + undefined -> + put(is_visit_item_timer_set, true), + {ok, TRef} = start_visit_item_timer(), + Acc0#ho_acc{acksync_timer = TRef}; + _ -> Acc0 + end, receive - tick_send_sync -> - visit_item2(K, V, Acc#ho_acc{ack = AccSyncThreshold}) - after 0 -> visit_item2(K, V, Acc) + tick_send_sync -> + visit_item2(K, V, Acc#ho_acc{ack = AccSyncThreshold}) + after 0 -> visit_item2(K, V, Acc) end. %% When a tcp error occurs, the ErrStatus argument is set to {error, Reason}. %% Since we can't abort the fold, this clause is just a no-op. visit_item2(_K, _V, - Acc = #ho_acc{error = {error, _Reason}}) -> + Acc = #ho_acc{error = {error, _Reason}}) -> %% When a TCP error occurs, #ho_acc.error is set to {error, Reason}. throw(Acc); visit_item2(K, V, - Acc = #ho_acc{ack = _AccSyncThreshold, - acksync_threshold = _AccSyncThreshold}) -> + Acc = #ho_acc{ack = _AccSyncThreshold, + acksync_threshold = _AccSyncThreshold}) -> #ho_acc{module = Module, socket = Sock, - src_target = {SrcPartition, TargetPartition}, - stats = Stats} = - Acc, + src_target = {SrcPartition, TargetPartition}, + stats = Stats} = + Acc, RecvTimeout = get_handoff_receive_timeout(), M = <<(?PT_MSG_OLDSYNC):8, "sync">>, NumBytes = byte_size(M), Stats2 = incr_bytes(Stats, NumBytes), Stats3 = maybe_send_status({Module, - SrcPartition, - TargetPartition}, - Stats2), + SrcPartition, + TargetPartition}, + Stats2), case gen_tcp:send(Sock, M) of - ok -> - case gen_tcp:recv(Sock, 0, RecvTimeout) of - {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> - Acc2 = Acc#ho_acc{ack = 0, error = ok, stats = Stats3}, - visit_item2(K, V, Acc2); - {error, Reason} -> - Acc#ho_acc{ack = 0, error = {error, Reason}, - stats = Stats3} - end; - {error, Reason} -> - Acc#ho_acc{ack = 0, error = {error, Reason}, - stats = Stats3} + ok -> + case gen_tcp:recv(Sock, 0, RecvTimeout) of + {ok, [?PT_MSG_OLDSYNC | <<"sync">>]} -> + Acc2 = Acc#ho_acc{ack = 0, error = ok, stats = Stats3}, + visit_item2(K, V, Acc2); + {error, Reason} -> + Acc#ho_acc{ack = 0, error = {error, Reason}, + stats = Stats3} + end; + {error, Reason} -> + Acc#ho_acc{ack = 0, error = {error, Reason}, + stats = Stats3} end; visit_item2(K, V, Acc) -> #ho_acc{filter = Filter, module = Module, - total_objects = TotalObjects, - use_batching = UseBatching, item_queue = ItemQueue, - item_queue_length = ItemQueueLength, - item_queue_byte_size = ItemQueueByteSize, - notsent_fun = NotSentFun, notsent_acc = NotSentAcc} = - Acc, + total_objects = TotalObjects, + use_batching = UseBatching, item_queue = ItemQueue, + item_queue_length = ItemQueueLength, + item_queue_byte_size = ItemQueueByteSize, + notsent_fun = NotSentFun, notsent_acc = NotSentAcc} = + Acc, case Filter(K) of - true -> - case Module:encode_handoff_item(K, V) of - corrupted -> - {Bucket, Key} = K, - logger:warning("Unreadable object ~p/~p discarded", - [Bucket, Key]), - Acc; - BinObj -> - case UseBatching of - true -> - ItemQueue2 = [BinObj | ItemQueue], - ItemQueueLength2 = ItemQueueLength + 1, - ItemQueueByteSize2 = ItemQueueByteSize + - byte_size(BinObj), - Acc2 = Acc#ho_acc{item_queue_length = - ItemQueueLength2, - item_queue_byte_size = - ItemQueueByteSize2}, - %% Unit size is bytes: - HandoffBatchThreshold = - application:get_env(riak_core, - handoff_batch_threshold, - 1024 * 1024), - case ItemQueueByteSize2 =< HandoffBatchThreshold of - true -> Acc2#ho_acc{item_queue = ItemQueue2}; - false -> send_objects(ItemQueue2, Acc2) - end; - _ -> - #ho_acc{ack = Ack, socket = Sock, - src_target = - {SrcPartition, TargetPartition}, - stats = Stats, total_objects = TotalObjects, - total_bytes = TotalBytes} = - Acc, - M = <<(?PT_MSG_OBJ):8, BinObj/binary>>, - NumBytes = byte_size(M), - Stats2 = incr_bytes(incr_objs(Stats), NumBytes), - Stats3 = maybe_send_status({Module, - SrcPartition, - TargetPartition}, - Stats2), - case gen_tcp:send(Sock, M) of - ok -> - Acc#ho_acc{ack = Ack + 1, error = ok, - stats = Stats3, - total_bytes = - TotalBytes + NumBytes, - total_objects = - TotalObjects + 1}; - {error, Reason} -> - Acc#ho_acc{error = {error, Reason}, - stats = Stats3} - end - end - end; - false -> - NewNotSentAcc = handle_not_sent_item(NotSentFun, - NotSentAcc, - K), - Acc#ho_acc{error = ok, total_objects = TotalObjects + 1, - notsent_acc = NewNotSentAcc} + true -> + case Module:encode_handoff_item(K, V) of + corrupted -> + {Bucket, Key} = K, + logger:warning("Unreadable object ~p/~p discarded", + [Bucket, Key]), + Acc; + BinObj -> + case UseBatching of + true -> + ItemQueue2 = [BinObj | ItemQueue], + ItemQueueLength2 = ItemQueueLength + 1, + ItemQueueByteSize2 = ItemQueueByteSize + + byte_size(BinObj), + Acc2 = Acc#ho_acc{item_queue_length = + ItemQueueLength2, + item_queue_byte_size = + ItemQueueByteSize2}, + %% Unit size is bytes: + HandoffBatchThreshold = + application:get_env(riak_core, + handoff_batch_threshold, + 1024 * 1024), + case ItemQueueByteSize2 =< HandoffBatchThreshold of + true -> Acc2#ho_acc{item_queue = ItemQueue2}; + false -> send_objects(ItemQueue2, Acc2) + end; + _ -> + #ho_acc{ack = Ack, socket = Sock, + src_target = + {SrcPartition, TargetPartition}, + stats = Stats, total_objects = TotalObjects, + total_bytes = TotalBytes} = + Acc, + M = <<(?PT_MSG_OBJ):8, BinObj/binary>>, + NumBytes = byte_size(M), + Stats2 = incr_bytes(incr_objs(Stats), NumBytes), + Stats3 = maybe_send_status({Module, + SrcPartition, + TargetPartition}, + Stats2), + case gen_tcp:send(Sock, M) of + ok -> + Acc#ho_acc{ack = Ack + 1, error = ok, + stats = Stats3, + total_bytes = + TotalBytes + NumBytes, + total_objects = + TotalObjects + 1}; + {error, Reason} -> + Acc#ho_acc{error = {error, Reason}, + stats = Stats3} + end + end + end; + false -> + NewNotSentAcc = handle_not_sent_item(NotSentFun, + NotSentAcc, + K), + Acc#ho_acc{error = ok, total_objects = TotalObjects + 1, + notsent_acc = NewNotSentAcc} end. handle_not_sent_item(undefined, _, _) -> undefined; @@ -466,50 +466,50 @@ send_objects([], Acc) -> Acc; send_objects(ItemsReverseList, Acc) -> Items = lists:reverse(ItemsReverseList), #ho_acc{ack = Ack, module = Module, socket = Sock, - src_target = {SrcPartition, TargetPartition}, - stats = Stats, total_objects = TotalObjects, - total_bytes = TotalBytes, - item_queue_length = NObjects} = - Acc, + src_target = {SrcPartition, TargetPartition}, + stats = Stats, total_objects = TotalObjects, + total_bytes = TotalBytes, + item_queue_length = NObjects} = + Acc, ObjectList = term_to_binary(Items), M = <<(?PT_MSG_BATCH):8, ObjectList/binary>>, NumBytes = byte_size(M), Stats2 = incr_bytes(incr_objs(Stats, NObjects), - NumBytes), + NumBytes), Stats3 = maybe_send_status({Module, - SrcPartition, - TargetPartition}, - Stats2), + SrcPartition, + TargetPartition}, + Stats2), case gen_tcp:send(Sock, M) of - ok -> - Acc#ho_acc{ack = Ack + 1, error = ok, stats = Stats3, - total_objects = TotalObjects + NObjects, - total_bytes = TotalBytes + NumBytes, item_queue = [], - item_queue_length = 0, item_queue_byte_size = 0}; - {error, Reason} -> - Acc#ho_acc{error = {error, Reason}, stats = Stats3} + ok -> + Acc#ho_acc{ack = Ack + 1, error = ok, stats = Stats3, + total_objects = TotalObjects + NObjects, + total_bytes = TotalBytes + NumBytes, item_queue = [], + item_queue_length = 0, item_queue_byte_size = 0}; + {error, Reason} -> + Acc#ho_acc{error = {error, Reason}, stats = Stats3} end. get_handoff_ip(Node) when is_atom(Node) -> case riak_core_util:safe_rpc(Node, - riak_core_handoff_listener, - get_handoff_ip, - [], - infinity) - of - {badrpc, _} -> error; - Res -> Res + riak_core_handoff_listener, + get_handoff_ip, + [], + infinity) + of + {badrpc, _} -> error; + Res -> Res end. get_handoff_port(Node) when is_atom(Node) -> gen_server:call({riak_core_handoff_listener, Node}, - handoff_port, - infinity). + handoff_port, + infinity). get_handoff_receive_timeout() -> application:get_env(riak_core, - handoff_timeout, - ?TCP_TIMEOUT). + handoff_timeout, + ?TCP_TIMEOUT). end_fold_time(StartFoldTime) -> EndFoldTime = os:timestamp(), @@ -536,10 +536,10 @@ is_elapsed(TS) -> os:timestamp() >= TS. %% %% @doc Increment `Stats' byte count by `NumBytes'. -spec incr_bytes(ho_stats(), - non_neg_integer()) -> NewStats :: ho_stats(). + non_neg_integer()) -> NewStats :: ho_stats(). incr_bytes(Stats = #ho_stats{bytes = Bytes}, - NumBytes) -> + NumBytes) -> Stats#ho_stats{bytes = Bytes + NumBytes}. incr_objs(Stats) -> incr_objs(Stats, 1). @@ -548,7 +548,7 @@ incr_objs(Stats) -> incr_objs(Stats, 1). %% %% @doc Increment `Stats' object count by NObjs: -spec incr_objs(ho_stats(), - non_neg_integer()) -> NewStats :: ho_stats(). + non_neg_integer()) -> NewStats :: ho_stats(). incr_objs(Stats = #ho_stats{objs = Objs}, NObjs) -> Stats#ho_stats{objs = Objs + NObjs}. @@ -559,25 +559,25 @@ incr_objs(Stats = #ho_stats{objs = Objs}, NObjs) -> %% for `ModSrcTgt' to the manager and return a new stats record %% `NetStats'. -spec maybe_send_status({module(), non_neg_integer(), - non_neg_integer()}, - ho_stats()) -> NewStats :: ho_stats(). + non_neg_integer()}, + ho_stats()) -> NewStats :: ho_stats(). maybe_send_status(ModSrcTgt, - Stats = #ho_stats{interval_end = IntervalEnd}) -> + Stats = #ho_stats{interval_end = IntervalEnd}) -> case is_elapsed(IntervalEnd) of - true -> - Stats2 = Stats#ho_stats{last_update = os:timestamp()}, - riak_core_handoff_manager:status_update(ModSrcTgt, - Stats2), - #ho_stats{interval_end = - future_now(get_status_interval())}; - false -> Stats + true -> + Stats2 = Stats#ho_stats{last_update = os:timestamp()}, + riak_core_handoff_manager:status_update(ModSrcTgt, + Stats2), + #ho_stats{interval_end = + future_now(get_status_interval())}; + false -> Stats end. get_status_interval() -> application:get_env(riak_core, - handoff_status_interval, - ?STATUS_INTERVAL). + handoff_status_interval, + ?STATUS_INTERVAL). get_src_partition(Opts) -> proplists:get_value(src_partition, Opts). @@ -590,16 +590,16 @@ get_notsent_acc0(Opts) -> get_notsent_fun(Opts) -> case proplists:get_value(notsent_fun, Opts) of - none -> fun (_, _) -> undefined end; - Fun -> Fun + none -> fun (_, _) -> undefined end; + Fun -> Fun end. -spec get_filter(proplists:proplist()) -> predicate(). get_filter(Opts) -> case proplists:get_value(filter, Opts) of - none -> fun (_) -> true end; - Filter -> Filter + none -> fun (_) -> true end; + Filter -> Filter end. %% @private @@ -609,18 +609,18 @@ get_filter(Opts) -> remote_supports_batching(Node) -> case catch rpc:call(Node, - riak_core_handoff_receiver, - supports_batching, - []) - of - true -> - logger:debug("remote node supports batching, enabling"), - true; - _ -> - %% whatever the problem here, just revert to the old behavior - %% which shouldn't matter too much for any single handoff - logger:debug("remote node doesn't support batching"), - false + riak_core_handoff_receiver, + supports_batching, + []) + of + true -> + logger:debug("remote node supports batching, enabling"), + true; + _ -> + %% whatever the problem here, just revert to the old behavior + %% which shouldn't matter too much for any single handoff + logger:debug("remote node doesn't support batching"), + false end. %% @private @@ -634,19 +634,19 @@ remote_supports_batching(Node) -> %% the process. maybe_call_handoff_started(Module, SrcPartition) -> case lists:member({handoff_started, 2}, - Module:module_info(exports)) - of - true -> - WorkerPid = self(), - case Module:handoff_started(SrcPartition, WorkerPid) of - {ok, FoldOpts} -> FoldOpts; - {error, max_concurrency} -> - %% Handoff of that partition is busy or can't proceed. Stopping with - %% max_concurrency will cause this partition to be retried again later. - exit({shutdown, max_concurrency}); - {error, Error} -> exit({shutdown, Error}) - end; - false -> - %% optional callback not implemented, so we carry on, w/ no addition fold options - [] + Module:module_info(exports)) + of + true -> + WorkerPid = self(), + case Module:handoff_started(SrcPartition, WorkerPid) of + {ok, FoldOpts} -> FoldOpts; + {error, max_concurrency} -> + %% Handoff of that partition is busy or can't proceed. Stopping with + %% max_concurrency will cause this partition to be retried again later. + exit({shutdown, max_concurrency}); + {error, Error} -> exit({shutdown, Error}) + end; + false -> + %% optional callback not implemented, so we carry on, w/ no addition fold options + [] end. diff --git a/src/riak_core_handoff_sender_sup.erl b/src/riak_core_handoff_sender_sup.erl index 2def80331..3b0544e67 100644 --- a/src/riak_core_handoff_sender_sup.erl +++ b/src/riak_core_handoff_sender_sup.erl @@ -31,12 +31,12 @@ -include("riak_core_handoff.hrl"). -define(CHILD(I, Type), - {I, - {I, start_link, []}, - temporary, - brutal_kill, - Type, - [I]}). + {I, + {I, start_link, []}, + temporary, + brutal_kill, + Type, + [I]}). %%%=================================================================== %%% API @@ -60,11 +60,11 @@ start_link() -> %% * unsent_acc0 - optional. The intial accumulator value passed to unsent_fun %% for the first unsent key -spec start_sender(ho_type(), atom(), term(), pid(), - [{atom(), term()}]) -> {ok, pid()}. + [{atom(), term()}]) -> {ok, pid()}. start_sender(Type, Module, TargetNode, VNode, Opts) -> supervisor:start_child(?MODULE, - [TargetNode, Module, {Type, Opts}, VNode]). + [TargetNode, Module, {Type, Opts}, VNode]). %%%=================================================================== %%% Callbacks diff --git a/src/riak_core_handoff_sup.erl b/src/riak_core_handoff_sup.erl index 4493f220e..a8d9f4627 100644 --- a/src/riak_core_handoff_sup.erl +++ b/src/riak_core_handoff_sup.erl @@ -26,12 +26,12 @@ -export([start_link/0, init/1]). -define(CHILD(I, Type), - {I, - {I, start_link, []}, - permanent, - brutal_kill, - Type, - [I]}). + {I, + {I, start_link, []}, + permanent, + brutal_kill, + Type, + [I]}). %% begins the supervisor, init/1 will be called start_link() -> diff --git a/src/riak_core_node_watcher.erl b/src/riak_core_node_watcher.erl index 46b999453..05ff058fd 100644 --- a/src/riak_core_node_watcher.erl +++ b/src/riak_core_node_watcher.erl @@ -27,19 +27,19 @@ %% API -export([start_link/0, - service_up/2, - service_up/3, - service_up/4, - check_health/1, - suspend_health_checks/0, - resume_health_checks/0, - service_down/1, - service_down/2, - node_up/0, - node_down/0, - services/0, - services/1, - nodes/1]). + service_up/2, + service_up/3, + service_up/4, + check_health/1, + suspend_health_checks/0, + resume_health_checks/0, + service_down/1, + service_down/2, + node_up/0, + node_down/0, + services/0, + services/1, + nodes/1]). %% TEST API -ifdef(TEST). @@ -56,36 +56,36 @@ %% gen_server callbacks -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, - {status = up, - services = [], - health_checks = [], - healths_enabled = true, - peers = [], - avsn = 0, - bcast_tref, - bcast_mod = {gen_server, abcast}}). + {status = up, + services = [], + health_checks = [], + healths_enabled = true, + peers = [], + avsn = 0, + bcast_tref, + bcast_mod = {gen_server, abcast}}). -record(health_check, - {state = waiting :: waiting | checking | suspend, - callback :: {atom(), atom(), [any()]}, - service_pid :: pid(), - checking_pid :: pid() | undefined, - health_failures = 0 :: non_neg_integer(), - callback_failures = 0 :: non_neg_integer(), - interval_tref, - %% how many milliseconds to wait after a check has - %% finished before starting a new one - check_interval = ?DEFAULT_HEALTH_CHECK_INTERVAL :: - timeout(), - max_callback_failures = 3, - max_health_failures = 1}). + {state = waiting :: waiting | checking | suspend, + callback :: {atom(), atom(), [any()]}, + service_pid :: pid(), + checking_pid :: pid() | undefined, + health_failures = 0 :: non_neg_integer(), + callback_failures = 0 :: non_neg_integer(), + interval_tref, + %% how many milliseconds to wait after a check has + %% finished before starting a new one + check_interval = ?DEFAULT_HEALTH_CHECK_INTERVAL :: + timeout(), + max_callback_failures = 3, + max_health_failures = 1}). %% =================================================================== %% Public API @@ -93,36 +93,36 @@ start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], - []). + ?MODULE, + [], + []). service_up(Id, Pid) -> gen_server:call(?MODULE, - {service_up, Id, Pid}, - infinity). + {service_up, Id, Pid}, + infinity). %% @doc {@link service_up/4} with default options. %% @see service_up/4 -spec service_up(Id :: atom(), Pid :: pid(), - MFA :: mfa()) -> ok. + MFA :: mfa()) -> ok. service_up(Id, Pid, MFA) -> service_up(Id, Pid, MFA, []). -type hc_check_interval_opt() :: {check_interval, - timeout()}. + timeout()}. -type hc_max_callback_fails_opt() :: {max_callback_failures, - non_neg_integer()}. + non_neg_integer()}. -type hc_max_health_fails_opt() :: {max_health_failures, - non_neg_integer()}. + non_neg_integer()}. -type health_opt() :: hc_check_interval_opt() | - hc_max_callback_fails_opt() | - hc_max_health_fails_opt(). + hc_max_callback_fails_opt() | + hc_max_health_fails_opt(). -type health_opts() :: [health_opt()]. @@ -144,17 +144,17 @@ service_up(Id, Pid, MFA) -> %% any other, using {@link service_down/1}. %% @see service_up/2 -spec service_up(Id :: atom(), Pid :: pid(), - Callback :: mfa(), Options :: health_opts()) -> ok. + Callback :: mfa(), Options :: health_opts()) -> ok. service_up(Id, Pid, {Module, Function, Args}, - Options) -> + Options) -> gen_server:call(?MODULE, - {service_up, - Id, - Pid, - {Module, Function, Args}, - Options}, - infinity). + {service_up, + Id, + Pid, + {Module, Function, Args}, + Options}, + infinity). %% @doc Force a health check for the given service. If the service does %% not have a health check associated with it, this is ignored. Resets the @@ -177,8 +177,8 @@ service_down(Id) -> service_down(Id, true) -> gen_server:call(?MODULE, - {service_down, Id, health_check}, - infinitiy); + {service_down, Id, health_check}, + infinitiy); service_down(Id, false) -> service_down(Id). node_up() -> @@ -192,8 +192,8 @@ services() -> services(Node) -> case check_node_valid(Node) of - true -> internal_get_services(Node); - _ -> invalid_node + true -> internal_get_services(Node); + _ -> invalid_node end. nodes(Service) -> internal_get_nodes(Service). @@ -208,8 +208,8 @@ avsn() -> gen_server:call(?MODULE, get_avsn, infinity). set_broadcast_module(Module, Fn) -> gen_server:call(?MODULE, - {set_bcast_mod, Module, Fn}, - infinity). + {set_bcast_mod, Module, Fn}, + infinity). -endif. @@ -228,11 +228,11 @@ init([]) -> ok = net_kernel:monitor_nodes(true), %% Setup ETS table to track node status (?MODULE) = ets:new(?MODULE, - [protected, {read_concurrency, true}, named_table]), + [protected, {read_concurrency, true}, named_table]), {ok, schedule_broadcast(#state{})}. handle_call({set_bcast_mod, Module, Fn}, _From, - State) -> + State) -> %% Call available for swapping out how broadcasts are generated {reply, ok, State#state{bcast_mod = {Module, Fn}}}; handle_call(get_avsn, _From, State) -> @@ -243,44 +243,44 @@ handle_call({service_up, Id, Pid}, _From, State) -> S3 = add_service(Id, Pid, S2), {reply, ok, S3}; handle_call({service_up, Id, Pid, MFA, Options}, From, - State) -> + State) -> %% update the active set of services if needed. {reply, _, State1} = handle_call({service_up, Id, Pid}, - From, - State), + From, + State), State2 = remove_health_check(Id, State1), case application:get_env(riak_core, - enable_health_checks, - true) - of - true -> - %% install the health check - CheckInterval = proplists:get_value(check_interval, - Options, - ?DEFAULT_HEALTH_CHECK_INTERVAL), - IntervalTref = case CheckInterval of - infinity -> undefined; - N -> - erlang:send_after(N, - self(), - {check_health, Id}) - end, - CheckRec = #health_check{callback = MFA, - check_interval = CheckInterval, - service_pid = Pid, - max_health_failures = - proplists:get_value(max_health_failures, - Options, - 1), - max_callback_failures = - proplists:get_value(max_callback_failures, - Options, - 3), - interval_tref = IntervalTref}, - Healths = orddict:store(Id, - CheckRec, - State2#state.health_checks); - false -> Healths = State2#state.health_checks + enable_health_checks, + true) + of + true -> + %% install the health check + CheckInterval = proplists:get_value(check_interval, + Options, + ?DEFAULT_HEALTH_CHECK_INTERVAL), + IntervalTref = case CheckInterval of + infinity -> undefined; + N -> + erlang:send_after(N, + self(), + {check_health, Id}) + end, + CheckRec = #health_check{callback = MFA, + check_interval = CheckInterval, + service_pid = Pid, + max_health_failures = + proplists:get_value(max_health_failures, + Options, + 1), + max_callback_failures = + proplists:get_value(max_callback_failures, + Options, + 3), + interval_tref = IntervalTref}, + Healths = orddict:store(Id, + CheckRec, + State2#state.health_checks); + false -> Healths = State2#state.health_checks end, {reply, ok, State2#state{health_checks = Healths}}; handle_call({service_down, Id}, _From, State) -> @@ -291,64 +291,64 @@ handle_call({service_down, Id}, _From, State) -> handle_call({node_status, Status}, _From, State) -> Transition = {State#state.status, Status}, S2 = case Transition of - {up, down} -> %% up -> down - case State#state.healths_enabled of - true -> - Healths = all_health_fsms(suspend, - State#state.health_checks); - false -> Healths = State#state.health_checks - end, - local_delete(State#state{status = down, - health_checks = Healths}); - {down, up} -> %% down -> up - case State#state.healths_enabled of - true -> - Healths = all_health_fsms(resume, - State#state.health_checks); - false -> Healths = State#state.health_checks - end, - local_update(State#state{status = up, - health_checks = Healths}); - {Status, Status} -> %% noop - State - end, + {up, down} -> %% up -> down + case State#state.healths_enabled of + true -> + Healths = all_health_fsms(suspend, + State#state.health_checks); + false -> Healths = State#state.health_checks + end, + local_delete(State#state{status = down, + health_checks = Healths}); + {down, up} -> %% down -> up + case State#state.healths_enabled of + true -> + Healths = all_health_fsms(resume, + State#state.health_checks); + false -> Healths = State#state.health_checks + end, + local_update(State#state{status = up, + health_checks = Healths}); + {Status, Status} -> %% noop + State + end, {reply, ok, update_avsn(S2)}; handle_call(services, _From, State) -> Res = [Service - || {{by_service, Service}, Nds} - <- ets:tab2list(?MODULE), - Nds /= []], + || {{by_service, Service}, Nds} + <- ets:tab2list(?MODULE), + Nds /= []], {reply, lists:sort(Res), State}; handle_call(suspend_healths, _From, - State = #state{healths_enabled = false}) -> + State = #state{healths_enabled = false}) -> {reply, already_disabled, State}; handle_call(suspend_healths, _From, - State = #state{healths_enabled = true}) -> + State = #state{healths_enabled = true}) -> logger:info("suspending all health checks"), Healths = all_health_fsms(suspend, - State#state.health_checks), + State#state.health_checks), {reply, ok, update_avsn(State#state{health_checks = Healths, - healths_enabled = false})}; + healths_enabled = false})}; handle_call(resume_healths, _From, - State = #state{healths_enabled = true}) -> + State = #state{healths_enabled = true}) -> {reply, already_enabled, State}; handle_call(resume_healths, _From, - State = #state{healths_enabled = false}) -> + State = #state{healths_enabled = false}) -> logger:info("resuming all health checks"), Healths = all_health_fsms(resume, - State#state.health_checks), + State#state.health_checks), {reply, ok, update_avsn(State#state{health_checks = Healths, - healths_enabled = true})}. + healths_enabled = true})}. handle_cast({ring_update, R}, State) -> %% Ring has changed; determine what peers are new to us %% and broadcast out current status to those peers. Peers0 = - ordsets:from_list(riak_core_ring:all_members(R)), + ordsets:from_list(riak_core_ring:all_members(R)), Peers = ordsets:del_element(node(), Peers0), S2 = peers_update(Peers, State), {noreply, update_avsn(S2)}; @@ -361,8 +361,8 @@ handle_cast({down, Node}, State) -> handle_cast({health_check_result, Pid, R}, State) -> Service = erlang:erase(Pid), State2 = handle_check_msg({result, Pid, R}, - Service, - State), + Service, + State), {noreply, State2}. handle_info({nodeup, _Node}, State) -> @@ -375,19 +375,19 @@ handle_info({'DOWN', Mref, _, _Pid, _Info}, State) -> %% A sub-system monitored process has terminated. Identify %% the sub-system in question and notify our peers. case erlang:get(Mref) of - undefined -> - %% No entry found for this monitor; ignore the message - {noreply, update_avsn(State)}; - Id -> - %% Remove the id<->mref entries in the pdict - delete_service_mref(Id), - %% remove any health checks in place - S2 = remove_health_check(Id, State), - %% Update our list of active services and ETS table - Services = ordsets:del_element(Id, - State#state.services), - S3 = local_update(S2#state{services = Services}), - {noreply, update_avsn(S3)} + undefined -> + %% No entry found for this monitor; ignore the message + {noreply, update_avsn(State)}; + Id -> + %% Remove the id<->mref entries in the pdict + delete_service_mref(Id), + %% remove any health checks in place + S2 = remove_health_check(Id, State), + %% Update our list of active services and ETS table + Services = ordsets:del_element(Id, + State#state.services), + S3 = local_update(S2#state{services = Services}), + {noreply, update_avsn(S3)} end; handle_info({'EXIT', Pid, _Cause} = Msg, State) -> Service = erlang:erase(Pid), @@ -407,7 +407,7 @@ handle_info(broadcast, State) -> terminate(_Reason, State) -> %% Let our peers know that we are shutting down broadcast(State#state.peers, - State#state{status = down}). + State#state{status = down}). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -426,23 +426,23 @@ update_avsn(State) -> watch_for_ring_events() -> Self = self(), Fn = fun (R) -> gen_server:cast(Self, {ring_update, R}) - end, + end, riak_core_ring_events:add_sup_callback(Fn). delete_service_mref(Id) -> %% Cleanup the monitor if one exists case erlang:get(Id) of - undefined -> ok; - Mref -> - erlang:erase(Mref), - erlang:erase(Id), - erlang:demonitor(Mref) + undefined -> ok; + Mref -> + erlang:erase(Mref), + erlang:erase(Id), + erlang:demonitor(Mref) end. broadcast(Nodes, State) -> case State#state.status of - up -> Msg = {up, node(), State#state.services}; - down -> Msg = {down, node()} + up -> Msg = {up, node(), State#state.services}; + down -> Msg = {down, node()} end, {Mod, Fn} = State#state.bcast_mod, Mod:Fn(Nodes, ?MODULE, Msg), @@ -450,13 +450,13 @@ broadcast(Nodes, State) -> schedule_broadcast(State) -> case State#state.bcast_tref of - undefined -> ok; - OldTref -> - _ = erlang:cancel_timer(OldTref), - ok + undefined -> ok; + OldTref -> + _ = erlang:cancel_timer(OldTref), + ok end, {ok, Interval} = application:get_env(riak_core, - gossip_interval), + gossip_interval), Tref = erlang:send_after(Interval, self(), broadcast), State#state{bcast_tref = Tref}. @@ -467,37 +467,37 @@ is_node_up(Node) -> ets:member(?MODULE, Node). node_up(Node, Services, State) -> case is_peer(Node, State) of - true -> - %% Before we alter the ETS table, see if this node was previously - %% down. In that situation, we'll go ahead and broadcast out. - S2 = case is_node_up(Node) of - false -> broadcast([Node], State); - true -> State - end, - case node_update(Node, Services) of - [] -> ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) - end, - S2; - false -> State + true -> + %% Before we alter the ETS table, see if this node was previously + %% down. In that situation, we'll go ahead and broadcast out. + S2 = case is_node_up(Node) of + false -> broadcast([Node], State); + true -> State + end, + case node_update(Node, Services) of + [] -> ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) + end, + S2; + false -> State end. node_down(Node, State) -> case is_peer(Node, State) of - true -> - case node_delete(Node) of - [] -> ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) - end; - false -> ok + true -> + case node_delete(Node) of + [] -> ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) + end; + false -> ok end. node_delete(Node) -> Services = internal_get_services(Node), _ = [internal_delete(Node, Service) - || Service <- Services], + || Service <- Services], ets:delete(?MODULE, Node), Services. @@ -507,7 +507,7 @@ node_update(Node, Services) -> Now = riak_core_util:moment(), NewStatus = ordsets:from_list(Services), OldStatus = - ordsets:from_list(internal_get_services(Node)), + ordsets:from_list(internal_get_services(Node)), Added = ordsets:subtract(NewStatus, OldStatus), Deleted = ordsets:subtract(OldStatus, NewStatus), %% Update ets table with changes; make sure to touch unchanged @@ -525,23 +525,23 @@ local_update(#state{status = down} = State) -> local_update(State) -> %% Update our local ETS table case node_update(node(), State#state.services) of - [] -> - %% No material changes; no local notification necessary - ok; - AffectedServices -> - %% Generate a local notification about the affected services and - %% also broadcast our status - riak_core_node_watcher_events:service_update(AffectedServices) + [] -> + %% No material changes; no local notification necessary + ok; + AffectedServices -> + %% Generate a local notification about the affected services and + %% also broadcast our status + riak_core_node_watcher_events:service_update(AffectedServices) end, broadcast(State#state.peers, State). local_delete(State) -> case node_delete(node()) of - [] -> - %% No services changed; no local notification required - ok; - AffectedServices -> - riak_core_node_watcher_events:service_update(AffectedServices) + [] -> + %% No services changed; no local notification required + ok; + AffectedServices -> + riak_core_node_watcher_events:service_update(AffectedServices) end, broadcast(State#state.peers, State). @@ -552,17 +552,17 @@ peers_update(NewPeers, State) -> %% For peers that have been deleted, remove their entries from %% the ETS table; we no longer care about their status Services0 = lists:foldl(fun (Node, Acc) -> - S = node_delete(Node), - S ++ Acc - end, - [], - Deleted), + S = node_delete(Node), + S ++ Acc + end, + [], + Deleted), Services = ordsets:from_list(Services0), %% Notify local parties if any services are affected by this change case Services of - [] -> ok; - _ -> - riak_core_node_watcher_events:service_update(Services) + [] -> ok; + _ -> + riak_core_node_watcher_events:service_update(Services) end, %% Broadcast our current status to new peers broadcast(Added, State#state{peers = NewPeers}). @@ -570,36 +570,36 @@ peers_update(NewPeers, State) -> internal_delete(Node, Service) -> Svcs = internal_get_services(Node), ets:insert(?MODULE, - {{by_node, Node}, Svcs -- [Service]}), + {{by_node, Node}, Svcs -- [Service]}), Nds = internal_get_nodes(Service), ets:insert(?MODULE, - {{by_service, Service}, Nds -- [Node]}). + {{by_service, Service}, Nds -- [Node]}). internal_insert(Node, Service) -> %% Remove Service & node before adding: avoid accidental duplicates Svcs = internal_get_services(Node) -- [Service], ets:insert(?MODULE, - {{by_node, Node}, [Service | Svcs]}), + {{by_node, Node}, [Service | Svcs]}), Nds = internal_get_nodes(Service) -- [Node], ets:insert(?MODULE, - {{by_service, Service}, [Node | Nds]}). + {{by_service, Service}, [Node | Nds]}). internal_get_services(Node) -> case ets:lookup(?MODULE, {by_node, Node}) of - [{{by_node, Node}, Ss}] -> Ss; - [] -> [] + [{{by_node, Node}, Ss}] -> Ss; + [] -> [] end. internal_get_nodes(Service) -> case ets:lookup(?MODULE, {by_service, Service}) of - [{{by_service, Service}, Ns}] -> Ns; - [] -> [] + [{{by_service, Service}, Ns}] -> Ns; + [] -> [] end. add_service(ServiceId, Pid, State) -> %% Update the set of active services locally Services = ordsets:add_element(ServiceId, - State#state.services), + State#state.services), S2 = State#state{services = Services}, %% Remove any existing mrefs for this service delete_service_mref(ServiceId), @@ -614,7 +614,7 @@ add_service(ServiceId, Pid, State) -> drop_service(ServiceId, State) -> %% Update the set of active services locally Services = ordsets:del_element(ServiceId, - State#state.services), + State#state.services), S2 = State#state{services = Services}, %% Remove any existing mrefs for this service delete_service_mref(ServiceId), @@ -624,50 +624,50 @@ drop_service(ServiceId, State) -> handle_check_msg(_Msg, undefined, State) -> State; handle_check_msg(_Msg, _ServiceId, - #state{status = down} = State) -> + #state{status = down} = State) -> %% most likely a late message State; handle_check_msg(Msg, ServiceId, State) -> case orddict:find(ServiceId, State#state.health_checks) - of - error -> State; - {ok, Check} -> - CheckReturn = health_fsm(Msg, ServiceId, Check), - handle_check_return(CheckReturn, ServiceId, State) + of + error -> State; + {ok, Check} -> + CheckReturn = health_fsm(Msg, ServiceId, Check), + handle_check_return(CheckReturn, ServiceId, State) end. handle_check_return({remove, _Check}, ServiceId, - State) -> + State) -> Healths = orddict:erase(ServiceId, - State#state.health_checks), + State#state.health_checks), State#state{health_checks = Healths}; handle_check_return({ok, Check}, ServiceId, State) -> Healths = orddict:store(ServiceId, - Check, - State#state.health_checks), + Check, + State#state.health_checks), State#state{health_checks = Healths}; handle_check_return({up, Check}, ServiceId, State) -> #health_check{service_pid = Pid} = Check, Healths = orddict:store(ServiceId, - Check, - State#state.health_checks), + Check, + State#state.health_checks), S2 = State#state{health_checks = Healths}, add_service(ServiceId, Pid, S2); handle_check_return({down, Check}, ServiceId, State) -> Healths = orddict:store(ServiceId, - Check, - State#state.health_checks), + Check, + State#state.health_checks), S2 = State#state{health_checks = Healths}, drop_service(ServiceId, S2). remove_health_check(ServiceId, State) -> #state{health_checks = Healths} = State, Healths2 = case orddict:find(ServiceId, Healths) of - error -> Healths; - {ok, Check} -> - {_, _} = health_fsm(remove, ServiceId, Check), - orddict:erase(ServiceId, Healths) - end, + error -> Healths; + {ok, Check} -> + {_, _} = health_fsm(remove, ServiceId, Check), + orddict:erase(ServiceId, Healths) + end, State#state{health_checks = Healths2}. %% health checks are an fsm to make mental modeling easier. @@ -682,18 +682,18 @@ remove_health_check(ServiceId, State) -> %% health check finished health_fsm(Msg, Service, - #health_check{state = StateName} = Check) -> + #health_check{state = StateName} = Check) -> {Reply, NextState, Check2} = health_fsm(StateName, - Msg, - Service, - Check), + Msg, + Service, + Check), Check3 = Check2#health_check{state = NextState}, {Reply, Check3}. %% suspend state health_fsm(suspend, resume, Service, InCheck) -> #health_check{health_failures = N, check_interval = V} = - InCheck, + InCheck, Tref = next_health_tref(N, V, Service), OutCheck = InCheck#health_check{interval_tref = Tref}, {ok, waiting, OutCheck}; @@ -711,64 +711,64 @@ health_fsm(checking, check_health, _Service, InCheck) -> health_fsm(checking, remove, _Service, InCheck) -> {remove, checking, InCheck}; health_fsm(checking, {result, Pid, Cause}, Service, - #health_check{checking_pid = Pid} = InCheck) -> + #health_check{checking_pid = Pid} = InCheck) -> %% handle result from checking pid #health_check{health_failures = HPFails, - max_health_failures = HPMaxFails} = - InCheck, + max_health_failures = HPMaxFails} = + InCheck, {Reply, HPFails1} = handle_fsm_exit(Cause, - HPFails, - HPMaxFails), + HPFails, + HPMaxFails), Tref = next_health_tref(HPFails1, - InCheck#health_check.check_interval, - Service), + InCheck#health_check.check_interval, + Service), OutCheck = InCheck#health_check{checking_pid = - undefined, - health_failures = HPFails1, - callback_failures = 0, - interval_tref = Tref}, + undefined, + health_failures = HPFails1, + callback_failures = 0, + interval_tref = Tref}, {Reply, waiting, OutCheck}; health_fsm(checking, {'EXIT', Pid, Cause}, Service, - #health_check{checking_pid = Pid} = InCheck) + #health_check{checking_pid = Pid} = InCheck) when Cause =/= normal -> logger:error("health check process for ~p error'ed: " - " ~p", - [Service, Cause]), + " ~p", + [Service, Cause]), Fails = InCheck#health_check.callback_failures + 1, if Fails == - InCheck#health_check.max_callback_failures -> - logger:error("health check callback for ~p failed " - "too many times, disabling.", - [Service]), - {down, - suspend, - InCheck#health_check{checking_pid = undefined, - callback_failures = Fails}}; + InCheck#health_check.max_callback_failures -> + logger:error("health check callback for ~p failed " + "too many times, disabling.", + [Service]), + {down, + suspend, + InCheck#health_check{checking_pid = undefined, + callback_failures = Fails}}; Fails < InCheck#health_check.max_callback_failures -> - #health_check{health_failures = N, - check_interval = Inter} = - InCheck, - Tref = next_health_tref(N, Inter, Service), - OutCheck = InCheck#health_check{checking_pid = - undefined, - callback_failures = Fails, - interval_tref = Tref}, - {ok, waiting, OutCheck}; + #health_check{health_failures = N, + check_interval = Inter} = + InCheck, + Tref = next_health_tref(N, Inter, Service), + OutCheck = InCheck#health_check{checking_pid = + undefined, + callback_failures = Fails, + interval_tref = Tref}, + {ok, waiting, OutCheck}; true -> - %% likely a late message, or a faker - {ok, - suspend, - InCheck#health_check{checking_pid = undefined, - callback_failures = Fails}} + %% likely a late message, or a faker + {ok, + suspend, + InCheck#health_check{checking_pid = undefined, + callback_failures = Fails}} end; %% message handling when in a waiting state health_fsm(waiting, suspend, _Service, InCheck) -> case InCheck#health_check.interval_tref of - undefined -> ok; - _ -> - _ = - erlang:cancel_timer(InCheck#health_check.interval_tref), - ok + undefined -> ok; + _ -> + _ = + erlang:cancel_timer(InCheck#health_check.interval_tref), + ok end, {ok, suspend, @@ -778,13 +778,13 @@ health_fsm(waiting, check_health, Service, InCheck) -> {ok, checking, InCheck1}; health_fsm(waiting, remove, _Service, InCheck) -> case InCheck#health_check.interval_tref of - undefined -> ok; - Tref -> - _ = erlang:cancel_timer(Tref), - ok + undefined -> ok; + Tref -> + _ = erlang:cancel_timer(Tref), + ok end, OutCheck = InCheck#health_check{interval_tref = - undefined}, + undefined}, {remove, waiting, OutCheck}; %% fallthrough handling health_fsm(StateName, _Msg, _Service, Health) -> @@ -807,44 +807,44 @@ handle_fsm_exit(false, HPFails, __) -> {ok, HPFails + 1}. start_health_check(Service, - #health_check{checking_pid = undefined} = CheckRec) -> + #health_check{checking_pid = undefined} = CheckRec) -> {Mod, Func, Args} = CheckRec#health_check.callback, Pid = CheckRec#health_check.service_pid, case CheckRec#health_check.interval_tref of - undefined -> ok; - Tref -> - _ = erlang:cancel_timer(Tref), - ok + undefined -> ok; + Tref -> + _ = erlang:cancel_timer(Tref), + ok end, CheckingPid = proc_lib:spawn_link(fun () -> - case erlang:apply(Mod, - Func, - [Pid | Args]) - of - R - when R =:= true orelse - R =:= false -> - health_check_result(self(), - R); - Else -> exit(Else) - end - end), + case erlang:apply(Mod, + Func, + [Pid | Args]) + of + R + when R =:= true orelse + R =:= false -> + health_check_result(self(), + R); + Else -> exit(Else) + end + end), erlang:put(CheckingPid, Service), CheckRec#health_check{state = checking, - checking_pid = CheckingPid, - interval_tref = undefined}; + checking_pid = CheckingPid, + interval_tref = undefined}; start_health_check(_Service, Check) -> Check. health_check_result(CheckPid, Result) -> gen_server:cast(?MODULE, - {health_check_result, CheckPid, Result}). + {health_check_result, CheckPid, Result}). next_health_tref(_, infinity, _) -> undefined; next_health_tref(N, V, Service) -> Time = determine_time(N, V), erlang:send_after(Time, - self(), - {check_health, Service}). + self(), + {check_health, Service}). all_health_fsms(Msg, Healths) -> [begin {ok, C1} = health_fsm(Msg, S, C), {S, C1} end diff --git a/src/riak_core_node_watcher_events.erl b/src/riak_core_node_watcher_events.erl index 70f2c5f14..ce012d02f 100644 --- a/src/riak_core_node_watcher_events.erl +++ b/src/riak_core_node_watcher_events.erl @@ -25,21 +25,21 @@ %% API -export([start_link/0, - add_handler/2, - add_sup_handler/2, - add_guarded_handler/2, - add_callback/1, - add_sup_callback/1, - add_guarded_callback/1, - service_update/1]). + add_handler/2, + add_sup_handler/2, + add_guarded_handler/2, + add_callback/1, + add_sup_callback/1, + add_guarded_callback/1, + service_update/1]). %% gen_event callbacks -export([init/1, - handle_event/2, - handle_call/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {callback}). @@ -57,23 +57,23 @@ add_sup_handler(Handler, Args) -> add_guarded_handler(Handler, Args) -> riak_core:add_guarded_event_handler(?MODULE, - Handler, - Args). + Handler, + Args). add_callback(Fn) when is_function(Fn) -> gen_event:add_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_sup_callback(Fn) when is_function(Fn) -> gen_event:add_sup_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_guarded_callback(Fn) when is_function(Fn) -> riak_core:add_guarded_event_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). service_update(Services) -> gen_event:notify(?MODULE, {service_update, Services}). diff --git a/src/riak_core_priority_queue.erl b/src/riak_core_priority_queue.erl index 8be373fda..928311e49 100644 --- a/src/riak_core_priority_queue.erl +++ b/src/riak_core_priority_queue.erl @@ -55,16 +55,16 @@ -module(riak_core_priority_queue). -export([new/0, - is_queue/1, - is_empty/1, - len/1, - to_list/1, - in/2, - in/3, - out/1, - out/2, - pout/1, - join/2]). + is_queue/1, + is_empty/1, + len/1, + to_list/1, + in/2, + in/3, + out/1, + out/2, + pout/1, + join/2]). %%---------------------------------------------------------------------------- @@ -73,7 +73,7 @@ -type squeue() :: {queue, [any()], [any()]}. -type pqueue() :: squeue() | - {pqueue, [{priority(), squeue()}]}. + {pqueue, [{priority(), squeue()}]}. %%---------------------------------------------------------------------------- @@ -87,9 +87,9 @@ is_queue({queue, R, F}) when is_list(R), is_list(F) -> true; is_queue({pqueue, Queues}) when is_list(Queues) -> lists:all(fun ({P, Q}) -> - is_integer(P) andalso is_queue(Q) - end, - Queues); + is_integer(P) andalso is_queue(Q) + end, + Queues); is_queue(_) -> false. -spec is_empty(pqueue()) -> boolean(). @@ -130,14 +130,14 @@ in(X, Priority, {pqueue, Queues}) -> P = -Priority, {pqueue, case lists:keysearch(P, 1, Queues) of - {value, {_, Q}} -> - lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); - false -> - lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + {value, {_, Q}} -> + lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); + false -> + lists:keysort(1, [{P, {queue, [X], []}} | Queues]) end}. -spec out(pqueue()) -> {empty | {value, any()}, - pqueue()}. + pqueue()}. out({queue, [], []} = Q) -> {empty, Q}; out({queue, [V], []}) -> {{value, V}, {queue, [], []}}; @@ -151,19 +151,19 @@ out({queue, In, [V | Out]}) when is_list(In) -> out({pqueue, [{P, Q} | Queues]}) -> {R, Q1} = out(Q), NewQ = case is_empty(Q1) of - true -> - case Queues of - [] -> {queue, [], []}; - [{0, OnlyQ}] -> OnlyQ; - [_ | _] -> {pqueue, Queues} - end; - false -> {pqueue, [{P, Q1} | Queues]} - end, + true -> + case Queues of + [] -> {queue, [], []}; + [{0, OnlyQ}] -> OnlyQ; + [_ | _] -> {pqueue, Queues} + end; + false -> {pqueue, [{P, Q1} | Queues]} + end, {R, NewQ}. -spec out(priority(), pqueue()) -> {empty | - {value, any()}, - pqueue()}. + {value, any()}, + pqueue()}. out(_Priority, {queue, [], []} = Q) -> {empty, Q}; out(Priority, {queue, _, _} = Q) when Priority =< 0 -> @@ -175,8 +175,8 @@ out(Priority, {pqueue, [{P, _Q} | _Queues]} = Q) out(_Priority, {pqueue, [_ | _]} = Q) -> {empty, Q}. -spec pout(pqueue()) -> {empty | - {value, any(), priority()}, - pqueue()}. + {value, any(), priority()}, + pqueue()}. pout({queue, [], []} = Q) -> {empty, Q}; pout({queue, _, _} = Q) -> @@ -185,14 +185,14 @@ pout({queue, _, _} = Q) -> pout({pqueue, [{P, Q} | Queues]}) -> {{value, V}, Q1} = out(Q), NewQ = case is_empty(Q1) of - true -> - case Queues of - [] -> {queue, [], []}; - [{0, OnlyQ}] -> OnlyQ; - [_ | _] -> {pqueue, Queues} - end; - false -> {pqueue, [{P, Q1} | Queues]} - end, + true -> + case Queues of + [] -> {queue, [], []}; + [{0, OnlyQ}] -> OnlyQ; + [_ | _] -> {pqueue, Queues} + end; + false -> {pqueue, [{P, Q1} | Queues]} + end, {{value, V, -P}, NewQ}. -spec join(pqueue(), pqueue()) -> pqueue(). @@ -203,23 +203,23 @@ join({queue, AIn, AOut}, {queue, BIn, BOut}) -> {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; join(A = {queue, _, _}, {pqueue, BPQ}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, - BPQ), + BPQ), Post1 = case Post of - [] -> [{0, A}]; - [{0, ZeroQueue} | Rest] -> - [{0, join(A, ZeroQueue)} | Rest]; - _ -> [{0, A} | Post] - end, + [] -> [{0, A}]; + [{0, ZeroQueue} | Rest] -> + [{0, join(A, ZeroQueue)} | Rest]; + _ -> [{0, A} | Post] + end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, B = {queue, _, _}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, - APQ), + APQ), Post1 = case Post of - [] -> [{0, B}]; - [{0, ZeroQueue} | Rest] -> - [{0, join(ZeroQueue, B)} | Rest]; - _ -> [{0, B} | Post] - end, + [] -> [{0, B}]; + [{0, ZeroQueue} | Rest] -> + [{0, join(ZeroQueue, B)} | Rest]; + _ -> [{0, B} | Post] + end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, {pqueue, BPQ}) -> {pqueue, merge(APQ, BPQ, [])}. @@ -251,29 +251,29 @@ simple_case(Order) -> ?assertEqual(0, ((?MODULE):len(Queue))), ?assertEqual([], ((?MODULE):to_list(Queue))), case Order of - forward -> - Queue2 = (?MODULE):in(low, Queue), - Queue3 = (?MODULE):in(mid, 500, Queue2), - Queue4 = (?MODULE):in(high, 1000, Queue3); - reverse -> - Queue2 = (?MODULE):in(high, 1000, Queue), - Queue3 = (?MODULE):in(mid, 500, Queue2), - Queue4 = (?MODULE):in(low, Queue3); - mixed -> - Queue2 = (?MODULE):in(high, 1000, Queue), - Queue3 = (?MODULE):in(low, Queue2), - Queue4 = (?MODULE):in(mid, 500, Queue3) + forward -> + Queue2 = (?MODULE):in(low, Queue), + Queue3 = (?MODULE):in(mid, 500, Queue2), + Queue4 = (?MODULE):in(high, 1000, Queue3); + reverse -> + Queue2 = (?MODULE):in(high, 1000, Queue), + Queue3 = (?MODULE):in(mid, 500, Queue2), + Queue4 = (?MODULE):in(low, Queue3); + mixed -> + Queue2 = (?MODULE):in(high, 1000, Queue), + Queue3 = (?MODULE):in(low, Queue2), + Queue4 = (?MODULE):in(mid, 500, Queue3) end, ?assertEqual(false, ((?MODULE):is_empty(Queue4))), ?assertEqual(3, ((?MODULE):len(Queue4))), ?assertMatch({{value, high}, _}, - ((?MODULE):out(Queue4))), + ((?MODULE):out(Queue4))), {{value, high}, Queue5} = (?MODULE):out(Queue4), ?assertMatch({{value, mid}, _}, - ((?MODULE):out(Queue5))), + ((?MODULE):out(Queue5))), {{value, mid}, Queue6} = (?MODULE):out(Queue5), ?assertMatch({{value, low}, _}, - ((?MODULE):out(Queue6))), + ((?MODULE):out(Queue6))), {{value, low}, Queue7} = (?MODULE):out(Queue6), ?assertEqual(0, ((?MODULE):len(Queue7))), ?assertEqual(true, ((?MODULE):is_queue(Queue2))), @@ -293,12 +293,12 @@ merge_case() -> QueueB4 = (?MODULE):in(6, QueueB3), Merged1 = (?MODULE):join(QueueA4, QueueB4), ?assertEqual([{0, 1}, - {0, 3}, - {0, 5}, - {0, 2}, - {0, 4}, - {0, 6}], - ((?MODULE):to_list(Merged1))), + {0, 3}, + {0, 5}, + {0, 2}, + {0, 4}, + {0, 6}], + ((?MODULE):to_list(Merged1))), QueueC1 = (?MODULE):new(), QueueC2 = (?MODULE):in(1, 10, QueueC1), QueueC3 = (?MODULE):in(3, 30, QueueC2), @@ -309,12 +309,12 @@ merge_case() -> QueueD4 = (?MODULE):in(6, 60, QueueD3), Merged2 = (?MODULE):join(QueueC4, QueueD4), ?assertEqual([{60, 6}, - {50, 5}, - {40, 4}, - {30, 3}, - {20, 2}, - {10, 1}], - ((?MODULE):to_list(Merged2))), + {50, 5}, + {40, 4}, + {30, 3}, + {20, 2}, + {10, 1}], + ((?MODULE):to_list(Merged2))), ok. basic_test() -> diff --git a/src/riak_core_rand.erl b/src/riak_core_rand.erl index f54c8e7b1..03ac0fd47 100644 --- a/src/riak_core_rand.erl +++ b/src/riak_core_rand.erl @@ -5,12 +5,12 @@ %% API -export([uniform/0, - uniform/1, - uniform_s/2, - seed/0, - seed/1, - rand_seed/0, - rand_bytes/1]). + uniform/1, + uniform_s/2, + seed/0, + seed/1, + rand_seed/0, + rand_bytes/1]). %% As the algorithm is not changed in any place we can use the default %% algorithm for all call here. @@ -38,7 +38,7 @@ seed() -> rand:seed(?ALGO). %% rand:seed will return the **new** seed. We can work around this by first %% getting the exported seed then using this instead. -spec seed({integer(), integer(), integer()} | - rand:export_state()) -> rand:export_state() | undefined. + rand:export_state()) -> rand:export_state() | undefined. seed({_, _, _} = Seed) -> Old = rand:export_seed(), diff --git a/src/riak_core_ring.erl b/src/riak_core_ring.erl index 774ef8636..0859f802f 100644 --- a/src/riak_core_ring.erl +++ b/src/riak_core_ring.erl @@ -29,114 +29,114 @@ -module(riak_core_ring). -export([all_members/1, - all_owners/1, - all_preflists/2, - diff_nodes/2, - equal_rings/2, - fresh/0, - fresh/1, - fresh/2, - get_meta/2, - get_buckets/1, - index_owner/2, - my_indices/1, - num_partitions/1, - owner_node/1, - preflist/2, - random_node/1, - random_other_index/1, - random_other_index/2, - random_other_node/1, - reconcile/2, - rename_node/3, - responsible_index/2, - transfer_node/3, - update_meta/3, - remove_meta/2]). + all_owners/1, + all_preflists/2, + diff_nodes/2, + equal_rings/2, + fresh/0, + fresh/1, + fresh/2, + get_meta/2, + get_buckets/1, + index_owner/2, + my_indices/1, + num_partitions/1, + owner_node/1, + preflist/2, + random_node/1, + random_other_index/1, + random_other_index/2, + random_other_node/1, + reconcile/2, + rename_node/3, + responsible_index/2, + transfer_node/3, + update_meta/3, + remove_meta/2]). -export([cluster_name/1, - set_tainted/1, - check_tainted/2, - nearly_equal/2, - claimant/1, - member_status/2, - pretty_print/2, - all_member_status/1, - update_member_meta/5, - clear_member_meta/3, - get_member_meta/3, - add_member/3, - remove_member/3, - leave_member/3, - exit_member/3, - down_member/3, - set_member/4, - set_member/5, - members/2, - set_claimant/2, - increment_vclock/2, - ring_version/1, - increment_ring_version/2, - set_pending_changes/2, - active_members/1, - claiming_members/1, - ready_members/1, - random_other_active_node/1, - down_members/1, - set_owner/2, - indices/2, - future_indices/2, - future_ring/1, - disowning_indices/2, - cancel_transfers/1, - pending_changes/1, - next_owner/1, - next_owner/2, - next_owner/3, - completed_next_owners/2, - all_next_owners/1, - change_owners/2, - handoff_complete/3, - ring_ready/0, - ring_ready/1, - ring_ready_info/1, - ring_changed/2, - set_cluster_name/2, - reconcile_names/2, - reconcile_members/2, - is_primary/2, - chash/1, - set_chash/2, - resize/2, - set_pending_resize/2, - set_pending_resize_abort/1, - maybe_abort_resize/1, - schedule_resize_transfer/3, - awaiting_resize_transfer/3, - resize_transfer_status/4, - resize_transfer_complete/4, - complete_resize_transfers/3, - reschedule_resize_transfers/3, - is_resizing/1, - is_post_resize/1, - is_resize_complete/1, - resized_ring/1, - set_resized_ring/2, - future_index/3, - future_index/4, - future_index/5, - is_future_index/4, - future_owner/2, - future_num_partitions/1, - vnode_type/2, - deletion_complete/3]). + set_tainted/1, + check_tainted/2, + nearly_equal/2, + claimant/1, + member_status/2, + pretty_print/2, + all_member_status/1, + update_member_meta/5, + clear_member_meta/3, + get_member_meta/3, + add_member/3, + remove_member/3, + leave_member/3, + exit_member/3, + down_member/3, + set_member/4, + set_member/5, + members/2, + set_claimant/2, + increment_vclock/2, + ring_version/1, + increment_ring_version/2, + set_pending_changes/2, + active_members/1, + claiming_members/1, + ready_members/1, + random_other_active_node/1, + down_members/1, + set_owner/2, + indices/2, + future_indices/2, + future_ring/1, + disowning_indices/2, + cancel_transfers/1, + pending_changes/1, + next_owner/1, + next_owner/2, + next_owner/3, + completed_next_owners/2, + all_next_owners/1, + change_owners/2, + handoff_complete/3, + ring_ready/0, + ring_ready/1, + ring_ready_info/1, + ring_changed/2, + set_cluster_name/2, + reconcile_names/2, + reconcile_members/2, + is_primary/2, + chash/1, + set_chash/2, + resize/2, + set_pending_resize/2, + set_pending_resize_abort/1, + maybe_abort_resize/1, + schedule_resize_transfer/3, + awaiting_resize_transfer/3, + resize_transfer_status/4, + resize_transfer_complete/4, + complete_resize_transfers/3, + reschedule_resize_transfers/3, + is_resizing/1, + is_post_resize/1, + is_resize_complete/1, + resized_ring/1, + set_resized_ring/2, + future_index/3, + future_index/4, + future_index/5, + is_future_index/4, + future_owner/2, + future_num_partitions/1, + vnode_type/2, + deletion_complete/3]). %% upgrade/1, - %% downgrade/2, + %% downgrade/2, -export_type([riak_core_ring/0, - ring_size/0, - partition_id/0]). + ring_size/0, + partition_id/0]). -ifdef(TEST). @@ -145,44 +145,44 @@ -endif. -record(chstate, - {nodename :: - term(), % the Node responsible for this chstate - vclock :: - vclock:vclock() | - undefined, % for this chstate object, entries are - % {Node, Ctr} - chring :: - chash:chash() | - undefined, % chash ring of {IndexAsInt, Node} mappings - meta :: dict:dict() | undefined, - % dict of cluster-wide other data (primarily - % bucket N-value, etc) - clustername :: {term(), term()} | undefined, - next :: - [{integer(), term(), term(), [module()], - awaiting | complete}], - members :: - [{node(), - {member_status(), vclock:vclock(), - [{atom(), term()}]}}] | - undefined, - claimant :: term(), - seen :: [{term(), vclock:vclock()}] | undefined, - rvsn :: vclock:vclock() | undefined}). + {nodename :: + term(), % the Node responsible for this chstate + vclock :: + vclock:vclock() | + undefined, % for this chstate object, entries are + % {Node, Ctr} + chring :: + chash:chash() | + undefined, % chash ring of {IndexAsInt, Node} mappings + meta :: dict:dict() | undefined, + % dict of cluster-wide other data (primarily + % bucket N-value, etc) + clustername :: {term(), term()} | undefined, + next :: + [{integer(), term(), term(), [module()], + awaiting | complete}], + members :: + [{node(), + {member_status(), vclock:vclock(), + [{atom(), term()}]}}] | + undefined, + claimant :: term(), + seen :: [{term(), vclock:vclock()}] | undefined, + rvsn :: vclock:vclock() | undefined}). -type member_status() :: joining | - valid | - invalid | - leaving | - exiting | - down. + valid | + invalid | + leaving | + exiting | + down. %% type meta_entry(). Record for each entry in #chstate.meta -record(meta_entry, - {value, % The value stored under this entry - lastmod}). % The last modified time of this entry, - % from calendar:datetime_to_gregorian_seconds( - % calendar:universal_time()), + {value, % The value stored under this entry + lastmod}). % The last modified time of this entry, + % from calendar:datetime_to_gregorian_seconds( + % calendar:universal_time()), %% @type riak_core_ring(). Opaque data type used for partition ownership -type riak_core_ring() :: #chstate{}. @@ -190,11 +190,11 @@ -type chstate() :: riak_core_ring(). -type pending_change() :: {Owner :: node(), - NextOwner :: node(), awaiting | complete} | - {undefined, undefined, undefined}. + NextOwner :: node(), awaiting | complete} | + {undefined, undefined, undefined}. -type resize_transfer() :: {{integer(), term()}, - ordsets:ordset(node()), awaiting | complete}. + ordsets:ordset(node()), awaiting | complete}. -type ring_size() :: non_neg_integer(). @@ -210,16 +210,16 @@ set_tainted(Ring) -> check_tainted(Ring = #chstate{}, Msg) -> Exit = application:get_env(riak_core, - exit_when_tainted, - false), + exit_when_tainted, + false), case {get_meta(riak_core_ring_tainted, Ring), Exit} of - {{ok, true}, true} -> - riak_core:stop(Msg), - ok; - {{ok, true}, false} -> - logger:error(Msg), - ok; - _ -> ok + {{ok, true}, true} -> + riak_core:stop(Msg), + ok; + {{ok, true}, false} -> + logger:error(Msg), + ok; + _ -> ok end. %% @doc Verify that the two rings are identical expect that metadata can @@ -230,18 +230,18 @@ check_tainted(Ring = #chstate{}, Msg) -> nearly_equal(RingA, RingB) -> TestVC = vclock:descends(RingB#chstate.vclock, - RingA#chstate.vclock), + RingA#chstate.vclock), RingA2 = RingA#chstate{vclock = undefined, - meta = undefined}, + meta = undefined}, RingB2 = RingB#chstate{vclock = undefined, - meta = undefined}, + meta = undefined}, TestRing = RingA2 =:= RingB2, TestVC and TestRing. %% @doc Determine if a given Index/Node `IdxNode' combination is a %% primary. -spec is_primary(chstate(), - {chash:index_as_int(), node()}) -> boolean(). + {chash:index_as_int(), node()}) -> boolean(). is_primary(Ring, IdxNode) -> Owners = all_owners(Ring), @@ -257,7 +257,7 @@ set_chash(State, CHash) -> %% @doc Produce a list of all nodes that are members of the cluster -spec all_members(State :: chstate()) -> [Node :: - term()]. + term()]. all_members(#chstate{members = Members}) -> get_members(Members). @@ -268,7 +268,7 @@ members(#chstate{members = Members}, Types) -> %% @doc Produce a list of all active (not marked as down) cluster members active_members(#chstate{members = Members}) -> get_members(Members, - [joining, valid, leaving, exiting]). + [joining, valid, leaving, exiting]). %% @doc Returns a list of members guaranteed safe for requests ready_members(#chstate{members = Members}) -> @@ -276,21 +276,21 @@ ready_members(#chstate{members = Members}) -> %% @doc Provide all ownership information in the form of {Index,Node} pairs. -spec all_owners(State :: chstate()) -> [{Index :: - integer(), - Node :: term()}]. + integer(), + Node :: term()}]. all_owners(State) -> chash:nodes(State#chstate.chring). %% @doc Provide every preflist in the ring, truncated at N. -spec all_preflists(State :: chstate(), - N :: integer()) -> [[{Index :: integer(), - Node :: term()}]]. + N :: integer()) -> [[{Index :: integer(), + Node :: term()}]]. all_preflists(State, N) -> [lists:sublist(preflist(Key, State), N) || Key - <- [<<(I + 1):160/integer>> - || {I, _Owner} <- (?MODULE):all_owners(State)]]. + <- [<<(I + 1):160/integer>> + || {I, _Owner} <- (?MODULE):all_owners(State)]]. %% @doc For two rings, return the list of owners that have differing ownership. -spec diff_nodes(chstate(), chstate()) -> [node()]. @@ -298,18 +298,18 @@ all_preflists(State, N) -> diff_nodes(State1, State2) -> AO = lists:zip(all_owners(State1), all_owners(State2)), AllDiff = [[N1, N2] - || {{I, N1}, {I, N2}} <- AO, N1 =/= N2], + || {{I, N1}, {I, N2}} <- AO, N1 =/= N2], lists:usort(lists:flatten(AllDiff)). -spec equal_rings(chstate(), chstate()) -> boolean(). equal_rings(_A = #chstate{chring = RA, meta = MA}, - _B = #chstate{chring = RB, meta = MB}) -> + _B = #chstate{chring = RB, meta = MB}) -> MDA = lists:sort(dict:to_list(MA)), MDB = lists:sort(dict:to_list(MB)), case MDA =:= MDB of - false -> false; - true -> RA =:= RB + false -> false; + true -> RA =:= RB end. %% @doc This is used only when this node is creating a brand new cluster. @@ -325,24 +325,24 @@ fresh() -> fresh(NodeName) -> fresh(application:get_env(riak_core, - ring_creation_size, - undefined), - NodeName). + ring_creation_size, + undefined), + NodeName). %% @doc Equivalent to fresh/1 but allows specification of the ring size. %% Called by fresh/1, and otherwise only intended for testing purposes. -spec fresh(ring_size(), - NodeName :: term()) -> chstate(). + NodeName :: term()) -> chstate(). fresh(RingSize, NodeName) -> VClock = vclock:increment(NodeName, vclock:fresh()), #chstate{nodename = NodeName, - clustername = {NodeName, erlang:timestamp()}, - members = - [{NodeName, {valid, VClock, [{gossip_vsn, 2}]}}], - chring = chash:fresh(RingSize, NodeName), next = [], - claimant = NodeName, seen = [{NodeName, VClock}], - rvsn = VClock, vclock = VClock, meta = dict:new()}. + clustername = {NodeName, erlang:timestamp()}, + members = + [{NodeName, {valid, VClock, [{gossip_vsn, 2}]}}], + chring = chash:fresh(RingSize, NodeName), next = [], + claimant = NodeName, seen = [{NodeName, VClock}], + rvsn = VClock, vclock = VClock, meta = dict:new()}. %% @doc change the size of the ring to `NewRingSize'. If the ring %% is larger than the current ring any new indexes will be owned @@ -351,32 +351,32 @@ fresh(RingSize, NodeName) -> resize(State, NewRingSize) -> NewRing = lists:foldl(fun ({Idx, Owner}, RingAcc) -> - chash:update(Idx, Owner, RingAcc) - end, - chash:fresh(NewRingSize, '$dummyhost@resized'), - all_owners(State)), + chash:update(Idx, Owner, RingAcc) + end, + chash:fresh(NewRingSize, '$dummyhost@resized'), + all_owners(State)), set_chash(State, NewRing). % @doc Return a value from the cluster metadata dict -spec get_meta(Key :: term(), - State :: chstate()) -> {ok, term()} | undefined. + State :: chstate()) -> {ok, term()} | undefined. get_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - error -> undefined; - {ok, '$removed'} -> undefined; - {ok, M} when M#meta_entry.value =:= '$removed' -> - undefined; - {ok, M} -> {ok, M#meta_entry.value} + error -> undefined; + {ok, '$removed'} -> undefined; + {ok, M} when M#meta_entry.value =:= '$removed' -> + undefined; + {ok, M} -> {ok, M#meta_entry.value} end. -spec get_meta(term(), term(), chstate()) -> {ok, - term()}. + term()}. get_meta(Key, Default, State) -> case get_meta(Key, State) of - undefined -> {ok, Default}; - Res -> Res + undefined -> {ok, Default}; + Res -> Res end. %% @doc return the names of all the custom buckets stored in the ring. @@ -385,15 +385,15 @@ get_meta(Key, Default, State) -> get_buckets(State) -> Keys = dict:fetch_keys(State#chstate.meta), lists:foldl(fun ({bucket, Bucket}, Acc) -> - [Bucket | Acc]; - (_, Acc) -> Acc - end, - [], - Keys). + [Bucket | Acc]; + (_, Acc) -> Acc + end, + [], + Keys). %% @doc Return the node that owns the given index. -spec index_owner(State :: chstate(), - Idx :: chash:index_as_int()) -> Node :: term(). + Idx :: chash:index_as_int()) -> Node :: term(). index_owner(State, Idx) -> {Idx, Owner} = lists:keyfind(Idx, 1, all_owners(State)), @@ -403,23 +403,23 @@ index_owner(State, Idx) -> %% this function will error if the ring is shrinking and Idx no longer exists %% in it -spec future_owner(chstate(), - chash:index_as_int()) -> term(). + chash:index_as_int()) -> term(). future_owner(State, Idx) -> index_owner(future_ring(State), Idx). %% @doc Return all partition indices owned by the node executing this function. -spec my_indices(State :: - chstate()) -> [chash:index_as_int()]. + chstate()) -> [chash:index_as_int()]. my_indices(State) -> [I || {I, Owner} <- (?MODULE):all_owners(State), - Owner =:= node()]. + Owner =:= node()]. %% @doc Return the number of partitions in this Riak ring. -spec num_partitions(State :: - chstate()) -> pos_integer(). + chstate()) -> pos_integer(). num_partitions(State) -> chash:size(State#chstate.chring). @@ -427,10 +427,10 @@ num_partitions(State) -> -spec future_num_partitions(chstate()) -> pos_integer(). future_num_partitions(State = #chstate{chring = - CHRing}) -> + CHRing}) -> case resized_ring(State) of - {ok, C} -> chash:size(C); - undefined -> chash:size(CHRing) + {ok, C} -> chash:size(C); + undefined -> chash:size(CHRing) end. %% @doc Return the node that is responsible for a given chstate. @@ -441,8 +441,8 @@ owner_node(State) -> State#chstate.nodename. %% @doc For a given object key, produce the ordered list of %% {partition,node} pairs that could be responsible for that object. -spec preflist(Key :: binary(), - State :: chstate()) -> [{Index :: chash:index_as_int(), - Node :: term()}]. + State :: chstate()) -> [{Index :: chash:index_as_int(), + Node :: term()}]. preflist(Key, State) -> chash:successors(Key, State#chstate.chring). @@ -457,114 +457,114 @@ random_node(State) -> %% @doc Return a partition index not owned by the node executing this function. %% If this node owns all partitions, return any index. -spec random_other_index(State :: - chstate()) -> chash:index_as_int(). + chstate()) -> chash:index_as_int(). random_other_index(State) -> L = [I - || {I, Owner} <- (?MODULE):all_owners(State), - Owner =/= node()], + || {I, Owner} <- (?MODULE):all_owners(State), + Owner =/= node()], case L of - [] -> hd(my_indices(State)); - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> hd(my_indices(State)); + _ -> lists:nth(riak_core_rand:uniform(length(L)), L) end. -spec random_other_index(State :: chstate(), - Exclude :: [term()]) -> chash:index_as_int() | - no_indices. + Exclude :: [term()]) -> chash:index_as_int() | + no_indices. random_other_index(State, Exclude) when is_list(Exclude) -> L = [I - || {I, Owner} <- (?MODULE):all_owners(State), - Owner =/= node(), not lists:member(I, Exclude)], + || {I, Owner} <- (?MODULE):all_owners(State), + Owner =/= node(), not lists:member(I, Exclude)], case L of - [] -> no_indices; - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_indices; + _ -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen node from amongst the owners other than this one. -spec random_other_node(State :: chstate()) -> Node :: - term() | no_node. + term() | no_node. random_other_node(State) -> case lists:delete(node(), all_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen active node other than this one. -spec random_other_active_node(State :: - chstate()) -> Node :: term() | no_node. + chstate()) -> Node :: term() | no_node. random_other_active_node(State) -> case lists:delete(node(), active_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(riak_core_rand:uniform(length(L)), L) end. %% @doc Incorporate another node's state into our view of the Riak world. -spec reconcile(ExternState :: chstate(), - MyState :: chstate()) -> {no_change | new_ring, - chstate()}. + MyState :: chstate()) -> {no_change | new_ring, + chstate()}. reconcile(ExternState, MyState) -> check_tainted(ExternState, - "Error: riak_core_ring/reconcile :: reconcilin" - "g tainted external ring"), + "Error: riak_core_ring/reconcile :: reconcilin" + "g tainted external ring"), check_tainted(MyState, - "Error: riak_core_ring/reconcile :: reconcilin" - "g tainted internal ring"), + "Error: riak_core_ring/reconcile :: reconcilin" + "g tainted internal ring"), case internal_reconcile(MyState, ExternState) of - {false, State} -> {no_change, State}; - {true, State} -> {new_ring, State} + {false, State} -> {no_change, State}; + {true, State} -> {new_ring, State} end. %% @doc Rename OldNode to NewNode in a Riak ring. -spec rename_node(State :: chstate(), OldNode :: atom(), - NewNode :: atom()) -> chstate(). + NewNode :: atom()) -> chstate(). rename_node(State = #chstate{chring = Ring, - nodename = ThisNode, members = Members, - claimant = Claimant, seen = Seen}, - OldNode, NewNode) + nodename = ThisNode, members = Members, + claimant = Claimant, seen = Seen}, + OldNode, NewNode) when is_atom(OldNode), is_atom(NewNode) -> State#chstate{chring = - lists:foldl(fun ({Idx, Owner}, AccIn) -> - case Owner of - OldNode -> - chash:update(Idx, - NewNode, - AccIn); - _ -> AccIn - end - end, - Ring, - riak_core_ring:all_owners(State)), - members = - orddict:from_list(proplists:substitute_aliases([{OldNode, - NewNode}], - Members)), - seen = - orddict:from_list(proplists:substitute_aliases([{OldNode, - NewNode}], - Seen)), - nodename = - case ThisNode of - OldNode -> NewNode; - _ -> ThisNode - end, - claimant = - case Claimant of - OldNode -> NewNode; - _ -> Claimant - end, - vclock = - vclock:increment(NewNode, State#chstate.vclock)}. + lists:foldl(fun ({Idx, Owner}, AccIn) -> + case Owner of + OldNode -> + chash:update(Idx, + NewNode, + AccIn); + _ -> AccIn + end + end, + Ring, + riak_core_ring:all_owners(State)), + members = + orddict:from_list(proplists:substitute_aliases([{OldNode, + NewNode}], + Members)), + seen = + orddict:from_list(proplists:substitute_aliases([{OldNode, + NewNode}], + Seen)), + nodename = + case ThisNode of + OldNode -> NewNode; + _ -> ThisNode + end, + claimant = + case Claimant of + OldNode -> NewNode; + _ -> Claimant + end, + vclock = + vclock:increment(NewNode, State#chstate.vclock)}. %% @doc Determine the integer ring index responsible %% for a chash key. -spec responsible_index(binary(), - chstate()) -> integer(). + chstate()) -> integer(). responsible_index(ChashKey, #chstate{chring = Ring}) -> <> = ChashKey, @@ -579,26 +579,26 @@ responsible_index(ChashKey, #chstate{chring = Ring}) -> %% the returned index will always be `OrigIdx'. If the ring is %% resizing the index may be different -spec future_index(chash:index(), integer(), - chstate()) -> integer() | undefined. + chstate()) -> integer() | undefined. future_index(CHashKey, OrigIdx, State) -> future_index(CHashKey, OrigIdx, undefined, State). -spec future_index(chash:index(), integer(), - undefined | integer(), chstate()) -> integer() | - undefined. + undefined | integer(), chstate()) -> integer() | + undefined. future_index(CHashKey, OrigIdx, NValCheck, State) -> OrigCount = num_partitions(State), NextCount = future_num_partitions(State), future_index(CHashKey, - OrigIdx, - NValCheck, - OrigCount, - NextCount). + OrigIdx, + NValCheck, + OrigCount, + NextCount). future_index(CHashKey, OrigIdx, NValCheck, OrigCount, - NextCount) -> + NextCount) -> <> = CHashKey, OrigInc = chash:ring_increment(OrigCount), NextInc = chash:ring_increment(NextCount), @@ -609,43 +609,43 @@ future_index(CHashKey, OrigIdx, NValCheck, OrigCount, %% Determine position of the source partition in the ring %% if OrigIdx is 0 we know the position is OrigCount (number of partitions) OrigPos = case OrigIdx of - 0 -> OrigCount; - _ -> OrigIdx div OrigInc - end, + 0 -> OrigCount; + _ -> OrigIdx div OrigInc + end, %% The distance between the key's owner (head of preflist) and the source partition %% is the position of the source in the preflist, the distance may be negative %% in which case we have wrapped around the ring. distance of zero means the source %% is the head of the preflist. OrigDist = case OrigPos - OwnerPos of - P when P < 0 -> OrigCount + P; - P -> P - end, + P when P < 0 -> OrigCount + P; + P -> P + end, %% In the case that the ring is shrinking the future index for a key whose position %% in the preflist is >= ring size may be calculated, any transfer is invalid in %% this case, return undefined. The position may also be >= an optional N value for %% the key, if this is true undefined is also returned case check_invalid_future_index(OrigDist, - NextCount, - NValCheck) - of - true -> undefined; - false -> - %% Determine the partition (head of preflist) that will own the key in the future ring - FuturePos = CHashInt div NextInc + 1, - NextOwner = FuturePos * NextInc, - %% Determine the partition that the key should be transferred to (has same position - %% in future preflist as source partition does in current preflist) - RingTop = trunc(math:pow(2, 160) - 1), - (NextOwner + NextInc * OrigDist) rem RingTop + NextCount, + NValCheck) + of + true -> undefined; + false -> + %% Determine the partition (head of preflist) that will own the key in the future ring + FuturePos = CHashInt div NextInc + 1, + NextOwner = FuturePos * NextInc, + %% Determine the partition that the key should be transferred to (has same position + %% in future preflist as source partition does in current preflist) + RingTop = trunc(math:pow(2, 160) - 1), + (NextOwner + NextInc * OrigDist) rem RingTop end. check_invalid_future_index(OrigDist, NextCount, - NValCheck) -> + NValCheck) -> OverRingSize = OrigDist >= NextCount, OverNVal = case NValCheck of - undefined -> false; - _ -> OrigDist >= NValCheck - end, + undefined -> false; + _ -> OrigDist >= NValCheck + end, OverRingSize orelse OverNVal. %% Takes the hashed value for a key and any partition, `OrigIdx', @@ -653,58 +653,58 @@ check_invalid_future_index(OrigDist, NextCount, %% is in the same position in the future preflist for that key. %% @see future_index/4 -spec is_future_index(chash:index(), integer(), - integer(), chstate()) -> boolean(). + integer(), chstate()) -> boolean(). is_future_index(CHashKey, OrigIdx, TargetIdx, State) -> FutureIndex = future_index(CHashKey, - OrigIdx, - undefined, - State), + OrigIdx, + undefined, + State), FutureIndex =:= TargetIdx. -spec transfer_node(Idx :: integer(), Node :: term(), - MyState :: chstate()) -> chstate(). + MyState :: chstate()) -> chstate(). transfer_node(Idx, Node, MyState) -> case chash:lookup(Idx, MyState#chstate.chring) of - Node -> MyState; - _ -> - Me = MyState#chstate.nodename, - VClock = vclock:increment(Me, MyState#chstate.vclock), - CHRing = chash:update(Idx, - Node, - MyState#chstate.chring), - MyState#chstate{vclock = VClock, chring = CHRing} + Node -> MyState; + _ -> + Me = MyState#chstate.nodename, + VClock = vclock:increment(Me, MyState#chstate.vclock), + CHRing = chash:update(Idx, + Node, + MyState#chstate.chring), + MyState#chstate{vclock = VClock, chring = CHRing} end. % @doc Set a key in the cluster metadata dict -spec update_meta(Key :: term(), Val :: term(), - State :: chstate()) -> chstate(). + State :: chstate()) -> chstate(). update_meta(Key, Val, State) -> Change = case dict:find(Key, State#chstate.meta) of - {ok, OldM} -> Val /= OldM#meta_entry.value; - error -> true - end, + {ok, OldM} -> Val /= OldM#meta_entry.value; + error -> true + end, if Change -> - M = #meta_entry{lastmod = - calendar:datetime_to_gregorian_seconds(calendar:universal_time()), - value = Val}, - VClock = vclock:increment(State#chstate.nodename, - State#chstate.vclock), - State#chstate{vclock = VClock, - meta = dict:store(Key, M, State#chstate.meta)}; + M = #meta_entry{lastmod = + calendar:datetime_to_gregorian_seconds(calendar:universal_time()), + value = Val}, + VClock = vclock:increment(State#chstate.nodename, + State#chstate.vclock), + State#chstate{vclock = VClock, + meta = dict:store(Key, M, State#chstate.meta)}; true -> State end. %% @doc Logical delete of a key in the cluster metadata dict -spec remove_meta(Key :: term(), - State :: chstate()) -> chstate(). + State :: chstate()) -> chstate(). remove_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - {ok, _} -> update_meta(Key, '$removed', State); - error -> State + {ok, _} -> update_meta(Key, '$removed', State); + error -> State end. %% @doc Return the current claimant. @@ -725,12 +725,12 @@ set_cluster_name(State, Name) -> State#chstate{clustername = Name}. reconcile_names(RingA = #chstate{clustername = NameA}, - RingB = #chstate{clustername = NameB}) -> + RingB = #chstate{clustername = NameB}) -> case (NameA =:= undefined) or (NameB =:= undefined) of - true -> - {RingA#chstate{clustername = undefined}, - RingB#chstate{clustername = undefined}}; - false -> {RingA, RingB} + true -> + {RingA#chstate{clustername = undefined}, + RingB#chstate{clustername = undefined}}; + false -> {RingA, RingB} end. increment_vclock(Node, State) -> @@ -745,75 +745,75 @@ increment_ring_version(Node, State) -> %% @doc Returns the current membership status for a node in the cluster. -spec member_status(chstate() | [node()], - Node :: node()) -> member_status(). + Node :: node()) -> member_status(). member_status(#chstate{members = Members}, Node) -> member_status(Members, Node); member_status(Members, Node) -> case orddict:find(Node, Members) of - {ok, {Status, _, _}} -> Status; - _ -> invalid + {ok, {Status, _, _}} -> Status; + _ -> invalid end. %% @doc Returns the current membership status for all nodes in the cluster. -spec all_member_status(State :: chstate()) -> [{node(), - member_status()}]. + member_status()}]. all_member_status(#chstate{members = Members}) -> [{Node, Status} || {Node, {Status, _VC, _}} <- Members, - Status /= invalid]. + Status /= invalid]. get_member_meta(State, Member, Key) -> case orddict:find(Member, State#chstate.members) of - error -> undefined; - {ok, {_, _, Meta}} -> - case orddict:find(Key, Meta) of - error -> undefined; - {ok, Value} -> Value - end + error -> undefined; + {ok, {_, _, Meta}} -> + case orddict:find(Key, Meta) of + error -> undefined; + {ok, Value} -> Value + end end. %% @doc Set a key in the member metadata orddict update_member_meta(Node, State, Member, Key, Val) -> VClock = vclock:increment(Node, State#chstate.vclock), State2 = update_member_meta(Node, - State, - Member, - Key, - Val, - same_vclock), + State, + Member, + Key, + Val, + same_vclock), State2#chstate{vclock = VClock}. update_member_meta(Node, State, Member, Key, Val, - same_vclock) -> + same_vclock) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, MD}) -> - {Status, - vclock:increment(Node, VC), - orddict:store(Key, Val, MD)} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, MD}) -> + {Status, + vclock:increment(Node, VC), + orddict:store(Key, Val, MD)} + end, + Members), + State#chstate{members = Members2}; + false -> State end. clear_member_meta(Node, State, Member) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, _MD}) -> - {Status, - vclock:increment(Node, VC), - orddict:new()} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, _MD}) -> + {Status, + vclock:increment(Node, VC), + orddict:new()} + end, + Members), + State#chstate{members = Members2}; + false -> State end. add_member(PNode, State, Node) -> @@ -835,48 +835,48 @@ down_member(PNode, State, Node) -> set_member(Node, CState, Member, Status) -> VClock = vclock:increment(Node, CState#chstate.vclock), CState2 = set_member(Node, - CState, - Member, - Status, - same_vclock), + CState, + Member, + Status, + same_vclock), CState2#chstate{vclock = VClock}. set_member(Node, CState, Member, Status, same_vclock) -> Members2 = orddict:update(Member, - fun ({_, VC, MD}) -> - {Status, vclock:increment(Node, VC), MD} - end, - {Status, - vclock:increment(Node, vclock:fresh()), - []}, - CState#chstate.members), + fun ({_, VC, MD}) -> + {Status, vclock:increment(Node, VC), MD} + end, + {Status, + vclock:increment(Node, vclock:fresh()), + []}, + CState#chstate.members), CState#chstate{members = Members2}. %% @doc Return a list of all members of the cluster that are eligible to %% claim partitions. -spec claiming_members(State :: chstate()) -> [Node :: - node()]. + node()]. claiming_members(#chstate{members = Members}) -> get_members(Members, [joining, valid, down]). %% @doc Return a list of all members of the cluster that are marked as down. -spec down_members(State :: chstate()) -> [Node :: - node()]. + node()]. down_members(#chstate{members = Members}) -> get_members(Members, [down]). %% @doc Set the node that is responsible for a given chstate. -spec set_owner(State :: chstate(), - Node :: node()) -> chstate(). + Node :: node()) -> chstate(). set_owner(State, Node) -> State#chstate{nodename = Node}. %% @doc Return all partition indices owned by a node. -spec indices(State :: chstate(), - Node :: node()) -> [integer()]. + Node :: node()) -> [integer()]. indices(State, Node) -> AllOwners = all_owners(State), @@ -885,13 +885,13 @@ indices(State, Node) -> %% @doc Return all partition indices that will be owned by a node after all %% pending ownership transfers have completed. -spec future_indices(State :: chstate(), - Node :: node()) -> [integer()]. + Node :: node()) -> [integer()]. future_indices(State, Node) -> indices(future_ring(State), Node). -spec all_next_owners(chstate()) -> [{integer(), - term()}]. + term()}]. all_next_owners(CState) -> Next = riak_core_ring:pending_changes(CState), @@ -900,38 +900,38 @@ all_next_owners(CState) -> %% @private change_owners(CState, Reassign) -> lists:foldl(fun ({Idx, NewOwner}, CState0) -> - %% if called for indexes not in the current ring (during resizing) - %% ignore the error - try riak_core_ring:transfer_node(Idx, NewOwner, CState0) - catch - error:{badmatch, _} -> CState0 - end - end, - CState, - Reassign). + %% if called for indexes not in the current ring (during resizing) + %% ignore the error + try riak_core_ring:transfer_node(Idx, NewOwner, CState0) + catch + error:{badmatch, _} -> CState0 + end + end, + CState, + Reassign). %% @doc Return all indices that a node is scheduled to give to another. disowning_indices(State, Node) -> case is_resizing(State) of - false -> - [Idx - || {Idx, Owner, _NextOwner, _Mods, _Status} - <- State#chstate.next, - Owner =:= Node]; - true -> - [Idx - || {Idx, Owner} <- all_owners(State), Owner =:= Node, - disowned_during_resize(State, Idx, Owner)] + false -> + [Idx + || {Idx, Owner, _NextOwner, _Mods, _Status} + <- State#chstate.next, + Owner =:= Node]; + true -> + [Idx + || {Idx, Owner} <- all_owners(State), Owner =:= Node, + disowned_during_resize(State, Idx, Owner)] end. disowned_during_resize(CState, Idx, Owner) -> %% catch error when index doesn't exist, we are disowning it if its going away NextOwner = try future_owner(CState, Idx) catch - _:_ -> undefined - end, + _:_ -> undefined + end, case NextOwner of - Owner -> false; - _ -> true + Owner -> false; + _ -> true end. %% @doc Returns a list of all pending ownership transfers. @@ -945,12 +945,12 @@ set_pending_changes(State, Transfers) -> %% @doc Given a ring, `Resizing', that has been resized (and presumably rebalanced) %% schedule a resize transition for `Orig'. -spec set_pending_resize(chstate(), - chstate()) -> chstate(). + chstate()) -> chstate(). set_pending_resize(Resizing, Orig) -> %% all existing indexes must transfer data when the ring is being resized Next = [{Idx, Owner, '$resize', [], awaiting} - || {Idx, Owner} <- riak_core_ring:all_owners(Orig)], + || {Idx, Owner} <- riak_core_ring:all_owners(Orig)], %% Whether or not the ring is shrinking or expanding, some %% ownership may be shared between the old and new ring. To prevent %% degenerate cases where partitions whose ownership does not @@ -959,42 +959,42 @@ set_pending_resize(Resizing, Orig) -> %% of the next list which is treated as ordered. FutureOwners = riak_core_ring:all_owners(Resizing), SortedNext = lists:sort(fun ({Idx, Owner, _, _, _}, - _) -> - %% we only need to check one element because the end result - %% is the same as if we checked both: - %% - %% true, false -> true - %% true, true -> true - %% false, false -> false - %% false, true -> false - lists:member({Idx, Owner}, FutureOwners) - end, - Next), + _) -> + %% we only need to check one element because the end result + %% is the same as if we checked both: + %% + %% true, false -> true + %% true, true -> true + %% false, false -> false + %% false, true -> false + lists:member({Idx, Owner}, FutureOwners) + end, + Next), %% Resizing is assumed to have a modified chring, we need to put back %% the original chring to not install the resized one pre-emptively. The %% resized ring is stored in ring metadata for later use FutureCHash = chash(Resizing), ResetRing = set_chash(Resizing, chash(Orig)), set_resized_ring(set_pending_changes(ResetRing, - SortedNext), - FutureCHash). + SortedNext), + FutureCHash). -spec maybe_abort_resize(chstate()) -> {boolean(), - chstate()}. + chstate()}. maybe_abort_resize(State) -> Resizing = is_resizing(State), PostResize = is_post_resize(State), PendingAbort = is_resize_aborted(State), case PendingAbort andalso - Resizing andalso not PostResize - of - true -> - State1 = State#chstate{next = []}, - State2 = clear_all_resize_transfers(State1), - State3 = remove_meta('$resized_ring_abort', State2), - {true, remove_meta('$resized_ring', State3)}; - false -> {false, State} + Resizing andalso not PostResize + of + true -> + State1 = State#chstate{next = []}, + State2 = clear_all_resize_transfers(State1), + State3 = remove_meta('$resized_ring_abort', State2), + {true, remove_meta('$resized_ring', State3)}; + false -> {false, State} end. -spec set_pending_resize_abort(chstate()) -> chstate(). @@ -1003,133 +1003,133 @@ set_pending_resize_abort(State) -> update_meta('$resized_ring_abort', true, State). -spec schedule_resize_transfer(chstate(), - {integer(), term()}, - integer() | {integer(), term()}) -> chstate(). + {integer(), term()}, + integer() | {integer(), term()}) -> chstate(). schedule_resize_transfer(State, Source, TargetIdx) when is_integer(TargetIdx) -> TargetNode = index_owner(future_ring(State), TargetIdx), schedule_resize_transfer(State, - Source, - {TargetIdx, TargetNode}); + Source, + {TargetIdx, TargetNode}); schedule_resize_transfer(State, Source, Source) -> State; schedule_resize_transfer(State, Source, Target) -> Transfers = resize_transfers(State, Source), %% ignore if we have already scheduled a transfer from source -> target case lists:keymember(Target, 1, Transfers) of - true -> State; - false -> - Transfers1 = lists:keystore(Target, - 1, - Transfers, - {Target, ordsets:new(), awaiting}), - set_resize_transfers(State, Source, Transfers1) + true -> State; + false -> + Transfers1 = lists:keystore(Target, + 1, + Transfers, + {Target, ordsets:new(), awaiting}), + set_resize_transfers(State, Source, Transfers1) end. %% @doc reassign all outbound and inbound resize transfers from `Node' to `NewNode' -spec reschedule_resize_transfers(chstate(), term(), - term()) -> chstate(). + term()) -> chstate(). reschedule_resize_transfers(State = #chstate{next = - Next}, - Node, NewNode) -> + Next}, + Node, NewNode) -> {NewNext, NewState} = lists:mapfoldl(fun (Entry, - StateAcc) -> - reschedule_resize_operation(Node, - NewNode, - Entry, - StateAcc) - end, - State, - Next), + StateAcc) -> + reschedule_resize_operation(Node, + NewNode, + Entry, + StateAcc) + end, + State, + Next), NewState#chstate{next = NewNext}. reschedule_resize_operation(N, NewNode, - {Idx, N, '$resize', _Mods, _Status}, State) -> + {Idx, N, '$resize', _Mods, _Status}, State) -> NewEntry = {Idx, - NewNode, - '$resize', - ordsets:new(), - awaiting}, + NewNode, + '$resize', + ordsets:new(), + awaiting}, NewState = reschedule_outbound_resize_transfers(State, - Idx, - N, - NewNode), + Idx, + N, + NewNode), {NewEntry, NewState}; reschedule_resize_operation(Node, NewNode, - {Idx, OtherNode, '$resize', _Mods, _Status} = Entry, - State) -> + {Idx, OtherNode, '$resize', _Mods, _Status} = Entry, + State) -> {Changed, NewState} = - reschedule_inbound_resize_transfers({Idx, OtherNode}, - Node, - NewNode, - State), + reschedule_inbound_resize_transfers({Idx, OtherNode}, + Node, + NewNode, + State), case Changed of - true -> - NewEntry = {Idx, - OtherNode, - '$resize', - ordsets:new(), - awaiting}, - {NewEntry, NewState}; - false -> {Entry, State} + true -> + NewEntry = {Idx, + OtherNode, + '$resize', + ordsets:new(), + awaiting}, + {NewEntry, NewState}; + false -> {Entry, State} end. reschedule_inbound_resize_transfers(Source, Node, - NewNode, State) -> + NewNode, State) -> F = fun (Transfer, Acc) -> - {NewXfer, NewAcc} = - reschedule_inbound_resize_transfer(Transfer, - Node, - NewNode), - {NewXfer, NewAcc orelse Acc} - end, + {NewXfer, NewAcc} = + reschedule_inbound_resize_transfer(Transfer, + Node, + NewNode), + {NewXfer, NewAcc orelse Acc} + end, {ResizeTransfers, Changed} = lists:mapfoldl(F, - false, - resize_transfers(State, - Source)), + false, + resize_transfers(State, + Source)), {Changed, set_resize_transfers(State, Source, ResizeTransfers)}. reschedule_inbound_resize_transfer({{Idx, Target}, - _, - _}, - Target, NewNode) -> + _, + _}, + Target, NewNode) -> {{{Idx, NewNode}, ordsets:new(), awaiting}, true}; reschedule_inbound_resize_transfer(Transfer, _, _) -> {Transfer, false}. reschedule_outbound_resize_transfers(State, Idx, Node, - NewNode) -> + NewNode) -> OldSource = {Idx, Node}, NewSource = {Idx, NewNode}, Transfers = resize_transfers(State, OldSource), F = fun ({I, N}) when N =:= Node -> {I, NewNode}; - (T) -> T - end, + (T) -> T + end, NewTransfers = [{F(Target), ordsets:new(), awaiting} - || {Target, _, _} <- Transfers], + || {Target, _, _} <- Transfers], set_resize_transfers(clear_resize_transfers(OldSource, - State), - NewSource, - NewTransfers). + State), + NewSource, + NewTransfers). %% @doc returns the first awaiting resize_transfer for a {SourceIdx, SourceNode} %% pair. If all transfers for the pair are complete, undefined is returned -spec awaiting_resize_transfer(chstate(), - {integer(), term()}, atom()) -> {integer(), - term()} | - undefined. + {integer(), term()}, atom()) -> {integer(), + term()} | + undefined. awaiting_resize_transfer(State, Source, Mod) -> ResizeTransfers = resize_transfers(State, Source), Awaiting = [{Target, Mods, Status} - || {Target, Mods, Status} <- ResizeTransfers, - Status =/= complete, not ordsets:is_element(Mod, Mods)], + || {Target, Mods, Status} <- ResizeTransfers, + Status =/= complete, not ordsets:is_element(Mod, Mods)], case Awaiting of - [] -> undefined; - [{Target, _, _} | _] -> Target + [] -> undefined; + [{Target, _, _} | _] -> Target end. %% @doc return the status of a resize_transfer for `Source' (an index-node pair). undefined @@ -1137,24 +1137,24 @@ awaiting_resize_transfer(State, Source, Mod) -> %% is marked as such or `Mod' is contained in the completed modules set. awaiting is %% returned otherwise -spec resize_transfer_status(chstate(), - {integer(), term()}, {integer(), term()}, - atom()) -> awaiting | complete | undefined. + {integer(), term()}, {integer(), term()}, + atom()) -> awaiting | complete | undefined. resize_transfer_status(State, Source, Target, Mod) -> ResizeTransfers = resize_transfers(State, Source), IsComplete = case lists:keyfind(Target, - 1, - ResizeTransfers) - of - false -> undefined; - {Target, _, complete} -> true; - {Target, Mods, awaiting} -> - ordsets:is_element(Mod, Mods) - end, + 1, + ResizeTransfers) + of + false -> undefined; + {Target, _, complete} -> true; + {Target, Mods, awaiting} -> + ordsets:is_element(Mod, Mods) + end, case IsComplete of - true -> complete; - false -> awaiting; - undefined -> undefined + true -> complete; + false -> awaiting; + undefined -> undefined end. %% @doc mark a resize_transfer from `Source' to `Target' for `Mod' complete. @@ -1163,127 +1163,127 @@ resize_transfer_status(State, Source, Target, Mod) -> %% for `Source' that need to be started to be scheduled before calling %% this fuction -spec resize_transfer_complete(chstate(), - {integer(), term()}, {integer(), term()}, - atom()) -> chstate(). + {integer(), term()}, {integer(), term()}, + atom()) -> chstate(). resize_transfer_complete(State, {SrcIdx, _} = Source, - Target, Mod) -> + Target, Mod) -> ResizeTransfers = resize_transfers(State, Source), Transfer = lists:keyfind(Target, 1, ResizeTransfers), case Transfer of - {Target, Mods, Status} -> - VNodeMods = ordsets:from_list([VMod - || {_, VMod} - <- riak_core:vnode_modules()]), - Mods2 = ordsets:add_element(Mod, Mods), - Status2 = case {Status, Mods2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting - end, - ResizeTransfers2 = lists:keyreplace(Target, - 1, - ResizeTransfers, - {Target, Mods2, Status2}), - State1 = set_resize_transfers(State, - Source, - ResizeTransfers2), - AllComplete = lists:all(fun ({_, _, complete}) -> true; - ({_, Ms, awaiting}) -> - ordsets:is_element(Mod, Ms) - end, - ResizeTransfers2), - case AllComplete of - true -> transfer_complete(State1, SrcIdx, Mod); - false -> State1 - end; - _ -> State + {Target, Mods, Status} -> + VNodeMods = ordsets:from_list([VMod + || {_, VMod} + <- riak_core:vnode_modules()]), + Mods2 = ordsets:add_element(Mod, Mods), + Status2 = case {Status, Mods2} of + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting + end, + ResizeTransfers2 = lists:keyreplace(Target, + 1, + ResizeTransfers, + {Target, Mods2, Status2}), + State1 = set_resize_transfers(State, + Source, + ResizeTransfers2), + AllComplete = lists:all(fun ({_, _, complete}) -> true; + ({_, Ms, awaiting}) -> + ordsets:is_element(Mod, Ms) + end, + ResizeTransfers2), + case AllComplete of + true -> transfer_complete(State1, SrcIdx, Mod); + false -> State1 + end; + _ -> State end. -spec is_resizing(chstate()) -> boolean(). is_resizing(State) -> case resized_ring(State) of - undefined -> false; - {ok, _} -> true + undefined -> false; + {ok, _} -> true end. -spec is_post_resize(chstate()) -> boolean(). is_post_resize(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> true; - _ -> false + {ok, '$cleanup'} -> true; + _ -> false end. -spec is_resize_aborted(chstate()) -> boolean(). is_resize_aborted(State) -> case get_meta('$resized_ring_abort', State) of - {ok, true} -> true; - _ -> false + {ok, true} -> true; + _ -> false end. -spec is_resize_complete(chstate()) -> boolean(). is_resize_complete(#chstate{next = Next}) -> not - lists:any(fun ({_, _, _, _, awaiting}) -> true; - ({_, _, _, _, complete}) -> false - end, - Next). + lists:any(fun ({_, _, _, _, awaiting}) -> true; + ({_, _, _, _, complete}) -> false + end, + Next). -spec complete_resize_transfers(chstate(), - {integer(), term()}, atom()) -> [{integer(), - term()}]. + {integer(), term()}, atom()) -> [{integer(), + term()}]. complete_resize_transfers(State, Source, Mod) -> [Target || {Target, Mods, Status} - <- resize_transfers(State, Source), - Status =:= complete orelse - ordsets:is_element(Mod, Mods)]. + <- resize_transfers(State, Source), + Status =:= complete orelse + ordsets:is_element(Mod, Mods)]. -spec deletion_complete(chstate(), integer(), - atom()) -> chstate(). + atom()) -> chstate(). deletion_complete(State, Idx, Mod) -> transfer_complete(State, Idx, Mod). -spec resize_transfers(chstate(), - {integer(), term()}) -> [resize_transfer()]. + {integer(), term()}) -> [resize_transfer()]. resize_transfers(State, Source) -> {ok, Transfers} = get_meta({resize, Source}, [], State), Transfers. -spec set_resize_transfers(chstate(), - {integer(), term()}, - [resize_transfer()]) -> chstate(). + {integer(), term()}, + [resize_transfer()]) -> chstate(). set_resize_transfers(State, Source, Transfers) -> update_meta({resize, Source}, Transfers, State). clear_all_resize_transfers(State) -> lists:foldl(fun clear_resize_transfers/2, - State, - all_owners(State)). + State, + all_owners(State)). clear_resize_transfers(Source, State) -> remove_meta({resize, Source}, State). -spec resized_ring(chstate()) -> {ok, chash:chash()} | - undefined. + undefined. resized_ring(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> {ok, State#chstate.chring}; - {ok, CHRing} -> {ok, CHRing}; - _ -> undefined + {ok, '$cleanup'} -> {ok, State#chstate.chring}; + {ok, CHRing} -> {ok, CHRing}; + _ -> undefined end. -spec set_resized_ring(chstate(), - chash:chash()) -> chstate(). + chash:chash()) -> chstate(). set_resized_ring(State, FutureCHash) -> update_meta('$resized_ring', FutureCHash, State). @@ -1292,40 +1292,40 @@ cleanup_after_resize(State) -> update_meta('$resized_ring', '$cleanup', State). -spec vnode_type(chstate(), integer()) -> primary | - {fallback, term()} | - future_primary | - resized_primary. + {fallback, term()} | + future_primary | + resized_primary. vnode_type(State, Idx) -> vnode_type(State, Idx, node()). vnode_type(State, Idx, Node) -> try index_owner(State, Idx) of - Node -> primary; - Owner -> - case next_owner(State, Idx) of - {_, Node, _} -> future_primary; - _ -> {fallback, Owner} - end + Node -> primary; + Owner -> + case next_owner(State, Idx) of + {_, Node, _} -> future_primary; + _ -> {fallback, Owner} + end catch - error:{badmatch, _} -> - %% idx doesn't exist so must be an index in a resized ring - resized_primary + error:{badmatch, _} -> + %% idx doesn't exist so must be an index in a resized ring + resized_primary end. %% @doc Return details for a pending partition ownership change. -spec next_owner(State :: chstate(), - Idx :: integer()) -> pending_change(). + Idx :: integer()) -> pending_change(). next_owner(State, Idx) -> case lists:keyfind(Idx, 1, State#chstate.next) of - false -> {undefined, undefined, undefined}; - NInfo -> next_owner(NInfo) + false -> {undefined, undefined, undefined}; + NInfo -> next_owner(NInfo) end. %% @doc Return details for a pending partition ownership change. -spec next_owner(State :: chstate(), Idx :: integer(), - Mod :: module()) -> pending_change(). + Mod :: module()) -> pending_change(). next_owner(State, Idx, Mod) -> NInfo = lists:keyfind(Idx, 1, State#chstate.next), @@ -1333,14 +1333,14 @@ next_owner(State, Idx, Mod) -> next_owner_status(NInfo, Mod) -> case NInfo of - false -> {undefined, undefined, undefined}; - {_, Owner, NextOwner, _Transfers, complete} -> - {Owner, NextOwner, complete}; - {_, Owner, NextOwner, Transfers, _Status} -> - case ordsets:is_element(Mod, Transfers) of - true -> {Owner, NextOwner, complete}; - false -> {Owner, NextOwner, awaiting} - end + false -> {undefined, undefined, undefined}; + {_, Owner, NextOwner, _Transfers, complete} -> + {Owner, NextOwner, complete}; + {_, Owner, NextOwner, Transfers, _Status} -> + case ordsets:is_element(Mod, Transfers) of + true -> {Owner, NextOwner, complete}; + false -> {Owner, NextOwner, awaiting} + end end. %% @private @@ -1350,28 +1350,28 @@ next_owner({_, Owner, NextOwner, _Transfers, Status}) -> completed_next_owners(Mod, #chstate{next = Next}) -> [{Idx, O, NO} || NInfo = {Idx, _, _, _, _} <- Next, - {O, NO, complete} <- [next_owner_status(NInfo, Mod)]]. + {O, NO, complete} <- [next_owner_status(NInfo, Mod)]]. %% @doc Returns true if all cluster members have seen the current ring. -spec ring_ready(State :: chstate()) -> boolean(). ring_ready(State0) -> check_tainted(State0, - "Error: riak_core_ring/ring_ready called " - "on tainted ring"), + "Error: riak_core_ring/ring_ready called " + "on tainted ring"), Owner = owner_node(State0), State = update_seen(Owner, State0), Seen = State#chstate.seen, Members = get_members(State#chstate.members, - [valid, leaving, exiting]), + [valid, leaving, exiting]), VClock = State#chstate.vclock, R = [begin - case orddict:find(Node, Seen) of - error -> false; - {ok, VC} -> vclock:equal(VClock, VC) - end - end - || Node <- Members], + case orddict:find(Node, Seen) of + error -> false; + {ok, VC} -> vclock:equal(VClock, VC) + end + end + || Node <- Members], Ready = lists:all(fun (X) -> X =:= true end, R), Ready. @@ -1384,33 +1384,33 @@ ring_ready_info(State0) -> State = update_seen(Owner, State0), Seen = State#chstate.seen, Members = get_members(State#chstate.members, - [valid, leaving, exiting]), + [valid, leaving, exiting]), RecentVC = orddict:fold(fun (_, VC, Recent) -> - case vclock:descends(VC, Recent) of - true -> VC; - false -> Recent - end - end, - State#chstate.vclock, - Seen), + case vclock:descends(VC, Recent) of + true -> VC; + false -> Recent + end + end, + State#chstate.vclock, + Seen), Outdated = orddict:filter(fun (Node, VC) -> - not vclock:equal(VC, RecentVC) and - lists:member(Node, Members) - end, - Seen), + not vclock:equal(VC, RecentVC) and + lists:member(Node, Members) + end, + Seen), Outdated. %% @doc Marks a pending transfer as completed. -spec handoff_complete(State :: chstate(), - Idx :: integer(), Mod :: module()) -> chstate(). + Idx :: integer(), Mod :: module()) -> chstate(). handoff_complete(State, Idx, Mod) -> transfer_complete(State, Idx, Mod). ring_changed(Node, State) -> check_tainted(State, - "Error: riak_core_ring/ring_changed called " - "on tainted ring"), + "Error: riak_core_ring/ring_changed called " + "on tainted ring"), internal_ring_changed(Node, State). %% @doc Return the ring that will exist after all pending ownership transfers @@ -1422,53 +1422,53 @@ future_ring(State) -> future_ring(State, false) -> FutureState = change_owners(State, - all_next_owners(State)), + all_next_owners(State)), %% Individual nodes will move themselves from leaving to exiting if they %% have no ring ownership, this is implemented in riak_core_ring_handler. %% Emulate it here to return similar ring. Leaving = get_members(FutureState#chstate.members, - [leaving]), + [leaving]), FutureState2 = lists:foldl(fun (Node, StateAcc) -> - case indices(StateAcc, Node) of - [] -> - riak_core_ring:exit_member(Node, - StateAcc, - Node); - _ -> StateAcc - end - end, - FutureState, - Leaving), + case indices(StateAcc, Node) of + [] -> + riak_core_ring:exit_member(Node, + StateAcc, + Node); + _ -> StateAcc + end + end, + FutureState, + Leaving), FutureState2#chstate{next = []}; future_ring(State0 = #chstate{next = OldNext}, true) -> case is_post_resize(State0) of - false -> - {ok, FutureCHash} = resized_ring(State0), - State1 = cleanup_after_resize(State0), - State2 = clear_all_resize_transfers(State1), - Resized = State2#chstate{chring = FutureCHash}, - Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, - Acc) -> - DeleteEntry = {Idx, - Owner, - '$delete', - [], - awaiting}, - %% catch error when index doesn't exist in new ring - try index_owner(Resized, Idx) of - Owner -> Acc; - _ -> [DeleteEntry | Acc] - catch - error:{badmatch, _} -> - [DeleteEntry | Acc] - end - end, - [], - OldNext), - Resized#chstate{next = Next}; - true -> - State1 = remove_meta('$resized_ring', State0), - State1#chstate{next = []} + false -> + {ok, FutureCHash} = resized_ring(State0), + State1 = cleanup_after_resize(State0), + State2 = clear_all_resize_transfers(State1), + Resized = State2#chstate{chring = FutureCHash}, + Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, + Acc) -> + DeleteEntry = {Idx, + Owner, + '$delete', + [], + awaiting}, + %% catch error when index doesn't exist in new ring + try index_owner(Resized, Idx) of + Owner -> Acc; + _ -> [DeleteEntry | Acc] + catch + error:{badmatch, _} -> + [DeleteEntry | Acc] + end + end, + [], + OldNext), + Resized#chstate{next = Next}; + true -> + State1 = remove_meta('$resized_ring', State0), + State1#chstate{next = []} end. pretty_print(Ring, Opts) -> @@ -1476,55 +1476,55 @@ pretty_print(Ring, Opts) -> OptLegend = lists:member(legend, Opts), Out = proplists:get_value(out, Opts, standard_io), TargetN = proplists:get_value(target_n, - Opts, - application:get_env(riak_core, - target_n_val, - undefined)), + Opts, + application:get_env(riak_core, + target_n_val, + undefined)), Owners = riak_core_ring:all_members(Ring), Indices = riak_core_ring:all_owners(Ring), RingSize = length(Indices), Numeric = OptNumeric orelse length(Owners) > 26, case Numeric of - true -> - Ids = [integer_to_list(N) - || N <- lists:seq(1, length(Owners))]; - false -> - Ids = [[Letter] - || Letter <- lists:seq(97, 96 + length(Owners))] + true -> + Ids = [integer_to_list(N) + || N <- lists:seq(1, length(Owners))]; + false -> + Ids = [[Letter] + || Letter <- lists:seq(97, 96 + length(Owners))] end, Names = lists:zip(Owners, Ids), case OptLegend of - true -> - io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), - _ = [begin - NodeIndices = [Idx - || {Idx, Owner} <- Indices, Owner =:= Node], - RingPercent = length(NodeIndices) * 100 / RingSize, - io:format(Out, - "Node ~s: ~w (~5.1f%) ~s~n", - [Name, length(NodeIndices), RingPercent, Node]) - end - || {Node, Name} <- Names], - io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); - false -> ok + true -> + io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), + _ = [begin + NodeIndices = [Idx + || {Idx, Owner} <- Indices, Owner =:= Node], + RingPercent = length(NodeIndices) * 100 / RingSize, + io:format(Out, + "Node ~s: ~w (~5.1f%) ~s~n", + [Name, length(NodeIndices), RingPercent, Node]) + end + || {Node, Name} <- Names], + io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); + false -> ok end, case Numeric of - true -> - Ownership = [orddict:fetch(Owner, Names) - || {_Idx, Owner} <- Indices], - io:format(Out, "~p~n", [Ownership]); - false -> - lists:foldl(fun ({_, Owner}, N) -> - Name = orddict:fetch(Owner, Names), - case N rem TargetN of - 0 -> io:format(Out, "~s|", [[Name]]); - _ -> io:format(Out, "~s", [[Name]]) - end, - N + 1 - end, - 1, - Indices), - io:format(Out, "~n", []) + true -> + Ownership = [orddict:fetch(Owner, Names) + || {_Idx, Owner} <- Indices], + io:format(Out, "~p~n", [Ownership]); + false -> + lists:foldl(fun ({_, Owner}, N) -> + Name = orddict:fetch(Owner, Names), + case N rem TargetN of + 0 -> io:format(Out, "~s|", [[Name]]); + _ -> io:format(Out, "~s", [[Name]]) + end, + N + 1 + end, + 1, + Indices), + io:format(Out, "~n", []) end. %% @doc Return a ring with all transfers cancelled - for claim sim @@ -1538,27 +1538,27 @@ cancel_transfers(Ring) -> Ring#chstate{next = []}. internal_ring_changed(Node, CState0) -> CState = update_seen(Node, CState0), case ring_ready(CState) of - false -> CState; - true -> riak_core_claimant:ring_changed(Node, CState) + false -> CState; + true -> riak_core_claimant:ring_changed(Node, CState) end. %% @private merge_meta({N1, M1}, {N2, M2}) -> Meta = dict:merge(fun (_, D1, D2) -> - pick_val({N1, D1}, {N2, D2}) - end, - M1, - M2), + pick_val({N1, D1}, {N2, D2}) + end, + M1, + M2), log_meta_merge(M1, M2, Meta), Meta. %% @private pick_val({N1, M1}, {N2, M2}) -> case {M1#meta_entry.lastmod, N1} > - {M2#meta_entry.lastmod, N2} - of - true -> M1; - false -> M2 + {M2#meta_entry.lastmod, N2} + of + true -> M1; + false -> M2 end. %% @private @@ -1573,10 +1573,10 @@ log_meta_merge(M1, M2, Meta) -> %% subsequent log messages will allow us to track ring versions. %% Handle legacy rings as well. log_ring_result(#chstate{vclock = V, members = Members, - next = Next}) -> + next = Next}) -> logger:debug("Updated ring vclock: ~p, Members: ~p, " - "Next: ~p", - [V, Members, Next]). + "Next: ~p", + [V, Members, Next]). %% @private internal_reconcile(State, OtherState) -> @@ -1596,52 +1596,52 @@ internal_reconcile(State, OtherState) -> VMerge1 = vclock:merge([VC1, VC2]), VMerge2 = vclock:merge([VC2, VC1]), case {vclock:equal(VMerge1, VMerge2), VMerge1 < VMerge2} - of - {true, _} -> VC3 = VMerge1; - {_, true} -> VC3 = VMerge1; - {_, false} -> VC3 = VMerge2 + of + {true, _} -> VC3 = VMerge1; + {_, true} -> VC3 = VMerge1; + {_, false} -> VC3 = VMerge2 end, Newer = vclock:descends(VC1, VC2), Older = vclock:descends(VC2, VC1), Equal = equal_cstate(State3, OtherState3), case {Equal, Newer, Older} of - {_, true, false} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, false, true} -> - {true, - OtherState3#chstate{nodename = VNode, vclock = VC3}}; - {true, _, _} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, true, true} -> - %% Exceptional condition that should only occur during - %% rolling upgrades and manual setting of the ring. - %% Merge as a divergent case. - State4 = reconcile_divergent(VNode, - State3, - OtherState3), - {true, State4#chstate{nodename = VNode}}; - {_, false, false} -> - %% Unable to reconcile based on vector clock, merge rings. - State4 = reconcile_divergent(VNode, - State3, - OtherState3), - {true, State4#chstate{nodename = VNode}} + {_, true, false} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, false, true} -> + {true, + OtherState3#chstate{nodename = VNode, vclock = VC3}}; + {true, _, _} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, true, true} -> + %% Exceptional condition that should only occur during + %% rolling upgrades and manual setting of the ring. + %% Merge as a divergent case. + State4 = reconcile_divergent(VNode, + State3, + OtherState3), + {true, State4#chstate{nodename = VNode}}; + {_, false, false} -> + %% Unable to reconcile based on vector clock, merge rings. + State4 = reconcile_divergent(VNode, + State3, + OtherState3), + {true, State4#chstate{nodename = VNode}} end. %% @private reconcile_divergent(VNode, StateA, StateB) -> VClock = vclock:increment(VNode, - vclock:merge([StateA#chstate.vclock, - StateB#chstate.vclock])), + vclock:merge([StateA#chstate.vclock, + StateB#chstate.vclock])), Members = reconcile_members(StateA, StateB), Meta = merge_meta({StateA#chstate.nodename, - StateA#chstate.meta}, - {StateB#chstate.nodename, StateB#chstate.meta}), + StateA#chstate.meta}, + {StateB#chstate.nodename, StateB#chstate.meta}), NewState = reconcile_ring(StateA, - StateB, - get_members(Members)), + StateB, + get_members(Members)), NewState1 = NewState#chstate{vclock = VClock, - members = Members, meta = Meta}, + members = Members, meta = Meta}, log_ring_result(NewState1), NewState1. @@ -1650,34 +1650,34 @@ reconcile_divergent(VNode, StateA, StateB) -> %% and falling back to manual merge for divergent cases. reconcile_members(StateA, StateB) -> orddict:merge(fun (_K, {Valid1, VC1, Meta1}, - {Valid2, VC2, Meta2}) -> - New1 = vclock:descends(VC1, VC2), - New2 = vclock:descends(VC2, VC1), - MergeVC = vclock:merge([VC1, VC2]), - case {New1, New2} of - {true, false} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {Valid1, MergeVC, MergeMeta}; - {false, true} -> - MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), - {Valid2, MergeVC, MergeMeta}; - {_, _} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {merge_status(Valid1, Valid2), - MergeVC, - MergeMeta} - end - end, - StateA#chstate.members, - StateB#chstate.members). + {Valid2, VC2, Meta2}) -> + New1 = vclock:descends(VC1, VC2), + New2 = vclock:descends(VC2, VC1), + MergeVC = vclock:merge([VC1, VC2]), + case {New1, New2} of + {true, false} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {Valid1, MergeVC, MergeMeta}; + {false, true} -> + MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), + {Valid2, MergeVC, MergeMeta}; + {_, _} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {merge_status(Valid1, Valid2), + MergeVC, + MergeMeta} + end + end, + StateA#chstate.members, + StateB#chstate.members). %% @private reconcile_seen(StateA, StateB) -> orddict:merge(fun (_, VC1, VC2) -> - vclock:merge([VC1, VC2]) - end, - StateA#chstate.seen, - StateB#chstate.seen). + vclock:merge([VC1, VC2]) + end, + StateA#chstate.seen, + StateB#chstate.seen). %% @private merge_next_status(complete, _) -> complete; @@ -1689,19 +1689,19 @@ merge_next_status(awaiting, awaiting) -> awaiting. %% the same Idx/Owner pair. reconcile_next(Next1, Next2) -> lists:zipwith(fun ({Idx, - Owner, - Node, - Transfers1, - Status1}, - {Idx, Owner, Node, Transfers2, Status2}) -> - {Idx, - Owner, - Node, - ordsets:union(Transfers1, Transfers2), - merge_next_status(Status1, Status2)} - end, - Next1, - Next2). + Owner, + Node, + Transfers1, + Status1}, + {Idx, Owner, Node, Transfers2, Status2}) -> + {Idx, + Owner, + Node, + ordsets:union(Transfers1, Transfers2), + merge_next_status(Status1, Status2)} + end, + Next1, + Next2). %% @private %% @doc Merge two next lists that may be of different sizes and @@ -1711,101 +1711,101 @@ reconcile_next(Next1, Next2) -> reconcile_divergent_next(BaseNext, OtherNext) -> MergedNext = substitute(1, BaseNext, OtherNext), lists:zipwith(fun ({Idx, - Owner1, - Node1, - Transfers1, - Status1}, - {Idx, Owner2, Node2, Transfers2, Status2}) -> - Same = {Owner1, Node1} =:= {Owner2, Node2}, - case {Same, Status1, Status2} of - {false, _, _} -> - {Idx, Owner1, Node1, Transfers1, Status1}; - _ -> - {Idx, - Owner1, - Node1, - ordsets:union(Transfers1, Transfers2), - merge_next_status(Status1, Status2)} - end - end, - BaseNext, - MergedNext). + Owner1, + Node1, + Transfers1, + Status1}, + {Idx, Owner2, Node2, Transfers2, Status2}) -> + Same = {Owner1, Node1} =:= {Owner2, Node2}, + case {Same, Status1, Status2} of + {false, _, _} -> + {Idx, Owner1, Node1, Transfers1, Status1}; + _ -> + {Idx, + Owner1, + Node1, + ordsets:union(Transfers1, Transfers2), + merge_next_status(Status1, Status2)} + end + end, + BaseNext, + MergedNext). %% @private substitute(Idx, TL1, TL2) -> lists:map(fun (T) -> - Key = element(Idx, T), - case lists:keyfind(Key, Idx, TL2) of - false -> T; - T2 -> T2 - end - end, - TL1). + Key = element(Idx, T), + case lists:keyfind(Key, Idx, TL2) of + false -> T; + T2 -> T2 + end + end, + TL1). %% @private reconcile_ring(StateA = #chstate{claimant = Claimant1, - rvsn = VC1, next = Next1}, - StateB = #chstate{claimant = Claimant2, rvsn = VC2, - next = Next2}, - Members) -> + rvsn = VC1, next = Next1}, + StateB = #chstate{claimant = Claimant2, rvsn = VC2, + next = Next2}, + Members) -> %% Try to reconcile based on the ring version (rvsn) vector clock. V1Newer = vclock:descends(VC1, VC2), V2Newer = vclock:descends(VC2, VC1), EqualVC = vclock:equal(VC1, VC2) and - (Claimant1 =:= Claimant2), + (Claimant1 =:= Claimant2), case {EqualVC, V1Newer, V2Newer} of - {true, _, _} -> - Next = reconcile_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {_, _, _} -> - %% Ring versions were divergent, so fall back to reconciling based - %% on claimant. Under normal operation, divergent ring versions - %% should only occur if there are two different claimants, and one - %% claimant is invalid. For example, when a claimant is removed and - %% a new claimant has just taken over. We therefore chose the ring - %% with the valid claimant. - CValid1 = lists:member(Claimant1, Members), - CValid2 = lists:member(Claimant2, Members), - case {CValid1, CValid2} of - {true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {false, false} -> - %% This can occur when removed/down nodes are still - %% up and gossip to each other. We need to pick a - %% claimant to handle this case, although the choice - %% is irrelevant as a correct valid claimant will - %% eventually emerge when the ring converges. - %TODO False-false and true-true are the same. _-_ maybe better not repitition - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end; - {true, true} -> - %% This should never happen in normal practice. - %% But, we need to handle it for exceptional cases. - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end - end + {true, _, _} -> + Next = reconcile_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {_, _, _} -> + %% Ring versions were divergent, so fall back to reconciling based + %% on claimant. Under normal operation, divergent ring versions + %% should only occur if there are two different claimants, and one + %% claimant is invalid. For example, when a claimant is removed and + %% a new claimant has just taken over. We therefore chose the ring + %% with the valid claimant. + CValid1 = lists:member(Claimant1, Members), + CValid2 = lists:member(Claimant2, Members), + case {CValid1, CValid2} of + {true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {false, false} -> + %% This can occur when removed/down nodes are still + %% up and gossip to each other. We need to pick a + %% claimant to handle this case, although the choice + %% is irrelevant as a correct valid claimant will + %% eventually emerge when the ring converges. + %TODO False-false and true-true are the same. _-_ maybe better not repitition + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end; + {true, true} -> + %% This should never happen in normal practice. + %% But, we need to handle it for exceptional cases. + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end + end end. %% @private @@ -1825,43 +1825,43 @@ merge_status(_, _) -> invalid. %% @private transfer_complete(CState = #chstate{next = Next, - vclock = VClock}, - Idx, Mod) -> + vclock = VClock}, + Idx, Mod) -> {Idx, Owner, NextOwner, Transfers, Status} = - lists:keyfind(Idx, 1, Next), + lists:keyfind(Idx, 1, Next), Transfers2 = ordsets:add_element(Mod, Transfers), VNodeMods = ordsets:from_list([VMod - || {_, VMod} <- riak_core:vnode_modules()]), + || {_, VMod} <- riak_core:vnode_modules()]), Status2 = case {Status, Transfers2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting - end, + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting + end, Next2 = lists:keyreplace(Idx, - 1, - Next, - {Idx, Owner, NextOwner, Transfers2, Status2}), + 1, + Next, + {Idx, Owner, NextOwner, Transfers2, Status2}), VClock2 = vclock:increment(Owner, VClock), CState#chstate{next = Next2, vclock = VClock2}. %% @private get_members(Members) -> get_members(Members, - [joining, valid, leaving, exiting, down]). + [joining, valid, leaving, exiting, down]). %% @private get_members(Members, Types) -> [Node || {Node, {V, _, _}} <- Members, - lists:member(V, Types)]. + lists:member(V, Types)]. %% @private update_seen(Node, - CState = #chstate{vclock = VClock, seen = Seen}) -> + CState = #chstate{vclock = VClock, seen = Seen}) -> Seen2 = orddict:update(Node, - fun (SeenVC) -> vclock:merge([SeenVC, VClock]) end, - VClock, - Seen), + fun (SeenVC) -> vclock:merge([SeenVC, VClock]) end, + VClock, + Seen), CState#chstate{seen = Seen2}. %% @private @@ -1870,35 +1870,35 @@ equal_cstate(StateA, StateB) -> equal_cstate(StateA, StateB, false) -> T1 = equal_members(StateA#chstate.members, - StateB#chstate.members), + StateB#chstate.members), T2 = vclock:equal(StateA#chstate.rvsn, - StateB#chstate.rvsn), + StateB#chstate.rvsn), T3 = equal_seen(StateA, StateB), T4 = equal_rings(StateA, StateB), %% Clear fields checked manually and test remaining through equality. %% Note: We do not consider cluster name in equality. StateA2 = StateA#chstate{nodename = undefined, - members = undefined, vclock = undefined, - rvsn = undefined, seen = undefined, - chring = undefined, meta = undefined, - clustername = undefined}, + members = undefined, vclock = undefined, + rvsn = undefined, seen = undefined, + chring = undefined, meta = undefined, + clustername = undefined}, StateB2 = StateB#chstate{nodename = undefined, - members = undefined, vclock = undefined, - rvsn = undefined, seen = undefined, - chring = undefined, meta = undefined, - clustername = undefined}, + members = undefined, vclock = undefined, + rvsn = undefined, seen = undefined, + chring = undefined, meta = undefined, + clustername = undefined}, T5 = StateA2 =:= StateB2, T1 andalso T2 andalso T3 andalso T4 andalso T5. %% @private equal_members(M1, M2) -> L = orddict:merge(fun (_, {Status1, VC1, Meta1}, - {Status2, VC2, Meta2}) -> - Status1 =:= Status2 andalso - vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 - end, - M1, - M2), + {Status2, VC2, Meta2}) -> + Status1 =:= Status2 andalso + vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 + end, + M1, + M2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). @@ -1907,21 +1907,21 @@ equal_seen(StateA, StateB) -> Seen1 = filtered_seen(StateA), Seen2 = filtered_seen(StateB), L = orddict:merge(fun (_, VC1, VC2) -> - vclock:equal(VC1, VC2) - end, - Seen1, - Seen2), + vclock:equal(VC1, VC2) + end, + Seen1, + Seen2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). %% @private filtered_seen(State = #chstate{seen = Seen}) -> case get_members(State#chstate.members) of - [] -> Seen; - Members -> - orddict:filter(fun (N, _) -> lists:member(N, Members) - end, - Seen) + [] -> Seen; + Members -> + orddict:filter(fun (N, _) -> lists:member(N, Members) + end, + Seen) end. %% =================================================================== @@ -1961,20 +1961,20 @@ index_test() -> ?assertEqual((node()), (index_owner(Ring0, 0))), ?assertEqual(x, (index_owner(Ring1, 0))), ?assertEqual((lists:sort([x, node()])), - (lists:sort(diff_nodes(Ring0, Ring1)))). + (lists:sort(diff_nodes(Ring0, Ring1)))). reconcile_test() -> Ring0 = fresh(2, node()), Ring1 = transfer_node(0, x, Ring0), %% Only members and seen should have changed {new_ring, Ring2} = reconcile(fresh(2, someone_else), - Ring1), + Ring1), ?assertNot((equal_cstate(Ring1, Ring2, false))), RingB0 = fresh(2, node()), RingB1 = transfer_node(0, x, RingB0), RingB2 = RingB1#chstate{nodename = b}, ?assertMatch({no_change, _}, - (reconcile(Ring1, RingB2))), + (reconcile(Ring1, RingB2))), {no_change, RingB3} = reconcile(Ring1, RingB2), ?assert((equal_cstate(RingB2, RingB3))). @@ -1983,22 +1983,22 @@ metadata_inequality_test() -> Ring1 = update_meta(key, val, Ring0), ?assertNot((equal_rings(Ring0, Ring1))), ?assertEqual((Ring1#chstate.meta), - (merge_meta({node0, Ring0#chstate.meta}, - {node1, Ring1#chstate.meta}))), + (merge_meta({node0, Ring0#chstate.meta}, + {node1, Ring1#chstate.meta}))), timer:sleep(1001), % ensure that lastmod is at least a second later Ring2 = update_meta(key, val2, Ring1), ?assertEqual((get_meta(key, Ring2)), - (get_meta(key, - #chstate{meta = - merge_meta({node1, Ring1#chstate.meta}, - {node2, - Ring2#chstate.meta})}))), + (get_meta(key, + #chstate{meta = + merge_meta({node1, Ring1#chstate.meta}, + {node2, + Ring2#chstate.meta})}))), ?assertEqual((get_meta(key, Ring2)), - (get_meta(key, - #chstate{meta = - merge_meta({node2, Ring2#chstate.meta}, - {node1, - Ring1#chstate.meta})}))). + (get_meta(key, + #chstate{meta = + merge_meta({node2, Ring2#chstate.meta}, + {node1, + Ring1#chstate.meta})}))). metadata_remove_test() -> Ring0 = fresh(2, node()), @@ -2008,17 +2008,17 @@ metadata_remove_test() -> Ring2 = remove_meta(key, Ring1), ?assertEqual(undefined, (get_meta(key, Ring2))), ?assertEqual(undefined, - (get_meta(key, - #chstate{meta = - merge_meta({node1, Ring1#chstate.meta}, - {node2, - Ring2#chstate.meta})}))), + (get_meta(key, + #chstate{meta = + merge_meta({node1, Ring1#chstate.meta}, + {node2, + Ring2#chstate.meta})}))), ?assertEqual(undefined, - (get_meta(key, - #chstate{meta = - merge_meta({node2, Ring2#chstate.meta}, - {node1, - Ring1#chstate.meta})}))). + (get_meta(key, + #chstate{meta = + merge_meta({node2, Ring2#chstate.meta}, + {node1, + Ring1#chstate.meta})}))). rename_test() -> Ring0 = fresh(2, node()), @@ -2030,14 +2030,14 @@ exclusion_test() -> Ring0 = fresh(2, node()), Ring1 = transfer_node(0, x, Ring0), ?assertEqual(0, - (random_other_index(Ring1, - [730750818665451459101842416358141509827966271488]))), + (random_other_index(Ring1, + [730750818665451459101842416358141509827966271488]))), ?assertEqual(no_indices, - (random_other_index(Ring1, [0]))), + (random_other_index(Ring1, [0]))), ?assertEqual([{730750818665451459101842416358141509827966271488, - node()}, - {0, x}], - (preflist(<<1:160/integer>>, Ring1))). + node()}, + {0, x}], + (preflist(<<1:160/integer>>, Ring1))). random_other_node_test() -> Ring0 = fresh(2, node()), @@ -2052,7 +2052,7 @@ membership_test() -> RingA2 = add_member(nodeA, RingA1, nodeB), RingA3 = add_member(nodeA, RingA2, nodeC), ?assertEqual([nodeA, nodeB, nodeC], - (all_members(RingA3))), + (all_members(RingA3))), RingA4 = remove_member(nodeA, RingA3, nodeC), ?assertEqual([nodeA, nodeB], (all_members(RingA4))), %% Node should stay removed @@ -2066,41 +2066,41 @@ membership_test() -> RingB2 = add_member(nodeB, RingA6, nodeC), {_, RingA7} = reconcile(RingB2, RingA6), ?assertEqual([nodeA, nodeB, nodeC], - (all_members(RingA7))), + (all_members(RingA7))), Priority = [{invalid, 1}, - {down, 2}, - {joining, 3}, - {valid, 4}, - {exiting, 5}, - {leaving, 6}], + {down, 2}, + {joining, 3}, + {valid, 4}, + {exiting, 5}, + {leaving, 6}], RingX1 = fresh(nodeA), RingX2 = add_member(nodeA, RingX1, nodeB), RingX3 = add_member(nodeA, RingX2, nodeC), ?assertEqual(joining, (member_status(RingX3, nodeC))), %% Parallel/sibling status changes merge based on priority [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - StatusC = case PriorityA < PriorityB of - true -> StatusA; - false -> StatusB - end, - {_, RingT3} = reconcile(RingT2, RingT1), - ?assertEqual(StatusC, (member_status(RingT3, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + StatusC = case PriorityA < PriorityB of + true -> StatusA; + false -> StatusB + end, + {_, RingT3} = reconcile(RingT2, RingT1), + ?assertEqual(StatusC, (member_status(RingT3, nodeC))) end || {StatusA, PriorityA} <- Priority, - {StatusB, PriorityB} <- Priority], + {StatusB, PriorityB} <- Priority], %% Related status changes merge to descendant [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - RingT3 = set_member(nodeA, RingT1, nodeA, valid), - {_, RingT4} = reconcile(RingT2, RingT3), - ?assertEqual(StatusB, (member_status(RingT4, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + RingT3 = set_member(nodeA, RingT1, nodeA, valid), + {_, RingT4} = reconcile(RingT2, RingT3), + ?assertEqual(StatusB, (member_status(RingT4, nodeC))) end || {StatusA, _} <- Priority, {StatusB, _} <- Priority], ok. @@ -2113,25 +2113,25 @@ ring_version_test() -> #chstate{rvsn = RVsn, vclock = VClock} = Ring3, RingA1 = transfer_node(0, nodeA, Ring3), RingA2 = RingA1#chstate{vclock = - vclock:increment(nodeA, VClock)}, + vclock:increment(nodeA, VClock)}, RingB1 = transfer_node(0, nodeB, Ring3), RingB2 = RingB1#chstate{vclock = - vclock:increment(nodeB, VClock)}, + vclock:increment(nodeB, VClock)}, %% RingA1 has most recent ring version {_, RingT1} = reconcile(RingA2#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB2), + vclock:increment(nodeA, RVsn)}, + RingB2), ?assertEqual(nodeA, (index_owner(RingT1, 0))), %% RingB1 has most recent ring version {_, RingT2} = reconcile(RingA2, - RingB2#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + RingB2#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeB, (index_owner(RingT2, 0))), %% Divergent ring versions, merge based on claimant {_, RingT3} = reconcile(RingA2#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB2#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB2#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeA, (index_owner(RingT3, 0))), %% Divergent ring versions, one valid claimant. Merge on claimant. RingA3 = RingA2#chstate{claimant = nodeA}, @@ -2139,45 +2139,45 @@ ring_version_test() -> RingB3 = RingB2#chstate{claimant = nodeB}, RingB4 = remove_member(nodeB, RingB3, nodeA), {_, RingT4} = reconcile(RingA4#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB3#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB3#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeA, (index_owner(RingT4, 0))), {_, RingT5} = reconcile(RingA3#chstate{rvsn = - vclock:increment(nodeA, RVsn)}, - RingB4#chstate{rvsn = - vclock:increment(nodeB, RVsn)}), + vclock:increment(nodeA, RVsn)}, + RingB4#chstate{rvsn = + vclock:increment(nodeB, RVsn)}), ?assertEqual(nodeB, (index_owner(RingT5, 0))). reconcile_next_test() -> Next1 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], complete}], + {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], complete}], Next2 = [{0, nodeA, nodeB, [riak_kv_vnode], complete}, - {1, nodeA, nodeB, [], awaiting}, - {2, nodeA, nodeB, [], awaiting}], + {1, nodeA, nodeB, [], awaiting}, + {2, nodeA, nodeB, [], awaiting}], Next3 = [{0, - nodeA, - nodeB, - [riak_kv_vnode, riak_pipe_vnode], - complete}, - {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], complete}], + nodeA, + nodeB, + [riak_kv_vnode, riak_pipe_vnode], + complete}, + {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], complete}], ?assertEqual(Next3, (reconcile_next(Next1, Next2))), Next4 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [], awaiting}, - {2, nodeA, nodeB, [riak_pipe_vnode], awaiting}], + {1, nodeA, nodeB, [], awaiting}, + {2, nodeA, nodeB, [riak_pipe_vnode], awaiting}], Next5 = [{0, nodeA, nodeC, [riak_kv_vnode], complete}, - {2, nodeA, nodeB, [riak_kv_vnode], complete}], + {2, nodeA, nodeB, [riak_kv_vnode], complete}], Next6 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, - {1, nodeA, nodeB, [], awaiting}, - {2, - nodeA, - nodeB, - [riak_kv_vnode, riak_pipe_vnode], - complete}], + {1, nodeA, nodeB, [], awaiting}, + {2, + nodeA, + nodeB, + [riak_kv_vnode, riak_pipe_vnode], + complete}], ?assertEqual(Next6, - (reconcile_divergent_next(Next4, Next5))). + (reconcile_divergent_next(Next4, Next5))). resize_test() -> Ring0 = fresh(4, a), @@ -2189,30 +2189,30 @@ resize_test() -> valid_resize(Ring0, Ring1), Ring3 = set_pending_resize(Ring2, Ring0), ?assertEqual((num_partitions(Ring0)), - (num_partitions(Ring3))), + (num_partitions(Ring3))), ?assertEqual((num_partitions(Ring2)), - (future_num_partitions(Ring3))), + (future_num_partitions(Ring3))), ?assertEqual((num_partitions(Ring2)), - (num_partitions(future_ring(Ring3)))), + (num_partitions(future_ring(Ring3)))), Key = <<0:160/integer>>, OrigIdx = element(1, hd(preflist(Key, Ring0))), %% for non-resize transitions index should be the same ?assertEqual(OrigIdx, - (future_index(Key, OrigIdx, undefined, Ring0))), + (future_index(Key, OrigIdx, undefined, Ring0))), ?assertEqual((element(1, hd(preflist(Key, Ring2)))), - (future_index(Key, OrigIdx, undefined, Ring3))). + (future_index(Key, OrigIdx, undefined, Ring3))). resize_xfer_test_() -> {setup, fun () -> - meck:unload(), - meck:new(riak_core, [passthrough]), - meck:expect(riak_core, - vnode_modules, - fun () -> - [{some_app, fake_vnode}, - {other_app, other_vnode}] - end) + meck:unload(), + meck:new(riak_core, [passthrough]), + meck:expect(riak_core, + vnode_modules, + fun () -> + [{some_app, fake_vnode}, + {other_app, other_vnode}] + end) end, fun (_) -> meck:unload() end, fun test_resize_xfers/0}. @@ -2222,80 +2222,80 @@ test_resize_xfers() -> Ring1 = set_pending_resize(resize(Ring0, 8), Ring0), Source1 = {0, a}, Target1 = - {730750818665451459101842416358141509827966271488, a}, + {730750818665451459101842416358141509827966271488, a}, TargetIdx2 = - 365375409332725729550921208179070754913983135744, + 365375409332725729550921208179070754913983135744, Ring2 = schedule_resize_transfer(Ring1, - Source1, - Target1), + Source1, + Target1), ?assertEqual(Target1, - (awaiting_resize_transfer(Ring2, Source1, fake_vnode))), + (awaiting_resize_transfer(Ring2, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring2, - Source1, - Target1, - fake_vnode))), + (resize_transfer_status(Ring2, + Source1, + Target1, + fake_vnode))), %% use Target1 since we haven't used it as a source index ?assertEqual(undefined, - (awaiting_resize_transfer(Ring2, Target1, fake_vnode))), + (awaiting_resize_transfer(Ring2, Target1, fake_vnode))), ?assertEqual(undefined, - (resize_transfer_status(Ring2, - Target1, - Source1, - fake_vnode))), + (resize_transfer_status(Ring2, + Target1, + Source1, + fake_vnode))), Ring3 = schedule_resize_transfer(Ring2, - Source1, - TargetIdx2), + Source1, + TargetIdx2), Ring4 = resize_transfer_complete(Ring3, - Source1, - Target1, - fake_vnode), + Source1, + Target1, + fake_vnode), ?assertEqual({TargetIdx2, a}, - (awaiting_resize_transfer(Ring4, Source1, fake_vnode))), + (awaiting_resize_transfer(Ring4, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring4, - Source1, - {TargetIdx2, a}, - fake_vnode))), + (resize_transfer_status(Ring4, + Source1, + {TargetIdx2, a}, + fake_vnode))), ?assertEqual(complete, - (resize_transfer_status(Ring4, - Source1, - Target1, - fake_vnode))), + (resize_transfer_status(Ring4, + Source1, + Target1, + fake_vnode))), Ring5 = resize_transfer_complete(Ring4, - Source1, - {TargetIdx2, a}, - fake_vnode), + Source1, + {TargetIdx2, a}, + fake_vnode), {_, '$resize', Status1} = next_owner(Ring5, - 0, - fake_vnode), + 0, + fake_vnode), ?assertEqual(complete, Status1), Ring6 = resize_transfer_complete(Ring5, - Source1, - {TargetIdx2, a}, - other_vnode), + Source1, + {TargetIdx2, a}, + other_vnode), Ring7 = resize_transfer_complete(Ring6, - Source1, - Target1, - other_vnode), + Source1, + Target1, + other_vnode), {_, '$resize', Status2} = next_owner(Ring7, - 0, - fake_vnode), + 0, + fake_vnode), ?assertEqual(complete, Status2), {_, '$resize', Status3} = next_owner(Ring7, - 0, - other_vnode), + 0, + other_vnode), ?assertEqual(complete, Status3), {_, '$resize', complete} = next_owner(Ring7, 0). valid_resize(Ring0, Ring1) -> lists:foreach(fun ({Idx, Owner}) -> - case lists:keyfind(Idx, 1, all_owners(Ring0)) of - false -> - ?assertEqual('$dummyhost@resized', Owner); - {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) - end - end, - all_owners(Ring1)). + case lists:keyfind(Idx, 1, all_owners(Ring0)) of + false -> + ?assertEqual('$dummyhost@resized', Owner); + {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) + end + end, + all_owners(Ring1)). -endif. diff --git a/src/riak_core_ring_events.erl b/src/riak_core_ring_events.erl index d5257418e..d2bdfe359 100644 --- a/src/riak_core_ring_events.erl +++ b/src/riak_core_ring_events.erl @@ -25,24 +25,24 @@ %% API -export([start_link/0, - add_handler/2, - add_sup_handler/2, - add_guarded_handler/2, - add_callback/1, - add_sup_callback/1, - add_guarded_callback/1, - ring_update/1, - force_update/0, - ring_sync_update/1, - force_sync_update/0]). + add_handler/2, + add_sup_handler/2, + add_guarded_handler/2, + add_callback/1, + add_sup_callback/1, + add_guarded_callback/1, + ring_update/1, + force_update/0, + ring_sync_update/1, + force_sync_update/0]). %% gen_event callbacks -export([init/1, - handle_event/2, - handle_call/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {callback}). @@ -60,23 +60,23 @@ add_sup_handler(Handler, Args) -> add_guarded_handler(Handler, Args) -> riak_core:add_guarded_event_handler(?MODULE, - Handler, - Args). + Handler, + Args). add_callback(Fn) when is_function(Fn) -> gen_event:add_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_sup_callback(Fn) when is_function(Fn) -> gen_event:add_sup_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). add_guarded_callback(Fn) when is_function(Fn) -> riak_core:add_guarded_event_handler(?MODULE, - {?MODULE, make_ref()}, - [Fn]). + {?MODULE, make_ref()}, + [Fn]). force_update() -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), diff --git a/src/riak_core_ring_handler.erl b/src/riak_core_ring_handler.erl index 645920ff3..6ca16cfae 100644 --- a/src/riak_core_ring_handler.erl +++ b/src/riak_core_ring_handler.erl @@ -20,11 +20,11 @@ %% gen_event callbacks -export([init/1, - handle_event/2, - handle_call/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_event/2, + handle_call/2, + handle_info/2, + terminate/2, + code_change/3]). -export([ensure_vnodes_started/1]). @@ -59,65 +59,65 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. ensure_vnodes_started(Ring) -> case riak_core:vnode_modules() of - [] -> ok; - AppMods -> - case ensure_vnodes_started(AppMods, Ring, []) of - [] -> - Ready = riak_core_ring:ring_ready(Ring), - FutureIndices = riak_core_ring:future_indices(Ring, - node()), - Status = riak_core_ring:member_status(Ring, node()), - case {Ready, FutureIndices, Status} of - {true, [], leaving} -> - case ready_to_exit(AppMods) of - true -> - exit_ring_trans(), - maybe_shutdown(Ring); - false -> ok - end; - {_, _, invalid} -> - riak_core_ring_manager:refresh_my_ring(); - {_, _, exiting} -> - %% Deliberately do nothing. - ok; - {_, _, _} -> ok - end; - _ -> ok - end + [] -> ok; + AppMods -> + case ensure_vnodes_started(AppMods, Ring, []) of + [] -> + Ready = riak_core_ring:ring_ready(Ring), + FutureIndices = riak_core_ring:future_indices(Ring, + node()), + Status = riak_core_ring:member_status(Ring, node()), + case {Ready, FutureIndices, Status} of + {true, [], leaving} -> + case ready_to_exit(AppMods) of + true -> + exit_ring_trans(), + maybe_shutdown(Ring); + false -> ok + end; + {_, _, invalid} -> + riak_core_ring_manager:refresh_my_ring(); + {_, _, exiting} -> + %% Deliberately do nothing. + ok; + {_, _, _} -> ok + end; + _ -> ok + end end. %% Shutdown if we are the only node in the cluster maybe_shutdown(Ring) -> case riak_core_ring:random_other_node(Ring) of - no_node -> riak_core_ring_manager:refresh_my_ring(); - _ -> ok + no_node -> riak_core_ring_manager:refresh_my_ring(); + _ -> ok end. exit_ring_trans() -> riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:exit_member(node(), - Ring2, - node()), - {new_ring, Ring3} - end, - []). + Ring3 = + riak_core_ring:exit_member(node(), + Ring2, + node()), + {new_ring, Ring3} + end, + []). ready_to_exit([]) -> true; ready_to_exit([{_App, Mod} | AppMods]) -> case erlang:function_exported(Mod, ready_to_exit, 0) - andalso not Mod:ready_to_exit() - of - true -> false; - false -> ready_to_exit(AppMods) + andalso not Mod:ready_to_exit() + of + true -> false; + false -> ready_to_exit(AppMods) end. ensure_vnodes_started([], _Ring, Acc) -> lists:flatten(Acc); ensure_vnodes_started([{App, Mod} | T], Ring, Acc) -> ensure_vnodes_started(T, - Ring, - [ensure_vnodes_started({App, Mod}, Ring) | Acc]). + Ring, + [ensure_vnodes_started({App, Mod}, Ring) | Acc]). ensure_vnodes_started({App, Mod}, Ring) -> Startable = startable_vnodes(Mod, Ring), @@ -127,56 +127,56 @@ ensure_vnodes_started({App, Mod}, Ring) -> %% (needed to support those vnodes). The hack does not fix %% that dependency: internal techdebt todo list #A7 does. spawn_link(fun () -> - %% Use a registered name as a lock to prevent the same - %% vnode module from being started twice. - ModList = atom_to_list(Mod), - RegName = "riak_core_ring_handler_ensure_" ++ ModList, - try erlang:register(list_to_atom(RegName), self()) catch - error:badarg -> exit(normal) - end, - %% Let the app finish starting... - ok = riak_core:wait_for_application(App), - %% Start the vnodes. - HasStartVnodes = lists:member({start_vnodes, 1}, - Mod:module_info(exports)), - case HasStartVnodes of - true -> Mod:start_vnodes(Startable); - false -> [Mod:start_vnode(I) || I <- Startable] - end, - %% Mark the service as up. - SupName = list_to_atom(atom_to_list(App) ++ "_sup"), - SupPid = erlang:whereis(SupName), - case riak_core:health_check(App) of - undefined -> - riak_core_node_watcher:service_up(App, SupPid); - HealthMFA -> - riak_core_node_watcher:service_up(App, - SupPid, - HealthMFA) - end, - exit(normal) - end), + %% Use a registered name as a lock to prevent the same + %% vnode module from being started twice. + ModList = atom_to_list(Mod), + RegName = "riak_core_ring_handler_ensure_" ++ ModList, + try erlang:register(list_to_atom(RegName), self()) catch + error:badarg -> exit(normal) + end, + %% Let the app finish starting... + ok = riak_core:wait_for_application(App), + %% Start the vnodes. + HasStartVnodes = lists:member({start_vnodes, 1}, + Mod:module_info(exports)), + case HasStartVnodes of + true -> Mod:start_vnodes(Startable); + false -> [Mod:start_vnode(I) || I <- Startable] + end, + %% Mark the service as up. + SupName = list_to_atom(atom_to_list(App) ++ "_sup"), + SupPid = erlang:whereis(SupName), + case riak_core:health_check(App) of + undefined -> + riak_core_node_watcher:service_up(App, SupPid); + HealthMFA -> + riak_core_node_watcher:service_up(App, + SupPid, + HealthMFA) + end, + exit(normal) + end), Startable. startable_vnodes(Mod, Ring) -> AllMembers = riak_core_ring:all_members(Ring), case {length(AllMembers), hd(AllMembers) =:= node()} of - {1, true} -> riak_core_ring:my_indices(Ring); - _ -> - {ok, ModExcl} = - riak_core_handoff_manager:get_exclusions(Mod), - Excl = ModExcl -- - riak_core_ring:disowning_indices(Ring, node()), - case riak_core_ring:random_other_index(Ring, Excl) of - no_indices -> - case length(Excl) =:= - riak_core_ring:num_partitions(Ring) - of - true -> []; - false -> riak_core_ring:my_indices(Ring) - end; - RO -> [RO | riak_core_ring:my_indices(Ring)] - end + {1, true} -> riak_core_ring:my_indices(Ring); + _ -> + {ok, ModExcl} = + riak_core_handoff_manager:get_exclusions(Mod), + Excl = ModExcl -- + riak_core_ring:disowning_indices(Ring, node()), + case riak_core_ring:random_other_index(Ring, Excl) of + no_indices -> + case length(Excl) =:= + riak_core_ring:num_partitions(Ring) + of + true -> []; + false -> riak_core_ring:my_indices(Ring) + end; + RO -> [RO | riak_core_ring:my_indices(Ring)] + end end. maybe_start_vnode_proxies(Ring) -> @@ -185,30 +185,30 @@ maybe_start_vnode_proxies(Ring) -> FutureSize = riak_core_ring:future_num_partitions(Ring), Larger = Size < FutureSize, case Larger of - true -> - FutureIdxs = - riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), - _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) - || {Idx, _} <- FutureIdxs, Mod <- Mods], - ok; - false -> ok + true -> + FutureIdxs = + riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), + _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) + || {Idx, _} <- FutureIdxs, Mod <- Mods], + ok; + false -> ok end. maybe_stop_vnode_proxies(Ring) -> Mods = [M || {_, M} <- riak_core:vnode_modules()], case riak_core_ring:pending_changes(Ring) of - [] -> - Idxs = [{I, M} - || {I, _} <- riak_core_ring:all_owners(Ring), - M <- Mods], - ProxySpecs = - supervisor:which_children(riak_core_vnode_proxy_sup), - Running = [{I, M} - || {{M, I}, _, _, _} <- ProxySpecs, - lists:member(M, Mods)], - ToShutdown = Running -- Idxs, - _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) - || {I, M} <- ToShutdown], - ok; - _ -> ok + [] -> + Idxs = [{I, M} + || {I, _} <- riak_core_ring:all_owners(Ring), + M <- Mods], + ProxySpecs = + supervisor:which_children(riak_core_vnode_proxy_sup), + Running = [{I, M} + || {{M, I}, _, _, _} <- ProxySpecs, + lists:member(M, Mods)], + ToShutdown = Running -- Idxs, + _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) + || {I, M} <- ToShutdown], + ok; + _ -> ok end. diff --git a/src/riak_core_ring_manager.erl b/src/riak_core_ring_manager.erl index 05cf4134b..ee970b157 100644 --- a/src/riak_core_ring_manager.erl +++ b/src/riak_core_ring_manager.erl @@ -62,33 +62,33 @@ -behaviour(gen_server). -export([start_link/0, - start_link/1, - get_my_ring/0, - get_raw_ring/0, - get_raw_ring_chashbin/0, - get_chash_bin/0, - get_ring_id/0, - get_bucket_meta/1, - refresh_my_ring/0, - refresh_ring/2, - set_my_ring/1, - write_ringfile/0, - prune_ringfiles/0, - read_ringfile/1, - find_latest_ringfile/0, - force_update/0, - do_write_ringfile/1, - ring_trans/2, - run_fixups/3, - set_cluster_name/1, - is_stable_ring/0]). + start_link/1, + get_my_ring/0, + get_raw_ring/0, + get_raw_ring_chashbin/0, + get_chash_bin/0, + get_ring_id/0, + get_bucket_meta/1, + refresh_my_ring/0, + refresh_ring/2, + set_my_ring/1, + write_ringfile/0, + prune_ringfiles/0, + read_ringfile/1, + find_latest_ringfile/0, + force_update/0, + do_write_ringfile/1, + ring_trans/2, + run_fixups/3, + set_cluster_name/1, + is_stable_ring/0]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -ifdef(TEST). @@ -97,12 +97,12 @@ -endif. -record(state, - {mode, raw_ring, ring_changed_time, inactivity_timer}). + {mode, raw_ring, ring_changed_time, inactivity_timer}). -export([setup_ets/1, - cleanup_ets/1, - set_ring_global/1, - promote_ring/0]). + cleanup_ets/1, + set_ring_global/1, + promote_ring/0]). %% For EUnit testing @@ -122,51 +122,51 @@ start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [live], - []). + ?MODULE, + [live], + []). %% Testing entry point start_link(test) -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [test], - []). + ?MODULE, + [test], + []). -spec get_my_ring() -> {ok, - riak_core_ring:riak_core_ring()} | - {error, any()}. + riak_core_ring:riak_core_ring()} | + {error, any()}. get_my_ring() -> Ring = case persistent_term:get(?RING_KEY, undefined) of - ets -> - case ets:lookup(?ETS, ring) of - [{_, RingETS}] -> RingETS; - _ -> undefined - end; - RingMochi -> RingMochi - end, + ets -> + case ets:lookup(?ETS, ring) of + [{_, RingETS}] -> RingETS; + _ -> undefined + end; + RingMochi -> RingMochi + end, case Ring of - Ring when is_tuple(Ring) -> {ok, Ring}; - undefined -> {error, no_ring} + Ring when is_tuple(Ring) -> {ok, Ring}; + undefined -> {error, no_ring} end. get_raw_ring() -> try Ring = ets:lookup_element(?ETS, raw_ring, 2), - {ok, Ring} + {ok, Ring} catch - _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) + _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) end. get_raw_ring_chashbin() -> try Ring = ets:lookup_element(?ETS, raw_ring, 2), - {ok, CHBin} = get_chash_bin(), - {ok, Ring, CHBin} + {ok, CHBin} = get_chash_bin(), + {ok, Ring, CHBin} catch - _:_ -> - gen_server:call(?MODULE, - get_raw_ring_chashbin, - infinity) + _:_ -> + gen_server:call(?MODULE, + get_raw_ring_chashbin, + infinity) end. %% @spec refresh_my_ring() -> ok @@ -175,7 +175,7 @@ refresh_my_ring() -> refresh_ring(Node, ClusterName) -> gen_server:cast({?MODULE, Node}, - {refresh_my_ring, ClusterName}). + {refresh_my_ring, ClusterName}). %% @spec set_my_ring(riak_core_ring:riak_core_ring()) -> ok set_my_ring(Ring) -> @@ -183,8 +183,8 @@ set_my_ring(Ring) -> get_ring_id() -> case ets:lookup(?ETS, id) of - [{_, Id}] -> Id; - _ -> {0, 0} + [{_, Id}] -> Id; + _ -> {0, 0} end. %% @doc Return metadata for the given bucket. If a bucket @@ -198,16 +198,16 @@ get_bucket_meta({_Type, _Name} = Bucket) -> riak_core_bucket:get_bucket(Bucket); get_bucket_meta(Bucket) -> case ets:lookup(?ETS, {bucket, Bucket}) of - [] -> undefined; - [{_, undefined}] -> undefined; - [{_, Meta}] -> {ok, Meta} + [] -> undefined; + [{_, undefined}] -> undefined; + [{_, Meta}] -> {ok, Meta} end. %% @doc Return the {@link chashbin} generated from the current ring get_chash_bin() -> case ets:lookup(?ETS, chashbin) of - [{chashbin, CHBin}] -> {ok, CHBin}; - _ -> {error, no_ring} + [{chashbin, CHBin}] -> {ok, CHBin}; + _ -> {error, no_ring} end. %% @spec write_ringfile() -> ok @@ -216,13 +216,13 @@ write_ringfile() -> ring_trans(Fun, Args) -> gen_server:call(?MODULE, - {ring_trans, Fun, Args}, - infinity). + {ring_trans, Fun, Args}, + infinity). set_cluster_name(Name) -> gen_server:call(?MODULE, - {set_cluster_name, Name}, - infinity). + {set_cluster_name, Name}, + infinity). is_stable_ring() -> gen_server:call(?MODULE, is_stable_ring, infinity). @@ -231,122 +231,122 @@ is_stable_ring() -> %% ring in a manner that will trigger reconciliation on gossip. force_update() -> ring_trans(fun (Ring, _) -> - NewRing = riak_core_ring:update_member_meta(node(), - Ring, - node(), - unused, - erlang:timestamp()), - {new_ring, NewRing} - end, - []), + NewRing = riak_core_ring:update_member_meta(node(), + Ring, + node(), + unused, + erlang:timestamp()), + {new_ring, NewRing} + end, + []), ok. do_write_ringfile(Ring) -> case ring_dir() of - "" -> nop; - Dir -> - {{Year, Month, Day}, {Hour, Minute, Second}} = - calendar:universal_time(), - TS = - io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - [Year, Month, Day, Hour, Minute, Second]), - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, - do_write_ringfile(Ring, FN) + "" -> nop; + Dir -> + {{Year, Month, Day}, {Hour, Minute, Second}} = + calendar:universal_time(), + TS = + io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + [Year, Month, Day, Hour, Minute, Second]), + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, + do_write_ringfile(Ring, FN) end. do_write_ringfile(Ring, FN) -> ok = filelib:ensure_dir(FN), try ok = riak_core_util:replace_file(FN, - term_to_binary(Ring)) + term_to_binary(Ring)) catch - _:Err -> - logger:error("Unable to write ring to \"~s\" - ~p\n", - [FN, Err]), - {error, Err} + _:Err -> + logger:error("Unable to write ring to \"~s\" - ~p\n", + [FN, Err]), + {error, Err} end. %% @spec find_latest_ringfile() -> string() find_latest_ringfile() -> Dir = ring_dir(), case file:list_dir(Dir) of - {ok, Filenames} -> - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - Timestamps = [list_to_integer(TS) - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, ".")) - || FN <- Filenames], - C1 =:= Cluster], - SortedTimestamps = - lists:reverse(lists:sort(Timestamps)), - case SortedTimestamps of - [Latest | _] -> - {ok, - Dir ++ - "/riak_core_ring." ++ - Cluster ++ "." ++ integer_to_list(Latest)}; - _ -> {error, not_found} - end; - {error, Reason} -> {error, Reason} + {ok, Filenames} -> + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + Timestamps = [list_to_integer(TS) + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, ".")) + || FN <- Filenames], + C1 =:= Cluster], + SortedTimestamps = + lists:reverse(lists:sort(Timestamps)), + case SortedTimestamps of + [Latest | _] -> + {ok, + Dir ++ + "/riak_core_ring." ++ + Cluster ++ "." ++ integer_to_list(Latest)}; + _ -> {error, not_found} + end; + {error, Reason} -> {error, Reason} end. %% @spec read_ringfile(string()) -> riak_core_ring:riak_core_ring() | {error, any()} read_ringfile(RingFile) -> case file:read_file(RingFile) of - {ok, Binary} -> binary_to_term(Binary); - {error, Reason} -> {error, Reason} + {ok, Binary} -> binary_to_term(Binary); + {error, Reason} -> {error, Reason} end. %% @spec prune_ringfiles() -> ok | {error, Reason} prune_ringfiles() -> case ring_dir() of - "" -> ok; - Dir -> - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - case file:list_dir(Dir) of - {error, enoent} -> ok; - {error, Reason} -> {error, Reason}; - {ok, []} -> ok; - {ok, Filenames} -> - Timestamps = [TS - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, - ".")) - || FN <- Filenames], - C1 =:= Cluster], - if Timestamps /= [] -> - %% there are existing ring files - TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) - || TS <- Timestamps], - TSL = lists:reverse(lists:sort([TS - || {ok, TS, []} - <- TSPat])), - Keep = prune_list(TSL), - KeepTSs = - [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - K)) - || K <- Keep], - DelFNs = [Dir ++ "/" ++ FN - || FN <- Filenames, - lists:all(fun (TS) -> - string:str(FN, TS) =:= - 0 - end, - KeepTSs)], - _ = [file:delete(DelFN) || DelFN <- DelFNs], - ok; - true -> - %% directory wasn't empty, but there are no ring - %% files in it - ok - end - end + "" -> ok; + Dir -> + Cluster = application:get_env(riak_core, + cluster_name, + undefined), + case file:list_dir(Dir) of + {error, enoent} -> ok; + {error, Reason} -> {error, Reason}; + {ok, []} -> ok; + {ok, Filenames} -> + Timestamps = [TS + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, + ".")) + || FN <- Filenames], + C1 =:= Cluster], + if Timestamps /= [] -> + %% there are existing ring files + TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) + || TS <- Timestamps], + TSL = lists:reverse(lists:sort([TS + || {ok, TS, []} + <- TSPat])), + Keep = prune_list(TSL), + KeepTSs = + [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + K)) + || K <- Keep], + DelFNs = [Dir ++ "/" ++ FN + || FN <- Filenames, + lists:all(fun (TS) -> + string:str(FN, TS) =:= + 0 + end, + KeepTSs)], + _ = [file:delete(DelFN) || DelFN <- DelFNs], + ok; + true -> + %% directory wasn't empty, but there are no ring + %% files in it + ok + end + end end. -ifdef(TEST). @@ -354,7 +354,7 @@ prune_ringfiles() -> %% @private (only used for test instances) stop() -> try gen_server:call(?MODULE, stop) catch - exit:{noproc, _} -> ok + exit:{noproc, _} -> ok end. -endif. @@ -373,28 +373,28 @@ init([Mode]) -> reload_ring(test) -> riak_core_ring:fresh(16, node()); reload_ring(live) -> case riak_core_ring_manager:find_latest_ringfile() of - {ok, RingFile} -> - case riak_core_ring_manager:read_ringfile(RingFile) of - {error, Reason} -> - logger:critical("Failed to read ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}); - Ring -> Ring - end; - {error, not_found} -> - logger:warning("No ring file available."), - riak_core_ring:fresh(); - {error, Reason} -> - logger:critical("Failed to load ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}) + {ok, RingFile} -> + case riak_core_ring_manager:read_ringfile(RingFile) of + {error, Reason} -> + logger:critical("Failed to read ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}); + Ring -> Ring + end; + {error, not_found} -> + logger:warning("No ring file available."), + riak_core_ring:fresh(); + {error, Reason} -> + logger:critical("Failed to load ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}) end. handle_call(get_raw_ring, _From, - #state{raw_ring = Ring} = State) -> + #state{raw_ring = Ring} = State) -> {reply, {ok, Ring}, State}; handle_call(get_raw_ring_chashbin, _From, - #state{raw_ring = Ring} = State) -> + #state{raw_ring = Ring} = State) -> {ok, CHBin} = get_chash_bin(), {reply, {ok, Ring, CHBin}, State}; handle_call({set_my_ring, Ring}, _From, State) -> @@ -411,29 +411,29 @@ handle_call(refresh_my_ring, _From, State) -> riak_core:stop("node removal completed, exiting."), {reply, ok, State2}; handle_call({ring_trans, Fun, Args}, _From, - State = #state{raw_ring = Ring}) -> + State = #state{raw_ring = Ring}) -> case catch Fun(Ring, Args) of - {new_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:random_recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - {set_only, NewRing} -> - State2 = prune_write_ring(NewRing, State), - {reply, {ok, NewRing}, State2}; - {reconciled_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - ignore -> {reply, not_changed, State}; - {ignore, Reason} -> - {reply, {not_changed, Reason}, State}; - Other -> - logger:error("ring_trans: invalid return value: ~p", - [Other]), - {reply, not_changed, State} + {new_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:random_recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + {set_only, NewRing} -> + State2 = prune_write_ring(NewRing, State), + {reply, {ok, NewRing}, State2}; + {reconciled_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + ignore -> {reply, not_changed, State}; + {ignore, Reason} -> + {reply, {not_changed, Reason}, State}; + Other -> + logger:error("ring_trans: invalid return value: ~p", + [Other]), + {reply, not_changed, State} end; handle_call({set_cluster_name, Name}, _From, - State = #state{raw_ring = Ring}) -> + State = #state{raw_ring = Ring}) -> NewRing = riak_core_ring:set_cluster_name(Ring, Name), State2 = prune_write_notify_ring(NewRing, State), {reply, ok, State2}; @@ -446,31 +446,31 @@ handle_call(stop, _From, State) -> handle_cast({refresh_my_ring, ClusterName}, State) -> {ok, Ring} = get_my_ring(), case riak_core_ring:cluster_name(Ring) of - ClusterName -> handle_cast(refresh_my_ring, State); - _ -> {noreply, State} + ClusterName -> handle_cast(refresh_my_ring, State); + _ -> {noreply, State} end; handle_cast(refresh_my_ring, State) -> {_, _, State2} = handle_call(refresh_my_ring, - undefined, - State), + undefined, + State), {noreply, State2}; handle_cast(write_ringfile, test) -> {noreply, test}; handle_cast(write_ringfile, - State = #state{raw_ring = Ring}) -> + State = #state{raw_ring = Ring}) -> ok = do_write_ringfile(Ring), {noreply, State}. handle_info(inactivity_timeout, State) -> case is_stable_ring(State) of - {true, DeltaMS} -> - logger:debug("Promoting ring after ~p", [DeltaMS]), - promote_ring(), - State2 = State#state{inactivity_timer = undefined}, - {noreply, State2}; - {false, DeltaMS} -> - Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, - State2 = set_timer(Remaining, State), - {noreply, State2} + {true, DeltaMS} -> + logger:debug("Promoting ring after ~p", [DeltaMS]), + promote_ring(), + State2 = State#state{inactivity_timer = undefined}, + {noreply, State2}; + {false, DeltaMS} -> + Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, + State2 = set_timer(Remaining, State), + {noreply, State2} end; handle_info(_Info, State) -> {noreply, State}. @@ -486,71 +486,71 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. ring_dir() -> case application:get_env(riak_core, - ring_state_dir, - undefined) - of - undefined -> - filename:join(application:get_env(riak_core, - platform_data_dir, - "data"), - "ring"); - D -> D + ring_state_dir, + undefined) + of + undefined -> + filename:join(application:get_env(riak_core, + platform_data_dir, + "data"), + "ring"); + D -> D end. prune_list([X | Rest]) -> lists:usort(lists:append([[X], - back(1, X, Rest), - back(2, X, Rest), - back(3, X, Rest), - back(4, X, Rest), - back(5, X, Rest)])). + back(1, X, Rest), + back(2, X, Rest), + back(3, X, Rest), + back(4, X, Rest), + back(5, X, Rest)])). back(_N, _X, []) -> []; back(N, X, [H | T]) -> case lists:nth(N, X) =:= lists:nth(N, H) of - true -> back(N, X, T); - false -> [H] + true -> back(N, X, T); + false -> [H] end. %% @private run_fixups([], _Bucket, BucketProps) -> BucketProps; run_fixups([{App, Fixup} | T], BucketName, - BucketProps) -> + BucketProps) -> BP = try Fixup:fixup(BucketName, BucketProps) of - {ok, NewBucketProps} -> NewBucketProps; - {error, Reason} -> - logger:error("Error while running bucket fixup module " - "~p from application ~p on bucket ~p: " - "~p", - [Fixup, App, BucketName, Reason]), - BucketProps - catch - What:Why -> - logger:error("Crash while running bucket fixup module " - "~p from application ~p on bucket ~p " - ": ~p:~p", - [Fixup, App, BucketName, What, Why]), - BucketProps - end, + {ok, NewBucketProps} -> NewBucketProps; + {error, Reason} -> + logger:error("Error while running bucket fixup module " + "~p from application ~p on bucket ~p: " + "~p", + [Fixup, App, BucketName, Reason]), + BucketProps + catch + What:Why -> + logger:error("Crash while running bucket fixup module " + "~p from application ~p on bucket ~p " + ": ~p:~p", + [Fixup, App, BucketName, What, Why]), + BucketProps + end, run_fixups(T, BucketName, BP). set_ring(Ring, State) -> set_ring_global(Ring), Now = os:timestamp(), State2 = State#state{raw_ring = Ring, - ring_changed_time = Now}, + ring_changed_time = Now}, State3 = maybe_set_timer(?PROMOTE_TIMEOUT, State2), State3. maybe_set_timer(Duration, - State = #state{inactivity_timer = undefined}) -> + State = #state{inactivity_timer = undefined}) -> set_timer(Duration, State); maybe_set_timer(_Duration, State) -> State. set_timer(Duration, State) -> Timer = erlang:send_after(Duration, - self(), - inactivity_timeout), + self(), + inactivity_timeout), State#state{inactivity_timer = Timer}. setup_ets(Mode) -> @@ -558,14 +558,14 @@ setup_ets(Mode) -> %% eunit tests, but is unneeded for normal Riak operation. catch ets:delete(?ETS), Access = case Mode of - live -> protected; - test -> public - end, + live -> protected; + test -> public + end, (?ETS) = ets:new(?ETS, - [named_table, Access, {read_concurrency, true}]), + [named_table, Access, {read_concurrency, true}]), Id = reset_ring_id(), ets:insert(?ETS, - [{changes, 0}, {promoted, 0}, {id, Id}]), + [{changes, 0}, {promoted, 0}, {id, Id}]), ok. cleanup_ets(test) -> ets:delete(?ETS). @@ -574,11 +574,11 @@ reset_ring_id() -> %% Maintain ring id epoch using persistent_term to ensure ring id remains %% monotonic even if the riak_core_ring_manager crashes and restarts Epoch = case persistent_term:get(riak_ring_id_epoch, - undefined) - of - undefined -> 0; - Value -> Value - end, + undefined) + of + undefined -> 0; + Value -> Value + end, persistent_term:put(riak_ring_id_epoch, Epoch + 1), {Epoch + 1, 0}. @@ -587,40 +587,40 @@ reset_ring_id() -> %% process. set_ring_global(Ring) -> DefaultProps = case application:get_env(riak_core, - default_bucket_props) - of - {ok, Val} -> Val; - _ -> [] - end, + default_bucket_props) + of + {ok, Val} -> Val; + _ -> [] + end, %% run fixups on the ring before storing it in persistent_term FixedRing = case riak_core:bucket_fixups() of - [] -> Ring; - Fixups -> - Buckets = riak_core_ring:get_buckets(Ring), - lists:foldl(fun (Bucket, AccRing) -> - BucketProps = - riak_core_bucket:get_bucket(Bucket, - Ring), - %% Merge anything in the default properties but not in - %% the bucket's properties. This is to ensure default - %% properties added after the bucket is created are - %% inherited to the bucket. - MergedProps = - riak_core_bucket:merge_props(BucketProps, - DefaultProps), - %% fixup the ring - NewBucketProps = run_fixups(Fixups, - Bucket, - MergedProps), - %% update the bucket in the ring - riak_core_ring:update_meta({bucket, - Bucket}, - NewBucketProps, - AccRing) - end, - Ring, - Buckets) - end, + [] -> Ring; + Fixups -> + Buckets = riak_core_ring:get_buckets(Ring), + lists:foldl(fun (Bucket, AccRing) -> + BucketProps = + riak_core_bucket:get_bucket(Bucket, + Ring), + %% Merge anything in the default properties but not in + %% the bucket's properties. This is to ensure default + %% properties added after the bucket is created are + %% inherited to the bucket. + MergedProps = + riak_core_bucket:merge_props(BucketProps, + DefaultProps), + %% fixup the ring + NewBucketProps = run_fixups(Fixups, + Bucket, + MergedProps), + %% update the bucket in the ring + riak_core_ring:update_meta({bucket, + Bucket}, + NewBucketProps, + AccRing) + end, + Ring, + Buckets) + end, %% Mark ring as tainted to check if it is ever leaked over gossip or %% relied upon for any non-local ring operations. TaintedRing = riak_core_ring:set_tainted(FixedRing), @@ -632,29 +632,29 @@ set_ring_global(Ring) -> %% special meaning in `riak_core_bucket:get_bucket_props/2`. We then %% cleanup these values in a subsequent `ets:match_delete`. OldBuckets = ets:select(?ETS, - [{{{bucket, '$1'}, '_'}, [], ['$1']}]), + [{{{bucket, '$1'}, '_'}, [], ['$1']}]), BucketDefaults = [{{bucket, Bucket}, undefined} - || Bucket <- OldBuckets], + || Bucket <- OldBuckets], BucketMeta = [{{bucket, Bucket}, Meta} - || Bucket <- riak_core_ring:get_buckets(TaintedRing), - {ok, Meta} - <- [riak_core_ring:get_meta({bucket, Bucket}, - TaintedRing)]], + || Bucket <- riak_core_ring:get_buckets(TaintedRing), + {ok, Meta} + <- [riak_core_ring:get_meta({bucket, Bucket}, + TaintedRing)]], BucketMeta2 = lists:ukeysort(1, - BucketMeta ++ BucketDefaults), + BucketMeta ++ BucketDefaults), CHBin = - chashbin:create(riak_core_ring:chash(TaintedRing)), + chashbin:create(riak_core_ring:chash(TaintedRing)), {Epoch, Id} = ets:lookup_element(?ETS, id, 2), Actions = [{ring, TaintedRing}, - {raw_ring, Ring}, - {id, {Epoch, Id + 1}}, - {chashbin, CHBin} - | BucketMeta2], + {raw_ring, Ring}, + {id, {Epoch, Id + 1}}, + {chashbin, CHBin} + | BucketMeta2], ets:insert(?ETS, Actions), ets:match_delete(?ETS, {{bucket, '_'}, undefined}), case persistent_term:get(?RING_KEY, undefined) of - ets -> ok; - _ -> persistent_term:put(?RING_KEY, ets) + ets -> ok; + _ -> persistent_term:put(?RING_KEY, ets) end, ok. @@ -670,7 +670,7 @@ prune_write_notify_ring(Ring, State) -> prune_write_ring(Ring, State) -> riak_core_ring:check_tainted(Ring, - "Error: Persisting tainted ring"), + "Error: Persisting tainted ring"), ok = riak_core_ring_manager:prune_ringfiles(), _ = do_write_ringfile(Ring), State2 = set_ring(Ring, State), @@ -678,7 +678,7 @@ prune_write_ring(Ring, State) -> is_stable_ring(#state{ring_changed_time = Then}) -> DeltaUS = erlang:max(0, - timer:now_diff(os:timestamp(), Then)), + timer:now_diff(os:timestamp(), Then)), DeltaMS = DeltaUS div 1000, IsStable = DeltaMS >= (?PROMOTE_TIMEOUT), {IsStable, DeltaMS}. @@ -691,10 +691,10 @@ is_stable_ring(#state{ring_changed_time = Then}) -> back_test() -> X = [1, 2, 3], List1 = [[1, 2, 3], - [4, 2, 3], - [7, 8, 3], - [11, 12, 13], - [1, 2, 3]], + [4, 2, 3], + [7, 8, 3], + [11, 12, 13], + [1, 2, 3]], List2 = [[7, 8, 9], [1, 2, 3]], List3 = [[1, 2, 3]], ?assertEqual([[4, 2, 3]], (back(1, X, List1))), @@ -705,19 +705,19 @@ back_test() -> prune_list_test() -> TSList1 = [[2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36], - [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 32, 36], + [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], + [2011, 2, 28, 16, 32, 36]], TSList2 = [[2011, 2, 28, 16, 32, 36], - [2011, 2, 28, 16, 31, 16], - [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 31, 16], + [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], + [2011, 2, 28, 16, 32, 36]], PrunedList1 = [[2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16]], + [2011, 2, 28, 16, 32, 16]], PrunedList2 = [[2011, 2, 28, 16, 31, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 32, 36]], ?assertEqual(PrunedList1, (prune_list(TSList1))), ?assertEqual(PrunedList2, (prune_list(TSList2))). @@ -728,8 +728,8 @@ set_ring_global_test() -> set_ring_global(Ring), promote_ring(), ?assert((riak_core_ring:nearly_equal(Ring, - persistent_term:get(?RING_KEY, - undefined)))), + persistent_term:get(?RING_KEY, + undefined)))), cleanup_ets(test). set_my_ring_test() -> @@ -744,33 +744,33 @@ set_my_ring_test() -> refresh_my_ring_test() -> {spawn, fun () -> - setup_ets(test), - Core_Settings = [{ring_creation_size, 4}, - {ring_state_dir, "_build/test/tmp"}, - {cluster_name, "test"}], - [begin - put({?MODULE, AppKey}, - application:get_env(riak_core, AppKey, undefined)), - ok = application:set_env(riak_core, AppKey, Val) - end - || {AppKey, Val} <- Core_Settings], - stop_core_processes(), - riak_core_ring_events:start_link(), - riak_core_ring_manager:start_link(test), - riak_core_vnode_sup:start_link(), - riak_core_vnode_master:start_link(riak_core_vnode), - riak_core_test_util:setup_mockring1(), - ?assertEqual(ok, - (riak_core_ring_manager:refresh_my_ring())), - stop_core_processes(), - %% Cleanup the ring file created for this test - {ok, RingFile} = find_latest_ringfile(), - file:delete(RingFile), - [ok = application:set_env(riak_core, - AppKey, - get({?MODULE, AppKey})) - || {AppKey, _Val} <- Core_Settings], - ok + setup_ets(test), + Core_Settings = [{ring_creation_size, 4}, + {ring_state_dir, "_build/test/tmp"}, + {cluster_name, "test"}], + [begin + put({?MODULE, AppKey}, + application:get_env(riak_core, AppKey, undefined)), + ok = application:set_env(riak_core, AppKey, Val) + end + || {AppKey, Val} <- Core_Settings], + stop_core_processes(), + riak_core_ring_events:start_link(), + riak_core_ring_manager:start_link(test), + riak_core_vnode_sup:start_link(), + riak_core_vnode_master:start_link(riak_core_vnode), + riak_core_test_util:setup_mockring1(), + ?assertEqual(ok, + (riak_core_ring_manager:refresh_my_ring())), + stop_core_processes(), + %% Cleanup the ring file created for this test + {ok, RingFile} = find_latest_ringfile(), + file:delete(RingFile), + [ok = application:set_env(riak_core, + AppKey, + get({?MODULE, AppKey})) + || {AppKey, _Val} <- Core_Settings], + ok end}. stop_core_processes() -> @@ -792,21 +792,21 @@ do_write_ringfile_test() -> %% Check happy path GenR = fun (Name) -> riak_core_ring:fresh(64, Name) end, ?assertEqual(ok, - (do_write_ringfile(GenR(happy), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(happy), ?TEST_RINGFILE))), %% errors expected error_logger:tty(false), %% Check write fails (create .tmp file with no write perms) ok = file:write_file(?TMP_RINGFILE, - <<"no write for you">>), + <<"no write for you">>), ok = file:change_mode(?TMP_RINGFILE, 8#00444), ?assertMatch({error, _}, - (do_write_ringfile(GenR(tmp_perms), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(tmp_perms), ?TEST_RINGFILE))), ok = file:change_mode(?TMP_RINGFILE, 8#00644), ok = file:delete(?TMP_RINGFILE), %% Check rename fails ok = file:change_mode(?TEST_RINGDIR, 8#00444), ?assertMatch({error, _}, - (do_write_ringfile(GenR(ring_perms), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(ring_perms), ?TEST_RINGFILE))), ok = file:change_mode(?TEST_RINGDIR, 8#00755), error_logger:tty(true), %% Cleanup the ring file created for this test @@ -819,13 +819,13 @@ is_stable_ring_test() -> Within = {A, B - TimeoutSecs div 2, C}, Outside = {A, B - (TimeoutSecs + 1), C}, ?assertMatch({true, _}, - (is_stable_ring(#state{ring_changed_time = - {0, 0, 0}}))), + (is_stable_ring(#state{ring_changed_time = + {0, 0, 0}}))), ?assertMatch({true, _}, - (is_stable_ring(#state{ring_changed_time = Outside}))), + (is_stable_ring(#state{ring_changed_time = Outside}))), ?assertMatch({false, _}, - (is_stable_ring(#state{ring_changed_time = Within}))), + (is_stable_ring(#state{ring_changed_time = Within}))), ?assertMatch({false, _}, - (is_stable_ring(#state{ring_changed_time = Now}))). + (is_stable_ring(#state{ring_changed_time = Now}))). -endif. diff --git a/src/riak_core_ring_util.erl b/src/riak_core_ring_util.erl index 96c68d55b..6138a0d80 100644 --- a/src/riak_core_ring_util.erl +++ b/src/riak_core_ring_util.erl @@ -22,12 +22,12 @@ -module(riak_core_ring_util). -export([assign/2, - check_ring/0, - check_ring/1, - check_ring/2, - hash_to_partition_id/2, - partition_id_to_hash/2, - hash_is_partition_boundary/2]). + check_ring/0, + check_ring/1, + check_ring/2, + hash_to_partition_id/2, + partition_id_to_hash/2, + hash_is_partition_boundary/2]). -ifdef(TEST). @@ -38,11 +38,11 @@ %% @doc Forcibly assign a partition to a specific node assign(Partition, ToNode) -> F = fun (Ring, _) -> - {new_ring, - riak_core_ring:transfer_node(Partition, ToNode, Ring)} - end, + {new_ring, + riak_core_ring:transfer_node(Partition, ToNode, Ring)} + end, {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), + undefined), ok. %% @doc Check the local ring for any preflists that do not satisfy n_val @@ -52,7 +52,7 @@ check_ring() -> check_ring(Ring) -> {ok, Props} = application:get_env(riak_core, - default_bucket_props), + default_bucket_props), {n_val, Nval} = lists:keyfind(n_val, 1, Props), check_ring(Ring, Nval). @@ -60,18 +60,18 @@ check_ring(Ring) -> check_ring(Ring, Nval) -> Preflists = riak_core_ring:all_preflists(Ring, Nval), lists:foldl(fun (PL, Acc) -> - PLNodes = lists:usort([Node || {_, Node} <- PL]), - case length(PLNodes) of - Nval -> Acc; - _ -> ordsets:add_element(PL, Acc) - end - end, - [], - Preflists). + PLNodes = lists:usort([Node || {_, Node} <- PL]), + case length(PLNodes) of + Nval -> Acc; + _ -> ordsets:add_element(PL, Acc) + end + end, + [], + Preflists). -spec hash_to_partition_id(chash:index() | - chash:index_as_int(), - riak_core_ring:ring_size()) -> riak_core_ring:partition_id(). + chash:index_as_int(), + riak_core_ring:ring_size()) -> riak_core_ring:partition_id(). %% @doc Map a key hash (as binary or integer) to a partition ID [0, ring_size) hash_to_partition_id(CHashKey, RingSize) @@ -83,15 +83,15 @@ hash_to_partition_id(CHashInt, RingSize) -> -spec partition_id_to_hash(riak_core_ring:partition_id(), - pos_integer()) -> chash:index_as_int(). + pos_integer()) -> chash:index_as_int(). %% @doc Identify the first key hash (integer form) in a partition ID [0, ring_size) partition_id_to_hash(Id, RingSize) -> Id * chash:ring_increment(RingSize). -spec hash_is_partition_boundary(chash:index() | - chash:index_as_int(), - pos_integer()) -> boolean(). + chash:index_as_int(), + pos_integer()) -> boolean(). %% @doc For user-facing tools, indicate whether a specified hash value %% is a valid "boundary" value (first hash in some partition) @@ -113,55 +113,55 @@ hash_is_partition_boundary(CHashInt, RingSize) -> %% Partition boundaries are reversable. reverse_test() -> IntIndex = riak_core_ring_util:partition_id_to_hash(31, - 32), + 32), HashIndex = <>, ?assertEqual(31, - (riak_core_ring_util:hash_to_partition_id(HashIndex, - 32))), + (riak_core_ring_util:hash_to_partition_id(HashIndex, + 32))), ?assertEqual(0, - (riak_core_ring_util:hash_to_partition_id(<<0:160>>, - 32))). + (riak_core_ring_util:hash_to_partition_id(<<0:160>>, + 32))). %% Index values somewhere in the middle of a partition can be mapped %% to partition IDs. partition_test() -> IntIndex = riak_core_ring_util:partition_id_to_hash(20, - 32) - + chash:ring_increment(32) div 3, + 32) + + chash:ring_increment(32) div 3, HashIndex = <>, ?assertEqual(20, - (riak_core_ring_util:hash_to_partition_id(HashIndex, - 32))). + (riak_core_ring_util:hash_to_partition_id(HashIndex, + 32))). %% Index values divisible by partition size are boundary values, others are not boundary_test() -> BoundaryIndex = - riak_core_ring_util:partition_id_to_hash(15, 32), + riak_core_ring_util:partition_id_to_hash(15, 32), ?assert((riak_core_ring_util:hash_is_partition_boundary(<>, - 32))), + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + - 32):160>>, - 32))), + + + 32):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - - - 32):160>>, - 32))), + - + 32):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + - 1):160>>, - 32))), + + + 1):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - - - 1):160>>, - 32))), + - + 1):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + - 2):160>>, - 32))), + + + 2):160>>, + 32))), ?assertNot((riak_core_ring_util:hash_is_partition_boundary(<<(BoundaryIndex - + - 10):160>>, - 32))). + + + 10):160>>, + 32))). -endif. % TEST diff --git a/src/riak_core_send_msg.erl b/src/riak_core_send_msg.erl index d8d904b80..b2bc97086 100644 --- a/src/riak_core_send_msg.erl +++ b/src/riak_core_send_msg.erl @@ -23,9 +23,9 @@ -module(riak_core_send_msg). -export([reply_unreliable/2, - cast_unreliable/2, - send_event_unreliable/2, - bang_unreliable/2]). + cast_unreliable/2, + send_event_unreliable/2, + bang_unreliable/2]). -ifdef(TEST). @@ -36,9 +36,9 @@ -compile({parse_transform, pulse_instrument}). -compile({pulse_replace_module, - [{gen_fsm, pulse_gen_fsm}, - {gen_fsm_compat, pulse_gen_fsm}, - {gen_server, pulse_gen_server}]}). + [{gen_fsm, pulse_gen_fsm}, + {gen_fsm_compat, pulse_gen_fsm}, + {gen_server, pulse_gen_server}]}). -endif. @@ -53,10 +53,10 @@ cast_unreliable(Dest, Request) -> %% NOTE: We'ed peeked inside gen_fsm.erl's guts to see its internals. send_event_unreliable({global, _Name} = GlobalTo, - Event) -> + Event) -> erlang:error({unimplemented_send, GlobalTo, Event}); send_event_unreliable({via, _Mod, _Name} = ViaTo, - Event) -> + Event) -> erlang:error({unimplemented_send, ViaTo, Event}); send_event_unreliable(Name, Event) -> bang_unreliable(Name, {'$gen_event', Event}), diff --git a/src/riak_core_status.erl b/src/riak_core_status.erl index 25d59da85..bb6a7553d 100644 --- a/src/riak_core_status.erl +++ b/src/riak_core_status.erl @@ -22,56 +22,56 @@ -module(riak_core_status). -export([ringready/0, - all_active_transfers/0, - transfers/0, - partitions/2, - ring_status/0]). + all_active_transfers/0, + transfers/0, + partitions/2, + ring_status/0]). -spec ringready() -> {ok, [atom()]} | {error, any()}. ringready() -> case get_rings() of - {[], Rings} -> - {N1, R1} = hd(Rings), - case rings_match(hash_ring(R1), tl(Rings)) of - true -> - Nodes = [N || {N, _} <- Rings], - {ok, Nodes}; - {false, N2} -> {error, {different_owners, N1, N2}} - end; - {Down, _Rings} -> {error, {nodes_down, Down}} + {[], Rings} -> + {N1, R1} = hd(Rings), + case rings_match(hash_ring(R1), tl(Rings)) of + true -> + Nodes = [N || {N, _} <- Rings], + {ok, Nodes}; + {false, N2} -> {error, {different_owners, N1, N2}} + end; + {Down, _Rings} -> {error, {nodes_down, Down}} end. -spec transfers() -> {[atom()], - [{waiting_to_handoff, atom(), integer()} | - {stopped, atom(), integer()}]}. + [{waiting_to_handoff, atom(), integer()} | + {stopped, atom(), integer()}]}. transfers() -> {Down, Rings} = get_rings(), %% Work out which vnodes are running and which partitions they claim F = fun ({N, R}, Acc) -> - {_Pri, Sec, Stopped} = partitions(N, R), - Acc1 = case Sec of - [] -> []; - _ -> [{waiting_to_handoff, N, length(Sec)}] - end, - case Stopped of - [] -> Acc1 ++ Acc; - _ -> Acc1 ++ [{stopped, N, length(Stopped)} | Acc] - end - end, + {_Pri, Sec, Stopped} = partitions(N, R), + Acc1 = case Sec of + [] -> []; + _ -> [{waiting_to_handoff, N, length(Sec)}] + end, + case Stopped of + [] -> Acc1 ++ Acc; + _ -> Acc1 ++ [{stopped, N, length(Stopped)} | Acc] + end + end, {Down, lists:foldl(F, [], Rings)}. %% @doc Produce status for all active transfers in the cluster. -spec all_active_transfers() -> {Xfers :: list(), - Down :: list()}. + Down :: list()}. all_active_transfers() -> {Xfers, Down} = - riak_core_util:rpc_every_member(riak_core_handoff_manager, - status, - [{direction, outbound}], - 5000), + riak_core_util:rpc_every_member(riak_core_handoff_manager, + status, + [{direction, outbound}], + 5000), {Xfers, Down}. ring_status() -> @@ -79,64 +79,64 @@ ring_status() -> %% are running on each node. {ok, Ring} = riak_core_ring_manager:get_raw_ring(), {AllMods, Down} = - riak_core_util:rpc_every_member_ann(riak_core, - vnode_modules, - [], - 1000), + riak_core_util:rpc_every_member_ann(riak_core, + vnode_modules, + [], + 1000), %% Check if the claimant is running and if it believes the ring is ready Claimant = riak_core_ring:claimant(Ring), case riak_core_util:safe_rpc(Claimant, - riak_core_ring, - ring_ready, - [], - 5000) - of - {badrpc, _} -> - Down2 = lists:usort([Claimant | Down]), - RingReady = undefined; - RingReady -> - Down2 = Down, - RingReady = RingReady + riak_core_ring, + ring_ready, + [], + 5000) + of + {badrpc, _} -> + Down2 = lists:usort([Claimant | Down]), + RingReady = undefined; + RingReady -> + Down2 = Down, + RingReady = RingReady end, %% Get the list of pending ownership changes Changes = riak_core_ring:pending_changes(Ring), %% Group pending changes by (Owner, NextOwner) Merged = lists:foldl(fun ({Idx, - Owner, - NextOwner, - Mods, - Status}, - Acc) -> - orddict:append({Owner, NextOwner}, - {Idx, Mods, Status}, - Acc) - end, - [], - Changes), + Owner, + NextOwner, + Mods, + Status}, + Acc) -> + orddict:append({Owner, NextOwner}, + {Idx, Mods, Status}, + Acc) + end, + [], + Changes), %% For each pending transfer, determine which vnode modules have completed %% handoff and which we are still waiting on. %% Final result is of the form: %% [{Owner, NextOwner}, [{Index, WaitingMods, CompletedMods, Status}]] TransferStatus = orddict:map(fun ({Owner, _}, - Transfers) -> - case orddict:find(Owner, AllMods) of - error -> - [{Idx, down, Mods, Status} - || {Idx, Mods, Status} - <- Transfers]; - {ok, OwnerMods} -> - NodeMods = [Mod - || {_App, Mod} - <- OwnerMods], - [{Idx, - NodeMods -- Mods, - Mods, - Status} - || {Idx, Mods, Status} - <- Transfers] - end - end, - Merged), + Transfers) -> + case orddict:find(Owner, AllMods) of + error -> + [{Idx, down, Mods, Status} + || {Idx, Mods, Status} + <- Transfers]; + {ok, OwnerMods} -> + NodeMods = [Mod + || {_App, Mod} + <- OwnerMods], + [{Idx, + NodeMods -- Mods, + Mods, + Status} + || {Idx, Mods, Status} + <- Transfers] + end + end, + Merged), MarkedDown = riak_core_ring:down_members(Ring), {Claimant, RingReady, @@ -151,13 +151,13 @@ ring_status() -> %% Retrieve the rings for all other nodes by RPC get_rings() -> {RawRings, Down} = - riak_core_util:rpc_every_member(riak_core_ring_manager, - get_my_ring, - [], - 30000), + riak_core_util:rpc_every_member(riak_core_ring_manager, + get_my_ring, + [], + 30000), Rings = - orddict:from_list([{riak_core_ring:owner_node(R), R} - || {ok, R} <- RawRings]), + orddict:from_list([{riak_core_ring:owner_node(R), R} + || {ok, R} <- RawRings]), {lists:sort(Down), Rings}. %% Produce a hash of the 'chash' portion of the ring @@ -168,8 +168,8 @@ hash_ring(R) -> rings_match(_, []) -> true; rings_match(R1hash, [{N2, R2} | Rest]) -> case hash_ring(R2) of - R1hash -> rings_match(R1hash, Rest); - _ -> {false, N2} + R1hash -> rings_match(R1hash, Rest); + _ -> {false, N2} end. %% Get a list of active partition numbers - regardless of vnode type @@ -178,18 +178,18 @@ rings_match(R1hash, [{N2, R2} | Rest]) -> active_partitions(Node) -> case riak_core_util:safe_rpc(Node, - riak_core_vnode_manager, - all_vnodes, - [], - 30000) - of - {badrpc, _} -> ordsets:new(); - VNodes -> - lists:foldl(fun ({_, P, _}, Ps) -> - ordsets:add_element(P, Ps) - end, - ordsets:new(), - VNodes) + riak_core_vnode_manager, + all_vnodes, + [], + 30000) + of + {badrpc, _} -> ordsets:new(); + VNodes -> + lists:foldl(fun ({_, P, _}, Ps) -> + ordsets:add_element(P, Ps) + end, + ordsets:new(), + VNodes) end. %% Return a list of active primary partitions, active secondary partitions (to be handed off) @@ -197,7 +197,7 @@ active_partitions(Node) -> partitions(Node, Ring) -> Owners = riak_core_ring:all_owners(Ring), Owned = ordsets:from_list(owned_partitions(Owners, - Node)), + Node)), Active = active_partitions(Node), Stopped = ordsets:subtract(Owned, Active), Secondary = ordsets:subtract(Active, Owned), diff --git a/src/riak_core_sup.erl b/src/riak_core_sup.erl index 880f9819e..a971bab08 100644 --- a/src/riak_core_sup.erl +++ b/src/riak_core_sup.erl @@ -32,15 +32,15 @@ %% Helper macro for declaring children of supervisor -define(CHILD(I, Type, Timeout, Args), - {I, - {I, start_link, Args}, - permanent, - Timeout, - Type, - [I]}). + {I, + {I, start_link, Args}, + permanent, + Timeout, + Type, + [I]}). -define(CHILD(I, Type, Timeout), - ?CHILD(I, Type, Timeout, [])). + ?CHILD(I, Type, Timeout, [])). -define(CHILD(I, Type), ?CHILD(I, Type, 5000)). @@ -57,15 +57,15 @@ start_link() -> init([]) -> Children = lists:flatten([?CHILD(riak_core_vnode_sup, - supervisor, 305000), - ?CHILD(riak_core_eventhandler_sup, supervisor), - ?CHILD(riak_core_handoff_sup, supervisor), - ?CHILD(riak_core_ring_events, worker), - ?CHILD(riak_core_ring_manager, worker), - ?CHILD(riak_core_vnode_proxy_sup, supervisor), - ?CHILD(riak_core_node_watcher_events, worker), - ?CHILD(riak_core_node_watcher, worker), - ?CHILD(riak_core_vnode_manager, worker), - ?CHILD(riak_core_gossip, worker), - ?CHILD(riak_core_claimant, worker)]), + supervisor, 305000), + ?CHILD(riak_core_eventhandler_sup, supervisor), + ?CHILD(riak_core_handoff_sup, supervisor), + ?CHILD(riak_core_ring_events, worker), + ?CHILD(riak_core_ring_manager, worker), + ?CHILD(riak_core_vnode_proxy_sup, supervisor), + ?CHILD(riak_core_node_watcher_events, worker), + ?CHILD(riak_core_node_watcher, worker), + ?CHILD(riak_core_vnode_manager, worker), + ?CHILD(riak_core_gossip, worker), + ?CHILD(riak_core_claimant, worker)]), {ok, {{one_for_one, 10, 10}, Children}}. diff --git a/src/riak_core_test_util.erl b/src/riak_core_test_util.erl index 776b7d234..f56357317 100644 --- a/src/riak_core_test_util.erl +++ b/src/riak_core_test_util.erl @@ -27,11 +27,11 @@ -ifdef(TEST). -export([setup_mockring1/0, - fake_ring/2, - stop_pid/1, - wait_for_pid/1, - stop_pid/2, - unlink_named_process/1]). + fake_ring/2, + stop_pid/1, + wait_for_pid/1, + stop_pid/2, + unlink_named_process/1]). -include_lib("eunit/include/eunit.hrl"). @@ -50,8 +50,8 @@ stop_pid(Pid, ExitType) -> wait_for_pid(Pid) -> Mref = erlang:monitor(process, Pid), receive - {'DOWN', Mref, process, _, _} -> ok - after 5000 -> {error, didnotexit} + {'DOWN', Mref, process, _, _} -> ok + after 5000 -> {error, didnotexit} end. unlink_named_process(Name) when is_atom(Name) -> @@ -61,32 +61,32 @@ setup_mockring1() -> % requires a running riak_core_ring_manager, in test-mode is ok Ring0 = riak_core_ring:fresh(16, node()), Ring1 = riak_core_ring:add_member(node(), - Ring0, - othernode@otherhost), + Ring0, + othernode@otherhost), Ring2 = riak_core_ring:add_member(node(), - Ring1, - othernode2@otherhost2), + Ring1, + othernode2@otherhost2), Ring3 = lists:foldl(fun (_, R) -> - riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), - othernode@otherhost, - R) - end, - Ring2, - [1, 2, 3, 4, 5, 6]), + riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), + othernode@otherhost, + R) + end, + Ring2, + [1, 2, 3, 4, 5, 6]), Ring = lists:foldl(fun (_, R) -> - riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), - othernode2@otherhost2, - R) - end, - Ring3, - [1, 2, 3, 4, 5, 6]), + riak_core_ring:transfer_node(hd(riak_core_ring:my_indices(R)), + othernode2@otherhost2, + R) + end, + Ring3, + [1, 2, 3, 4, 5, 6]), riak_core_ring_manager:set_ring_global(Ring). fake_ring(Size, NumNodes) -> ManyNodes = [list_to_atom("dev" ++ - integer_to_list(X) ++ "@127.0.0.1") - || _ <- lists:seq(0, Size div NumNodes), - X <- lists:seq(1, NumNodes)], + integer_to_list(X) ++ "@127.0.0.1") + || _ <- lists:seq(0, Size div NumNodes), + X <- lists:seq(1, NumNodes)], Nodes = lists:sublist(ManyNodes, Size), Inc = chash:ring_increment(Size), Indices = lists:seq(0, (Size - 1) * Inc, Inc), @@ -94,24 +94,24 @@ fake_ring(Size, NumNodes) -> [Node | OtherNodes] = Nodes, Ring = riak_core_ring:fresh(Size, Node), Ring2 = lists:foldl(fun (OtherNode, RingAcc) -> - RingAcc2 = riak_core_ring:add_member(Node, - RingAcc, - OtherNode), - riak_core_ring:set_member(Node, - RingAcc2, - OtherNode, - valid, - same_vclock) - end, - Ring, - OtherNodes), + RingAcc2 = riak_core_ring:add_member(Node, + RingAcc, + OtherNode), + riak_core_ring:set_member(Node, + RingAcc2, + OtherNode, + valid, + same_vclock) + end, + Ring, + OtherNodes), Ring3 = lists:foldl(fun ({Idx, Owner}, RingAcc) -> - riak_core_ring:transfer_node(Idx, - Owner, - RingAcc) - end, - Ring2, - Owners), + riak_core_ring:transfer_node(Idx, + Owner, + RingAcc) + end, + Ring2, + Owners), Ring3. -endif. %TEST. diff --git a/src/riak_core_util.erl b/src/riak_core_util.erl index 81112b4b8..46825b483 100644 --- a/src/riak_core_util.erl +++ b/src/riak_core_util.erl @@ -22,65 +22,65 @@ -module(riak_core_util). -export([moment/0, - make_tmp_dir/0, - replace_file/2, - compare_dates/2, - reload_all/1, - integer_to_list/2, - unique_id_62/0, - str_to_node/1, - chash_key/1, - chash_key/2, - chash_std_keyfun/1, - chash_bucketonly_keyfun/1, - mkclientid/1, - start_app_deps/1, - build_tree/3, - orddict_delta/2, - safe_rpc/4, - safe_rpc/5, - rpc_every_member/4, - rpc_every_member_ann/4, - count/2, - keydelete/2, - multi_keydelete/2, - multi_keydelete/3, - compose/1, - compose/2, - pmap/2, - pmap/3, - multi_rpc/4, - multi_rpc/5, - multi_rpc_ann/4, - multi_rpc_ann/5, - multicall_ann/4, - multicall_ann/5, - shuffle/1, - is_arch/1, - format_ip_and_port/2, - peername/2, - sockname/2, - sha/1, - md5/1, - make_fold_req/1, - make_fold_req/2, - make_fold_req/4, - make_newest_fold_req/1, - proxy_spawn/1, - proxy/2, - enable_job_class/1, - enable_job_class/2, - disable_job_class/1, - disable_job_class/2, - job_class_enabled/1, - job_class_enabled/2, - job_class_disabled_message/2, - report_job_request_disposition/6, - responsible_preflists/1, - responsible_preflists/2, - get_index_n/1, - preflist_siblings/1, - posix_error/1]). + make_tmp_dir/0, + replace_file/2, + compare_dates/2, + reload_all/1, + integer_to_list/2, + unique_id_62/0, + str_to_node/1, + chash_key/1, + chash_key/2, + chash_std_keyfun/1, + chash_bucketonly_keyfun/1, + mkclientid/1, + start_app_deps/1, + build_tree/3, + orddict_delta/2, + safe_rpc/4, + safe_rpc/5, + rpc_every_member/4, + rpc_every_member_ann/4, + count/2, + keydelete/2, + multi_keydelete/2, + multi_keydelete/3, + compose/1, + compose/2, + pmap/2, + pmap/3, + multi_rpc/4, + multi_rpc/5, + multi_rpc_ann/4, + multi_rpc_ann/5, + multicall_ann/4, + multicall_ann/5, + shuffle/1, + is_arch/1, + format_ip_and_port/2, + peername/2, + sockname/2, + sha/1, + md5/1, + make_fold_req/1, + make_fold_req/2, + make_fold_req/4, + make_newest_fold_req/1, + proxy_spawn/1, + proxy/2, + enable_job_class/1, + enable_job_class/2, + disable_job_class/1, + disable_job_class/2, + job_class_enabled/1, + job_class_enabled/2, + job_class_disabled_message/2, + report_job_request_disposition/6, + responsible_preflists/1, + responsible_preflists/2, + get_index_n/1, + preflist_siblings/1, + posix_error/1]). -include("riak_core_vnode.hrl"). @@ -96,8 +96,8 @@ -include_lib("eunit/include/eunit.hrl"). -export([counter_loop/1, - incr_counter/1, - decr_counter/1]). + incr_counter/1, + decr_counter/1]). -endif. @@ -121,9 +121,9 @@ posix_error(Error) -> case erl_posix_msg:message(Error) of - "unknown POSIX error" -> - lists:flatten(io_lib:format("~p", [Error])); - Message -> Message + "unknown POSIX error" -> + lists:flatten(io_lib:format("~p", [Error])); + Message -> Message end. %% @spec moment() -> integer() @@ -149,7 +149,7 @@ compare_dates(A, B) when is_list(B) -> rfc1123_to_now(String) when is_list(String) -> GSec = - calendar:datetime_to_gregorian_seconds(httpd_util:convert_request_date(String)), + calendar:datetime_to_gregorian_seconds(httpd_util:convert_request_date(String)), ESec = GSec - (?SEC_TO_EPOCH), Sec = ESec rem 1000000, MSec = ESec div 1000000, @@ -160,13 +160,13 @@ rfc1123_to_now(String) when is_list(String) -> %% to the new directory. make_tmp_dir() -> TmpId = io_lib:format("riptemp.~p", - [erlang:phash2({riak_core_rand:uniform(), self()})]), + [erlang:phash2({riak_core_rand:uniform(), self()})]), TempDir = filename:join("/tmp", TmpId), case filelib:is_dir(TempDir) of - true -> make_tmp_dir(); - false -> - ok = file:make_dir(TempDir), - TempDir + true -> make_tmp_dir(); + false -> + ok = file:make_dir(TempDir), + TempDir end. %% @doc Atomically/safely (to some reasonable level of durablity) @@ -174,23 +174,23 @@ make_tmp_dir() -> %% slightly: If `FN' cannot be opened, will not error with a %% `badmatch', as before, but will instead return `{error, Reason}' -spec replace_file(string(), iodata()) -> ok | - {error, term()}. + {error, term()}. replace_file(FN, Data) -> TmpFN = FN ++ ".tmp", case file:open(TmpFN, [write, raw]) of - {ok, FH} -> - try ok = file:write(FH, Data), - ok = file:sync(FH), - ok = file:close(FH), - ok = file:rename(TmpFN, FN), - {ok, Contents} = read_file(FN), - true = Contents == iolist_to_binary(Data), - ok - catch - _:Err -> {error, Err} - end; - Err -> Err + {ok, FH} -> + try ok = file:write(FH, Data), + ok = file:sync(FH), + ok = file:close(FH), + ok = file:rename(TmpFN, FN), + {ok, Contents} = read_file(FN), + true = Contents == iolist_to_binary(Data), + ok + catch + _:Err -> {error, Err} + end; + Err -> Err end. %% @doc Similar to {@link file:read_file/1} but uses raw file `I/O' @@ -202,8 +202,8 @@ read_file(FName) -> read_file(FD, Acc) -> case file:read(FD, 4096) of - {ok, Data} -> read_file(FD, [Data | Acc]); - eof -> lists:reverse(Acc) + {ok, Data} -> read_file(FD, [Data | Acc]); + eof -> lists:reverse(Acc) end. %% @spec integer_to_list(Integer :: integer(), Base :: integer()) -> @@ -213,7 +213,7 @@ read_file(FD, Acc) -> integer_to_list(I, 10) -> erlang:integer_to_list(I); integer_to_list(I, Base) when is_integer(I), is_integer(Base), Base >= 2, - Base =< 1 + $Z - $A + 10 + 1 + $z - $a -> + Base =< 1 + $Z - $A + 10 + 1 + $z - $a -> if I < 0 -> [$- | integer_to_list(-I, Base, [])]; true -> integer_to_list(I, Base, []) end; @@ -225,9 +225,9 @@ integer_to_list(I0, Base, R0) -> D = I0 rem Base, I1 = I0 div Base, R1 = if D >= 36 -> [D - 36 + $a | R0]; - D >= 10 -> [D - 10 + $A | R0]; - true -> [D + $0 | R0] - end, + D >= 10 -> [D - 10 + $A | R0]; + true -> [D + $0 | R0] + end, if I1 =:= 0 -> R1; true -> integer_to_list(I1, Base, R1) end. @@ -242,7 +242,7 @@ md5(Bin) -> crypto:hash(md5, Bin). unique_id_62() -> Rand = sha(term_to_binary({make_ref(), - os:timestamp()})), + os:timestamp()})), <> = Rand, integer_to_list(I, 62). @@ -253,8 +253,8 @@ unique_id_62() -> %% Module. Return is a list of the results of code:purge/1 %% and code:load_file/1 on each node. -spec reload_all(Module :: atom()) -> [{boolean(), - {module, Module :: atom()} | - {error, term()}}]. + {module, Module :: atom()} | + {error, term()}}]. reload_all(Module) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -268,15 +268,15 @@ mkclientid(RemoteNode) -> {{Y, Mo, D}, {H, Mi, S}} = erlang:universaltime(), {_, _, NowPart} = os:timestamp(), Id = erlang:phash2([Y, - Mo, - D, - H, - Mi, - S, - node(), - RemoteNode, - NowPart, - self()]), + Mo, + D, + H, + Mi, + S, + node(), + RemoteNode, + NowPart, + self()]), <>. %% @spec chash_key(BKey :: riak_object:bkey()) -> chash:index() @@ -308,21 +308,21 @@ str_to_node(Node) when is_atom(Node) -> str_to_node(atom_to_list(Node)); str_to_node(NodeStr) -> case string:tokens(NodeStr, "@") of - [NodeName] -> - %% Node name only; no host name. If the local node has a hostname, - %% append it - case node_hostname() of - [] -> list_to_atom(NodeName); - Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) - end; - _ -> list_to_atom(NodeStr) + [NodeName] -> + %% Node name only; no host name. If the local node has a hostname, + %% append it + case node_hostname() of + [] -> list_to_atom(NodeName); + Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) + end; + _ -> list_to_atom(NodeStr) end. node_hostname() -> NodeStr = atom_to_list(node()), case string:tokens(NodeStr, "@") of - [_NodeName, Hostname] -> Hostname; - _ -> [] + [_NodeName, Hostname] -> Hostname; + _ -> [] end. %% @spec start_app_deps(App :: atom()) -> ok @@ -336,22 +336,22 @@ start_app_deps(App) -> %% @doc Start the named application if not already started. ensure_started(App) -> case application:start(App) of - ok -> ok; - {error, {already_started, App}} -> ok + ok -> ok; + {error, {already_started, App}} -> ok end. %% @doc Applies `Pred' to each element in `List', and returns a count of how many %% applications returned `true'. -spec count(fun((term()) -> boolean()), - [term()]) -> non_neg_integer(). + [term()]) -> non_neg_integer(). count(Pred, List) -> FoldFun = fun (E, A) -> - case Pred(E) of - false -> A; - true -> A + 1 - end - end, + case Pred(E) of + false -> A; + true -> A + 1 + end + end, lists:foldl(FoldFun, 0, List). %% @doc Returns a copy of `TupleList' where the first occurrence of a tuple whose @@ -374,19 +374,19 @@ multi_keydelete(KeysToDelete, TupleList) -> %% first element compares equal to any key in `KeysToDelete' is deleted, if %% there is such a tuple. -spec multi_keydelete([atom()], non_neg_integer(), - [tuple()]) -> [tuple()]. + [tuple()]) -> [tuple()]. multi_keydelete(KeysToDelete, N, TupleList) -> lists:foldl(fun (Key, Acc) -> - lists:keydelete(Key, N, Acc) - end, - TupleList, - KeysToDelete). + lists:keydelete(Key, N, Acc) + end, + TupleList, + KeysToDelete). %% @doc Function composition: returns a function that is the composition of %% `F' and `G'. -spec compose(F :: fun((B) -> C), - G :: fun((A) -> B)) -> fun((A) -> C). + G :: fun((A) -> B)) -> fun((A) -> C). compose(F, G) when is_function(F, 1), is_function(G, 1) -> @@ -407,45 +407,45 @@ compose(Funs) when is_list(Funs) -> %% @doc Invoke function `F' over each element of list `L' in parallel, %% returning the results in the same order as the input list. -spec pmap(F, L1) -> L2 when F :: fun((A) -> B), - L1 :: [A], L2 :: [B]. + L1 :: [A], L2 :: [B]. pmap(F, L) -> Parent = self(), lists:foldl(fun (X, N) -> - spawn_link(fun () -> Parent ! {pmap, N, F(X)} end), - N + 1 - end, - 0, - L), + spawn_link(fun () -> Parent ! {pmap, N, F(X)} end), + N + 1 + end, + 0, + L), L2 = [receive {pmap, N, R} -> {N, R} end || _ <- L], L3 = lists:keysort(1, L2), [R || {_, R} <- L3]. -record(pmap_acc, - {mapper, - fn, - n_pending = 0, - pending = sets:new(), - n_done = 0, - done = [], - max_concurrent = 1}). + {mapper, + fn, + n_pending = 0, + pending = sets:new(), + n_done = 0, + done = [], + max_concurrent = 1}). %% @doc Parallel map with a cap on the number of concurrent worker processes. %% Note: Worker processes are linked to the parent, so a crash propagates. -spec pmap(Fun :: function(), List :: list(), - MaxP :: integer()) -> list(). + MaxP :: integer()) -> list(). pmap(Fun, List, MaxP) when MaxP < 1 -> pmap(Fun, List, 1); pmap(Fun, List, MaxP) when is_function(Fun), is_list(List), - is_integer(MaxP) -> + is_integer(MaxP) -> Mapper = self(), #pmap_acc{pending = Pending, done = Done} = - lists:foldl(fun pmap_worker/2, - #pmap_acc{mapper = Mapper, fn = Fun, - max_concurrent = MaxP}, - List), + lists:foldl(fun pmap_worker/2, + #pmap_acc{mapper = Mapper, fn = Fun, + max_concurrent = MaxP}, + List), All = pmap_collect_rest(Pending, Done), % Restore input order Sorted = lists:keysort(1, All), @@ -454,43 +454,43 @@ pmap(Fun, List, MaxP) %% @doc Fold function for {@link pmap/3} that spawns up to a max number of %% workers to execute the mapping function over the input list. pmap_worker(X, - Acc = #pmap_acc{n_pending = NP, pending = Pending, - n_done = ND, max_concurrent = MaxP, mapper = Mapper, - fn = Fn}) + Acc = #pmap_acc{n_pending = NP, pending = Pending, + n_done = ND, max_concurrent = MaxP, mapper = Mapper, + fn = Fn}) when NP < MaxP -> Worker = spawn_link(fun () -> - R = Fn(X), - Mapper ! {pmap_result, self(), {NP + ND, R}} - end), + R = Fn(X), + Mapper ! {pmap_result, self(), {NP + ND, R}} + end), Acc#pmap_acc{n_pending = NP + 1, - pending = sets:add_element(Worker, Pending)}; + pending = sets:add_element(Worker, Pending)}; pmap_worker(X, - Acc = #pmap_acc{n_pending = NP, pending = Pending, - n_done = ND, done = Done, max_concurrent = MaxP}) + Acc = #pmap_acc{n_pending = NP, pending = Pending, + n_done = ND, done = Done, max_concurrent = MaxP}) when NP == MaxP -> {Result, NewPending} = pmap_collect_one(Pending), pmap_worker(X, - Acc#pmap_acc{n_pending = NP - 1, pending = NewPending, - n_done = ND + 1, done = [Result | Done]}). + Acc#pmap_acc{n_pending = NP - 1, pending = NewPending, + n_done = ND + 1, done = [Result | Done]}). %% @doc Waits for one pending pmap task to finish pmap_collect_one(Pending) -> receive - {pmap_result, Pid, Result} -> - Size = sets:size(Pending), - NewPending = sets:del_element(Pid, Pending), - case sets:size(NewPending) of - Size -> pmap_collect_one(Pending); - _ -> {Result, NewPending} - end + {pmap_result, Pid, Result} -> + Size = sets:size(Pending), + NewPending = sets:del_element(Pid, Pending), + case sets:size(NewPending) of + Size -> pmap_collect_one(Pending); + _ -> {Result, NewPending} + end end. pmap_collect_rest(Pending, Done) -> case sets:size(Pending) of - 0 -> Done; - _ -> - {Result, NewPending} = pmap_collect_one(Pending), - pmap_collect_rest(NewPending, [Result | Done]) + 0 -> Done; + _ -> + {Result, NewPending} = pmap_collect_one(Pending), + pmap_collect_rest(NewPending, [Result | Done]) end. %% @doc Wraps an rpc:call/4 in a try/catch to handle the case where the @@ -498,16 +498,16 @@ pmap_collect_rest(Pending, Done) -> %% the sense that it won't crash the calling process if the rex %% process is down. -spec safe_rpc(Node :: node(), Module :: atom(), - Function :: atom(), Args :: [any()]) -> {badrpc, - any()} | - any(). + Function :: atom(), Args :: [any()]) -> {badrpc, + any()} | + any(). safe_rpc(Node, Module, Function, Args) -> try rpc:call(Node, Module, Function, Args) of - Result -> Result + Result -> Result catch - exit:{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + exit:{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @doc Wraps an rpc:call/5 in a try/catch to handle the case where the @@ -515,15 +515,15 @@ safe_rpc(Node, Module, Function, Args) -> %% the sense that it won't crash the calling process if the rex %% process is down. -spec safe_rpc(Node :: node(), Module :: atom(), - Function :: atom(), Args :: [any()], - Timeout :: timeout()) -> {badrpc, any()} | any(). + Function :: atom(), Args :: [any()], + Timeout :: timeout()) -> {badrpc, any()} | any(). safe_rpc(Node, Module, Function, Args, Timeout) -> try rpc:call(Node, Module, Function, Args, Timeout) of - Result -> Result + Result -> Result catch - 'EXIT':{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + 'EXIT':{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @spec rpc_every_member(atom(), atom(), [term()], integer()|infinity) @@ -542,16 +542,16 @@ rpc_every_member_ann(Module, Function, Args, Timeout) -> {ok, MyRing} = riak_core_ring_manager:get_my_ring(), Nodes = riak_core_ring:all_members(MyRing), {Results, Down} = multicall_ann(Nodes, - Module, - Function, - Args, - Timeout), + Module, + Function, + Args, + Timeout), {Results, Down}. %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. -spec multi_rpc([node()], module(), atom(), - [any()]) -> [any()]. + [any()]) -> [any()]. multi_rpc(Nodes, Mod, Fun, Args) -> multi_rpc(Nodes, Mod, Fun, Args, infinity). @@ -559,19 +559,19 @@ multi_rpc(Nodes, Mod, Fun, Args) -> %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. -spec multi_rpc([node()], module(), atom(), [any()], - timeout()) -> [any()]. + timeout()) -> [any()]. multi_rpc(Nodes, Mod, Fun, Args, Timeout) -> pmap(fun (Node) -> - safe_rpc(Node, Mod, Fun, Args, Timeout) - end, - Nodes). + safe_rpc(Node, Mod, Fun, Args, Timeout) + end, + Nodes). %% @doc Perform an RPC call to a list of nodes in parallel, returning the %% results in the same order as the input list. Each result is tagged %% with the corresponding node name. -spec multi_rpc_ann([node()], module(), atom(), - [any()]) -> [{node(), any()}]. + [any()]) -> [{node(), any()}]. multi_rpc_ann(Nodes, Mod, Fun, Args) -> multi_rpc_ann(Nodes, Mod, Fun, Args, infinity). @@ -580,7 +580,7 @@ multi_rpc_ann(Nodes, Mod, Fun, Args) -> %% results in the same order as the input list. Each result is tagged %% with the corresponding node name. -spec multi_rpc_ann([node()], module(), atom(), [any()], - timeout()) -> [{node(), any()}]. + timeout()) -> [{node(), any()}]. multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout) -> Results = multi_rpc(Nodes, Mod, Fun, Args, Timeout), @@ -592,8 +592,8 @@ multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout) -> %% the same order as the input list, and each result is tagged with the %% corresponding node name. -spec multicall_ann([node()], module(), atom(), - [any()]) -> {Results :: [{node(), any()}], - Down :: [node()]}. + [any()]) -> {Results :: [{node(), any()}], + Down :: [node()]}. multicall_ann(Nodes, Mod, Fun, Args) -> multicall_ann(Nodes, Mod, Fun, Args, infinity). @@ -604,16 +604,16 @@ multicall_ann(Nodes, Mod, Fun, Args) -> %% the same order as the input list, and each result is tagged with the %% corresponding node name. -spec multicall_ann([node()], module(), atom(), [any()], - timeout()) -> {Results :: [{node(), any()}], - Down :: [node()]}. + timeout()) -> {Results :: [{node(), any()}], + Down :: [node()]}. multicall_ann(Nodes, Mod, Fun, Args, Timeout) -> L = multi_rpc_ann(Nodes, Mod, Fun, Args, Timeout), {Results, DownAnn} = lists:partition(fun ({_, - Result}) -> - Result /= {badrpc, nodedown} - end, - L), + Result}) -> + Result /= {badrpc, nodedown} + end, + L), {Down, _} = lists:unzip(DownAnn), {Results, Down}. @@ -625,45 +625,45 @@ multicall_ann(Nodes, Mod, Fun, Args, Timeout) -> %% have children by giving them backedges to other elements. -spec build_tree(N :: integer(), Nodes :: [term()], - Opts :: [term()]) -> orddict:orddict(). + Opts :: [term()]) -> orddict:orddict(). build_tree(N, Nodes, Opts) -> case lists:member(cycles, Opts) of - true -> - Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); - false -> Expand = Nodes + true -> + Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); + false -> Expand = Nodes end, {Tree, _} = lists:foldl(fun (Elm, {Result, Worklist}) -> - Len = erlang:min(N, length(Worklist)), - {Children, Rest} = lists:split(Len, - Worklist), - NewResult = [{Elm, Children} | Result], - {NewResult, Rest} - end, - {[], tl(Expand)}, - Nodes), + Len = erlang:min(N, length(Worklist)), + {Children, Rest} = lists:split(Len, + Worklist), + NewResult = [{Elm, Children} | Result], + {NewResult, Rest} + end, + {[], tl(Expand)}, + Nodes), orddict:from_list(Tree). orddict_delta(A, B) -> %% Pad both A and B to the same length DummyA = [{Key, '$none'} || {Key, _} <- B], A2 = orddict:merge(fun (_, Value, _) -> Value end, - A, - DummyA), + A, + DummyA), DummyB = [{Key, '$none'} || {Key, _} <- A], B2 = orddict:merge(fun (_, Value, _) -> Value end, - B, - DummyB), + B, + DummyB), %% Merge and filter out equal values Merged = orddict:merge(fun (_, AVal, BVal) -> - {AVal, BVal} - end, - A2, - B2), + {AVal, BVal} + end, + A2, + B2), Diff = orddict:filter(fun (_, {Same, Same}) -> false; - (_, _) -> true - end, - Merged), + (_, _) -> true + end, + Merged), Diff. shuffle(L) -> @@ -693,33 +693,33 @@ format_ip_and_port(Ip, Port) when is_list(Ip) -> lists:flatten(io_lib:format("~s:~p", [Ip, Port])); format_ip_and_port(Ip, Port) when is_tuple(Ip) -> lists:flatten(io_lib:format("~s:~p", - [inet_parse:ntoa(Ip), Port])). + [inet_parse:ntoa(Ip), Port])). peername(Socket, Transport) -> case Transport:peername(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. sockname(Socket, Transport) -> case Transport:sockname(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. %% @doc Convert a #riak_core_fold_req_v? record to the cluster's maximum %% supported record version. make_fold_req(#riak_core_fold_req_v1{foldfun = FoldFun, - acc0 = Acc0}) -> + acc0 = Acc0}) -> make_fold_req(FoldFun, Acc0, false, []); make_fold_req(#riak_core_fold_req_v2{foldfun = FoldFun, - acc0 = Acc0, forwardable = Forwardable, - opts = Opts}) -> + acc0 = Acc0, forwardable = Forwardable, + opts = Opts}) -> make_fold_req(FoldFun, Acc0, Forwardable, Opts). make_fold_req(FoldFun, Acc0) -> @@ -732,8 +732,8 @@ make_fold_req(FoldFun, Acc0, Forwardable, Opts) -> %% regardless of cluster support make_newest_fold_req(#riak_core_fold_req_v1{foldfun = - FoldFun, - acc0 = Acc0}) -> + FoldFun, + acc0 = Acc0}) -> make_fold_reqv(v2, FoldFun, Acc0, false, []); make_newest_fold_req(#riak_core_fold_req_v2{} = F) -> F. @@ -746,32 +746,32 @@ proxy_spawn(Fun) -> MRef = monitor(process, Pid), Pid ! {proxy, MRef}, receive - {proxy_reply, MRef, Result} -> - demonitor(MRef, [flush]), - Result; - {'DOWN', MRef, _, _, Reason} -> {error, Reason} + {proxy_reply, MRef, Result} -> + demonitor(MRef, [flush]), + Result; + {'DOWN', MRef, _, _, Reason} -> {error, Reason} end. %% @private make_fold_reqv(_, FoldFun, Acc0, Forwardable, Opts) when is_function(FoldFun, 3) andalso - (Forwardable == true orelse Forwardable == false) - andalso is_list(Opts) -> + (Forwardable == true orelse Forwardable == false) + andalso is_list(Opts) -> #riak_core_fold_req_v2{foldfun = FoldFun, acc0 = Acc0, - forwardable = Forwardable, opts = Opts}. + forwardable = Forwardable, opts = Opts}. %% @private - used with proxy_spawn proxy(Parent, Fun) -> _ = monitor(process, Parent), receive - {proxy, MRef} -> - Result = Fun(), - Parent ! {proxy_reply, MRef, Result}; - {'DOWN', _, _, _, _} -> ok + {proxy, MRef} -> + Result = Fun(), + Parent ! {proxy_reply, MRef, Result}; + {'DOWN', _, _, _, _} -> ok end. -spec enable_job_class(atom(), atom()) -> ok | - {error, term()}. + {error, term()}. %% @doc Enables the specified Application/Operation job class. %% This is the public API for use via RPC. @@ -779,13 +779,13 @@ proxy(Parent, Fun) -> %% or its complement disable_job_class/2. enable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> enable_job_class({Application, Operation}); enable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec disable_job_class(atom(), atom()) -> ok | - {error, term()}. + {error, term()}. %% @doc Disables the specified Application/Operation job class. %% This is the public API for use via RPC. @@ -793,25 +793,25 @@ enable_job_class(Application, Operation) -> %% or its complement enable_job_class/2. disable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> disable_job_class({Application, Operation}); disable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec job_class_enabled(atom(), atom()) -> boolean() | - {error, term()}. + {error, term()}. %% @doc Reports whether the specified Application/Operation job class is enabled. %% This is the public API for use via RPC. job_class_enabled(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> job_class_enabled({Application, Operation}); job_class_enabled(Application, Operation) -> {error, {badarg, {Application, Operation}}}. -spec enable_job_class(Class :: term()) -> ok | - {error, term()}. + {error, term()}. %% @doc Internal API to enable the specified job class. %% WARNING: @@ -820,25 +820,25 @@ job_class_enabled(Application, Operation) -> %% You are STRONGLY advised to use enable_job_class/2. enable_job_class(Class) -> case application:get_env(riak_core, - job_accept_class, - undefined) - of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - true -> ok; - _ -> - application:set_env(riak_core, - job_accept_class, - [Class | EnabledClasses]) - end; - _ -> - application:set_env(riak_core, - job_accept_class, - [Class]) + job_accept_class, + undefined) + of + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + true -> ok; + _ -> + application:set_env(riak_core, + job_accept_class, + [Class | EnabledClasses]) + end; + _ -> + application:set_env(riak_core, + job_accept_class, + [Class]) end. -spec disable_job_class(Class :: term()) -> ok | - {error, term()}. + {error, term()}. %% @doc Internal API to disable the specified job class. %% WARNING: @@ -847,18 +847,18 @@ enable_job_class(Class) -> %% You are STRONGLY advised to use disable_job_class/2. disable_job_class(Class) -> case application:get_env(riak_core, - job_accept_class, - undefined) - of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - false -> ok; - _ -> - application:set_env(riak_core, - job_accept_class, - lists:delete(Class, EnabledClasses)) - end; - _ -> ok + job_accept_class, + undefined) + of + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + false -> ok; + _ -> + application:set_env(riak_core, + job_accept_class, + lists:delete(Class, EnabledClasses)) + end; + _ -> ok end. -spec job_class_enabled(Class :: term()) -> boolean(). @@ -870,26 +870,26 @@ disable_job_class(Class) -> %% You are STRONGLY advised to use job_class_enabled/2. job_class_enabled(Class) -> case application:get_env(riak_core, - job_accept_class, - undefined) - of - undefined -> true; - [] -> false; - [_ | _] = EnabledClasses -> - lists:member(Class, EnabledClasses); - Other -> - % Don't crash if it's not a list - that should never be the case, - % but since the value *can* be manipulated externally be more - % accommodating. If someone mucks it up, nothing's going to be - % allowed, but give them a chance to catch on instead of crashing. - _ = logger:error("riak_core.job_accept_class is not a " - "list: ~p", - [Other]), - false + job_accept_class, + undefined) + of + undefined -> true; + [] -> false; + [_ | _] = EnabledClasses -> + lists:member(Class, EnabledClasses); + Other -> + % Don't crash if it's not a list - that should never be the case, + % but since the value *can* be manipulated externally be more + % accommodating. If someone mucks it up, nothing's going to be + % allowed, but give them a chance to catch on instead of crashing. + _ = logger:error("riak_core.job_accept_class is not a " + "list: ~p", + [Other]), + false end. -spec job_class_disabled_message(ReturnType :: atom(), - Class :: term()) -> binary() | string(). + Class :: term()) -> binary() | string(). %% @doc The error message to be returned to a client for a disabled job class. %% WARNING: @@ -897,16 +897,16 @@ job_class_enabled(Class) -> %% when the Jobs API is live. job_class_disabled_message(binary, Class) -> erlang:list_to_binary(job_class_disabled_message(text, - Class)); + Class)); job_class_disabled_message(text, Class) -> lists:flatten(io_lib:format("Operation '~p' is not enabled", - [Class])). + [Class])). -spec report_job_request_disposition(Accepted :: - boolean(), - Class :: term(), Mod :: module(), - Func :: atom(), Line :: pos_integer(), - Client :: term()) -> ok | {error, term()}. + boolean(), + Class :: term(), Mod :: module(), + Func :: atom(), Line :: pos_integer(), + Client :: term()) -> ok | {error, term()}. %% @doc Report/record the disposition of an async job request. %% @@ -926,17 +926,17 @@ job_class_disabled_message(text, Class) -> %% request was received. %% report_job_request_disposition(true, Class, Mod, Func, - Line, Client) -> + Line, Client) -> logger:debug("Request '~p' accepted from ~p", - [Class, Client], - #{pid => erlang:self(), module => Mod, function => Func, - line => Line}); + [Class, Client], + #{pid => erlang:self(), module => Mod, function => Func, + line => Line}); report_job_request_disposition(false, Class, Mod, Func, - Line, Client) -> + Line, Client) -> logger:warning("Request '~p' disabled from ~p", - [Class, Client], - #{pid => erlang:self(), module => Mod, function => Func, - line => Line}). + [Class, Client], + #{pid => erlang:self(), module => Mod, function => Func, + line => Line}). %% =================================================================== %% Preflist utility functions @@ -963,14 +963,14 @@ preflist_siblings(Index) -> %% @doc See {@link preflist_siblings/1}. -spec preflist_siblings(index(), - riak_core_ring()) -> [index()]. + riak_core_ring()) -> [index()]. preflist_siblings(Index, Ring) -> MaxN = determine_max_n(Ring), preflist_siblings(Index, MaxN, Ring). -spec preflist_siblings(index(), pos_integer(), - riak_core_ring()) -> [index()]. + riak_core_ring()) -> [index()]. preflist_siblings(Index, N, Ring) -> IndexBin = <>, @@ -988,15 +988,15 @@ responsible_preflists(Index) -> responsible_preflists(Index, Ring). -spec responsible_preflists(index(), - riak_core_ring()) -> [index_n()]. + riak_core_ring()) -> [index_n()]. responsible_preflists(Index, Ring) -> AllN = determine_all_n(Ring), responsible_preflists(Index, AllN, Ring). -spec responsible_preflists(index(), - [pos_integer(), ...], - riak_core_ring()) -> [index_n()]. + [pos_integer(), ...], + riak_core_ring()) -> [index_n()]. responsible_preflists(Index, AllN, Ring) -> IndexBin = <>, @@ -1004,12 +1004,12 @@ responsible_preflists(Index, AllN, Ring) -> Indices = [Idx || {Idx, _} <- PL], RevIndices = lists:reverse(Indices), lists:flatmap(fun (N) -> - responsible_preflists_n(RevIndices, N) - end, - AllN). + responsible_preflists_n(RevIndices, N) + end, + AllN). -spec responsible_preflists_n([index()], - pos_integer()) -> [index_n()]. + pos_integer()) -> [index_n()]. responsible_preflists_n(RevIndices, N) -> {Pred, _} = lists:split(N, RevIndices), @@ -1027,17 +1027,17 @@ determine_max_n(Ring) -> determine_all_n(Ring) -> Buckets = riak_core_ring:get_buckets(Ring), BucketProps = [riak_core_bucket:get_bucket(Bucket, Ring) - || Bucket <- Buckets], + || Bucket <- Buckets], Default = application:get_env(riak_core, - default_bucket_props, - undefined), + default_bucket_props, + undefined), DefaultN = proplists:get_value(n_val, Default), AllN = lists:foldl(fun (Props, AllN) -> - N = proplists:get_value(n_val, Props), - ordsets:add_element(N, AllN) - end, - [DefaultN], - BucketProps), + N = proplists:get_value(n_val, Props), + ordsets:add_element(N, AllN) + end, + [DefaultN], + BucketProps), AllN. %% =================================================================== @@ -1052,93 +1052,93 @@ moment_test() -> clientid_uniqueness_test() -> ClientIds = [mkclientid(somenode@somehost) - || _I <- lists:seq(0, 10000)], + || _I <- lists:seq(0, 10000)], length(ClientIds) =:= - length(sets:to_list(sets:from_list(ClientIds))). + length(sets:to_list(sets:from_list(ClientIds))). build_tree_test() -> Flat = [1, - 11, - 12, - 111, - 112, - 121, - 122, - 1111, - 1112, - 1121, - 1122, - 1211, - 1212, - 1221, - 1222], + 11, + 12, + 111, + 112, + 121, + 122, + 1111, + 1112, + 1121, + 1122, + 1211, + 1212, + 1221, + 1222], %% 2-ary tree decomposition ATree = [{1, [11, 12]}, - {11, [111, 112]}, - {12, [121, 122]}, - {111, [1111, 1112]}, - {112, [1121, 1122]}, - {121, [1211, 1212]}, - {122, [1221, 1222]}, - {1111, []}, - {1112, []}, - {1121, []}, - {1122, []}, - {1211, []}, - {1212, []}, - {1221, []}, - {1222, []}], + {11, [111, 112]}, + {12, [121, 122]}, + {111, [1111, 1112]}, + {112, [1121, 1122]}, + {121, [1211, 1212]}, + {122, [1221, 1222]}, + {1111, []}, + {1112, []}, + {1121, []}, + {1122, []}, + {1211, []}, + {1212, []}, + {1221, []}, + {1222, []}], %% 2-ary tree decomposition with cyclic wrap-around CTree = [{1, [11, 12]}, - {11, [111, 112]}, - {12, [121, 122]}, - {111, [1111, 1112]}, - {112, [1121, 1122]}, - {121, [1211, 1212]}, - {122, [1221, 1222]}, - {1111, [1, 11]}, - {1112, [12, 111]}, - {1121, [112, 121]}, - {1122, [122, 1111]}, - {1211, [1112, 1121]}, - {1212, [1122, 1211]}, - {1221, [1212, 1221]}, - {1222, [1222, 1]}], + {11, [111, 112]}, + {12, [121, 122]}, + {111, [1111, 1112]}, + {112, [1121, 1122]}, + {121, [1211, 1212]}, + {122, [1221, 1222]}, + {1111, [1, 11]}, + {1112, [12, 111]}, + {1121, [112, 121]}, + {1122, [122, 1111]}, + {1211, [1112, 1121]}, + {1212, [1122, 1211]}, + {1221, [1212, 1221]}, + {1222, [1222, 1]}], ?assertEqual(ATree, (build_tree(2, Flat, []))), ?assertEqual(CTree, (build_tree(2, Flat, [cycles]))), ok. counter_loop(N) -> receive - {up, Pid} -> - N2 = N + 1, - Pid ! {counter_value, N2}, - counter_loop(N2); - down -> counter_loop(N - 1); - exit -> exit(normal) + {up, Pid} -> + N2 = N + 1, + Pid ! {counter_value, N2}, + counter_loop(N2); + down -> counter_loop(N - 1); + exit -> exit(normal) end. incr_counter(CounterPid) -> CounterPid ! {up, self()}, receive - {counter_value, N} -> N after 3000 -> ?assert(false) + {counter_value, N} -> N after 3000 -> ?assert(false) end. decr_counter(CounterPid) -> CounterPid ! down. multi_keydelete_test_() -> Languages = [{lisp, 1958}, - {ml, 1973}, - {erlang, 1986}, - {haskell, 1990}, - {ocaml, 1996}, - {clojure, 2007}, - {elixir, 2012}], + {ml, 1973}, + {erlang, 1986}, + {haskell, 1990}, + {ocaml, 1996}, + {clojure, 2007}, + {elixir, 2012}], ?_assertMatch([{lisp, _}, - {ml, _}, - {erlang, _}, - {haskell, _}], - (multi_keydelete([ocaml, clojure, elixir], Languages))). + {ml, _}, + {erlang, _}, + {haskell, _}], + (multi_keydelete([ocaml, clojure, elixir], Languages))). compose_test_() -> Upper = fun string:to_upper/1, @@ -1149,90 +1149,90 @@ compose_test_() -> Double = fun (N) when is_integer(N) -> N * 2 end, Square = fun (N) when is_integer(N) -> N * N end, SquareDoubleIncrement = compose([Increment, - Double, - Square]), + Double, + Square]), CompatibleTypes = compose(Increment, - fun (X) when is_list(X) -> list_to_integer(X) - end), + fun (X) when is_list(X) -> list_to_integer(X) + end), IncompatibleTypes = compose(Increment, - fun (X) when is_binary(X) -> binary_to_list(X) - end), + fun (X) when is_binary(X) -> binary_to_list(X) + end), [?_assertEqual("DLROW OLLEH", - (StripReverseUpper("Hello world!"))), + (StripReverseUpper("Hello world!"))), ?_assertEqual((Increment(Double(Square(3)))), - (SquareDoubleIncrement(3))), + (SquareDoubleIncrement(3))), ?_assertMatch(4, (CompatibleTypes("3"))), ?_assertError(function_clause, - (IncompatibleTypes(<<"42">>))), + (IncompatibleTypes(<<"42">>))), ?_assertError(function_clause, - (compose(fun (X, Y) -> {X, Y} end, fun (X) -> X end)))]. + (compose(fun (X, Y) -> {X, Y} end, fun (X) -> X end)))]. pmap_test_() -> Fgood = fun (X) -> 2 * X end, Fbad = fun (3) -> throw(die_on_3); - (X) -> Fgood(X) - end, + (X) -> Fgood(X) + end, Lin = [1, 2, 3, 4], Lout = [2, 4, 6, 8], {setup, fun () -> error_logger:tty(false) end, fun (_) -> error_logger:tty(true) end, [fun () -> - % Test simple map case - ?assertEqual(Lout, (pmap(Fgood, Lin))), - % Verify a crashing process will not stall pmap - Parent = self(), - Pid = spawn(fun () -> - % Caller trapping exits causes stall!! - % TODO: Consider pmapping in a spawned proc - % process_flag(trap_exit, true), - pmap(Fbad, Lin), - ?debugMsg("pmap finished just fine"), - Parent ! no_crash_yo - end), - MonRef = monitor(process, Pid), - receive - {'DOWN', MonRef, _, _, _} -> ok; - no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) - end + % Test simple map case + ?assertEqual(Lout, (pmap(Fgood, Lin))), + % Verify a crashing process will not stall pmap + Parent = self(), + Pid = spawn(fun () -> + % Caller trapping exits causes stall!! + % TODO: Consider pmapping in a spawned proc + % process_flag(trap_exit, true), + pmap(Fbad, Lin), + ?debugMsg("pmap finished just fine"), + Parent ! no_crash_yo + end), + MonRef = monitor(process, Pid), + receive + {'DOWN', MonRef, _, _, _} -> ok; + no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) + end end]}. bounded_pmap_test_() -> Fun1 = fun (X) -> X + 2 end, Tests = fun (CountPid) -> - GFun = fun (Max) -> - fun (X) -> - ?assert((incr_counter(CountPid) =< - Max)), - timer:sleep(1), - decr_counter(CountPid), - Fun1(X) - end - end, - [fun () -> - ?assertEqual((lists:seq(Fun1(1), Fun1(N))), - (pmap(GFun(MaxP), - lists:seq(1, N), - MaxP))) - end - || MaxP <- lists:seq(1, 20), N <- lists:seq(0, 10)] - end, + GFun = fun (Max) -> + fun (X) -> + ?assert((incr_counter(CountPid) =< + Max)), + timer:sleep(1), + decr_counter(CountPid), + Fun1(X) + end + end, + [fun () -> + ?assertEqual((lists:seq(Fun1(1), Fun1(N))), + (pmap(GFun(MaxP), + lists:seq(1, N), + MaxP))) + end + || MaxP <- lists:seq(1, 20), N <- lists:seq(0, 10)] + end, {setup, fun () -> - Pid = spawn_link(?MODULE, counter_loop, [0]), - monitor(process, Pid), - Pid + Pid = spawn_link(?MODULE, counter_loop, [0]), + monitor(process, Pid), + Pid end, fun (Pid) -> - Pid ! exit, - receive - {'DOWN', _Ref, process, Pid, _Info} -> ok - after 3000 -> - ?debugMsg("pmap counter process did not go down " - "in time"), - ?assert(false) - end, - ok + Pid ! exit, + receive + {'DOWN', _Ref, process, Pid, _Info} -> ok + after 3000 -> + ?debugMsg("pmap counter process did not go down " + "in time"), + ?assert(false) + end, + ok end, Tests}. @@ -1243,10 +1243,10 @@ proxy_spawn_test() -> ?assertEqual({error, killer_fun}, B), %% Ensure no errant 'DOWN' messages receive - {'DOWN', _, _, _, _} = Msg -> - throw({error, {badmsg, Msg}}); - _ -> ok - after 1000 -> ok + {'DOWN', _, _, _, _} = Msg -> + throw({error, {badmsg, Msg}}); + _ -> ok + after 1000 -> ok end. -ifdef(PROPER). @@ -1256,8 +1256,8 @@ count_test() -> prop_count_correct() -> ?FORALL(List, (list(bool())), - (count(fun (E) -> E end, List) =:= - length([E || E <- List, E]))). + (count(fun (E) -> E end, List) =:= + length([E || E <- List, E]))). -endif. %% EQC diff --git a/src/riak_core_vnode.erl b/src/riak_core_vnode.erl index 3656fb97e..b6616eb74 100644 --- a/src/riak_core_vnode.erl +++ b/src/riak_core_vnode.erl @@ -23,42 +23,42 @@ -include("riak_core_vnode.hrl"). -export([start_link/3, - start_link/4, - wait_for_init/1, - send_command/2, - send_command_after/2]). + start_link/4, + wait_for_init/1, + send_command/2, + send_command_after/2]). -export([init/1, - started/2, - started/3, - active/2, - active/3, - handle_event/3, - handle_sync_event/4, - handle_info/3, - terminate/3, - code_change/4]). + started/2, + started/3, + active/2, + active/3, + handle_event/3, + handle_sync_event/4, + handle_info/3, + terminate/3, + code_change/4]). -export([reply/2, monitor/1]). -export([get_mod_index/1, - get_modstate/1, - set_forwarding/2, - trigger_handoff/2, - trigger_handoff/3, - trigger_delete/1, - core_status/1, - handoff_error/3]). + get_modstate/1, + set_forwarding/2, + trigger_handoff/2, + trigger_handoff/3, + trigger_delete/1, + core_status/1, + handoff_error/3]). -export([cast_finish_handoff/1, - send_an_event/2, - send_req/2, - send_all_proxy_req/2, - cancel_handoff/1, - handoff_complete/1, - resize_transfer_complete/2, - handoff_data/3, - unregistered/1]). + send_an_event/2, + send_req/2, + send_all_proxy_req/2, + cancel_handoff/1, + handoff_complete/1, + resize_transfer_complete/2, + handoff_data/3, + unregistered/1]). -ifdef(TEST). @@ -75,121 +75,121 @@ -compile({parse_transform, pulse_instrument}). -compile({pulse_replace_module, - [{gen_fsm_compat, pulse_gen_fsm}, - {gen_server, pulse_gen_server}]}). + [{gen_fsm_compat, pulse_gen_fsm}, + {gen_server, pulse_gen_server}]}). -endif. -define(NORMAL_REASON(R), - R == normal orelse - R == shutdown orelse - is_tuple(R) andalso element(1, R) == shutdown). + R == normal orelse + R == shutdown orelse + is_tuple(R) andalso element(1, R) == shutdown). -export_type([vnode_opt/0, pool_opt/0]). -type vnode_opt() :: pool_opt(). -type pool_opt() :: {pool, WorkerModule :: module(), - PoolSize :: pos_integer(), WorkerArgs :: [term()]}. + PoolSize :: pos_integer(), WorkerArgs :: [term()]}. -callback init([partition()]) -> {ok, - ModState :: term()} | - {ok, ModState :: term(), [vnode_opt()]} | - {error, Reason :: term()}. + ModState :: term()} | + {ok, ModState :: term(), [vnode_opt()]} | + {error, Reason :: term()}. -callback handle_command(Request :: term(), - Sender :: sender(), ModState :: term()) -> continue | - {reply, - Reply :: - term(), - NewModState :: - term()} | - {noreply, - NewModState :: - term()} | - {async, - Work :: - function(), - From :: - sender(), - NewModState :: - term()} | - {stop, - Reason :: - term(), - NewModState :: - term()}. + Sender :: sender(), ModState :: term()) -> continue | + {reply, + Reply :: + term(), + NewModState :: + term()} | + {noreply, + NewModState :: + term()} | + {async, + Work :: + function(), + From :: + sender(), + NewModState :: + term()} | + {stop, + Reason :: + term(), + NewModState :: + term()}. -callback handle_coverage(Request :: term(), - keyspaces(), Sender :: sender(), - ModState :: term()) -> continue | - {reply, Reply :: term(), - NewModState :: term()} | - {noreply, - NewModState :: term()} | - {async, Work :: function(), - From :: sender(), - NewModState :: term()} | - {stop, Reason :: term(), - NewModState :: term()}. + keyspaces(), Sender :: sender(), + ModState :: term()) -> continue | + {reply, Reply :: term(), + NewModState :: term()} | + {noreply, + NewModState :: term()} | + {async, Work :: function(), + From :: sender(), + NewModState :: term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handle_exit(pid(), Reason :: term(), - ModState :: term()) -> {noreply, - NewModState :: term()} | - {stop, Reason :: term(), - NewModState :: term()}. + ModState :: term()) -> {noreply, + NewModState :: term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handoff_starting(handoff_dest(), - ModState :: term()) -> {boolean(), - NewModState :: term()}. + ModState :: term()) -> {boolean(), + NewModState :: term()}. -callback handoff_cancelled(ModState :: term()) -> {ok, - NewModState :: term()}. + NewModState :: term()}. -callback handoff_finished(handoff_dest(), - ModState :: term()) -> {ok, NewModState :: term()}. + ModState :: term()) -> {ok, NewModState :: term()}. -callback handle_handoff_command(Request :: term(), - Sender :: sender(), - ModState :: term()) -> {reply, Reply :: term(), - NewModState :: - term()} | - {noreply, - NewModState :: - term()} | - {async, - Work :: function(), - From :: sender(), - NewModState :: - term()} | - {forward, - NewModState :: - term()} | - {drop, - NewModState :: - term()} | - {stop, Reason :: term(), - NewModState :: term()}. + Sender :: sender(), + ModState :: term()) -> {reply, Reply :: term(), + NewModState :: + term()} | + {noreply, + NewModState :: + term()} | + {async, + Work :: function(), + From :: sender(), + NewModState :: + term()} | + {forward, + NewModState :: + term()} | + {drop, + NewModState :: + term()} | + {stop, Reason :: term(), + NewModState :: term()}. -callback handle_handoff_data(binary(), - ModState :: term()) -> {reply, - ok | - {error, Reason :: term()}, - NewModState :: term()}. + ModState :: term()) -> {reply, + ok | + {error, Reason :: term()}, + NewModState :: term()}. -callback encode_handoff_item(Key :: term(), - Value :: term()) -> corrupted | binary(). + Value :: term()) -> corrupted | binary(). -callback is_empty(ModState :: term()) -> {boolean(), - NewModState :: term()} | - {false, Size :: pos_integer(), - NewModState :: term()}. + NewModState :: term()} | + {false, Size :: pos_integer(), + NewModState :: term()}. -callback terminate(Reason :: term(), - ModState :: term()) -> ok. + ModState :: term()) -> ok. -callback delete(ModState :: term()) -> {ok, - NewModState :: term()}. + NewModState :: term()}. %% This commands are not executed inside the VNode, instead they are %% part of the vnode_proxy contract. @@ -213,10 +213,10 @@ %% for people doing that! (it's called overflowing message queue hell and is %% really nasty!) -callback handle_overload_command(Request :: term(), - Sender :: sender(), Idx :: partition()) -> ok. + Sender :: sender(), Idx :: partition()) -> ok. -callback handle_overload_info(Request :: term(), - Idx :: partition()) -> ok. + Idx :: partition()) -> ok. %% handle_exit/3 is an optional behaviour callback that can be implemented. %% It will be called in the case that a process that is linked to the vnode @@ -253,81 +253,81 @@ start_link(Mod, Index, Forward) -> start_link(Mod, Index, 0, Forward). start_link(Mod, Index, InitialInactivityTimeout, - Forward) -> + Forward) -> gen_fsm_compat:start_link(?MODULE, - [Mod, Index, InitialInactivityTimeout, Forward], - []). + [Mod, Index, InitialInactivityTimeout, Forward], + []). %% #1 - State started wait_for_init(Vnode) -> gen_fsm_compat:sync_send_event(Vnode, - wait_for_init, - infinity). + wait_for_init, + infinity). %% #2 - %% Send a command message for the vnode module by Pid - %% typically to do some deferred processing after returning yourself send_command(Pid, Request) -> gen_fsm_compat:send_event(Pid, - #riak_vnode_req_v1{request = Request}). + #riak_vnode_req_v1{request = Request}). %% #3 - handoff_error(Vnode, Err, Reason) -> gen_fsm_compat:send_event(Vnode, - {handoff_error, Err, Reason}). + {handoff_error, Err, Reason}). %% #4 - get_mod_index(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, - get_mod_index). + get_mod_index). %% #5 set_forwarding(VNode, ForwardTo) -> gen_fsm_compat:send_all_state_event(VNode, - {set_forwarding, ForwardTo}). + {set_forwarding, ForwardTo}). %% #6 trigger_handoff(VNode, TargetIdx, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, - {trigger_handoff, - TargetIdx, - TargetNode}). + {trigger_handoff, + TargetIdx, + TargetNode}). %% #7 trigger_handoff(VNode, TargetNode) -> gen_fsm_compat:send_all_state_event(VNode, - {trigger_handoff, TargetNode}). + {trigger_handoff, TargetNode}). %% #8 trigger_delete(VNode) -> gen_fsm_compat:send_all_state_event(VNode, - trigger_delete). + trigger_delete). %% #9 core_status(VNode) -> gen_fsm_compat:sync_send_all_state_event(VNode, - core_status). + core_status). %% #10 %% Sends a command to the FSM that called it after Time %% has passed. -spec send_command_after(integer(), - term()) -> reference(). + term()) -> reference(). send_command_after(Time, Request) -> gen_fsm_compat:send_event_after(Time, - #riak_vnode_req_v1{request = Request}). + #riak_vnode_req_v1{request = Request}). %%%%%%% %new APIs %% #11 - riak_core_vnode_manager - handle_vnode_event cast_finish_handoff(VNode) -> gen_fsm_compat:send_all_state_event(VNode, - finish_handoff). + finish_handoff). %% #12 - riak_core_vnode_manager - handle_vnode_event cancel_handoff(VNode) -> gen_fsm_compat:send_all_state_event(VNode, - cancel_handoff). + cancel_handoff). %% #13 - riak_core_vnode_master - send_an_event send_an_event(VNode, Event) -> @@ -351,13 +351,13 @@ handoff_complete(VNode) -> %% #17 - riak:core_handoff_sender - start_fold_ resize_transfer_complete(VNode, NotSentAcc) -> gen_fsm_compat:send_event(VNode, - {resize_transfer_complete, NotSentAcc}). + {resize_transfer_complete, NotSentAcc}). %% #18 - riak_core_handoff_receiver - process_message handoff_data(VNode, MsgData, VNodeTimeout) -> gen_fsm_compat:sync_send_all_state_event(VNode, - {handoff_data, MsgData}, - VNodeTimeout). + {handoff_data, MsgData}, + VNodeTimeout). %% #19 - riak_core_vnode_proxy - handle_cast unregistered(VNode) -> @@ -376,7 +376,7 @@ reply({fsm, undefined, From}, Reply) -> riak_core_send_msg:send_event_unreliable(From, Reply); reply({fsm, Ref, From}, Reply) -> riak_core_send_msg:send_event_unreliable(From, - {Ref, Reply}); + {Ref, Reply}); reply({server, undefined, From}, Reply) -> riak_core_send_msg:reply_unreliable(From, Reply); reply({server, Ref, From}, Reply) -> @@ -390,7 +390,7 @@ reply(ignore, _Reply) -> ok. %% a monitor on `self()' in order to return a valid (if useless) %% monitor reference. -spec monitor(Sender :: sender()) -> Monitor :: - reference(). + reference(). monitor({fsm, _, From}) -> erlang:monitor(process, From); @@ -406,51 +406,51 @@ monitor(ignore) -> erlang:monitor(process, self()). %% ======== %% ======================== -record(state, - {index :: partition(), - mod :: module(), - modstate :: term(), - forward :: node() | [{integer(), node()}], - handoff_target = none :: none | {integer(), node()}, - handoff_pid :: pid() | undefined, - handoff_type :: - riak_core_handoff_manager:ho_type() | undefined, - pool_pid :: pid() | undefined, - pool_config :: tuple() | undefined, - manager_event_timer :: reference() | undefined, - inactivity_timeout :: non_neg_integer()}). + {index :: partition(), + mod :: module(), + modstate :: term(), + forward :: node() | [{integer(), node()}], + handoff_target = none :: none | {integer(), node()}, + handoff_pid :: pid() | undefined, + handoff_type :: + riak_core_handoff_manager:ho_type() | undefined, + pool_pid :: pid() | undefined, + pool_config :: tuple() | undefined, + manager_event_timer :: reference() | undefined, + inactivity_timeout :: non_neg_integer()}). init([Mod, Index, InitialInactivityTimeout, Forward]) -> process_flag(trap_exit, true), State = #state{index = Index, mod = Mod, - forward = Forward, - inactivity_timeout = InitialInactivityTimeout}, + forward = Forward, + inactivity_timeout = InitialInactivityTimeout}, {ok, started, State, 0}. terminate(Reason, _StateName, - #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> + #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> %% Shutdown if the pool is still alive and a normal `Reason' is %% given - there could be a race on delivery of the unregistered %% event and successfully shutting down the pool. try case is_pid(Pool) andalso - is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) - of - true -> - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok - end + is_process_alive(Pool) andalso (?NORMAL_REASON(Reason)) + of + true -> + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok + end catch - Type:Reason:Stacktrace -> - logger:error("Error while shutting down vnode worker " - "pool ~p:~p trace : ~p", - [Type, Reason, Stacktrace]) + Type:Reason:Stacktrace -> + logger:error("Error while shutting down vnode worker " + "pool ~p:~p trace : ~p", + [Type, Reason, Stacktrace]) after - case ModState of - %% Handoff completed, Mod:delete has been called, now terminate. - {deleted, ModState1} -> - Mod:terminate(Reason, ModState1); - _ -> Mod:terminate(Reason, ModState) - end + case ModState of + %% Handoff completed, Mod:delete has been called, now terminate. + {deleted, ModState1} -> + Mod:terminate(Reason, ModState1); + _ -> Mod:terminate(Reason, ModState) + end end. code_change(_OldVsn, StateName, State, _Extra) -> @@ -465,21 +465,21 @@ code_change(_OldVsn, StateName, State, _Extra) -> %% started %% ======== started(timeout, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> case do_init(State) of - {ok, State2} -> - {next_state, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} + {ok, State2} -> + {next_state, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} end. started(wait_for_init, _From, - State = #state{inactivity_timeout = - InitialInactivityTimeout}) -> + State = #state{inactivity_timeout = + InitialInactivityTimeout}) -> case do_init(State) of - {ok, State2} -> - {reply, ok, active, State2, InitialInactivityTimeout}; - {error, Reason} -> {stop, Reason} + {ok, State2} -> + {reply, ok, active, State2, InitialInactivityTimeout}; + {error, Reason} -> {stop, Reason} end. %%active @@ -487,115 +487,115 @@ started(wait_for_init, _From, active(timeout, State = #state{mod = Mod, index = Idx}) -> riak_core_vnode_manager:vnode_event(Mod, - Idx, - self(), - inactive), + Idx, + self(), + inactive), continue(State); active(#riak_coverage_req_v1{keyspaces = KeySpaces, - request = Request, sender = Sender}, + request = Request, sender = Sender}, State) -> %% Coverage request handled in handoff and non-handoff. Will be forwarded if set. vnode_coverage(Sender, Request, KeySpaces, State); active(#riak_vnode_req_v1{sender = Sender, - request = {resize_forward, Request}}, + request = {resize_forward, Request}}, State) -> vnode_command(Sender, Request, State); active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State = #state{handoff_target = HT}) when HT =:= none -> forward_or_vnode_command(Sender, Request, State); active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State = #state{handoff_type = resize, - handoff_target = {HOIdx, HONode}, index = Index, - forward = Forward, mod = Mod}) -> + handoff_target = {HOIdx, HONode}, index = Index, + forward = Forward, mod = Mod}) -> RequestHash = Mod:request_hash(Request), case RequestHash of - %% will never have enough information to forward request so only handle locally - undefined -> vnode_command(Sender, Request, State); - _ -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - FutureIndex = riak_core_ring:future_index(RequestHash, - Index, - R), - case FutureIndex of - %% request for portion of keyspace currently being transferred - HOIdx -> - vnode_handoff_command(Sender, - Request, - {HOIdx, HONode}, - State); - %% some portions of keyspace already transferred - _Other when is_list(Forward) -> - vnode_resize_command(Sender, - Request, - FutureIndex, - State); - %% some portions of keyspace not already transferred - _Other -> vnode_command(Sender, Request, State) - end + %% will never have enough information to forward request so only handle locally + undefined -> vnode_command(Sender, Request, State); + _ -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + FutureIndex = riak_core_ring:future_index(RequestHash, + Index, + R), + case FutureIndex of + %% request for portion of keyspace currently being transferred + HOIdx -> + vnode_handoff_command(Sender, + Request, + {HOIdx, HONode}, + State); + %% some portions of keyspace already transferred + _Other when is_list(Forward) -> + vnode_resize_command(Sender, + Request, + FutureIndex, + State); + %% some portions of keyspace not already transferred + _Other -> vnode_command(Sender, Request, State) + end end; active(#riak_vnode_req_v1{sender = Sender, - request = Request}, + request = Request}, State) -> vnode_handoff_command(Sender, - Request, - State#state.handoff_target, - State); + Request, + State#state.handoff_target, + State); active(handoff_complete, State) -> State2 = start_manager_event_timer(handoff_complete, - State), + State), continue(State2); active({resize_transfer_complete, SeenIdxs}, State = #state{mod = Mod, modstate = ModState, - handoff_target = Target}) -> + handoff_target = Target}) -> case Target of - none -> continue(State); - _ -> - %% TODO: refactor similarties w/ finish_handoff handle_event - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(SeenIdxs, - State#state{modstate = NewModState}) + none -> continue(State); + _ -> + %% TODO: refactor similarties w/ finish_handoff handle_event + {ok, NewModState} = Mod:handoff_finished(Target, + ModState), + finish_handoff(SeenIdxs, + State#state{modstate = NewModState}) end; active({handoff_error, _Err, _Reason}, State) -> State2 = start_manager_event_timer(handoff_error, - State), + State), continue(State2); active({send_manager_event, Event}, State) -> State2 = start_manager_event_timer(Event, State), continue(State2); active({trigger_handoff, TargetNode}, State) -> active({trigger_handoff, State#state.index, TargetNode}, - State); + State); active({trigger_handoff, TargetIdx, TargetNode}, State) -> maybe_handoff(TargetIdx, TargetNode, State); active(trigger_delete, State = #state{mod = Mod, modstate = ModState, - index = Idx}) -> + index = Idx}) -> case mark_delete_complete(Idx, Mod) of - {ok, _NewRing} -> - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode deleted", [Idx, Mod]); - _ -> NewModState = ModState + {ok, _NewRing} -> + {ok, NewModState} = Mod:delete(ModState), + logger:debug("~p ~p vnode deleted", [Idx, Mod]); + _ -> NewModState = ModState end, maybe_shutdown_pool(State), riak_core_vnode_manager:unregister_vnode(Idx, Mod), continue(State#state{modstate = - {deleted, NewModState}}); + {deleted, NewModState}}); active(unregistered, State = #state{mod = Mod, index = Index}) -> %% Add exclusion so the ring handler will not try to spin this vnode %% up until it receives traffic. riak_core_handoff_manager:add_exclusion(Mod, Index), logger:debug("~p ~p vnode excluded and unregistered.", - [Index, Mod]), + [Index, Mod]), {stop, normal, State#state{handoff_target = none, - handoff_type = undefined, pool_pid = undefined}}. + handoff_type = undefined, pool_pid = undefined}}. active(_Event, _From, State) -> Reply = ok, @@ -608,140 +608,140 @@ active(_Event, _From, State) -> %% handle_event %%%%%%%%%%%%%%%% handle_event({set_forwarding, undefined}, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + State = #state{modstate = {deleted, _ModState}}) -> %% The vnode must forward requests when in the deleted state, therefore %% ignore requests to stop forwarding. continue(State); handle_event({set_forwarding, ForwardTo}, _StateName, - State) -> + State) -> logger:debug("vnode fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, - State#state.index, - State#state.forward, - ForwardTo]), + [State#state.mod, + State#state.index, + State#state.forward, + ForwardTo]), State2 = mod_set_forwarding(ForwardTo, State), continue(State2#state{forward = ForwardTo}); handle_event(finish_handoff, _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + State = #state{modstate = {deleted, _ModState}}) -> stop_manager_event_timer(State), continue(State#state{handoff_target = none}); handle_event(finish_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState, - handoff_target = Target}) -> + State = #state{mod = Mod, modstate = ModState, + handoff_target = Target}) -> stop_manager_event_timer(State), case Target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(State#state{modstate = NewModState}) + none -> continue(State); + _ -> + {ok, NewModState} = Mod:handoff_finished(Target, + ModState), + finish_handoff(State#state{modstate = NewModState}) end; handle_event(cancel_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> %% it would be nice to pass {Err, Reason} to the vnode but the %% API doesn't currently allow for that. stop_manager_event_timer(State), case State#state.handoff_target of - none -> continue(State); - _ -> - {ok, NewModState} = Mod:handoff_cancelled(ModState), - continue(State#state{handoff_target = none, - handoff_type = undefined, - modstate = NewModState}) + none -> continue(State); + _ -> + {ok, NewModState} = Mod:handoff_cancelled(ModState), + continue(State#state{handoff_target = none, + handoff_type = undefined, + modstate = NewModState}) end; handle_event({trigger_handoff, TargetNode}, StateName, - State) -> + State) -> handle_event({trigger_handoff, - State#state.index, - TargetNode}, - StateName, - State); + State#state.index, + TargetNode}, + StateName, + State); handle_event({trigger_handoff, _TargetIdx, _TargetNode}, - _StateName, - State = #state{modstate = {deleted, _ModState}}) -> + _StateName, + State = #state{modstate = {deleted, _ModState}}) -> continue(State); handle_event(R = {trigger_handoff, - _TargetIdx, - _TargetNode}, - _StateName, State) -> + _TargetIdx, + _TargetNode}, + _StateName, State) -> active(R, State); handle_event(trigger_delete, _StateName, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> continue(State); handle_event(trigger_delete, _StateName, State) -> active(trigger_delete, State); handle_event(R = #riak_vnode_req_v1{}, _StateName, - State) -> + State) -> active(R, State); handle_event(R = #riak_coverage_req_v1{}, _StateName, - State) -> + State) -> active(R, State). %%handle_sync_event %%%%%%%%%%%%%%%%%%%% handle_sync_event(current_state, _From, StateName, - State) -> + State) -> {reply, {StateName, State}, StateName, State}; handle_sync_event(get_mod_index, _From, StateName, - State = #state{index = Idx, mod = Mod}) -> + State = #state{index = Idx, mod = Mod}) -> {reply, {Mod, Idx}, StateName, State, State#state.inactivity_timeout}; handle_sync_event({handoff_data, _BinObj}, _From, - StateName, - State = #state{modstate = {deleted, _ModState}}) -> + StateName, + State = #state{modstate = {deleted, _ModState}}) -> {reply, {error, vnode_exiting}, StateName, State, State#state.inactivity_timeout}; handle_sync_event({handoff_data, BinObj}, _From, - StateName, - State = #state{mod = Mod, modstate = ModState}) -> + StateName, + State = #state{mod = Mod, modstate = ModState}) -> case Mod:handle_handoff_data(BinObj, ModState) of - {reply, ok, NewModState} -> - {reply, - ok, - StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - {reply, {error, Err}, NewModState} -> - logger:error("~p failed to store handoff obj: ~p", - [Mod, Err]), - {reply, - {error, Err}, - StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout} + {reply, ok, NewModState} -> + {reply, + ok, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + {reply, {error, Err}, NewModState} -> + logger:error("~p failed to store handoff obj: ~p", + [Mod, Err]), + {reply, + {error, Err}, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout} end; handle_sync_event(core_status, _From, StateName, - State = #state{index = Index, mod = Mod, - modstate = ModState, handoff_target = HT, - forward = FN}) -> + State = #state{index = Index, mod = Mod, + modstate = ModState, handoff_target = HT, + forward = FN}) -> Mode = case {FN, HT} of - {undefined, none} -> active; - {undefined, HT} -> handoff; - {FN, none} -> forward; - _ -> undefined - end, + {undefined, none} -> active; + {undefined, HT} -> handoff; + {FN, none} -> forward; + _ -> undefined + end, Status = [{index, Index}, {mod, Mod}] ++ - case FN of - undefined -> []; - _ -> [{forward, FN}] - end - ++ - case HT of - none -> []; - _ -> [{handoff_target, HT}] - end - ++ - case ModState of - {deleted, _} -> [deleted]; - _ -> [] - end, + case FN of + undefined -> []; + _ -> [{forward, FN}] + end + ++ + case HT of + none -> []; + _ -> [{handoff_target, HT}] + end + ++ + case ModState of + {deleted, _} -> [deleted]; + _ -> [] + end, {reply, {Mode, Status}, StateName, @@ -752,81 +752,81 @@ handle_sync_event(core_status, _From, StateName, %%%%%%%%%%%%%% handle_info({'$vnode_proxy_ping', From, Ref, Msgs}, - StateName, State) -> + StateName, State) -> riak_core_vnode_proxy:cast(From, - {vnode_proxy_pong, Ref, Msgs}), + {vnode_proxy_pong, Ref, Msgs}), {next_state, StateName, State, State#state.inactivity_timeout}; handle_info({'EXIT', Pid, Reason}, _StateName, - State = #state{mod = Mod, index = Index, pool_pid = Pid, - pool_config = PoolConfig}) -> + State = #state{mod = Mod, index = Index, pool_pid = Pid, + pool_config = PoolConfig}) -> case Reason of - Reason when Reason == normal; Reason == shutdown -> - continue(State#state{pool_pid = undefined}); - _ -> - logger:error("~p ~p worker pool crashed ~p\n", - [Index, Mod, Reason]), - {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, - logger:debug("starting worker pool ~p with size of " - "~p for vnode ~p.", - [WorkerModule, PoolSize, Index]), - {ok, NewPoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, - Index, - WorkerArgs, - worker_props), - continue(State#state{pool_pid = NewPoolPid}) + Reason when Reason == normal; Reason == shutdown -> + continue(State#state{pool_pid = undefined}); + _ -> + logger:error("~p ~p worker pool crashed ~p\n", + [Index, Mod, Reason]), + {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, + logger:debug("starting worker pool ~p with size of " + "~p for vnode ~p.", + [WorkerModule, PoolSize, Index]), + {ok, NewPoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, + Index, + WorkerArgs, + worker_props), + continue(State#state{pool_pid = NewPoolPid}) end; handle_info({'DOWN', _Ref, process, _Pid, normal}, - _StateName, State = #state{modstate = {deleted, _}}) -> + _StateName, State = #state{modstate = {deleted, _}}) -> %% these messages are produced by riak_kv_vnode's aae tree %% monitors; they are harmless, so don't yell about them. also %% only dustbin them in the deleted modstate, because pipe vnodes %% need them in other states continue(State); handle_info(Info, _StateName, - State = #state{mod = Mod, modstate = {deleted, _}, - index = Index}) -> + State = #state{mod = Mod, modstate = {deleted, _}, + index = Index}) -> logger:info("~p ~p ignored handle_info ~p - vnode " - "unregistering\n", - [Index, Mod, Info]), + "unregistering\n", + [Index, Mod, Info]), continue(State); handle_info({'EXIT', Pid, Reason}, StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> %% A linked processes has died so use the %% handle_exit callback to allow the vnode %% process to take appropriate action. %% If the function is not implemented default %% to crashing the process. try case Mod:handle_exit(Pid, Reason, ModState) of - {noreply, NewModState} -> - {next_state, - StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - {stop, Reason1, NewModState} -> - {stop, Reason1, State#state{modstate = NewModState}} - end + {noreply, NewModState} -> + {next_state, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + {stop, Reason1, NewModState} -> + {stop, Reason1, State#state{modstate = NewModState}} + end catch - _ErrorType:undef -> {stop, linked_process_crash, State} + _ErrorType:undef -> {stop, linked_process_crash, State} end; handle_info(Info, StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case erlang:function_exported(Mod, handle_info, 2) of - true -> - {ok, NewModState} = Mod:handle_info(Info, ModState), - {next_state, - StateName, - State#state{modstate = NewModState}, - State#state.inactivity_timeout}; - false -> - {next_state, - StateName, - State, - State#state.inactivity_timeout} + true -> + {ok, NewModState} = Mod:handle_info(Info, ModState), + {next_state, + StateName, + State#state{modstate = NewModState}, + State#state.inactivity_timeout}; + false -> + {next_state, + StateName, + State, + State#state.inactivity_timeout} end. %% ======================== @@ -835,41 +835,41 @@ handle_info(Info, StateName, %% ======== %% ======================== do_init(State = #state{index = Index, mod = Mod, - forward = Forward}) -> + forward = Forward}) -> {ModState, Props} = case Mod:init([Index]) of - {ok, MS} -> {MS, []}; - {ok, MS, P} -> {MS, P}; - {error, R} -> {error, R} - end, + {ok, MS} -> {MS, []}; + {ok, MS, P} -> {MS, P}; + {error, R} -> {error, R} + end, case {ModState, Props} of - {error, Reason} -> {error, Reason}; - _ -> - case lists:keyfind(pool, 1, Props) of - {pool, WorkerModule, PoolSize, WorkerArgs} = - PoolConfig -> - logger:debug("starting worker pool ~p with size of " - "~p~n", - [WorkerModule, PoolSize]), - {ok, PoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, - Index, - WorkerArgs, - worker_props); - _ -> PoolPid = PoolConfig = undefined - end, - riak_core_handoff_manager:remove_exclusion(Mod, Index), - Timeout = application:get_env(riak_core, - vnode_inactivity_timeout, - ?DEFAULT_TIMEOUT), - Timeout2 = Timeout + riak_core_rand:uniform(Timeout), - State2 = State#state{modstate = ModState, - inactivity_timeout = Timeout2, - pool_pid = PoolPid, pool_config = PoolConfig}, - logger:debug("vnode :: ~p/~p :: ~p~n", - [Mod, Index, Forward]), - State3 = mod_set_forwarding(Forward, State2), - {ok, State3} + {error, Reason} -> {error, Reason}; + _ -> + case lists:keyfind(pool, 1, Props) of + {pool, WorkerModule, PoolSize, WorkerArgs} = + PoolConfig -> + logger:debug("starting worker pool ~p with size of " + "~p~n", + [WorkerModule, PoolSize]), + {ok, PoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, + Index, + WorkerArgs, + worker_props); + _ -> PoolPid = PoolConfig = undefined + end, + riak_core_handoff_manager:remove_exclusion(Mod, Index), + Timeout = application:get_env(riak_core, + vnode_inactivity_timeout, + ?DEFAULT_TIMEOUT), + Timeout2 = Timeout + riak_core_rand:uniform(Timeout), + State2 = State#state{modstate = ModState, + inactivity_timeout = Timeout2, + pool_pid = PoolPid, pool_config = PoolConfig}, + logger:debug("vnode :: ~p/~p :: ~p~n", + [Mod, Index, Forward]), + State3 = mod_set_forwarding(Forward, State2), + {ok, State3} end. continue(State) -> @@ -910,200 +910,200 @@ continue(State, NewModState) -> %% to a partition for which the transfer has already completed, are forwarded. All other %% requests are passed to handle_handoff_command. forward_or_vnode_command(Sender, Request, - State = #state{forward = Forward, mod = Mod, - index = Index}) -> + State = #state{forward = Forward, mod = Mod, + index = Index}) -> Resizing = is_list(Forward), RequestHash = case Resizing of - true -> Mod:request_hash(Request); - false -> undefined - end, + true -> Mod:request_hash(Request); + false -> undefined + end, case {Forward, RequestHash} of - %% typical vnode operation, no forwarding set, handle request locally - {undefined, _} -> vnode_command(Sender, Request, State); - %% implicit forwarding after ownership transfer/hinted handoff - {F, _} when not is_list(F) -> - vnode_forward(implicit, - {Index, Forward}, - Sender, - Request, - State), - continue(State); - %% during resize we can't forward a request w/o request hash, always handle locally - {_, undefined} -> vnode_command(Sender, Request, State); - %% possible forwarding during ring resizing - {_, _} -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - FutureIndex = riak_core_ring:future_index(RequestHash, - Index, - R), - vnode_resize_command(Sender, - Request, - FutureIndex, - State) + %% typical vnode operation, no forwarding set, handle request locally + {undefined, _} -> vnode_command(Sender, Request, State); + %% implicit forwarding after ownership transfer/hinted handoff + {F, _} when not is_list(F) -> + vnode_forward(implicit, + {Index, Forward}, + Sender, + Request, + State), + continue(State); + %% during resize we can't forward a request w/o request hash, always handle locally + {_, undefined} -> vnode_command(Sender, Request, State); + %% possible forwarding during ring resizing + {_, _} -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + FutureIndex = riak_core_ring:future_index(RequestHash, + Index, + R), + vnode_resize_command(Sender, + Request, + FutureIndex, + State) end. vnode_command(_Sender, _Request, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> continue(State); vnode_command(Sender, Request, - State = #state{mod = Mod, modstate = ModState, - pool_pid = Pool}) -> + State = #state{mod = Mod, modstate = ModState, + pool_pid = Pool}) -> case catch Mod:handle_command(Request, Sender, ModState) - of - {'EXIT', ExitReason} -> - reply(Sender, {vnode_error, ExitReason}), - logger:error("~p command failed ~p", [Mod, ExitReason]), - {stop, ExitReason, State#state{modstate = ModState}}; - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), - continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, - Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + of + {'EXIT', ExitReason} -> + reply(Sender, {vnode_error, ExitReason}), + logger:error("~p command failed ~p", [Mod, ExitReason]), + {stop, ExitReason, State#state{modstate = ModState}}; + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. vnode_coverage(Sender, Request, KeySpaces, - State = #state{index = Index, mod = Mod, - modstate = ModState, pool_pid = Pool, - forward = Forward}) -> + State = #state{index = Index, mod = Mod, + modstate = ModState, pool_pid = Pool, + forward = Forward}) -> %% Check if we should forward case Forward of - undefined -> - Action = Mod:handle_coverage(Request, - KeySpaces, - Sender, - ModState); - %% handle coverage requests locally during ring resize - Forwards when is_list(Forwards) -> - Action = Mod:handle_coverage(Request, - KeySpaces, - Sender, - ModState); - NextOwner -> - logger:debug("Forwarding coverage ~p -> ~p: ~p~n", - [node(), NextOwner, Index]), - riak_core_vnode_master:coverage(Request, - {Index, NextOwner}, - KeySpaces, - Sender, - riak_core_vnode_master:reg_name(Mod)), - Action = continue + undefined -> + Action = Mod:handle_coverage(Request, + KeySpaces, + Sender, + ModState); + %% handle coverage requests locally during ring resize + Forwards when is_list(Forwards) -> + Action = Mod:handle_coverage(Request, + KeySpaces, + Sender, + ModState); + NextOwner -> + logger:debug("Forwarding coverage ~p -> ~p: ~p~n", + [node(), NextOwner, Index]), + riak_core_vnode_master:coverage(Request, + {Index, NextOwner}, + KeySpaces, + Sender, + riak_core_vnode_master:reg_name(Mod)), + Action = continue end, case Action of - continue -> continue(State, ModState); - {reply, Reply, NewModState} -> - reply(Sender, Reply), - continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, - Work, - From), - continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + continue -> continue(State, ModState); + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. vnode_handoff_command(Sender, Request, ForwardTo, - State = #state{mod = Mod, modstate = ModState, - handoff_target = HOTarget, - handoff_type = HOType, pool_pid = Pool}) -> + State = #state{mod = Mod, modstate = ModState, + handoff_target = HOTarget, + handoff_type = HOType, pool_pid = Pool}) -> case Mod:handle_handoff_command(Request, - Sender, - ModState) - of - {reply, Reply, NewModState} -> - reply(Sender, Reply), - continue(State, NewModState); - {noreply, NewModState} -> continue(State, NewModState); - {async, Work, From, NewModState} -> - %% dispatch some work to the vnode worker pool - %% the result is sent back to 'From' - riak_core_vnode_worker_pool:handle_work(Pool, - Work, - From), - continue(State, NewModState); - {forward, NewModState} -> - forward_request(HOType, - Request, - HOTarget, - ForwardTo, - Sender, - State), - continue(State, NewModState); - {forward, NewReq, NewModState} -> - forward_request(HOType, - NewReq, - HOTarget, - ForwardTo, - Sender, - State), - continue(State, NewModState); - {drop, NewModState} -> continue(State, NewModState); - {stop, Reason, NewModState} -> - {stop, Reason, State#state{modstate = NewModState}} + Sender, + ModState) + of + {reply, Reply, NewModState} -> + reply(Sender, Reply), + continue(State, NewModState); + {noreply, NewModState} -> continue(State, NewModState); + {async, Work, From, NewModState} -> + %% dispatch some work to the vnode worker pool + %% the result is sent back to 'From' + riak_core_vnode_worker_pool:handle_work(Pool, + Work, + From), + continue(State, NewModState); + {forward, NewModState} -> + forward_request(HOType, + Request, + HOTarget, + ForwardTo, + Sender, + State), + continue(State, NewModState); + {forward, NewReq, NewModState} -> + forward_request(HOType, + NewReq, + HOTarget, + ForwardTo, + Sender, + State), + continue(State, NewModState); + {drop, NewModState} -> continue(State, NewModState); + {stop, Reason, NewModState} -> + {stop, Reason, State#state{modstate = NewModState}} end. %% @private wrap the request for resize forwards, and use the resize %% target. forward_request(resize, Request, _HOTarget, - ResizeTarget, Sender, State) -> + ResizeTarget, Sender, State) -> %% resize op and transfer ongoing vnode_forward(resize, - ResizeTarget, - Sender, - {resize_forward, Request}, - State); + ResizeTarget, + Sender, + {resize_forward, Request}, + State); forward_request(undefined, Request, _HOTarget, - ResizeTarget, Sender, State) -> + ResizeTarget, Sender, State) -> %% resize op ongoing, no resize transfer ongoing, arrive here %% via forward_or_vnode_command vnode_forward(resize, - ResizeTarget, - Sender, - {resize_forward, Request}, - State); + ResizeTarget, + Sender, + {resize_forward, Request}, + State); forward_request(_, Request, HOTarget, _ResizeTarget, - Sender, State) -> + Sender, State) -> %% normal explicit forwarding during owhership transfer vnode_forward(explicit, - HOTarget, - Sender, - Request, - State). + HOTarget, + Sender, + Request, + State). vnode_forward(Type, ForwardTo, Sender, Request, - State) -> + State) -> logger:debug("Forwarding (~p) {~p,~p} -> ~p~n", - [Type, State#state.index, node(), ForwardTo]), + [Type, State#state.index, node(), ForwardTo]), riak_core_vnode_master:command_unreliable(ForwardTo, - Request, - Sender, - riak_core_vnode_master:reg_name(State#state.mod)). + Request, + Sender, + riak_core_vnode_master:reg_name(State#state.mod)). %% @doc during ring resizing if we have completed a transfer to the index that will %% handle request in future ring we forward to it. Otherwise we delegate %% to the local vnode like other requests during handoff vnode_resize_command(Sender, Request, FutureIndex, - State = #state{forward = Forward}) + State = #state{forward = Forward}) when is_list(Forward) -> case lists:keyfind(FutureIndex, 1, Forward) of - false -> vnode_command(Sender, Request, State); - {FutureIndex, FutureOwner} -> - vnode_handoff_command(Sender, - Request, - {FutureIndex, FutureOwner}, - State) + false -> vnode_command(Sender, Request, State); + {FutureIndex, FutureOwner} -> + vnode_handoff_command(Sender, + Request, + {FutureIndex, FutureOwner}, + State) end. %% This code lives in riak_core_vnode rather than riak_core_vnode_manager @@ -1113,152 +1113,152 @@ vnode_resize_command(Sender, Request, FutureIndex, %% to execute on multiple parallel vnodes because of the synchronization %% afforded by having all ring changes go through the single ring manager. mark_handoff_complete(SrcIdx, Target, SeenIdxs, Mod, - resize) -> + resize) -> Prev = node(), Source = {SrcIdx, Prev}, TransFun = fun (Ring, _) -> - Owner = riak_core_ring:index_owner(Ring, SrcIdx), - Status = riak_core_ring:resize_transfer_status(Ring, - Source, - Target, - Mod), - case {Owner, Status} of - {Prev, awaiting} -> - F = fun (SeenIdx, RingAcc) -> - riak_core_ring:schedule_resize_transfer(RingAcc, - Source, - SeenIdx) - end, - Ring2 = lists:foldl(F, - Ring, - ordsets:to_list(SeenIdxs)), - Ring3 = - riak_core_ring:resize_transfer_complete(Ring2, - Source, - Target, - Mod), - %% local ring optimization (see below) - {set_only, Ring3}; - _ -> ignore - end - end, + Owner = riak_core_ring:index_owner(Ring, SrcIdx), + Status = riak_core_ring:resize_transfer_status(Ring, + Source, + Target, + Mod), + case {Owner, Status} of + {Prev, awaiting} -> + F = fun (SeenIdx, RingAcc) -> + riak_core_ring:schedule_resize_transfer(RingAcc, + Source, + SeenIdx) + end, + Ring2 = lists:foldl(F, + Ring, + ordsets:to_list(SeenIdxs)), + Ring3 = + riak_core_ring:resize_transfer_complete(Ring2, + Source, + Target, + Mod), + %% local ring optimization (see below) + {set_only, Ring3}; + _ -> ignore + end + end, Result = riak_core_ring_manager:ring_trans(TransFun, - []), + []), case Result of - {ok, _NewRing} -> resize; - _ -> continue + {ok, _NewRing} -> resize; + _ -> continue end; mark_handoff_complete(Idx, {Idx, New}, [], Mod, _) -> Prev = node(), Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Owner = - riak_core_ring:index_owner(Ring, - Idx), - {_, NextOwner, Status} = - riak_core_ring:next_owner(Ring, - Idx, - Mod), - NewStatus = - riak_core_ring:member_status(Ring, - New), - case {Owner, - NextOwner, - NewStatus, - Status} - of - {Prev, - New, - _, - awaiting} -> - Ring2 = - riak_core_ring:handoff_complete(Ring, - Idx, - Mod), - %% Optimization. Only alter the local ring without - %% triggering a gossip, thus implicitly coalescing - %% multiple vnode handoff completion events. In the - %% future we should decouple vnode handoff state from - %% the ring structure in order to make gossip independent - %% of ring size. - {set_only, - Ring2}; - _ -> ignore - end - end, - []), + _) -> + Owner = + riak_core_ring:index_owner(Ring, + Idx), + {_, NextOwner, Status} = + riak_core_ring:next_owner(Ring, + Idx, + Mod), + NewStatus = + riak_core_ring:member_status(Ring, + New), + case {Owner, + NextOwner, + NewStatus, + Status} + of + {Prev, + New, + _, + awaiting} -> + Ring2 = + riak_core_ring:handoff_complete(Ring, + Idx, + Mod), + %% Optimization. Only alter the local ring without + %% triggering a gossip, thus implicitly coalescing + %% multiple vnode handoff completion events. In the + %% future we should decouple vnode handoff state from + %% the ring structure in order to make gossip independent + %% of ring size. + {set_only, + Ring2}; + _ -> ignore + end + end, + []), case Result of - {ok, NewRing} -> NewRing = NewRing; - _ -> - {ok, NewRing} = riak_core_ring_manager:get_my_ring() + {ok, NewRing} -> NewRing = NewRing; + _ -> + {ok, NewRing} = riak_core_ring_manager:get_my_ring() end, Owner = riak_core_ring:index_owner(NewRing, Idx), {_, NextOwner, Status} = - riak_core_ring:next_owner(NewRing, Idx, Mod), + riak_core_ring:next_owner(NewRing, Idx, Mod), NewStatus = riak_core_ring:member_status(NewRing, New), case {Owner, NextOwner, NewStatus, Status} of - {_, _, invalid, _} -> - %% Handing off to invalid node, don't give-up data. - continue; - {Prev, New, _, _} -> forward; - {Prev, _, _, _} -> - %% Handoff wasn't to node that is scheduled in next, so no change. - continue; - {_, _, _, _} -> shutdown + {_, _, invalid, _} -> + %% Handing off to invalid node, don't give-up data. + continue; + {Prev, New, _, _} -> forward; + {Prev, _, _, _} -> + %% Handoff wasn't to node that is scheduled in next, so no change. + continue; + {_, _, _, _} -> shutdown end. finish_handoff(State) -> finish_handoff([], State). finish_handoff(SeenIdxs, - State = #state{mod = Mod, modstate = ModState, - index = Idx, handoff_target = Target, - handoff_type = HOType}) -> + State = #state{mod = Mod, modstate = ModState, + index = Idx, handoff_target = Target, + handoff_type = HOType}) -> case mark_handoff_complete(Idx, - Target, - SeenIdxs, - Mod, - HOType) - of - continue -> - continue(State#state{handoff_target = none, - handoff_type = undefined}); - resize -> - CurrentForwarding = resize_forwarding(State), - NewForwarding = [Target | CurrentForwarding], - State2 = mod_set_forwarding(NewForwarding, State), - continue(State2#state{handoff_target = none, - handoff_type = undefined, - forward = NewForwarding}); - Res when Res == forward; Res == shutdown -> - {_, HN} = Target, - %% Have to issue the delete now. Once unregistered the - %% vnode master will spin up a new vnode on demand. - %% Shutdown the async pool beforehand, don't want callbacks - %% running on non-existant data. - maybe_shutdown_pool(State), - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode finished handoff and deleted.", - [Idx, Mod]), - riak_core_vnode_manager:unregister_vnode(Idx, Mod), - logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", - [State#state.mod, - State#state.index, - State#state.forward, - HN]), - State2 = mod_set_forwarding(HN, State), - continue(State2#state{modstate = - {deleted, - NewModState}, % like to fail if used - handoff_target = none, - handoff_type = undefined, forward = HN}) + Target, + SeenIdxs, + Mod, + HOType) + of + continue -> + continue(State#state{handoff_target = none, + handoff_type = undefined}); + resize -> + CurrentForwarding = resize_forwarding(State), + NewForwarding = [Target | CurrentForwarding], + State2 = mod_set_forwarding(NewForwarding, State), + continue(State2#state{handoff_target = none, + handoff_type = undefined, + forward = NewForwarding}); + Res when Res == forward; Res == shutdown -> + {_, HN} = Target, + %% Have to issue the delete now. Once unregistered the + %% vnode master will spin up a new vnode on demand. + %% Shutdown the async pool beforehand, don't want callbacks + %% running on non-existant data. + maybe_shutdown_pool(State), + {ok, NewModState} = Mod:delete(ModState), + logger:debug("~p ~p vnode finished handoff and deleted.", + [Idx, Mod]), + riak_core_vnode_manager:unregister_vnode(Idx, Mod), + logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", + [State#state.mod, + State#state.index, + State#state.forward, + HN]), + State2 = mod_set_forwarding(HN, State), + continue(State2#state{modstate = + {deleted, + NewModState}, % like to fail if used + handoff_target = none, + handoff_type = undefined, forward = HN}) end. maybe_shutdown_pool(#state{pool_pid = Pool}) -> case is_pid(Pool) of - true -> - %% state.pool_pid will be cleaned up by handle_info message. - riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); - _ -> ok + true -> + %% state.pool_pid will be cleaned up by handle_info message. + riak_core_vnode_worker_pool:shutdown_pool(Pool, 60000); + _ -> ok end. resize_forwarding(#state{forward = F}) @@ -1268,126 +1268,126 @@ resize_forwarding(_) -> []. mark_delete_complete(Idx, Mod) -> Result = riak_core_ring_manager:ring_trans(fun (Ring, - _) -> - Type = - riak_core_ring:vnode_type(Ring, - Idx), - {_, Next, Status} = - riak_core_ring:next_owner(Ring, - Idx), - case {Type, Next, Status} - of - {resized_primary, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, - Ring3}; - {{fallback, _}, - '$delete', - awaiting} -> - Ring3 = - riak_core_ring:deletion_complete(Ring, - Idx, - Mod), - %% Use local ring optimization like mark_handoff_complete - {set_only, - Ring3}; - _ -> ignore - end - end, - []), + _) -> + Type = + riak_core_ring:vnode_type(Ring, + Idx), + {_, Next, Status} = + riak_core_ring:next_owner(Ring, + Idx), + case {Type, Next, Status} + of + {resized_primary, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, + Ring3}; + {{fallback, _}, + '$delete', + awaiting} -> + Ring3 = + riak_core_ring:deletion_complete(Ring, + Idx, + Mod), + %% Use local ring optimization like mark_handoff_complete + {set_only, + Ring3}; + _ -> ignore + end + end, + []), Result. maybe_handoff(_TargetIdx, _TargetNode, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> %% Modstate has been deleted, waiting for unregistered. No handoff. continue(State); maybe_handoff(TargetIdx, TargetNode, - State = #state{index = Idx, mod = Mod, - modstate = ModState, - handoff_target = CurrentTarget, - handoff_pid = HPid}) -> + State = #state{index = Idx, mod = Mod, + modstate = ModState, + handoff_target = CurrentTarget, + handoff_pid = HPid}) -> Target = {TargetIdx, TargetNode}, ExistingHO = is_pid(HPid) andalso - is_process_alive(HPid), + is_process_alive(HPid), ValidHN = case CurrentTarget of - none -> true; - Target -> not ExistingHO; - _ -> - logger:info("~s/~b: handoff request to ~p before " - "finishing handoff to ~p", - [Mod, Idx, Target, CurrentTarget]), - not ExistingHO - end, + none -> true; + Target -> not ExistingHO; + _ -> + logger:info("~s/~b: handoff request to ~p before " + "finishing handoff to ~p", + [Mod, Idx, Target, CurrentTarget]), + not ExistingHO + end, case ValidHN of - true -> - {ok, R} = riak_core_ring_manager:get_my_ring(), - Resizing = riak_core_ring:is_resizing(R), - Primary = riak_core_ring:is_primary(R, {Idx, node()}), - HOType = case {Resizing, Primary} of - {true, _} -> resize; - {_, true} -> ownership; - {_, false} -> hinted - end, - case Mod:handoff_starting({HOType, Target}, ModState) of - {true, NewModState} -> - start_handoff(HOType, - TargetIdx, - TargetNode, - State#state{modstate = NewModState}); - {false, NewModState} -> continue(State, NewModState) - end; - false -> continue(State) + true -> + {ok, R} = riak_core_ring_manager:get_my_ring(), + Resizing = riak_core_ring:is_resizing(R), + Primary = riak_core_ring:is_primary(R, {Idx, node()}), + HOType = case {Resizing, Primary} of + {true, _} -> resize; + {_, true} -> ownership; + {_, false} -> hinted + end, + case Mod:handoff_starting({HOType, Target}, ModState) of + {true, NewModState} -> + start_handoff(HOType, + TargetIdx, + TargetNode, + State#state{modstate = NewModState}); + {false, NewModState} -> continue(State, NewModState) + end; + false -> continue(State) end. start_handoff(HOType, TargetIdx, TargetNode, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case Mod:is_empty(ModState) of - {true, NewModState} -> - finish_handoff(State#state{modstate = NewModState, - handoff_type = HOType, - handoff_target = - {TargetIdx, TargetNode}}); - {false, Size, NewModState} -> - State2 = State#state{modstate = NewModState}, - NewState = start_outbound(HOType, - TargetIdx, - TargetNode, - [{size, Size}], - State2), - continue(NewState); - {false, NewModState} -> - State2 = State#state{modstate = NewModState}, - NewState = start_outbound(HOType, - TargetIdx, - TargetNode, - [], - State2), - continue(NewState) + {true, NewModState} -> + finish_handoff(State#state{modstate = NewModState, + handoff_type = HOType, + handoff_target = + {TargetIdx, TargetNode}}); + {false, Size, NewModState} -> + State2 = State#state{modstate = NewModState}, + NewState = start_outbound(HOType, + TargetIdx, + TargetNode, + [{size, Size}], + State2), + continue(NewState); + {false, NewModState} -> + State2 = State#state{modstate = NewModState}, + NewState = start_outbound(HOType, + TargetIdx, + TargetNode, + [], + State2), + continue(NewState) end. start_outbound(HOType, TargetIdx, TargetNode, Opts, - State = #state{index = Idx, mod = Mod}) -> + State = #state{index = Idx, mod = Mod}) -> case riak_core_handoff_manager:add_outbound(HOType, - Mod, - Idx, - TargetIdx, - TargetNode, - self(), - Opts) - of - {ok, Pid} -> - State#state{handoff_pid = Pid, handoff_type = HOType, - handoff_target = {TargetIdx, TargetNode}}; - {error, _Reason} -> - {ok, NewModState} = - Mod:handoff_cancelled(State#state.modstate), - State#state{modstate = NewModState} + Mod, + Idx, + TargetIdx, + TargetNode, + self(), + Opts) + of + {ok, Pid} -> + State#state{handoff_pid = Pid, handoff_type = HOType, + handoff_target = {TargetIdx, TargetNode}}; + {error, _Reason} -> + {ok, NewModState} = + Mod:handoff_cancelled(State#state.modstate), + State#state{modstate = NewModState} end. %% Individual vnode processes and the vnode manager are tightly coupled. When @@ -1399,37 +1399,37 @@ start_outbound(HOType, TargetIdx, TargetNode, Opts, %% messages until an appropriate message is received back from the vnode %% manager. The event timer functions below implement this logic. start_manager_event_timer(Event, - State = #state{mod = Mod, index = Idx}) -> + State = #state{mod = Mod, index = Idx}) -> riak_core_vnode_manager:vnode_event(Mod, - Idx, - self(), - Event), + Idx, + self(), + Event), stop_manager_event_timer(State), T2 = gen_fsm_compat:send_event_after(30000, - {send_manager_event, Event}), + {send_manager_event, Event}), State#state{manager_event_timer = T2}. stop_manager_event_timer(#state{manager_event_timer = - undefined}) -> + undefined}) -> ok; stop_manager_event_timer(#state{manager_event_timer = - T}) -> + T}) -> _ = gen_fsm_compat:cancel_timer(T), ok. mod_set_forwarding(_Forward, - State = #state{modstate = {deleted, _}}) -> + State = #state{modstate = {deleted, _}}) -> State; mod_set_forwarding(Forward, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Mod, modstate = ModState}) -> case lists:member({set_vnode_forwarding, 2}, - Mod:module_info(exports)) - of - true -> - NewModState = Mod:set_vnode_forwarding(Forward, - ModState), - State#state{modstate = NewModState}; - false -> State + Mod:module_info(exports)) + of + true -> + NewModState = Mod:set_vnode_forwarding(Forward, + ModState), + State#state{modstate = NewModState}; + false -> State end. %% =================================================================== @@ -1443,8 +1443,8 @@ mod_set_forwarding(Forward, get_modstate(Pid) -> {_StateName, State} = - gen_fsm_compat:sync_send_all_state_event(Pid, - current_state), + gen_fsm_compat:sync_send_all_state_event(Pid, + current_state), {State#state.mod, State#state.modstate}. -ifdef(TEST). @@ -1452,16 +1452,16 @@ get_modstate(Pid) -> %% Start the garbage collection server test_link(Mod, Index) -> gen_fsm_compat:start_link(?MODULE, - [Mod, Index, 0, node()], - []). + [Mod, Index, 0, node()], + []). %% Get the current state of the fsm for testing inspection -spec current_state(pid()) -> {atom(), state()} | - {error, term()}. + {error, term()}. current_state(Pid) -> gen_fsm_compat:sync_send_all_state_event(Pid, - current_state). + current_state). wait_for_process_death(Pid) -> wait_for_process_death(Pid, is_process_alive(Pid)). @@ -1473,17 +1473,17 @@ wait_for_process_death(_Pid, false) -> ok. wait_for_state_update(OriginalStateData, Pid) -> {_, CurrentStateData} = (?MODULE):current_state(Pid), wait_for_state_update(OriginalStateData, - CurrentStateData, - Pid). + CurrentStateData, + Pid). wait_for_state_update(OriginalStateData, - OriginalStateData, Pid) -> + OriginalStateData, Pid) -> {_, CurrentStateData} = (?MODULE):current_state(Pid), wait_for_state_update(OriginalStateData, - CurrentStateData, - Pid); + CurrentStateData, + Pid); wait_for_state_update(_OriginalState, _StateData, - _Pid) -> + _Pid) -> ok. %% =================================================================== @@ -1496,16 +1496,16 @@ pool_death_test() -> meck:unload(), meck:new(test_vnode, [non_strict, no_link]), meck:expect(test_vnode, - init, - fun (_) -> {ok, [], [{pool, test_pool_mod, 1, []}]} - end), + init, + fun (_) -> {ok, [], [{pool, test_pool_mod, 1, []}]} + end), meck:expect(test_vnode, - terminate, - fun (_, _) -> normal end), + terminate, + fun (_, _) -> normal end), meck:new(test_pool_mod, [non_strict, no_link]), meck:expect(test_pool_mod, - init_worker, - fun (_, _, _) -> {ok, []} end), + init_worker, + fun (_, _, _) -> {ok, []} end), {ok, Pid} = riak_core_vnode:test_link(test_vnode, 0), {_, StateData1} = riak_core_vnode:current_state(Pid), PoolPid1 = StateData1#state.pool_pid, diff --git a/src/riak_core_vnode_manager.erl b/src/riak_core_vnode_manager.erl index ab40781eb..5db955403 100644 --- a/src/riak_core_vnode_manager.erl +++ b/src/riak_core_vnode_manager.erl @@ -27,28 +27,28 @@ -export([start_link/0, stop/0]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -export([all_vnodes/0, - all_vnodes/1, - all_vnodes_status/0, - force_handoffs/0, - repair/3, - all_handoffs/0, - repair_status/1, - xfer_complete/2, - kill_repairs/1]). + all_vnodes/1, + all_vnodes_status/0, + force_handoffs/0, + repair/3, + all_handoffs/0, + repair_status/1, + xfer_complete/2, + kill_repairs/1]). -export([all_index_pid/1, - get_vnode_pid/2, - start_vnode/2, - unregister_vnode/2, - unregister_vnode/3, - vnode_event/4]). + get_vnode_pid/2, + start_vnode/2, + unregister_vnode/2, + unregister_vnode/3, + vnode_event/4]). %% Field debugging -export([get_tab/0]). @@ -58,41 +58,41 @@ -record(monrec, {monref, key}). -record(xfer_status, - {status :: pending | complete, - mod_src_target :: {module(), index(), index()}}). + {status :: pending | complete, + mod_src_target :: {module(), index(), index()}}). -type xfer_status() :: #xfer_status{}. -record(repair, - {mod_partition :: mod_partition(), - filter_mod_fun :: {module(), atom()}, - minus_one_xfer :: xfer_status(), - plus_one_xfer :: xfer_status(), - pairs :: [{index(), node()}]}). + {mod_partition :: mod_partition(), + filter_mod_fun :: {module(), atom()}, + minus_one_xfer :: xfer_status(), + plus_one_xfer :: xfer_status(), + pairs :: [{index(), node()}]}). -type repair() :: #repair{}. -type repairs() :: [repair()]. -record(state, - {idxtab, - forwarding :: dict:dict(), - handoff :: dict:dict(), - known_modules :: [term()], - never_started :: [{integer(), term()}], - vnode_start_tokens :: integer(), - last_ring_id :: term(), - repairs :: repairs()}). + {idxtab, + forwarding :: dict:dict(), + handoff :: dict:dict(), + known_modules :: [term()], + never_started :: [{integer(), term()}], + vnode_start_tokens :: integer(), + last_ring_id :: term(), + repairs :: repairs()}). -include("riak_core_handoff.hrl"). -include("riak_core_vnode.hrl"). -define(XFER_EQ(A, ModSrcTgt), - A#xfer_status.mod_src_target == ModSrcTgt). + A#xfer_status.mod_src_target == ModSrcTgt). -define(XFER_COMPLETE(X), - X#xfer_status.status == complete). + X#xfer_status.status == complete). -define(DEFAULT_OWNERSHIP_TRIGGER, 8). @@ -108,9 +108,9 @@ start_link() -> gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], - []). + ?MODULE, + [], + []). stop() -> gen_server:cast(?MODULE, stop). @@ -120,10 +120,10 @@ all_vnodes_status() -> %% @doc Repair the given `ModPartition' pair for `Service' using the %% given `FilterModFun' to filter keys. -spec repair(atom(), {module(), partition()}, - {module(), atom()}) -> {ok, - Pairs :: [{partition(), node()}]} | - {down, Down :: [{partition(), node()}]} | - ownership_change_in_progress. + {module(), atom()}) -> {ok, + Pairs :: [{partition(), node()}]} | + {down, Down :: [{partition(), node()}]} | + ownership_change_in_progress. repair(Service, {_Module, Partition} = ModPartition, FilterModFun) -> @@ -136,14 +136,14 @@ repair(Service, {_Module, Partition} = ModPartition, %% @doc Get the status of the repair process for a given `ModPartition'. -spec repair_status(mod_partition()) -> in_progress | - not_found. + not_found. repair_status({_Module, Partition} = ModPartition) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), Owner = riak_core_ring:index_owner(Ring, Partition), gen_server:call({?MODULE, Owner}, - {repair_status, ModPartition}, - ?LONG_TIMEOUT). + {repair_status, ModPartition}, + ?LONG_TIMEOUT). %% @doc Get all handoffs known by this manager. -spec all_handoffs() -> [known_handoff()]. @@ -158,8 +158,8 @@ all_handoffs() -> xfer_complete(Origin, Xfer) -> gen_server:call({?MODULE, Origin}, - {xfer_complete, Xfer}, - ?LONG_TIMEOUT). + {xfer_complete, Xfer}, + ?LONG_TIMEOUT). kill_repairs(Reason) -> gen_server:cast(?MODULE, {kill_repairs, Reason}). @@ -175,23 +175,23 @@ unregister_vnode(Index, VNodeMod) -> unregister_vnode(Index, Pid, VNodeMod) -> gen_server:cast(?MODULE, - {unregister, Index, VNodeMod, Pid}). + {unregister, Index, VNodeMod, Pid}). start_vnode(Index, VNodeMod) -> gen_server:cast(?MODULE, - {Index, VNodeMod, start_vnode}). + {Index, VNodeMod, start_vnode}). vnode_event(Mod, Idx, Pid, Event) -> gen_server:cast(?MODULE, - {vnode_event, Mod, Idx, Pid, Event}). + {vnode_event, Mod, Idx, Pid, Event}). get_tab() -> gen_server:call(?MODULE, get_tab, infinity). get_vnode_pid(Index, VNodeMod) -> gen_server:call(?MODULE, - {Index, VNodeMod, get_vnode}, - infinity). + {Index, VNodeMod, get_vnode}, + infinity). %% =================================================================== %% ETS-based API: try to determine response by reading protected ETS @@ -201,27 +201,27 @@ get_vnode_pid(Index, VNodeMod) -> all_vnodes() -> case get_all_vnodes() of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, all_vnodes, infinity); - Result -> Result + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, all_vnodes, infinity); + Result -> Result end. all_vnodes(Mod) -> case get_all_vnodes(Mod) of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, {all_vnodes, Mod}, infinity); - Result -> Result + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, {all_vnodes, Mod}, infinity); + Result -> Result end. all_index_pid(VNodeMod) -> case get_all_index_pid(VNodeMod, ets_error) of - ets_error -> - gen_server:call(?MODULE, - {all_index_pid, VNodeMod}, - infinity); - Result -> Result + ets_error -> + gen_server:call(?MODULE, + {all_index_pid, VNodeMod}, + infinity); + Result -> Result end. %% =================================================================== @@ -230,20 +230,20 @@ all_index_pid(VNodeMod) -> get_all_index_pid(Mod, Default) -> try [list_to_tuple(L) - || L - <- ets:match(?ETS, {idxrec, '_', '$1', Mod, '$2', '_'})] + || L + <- ets:match(?ETS, {idxrec, '_', '$1', Mod, '$2', '_'})] catch - _:_ -> Default + _:_ -> Default end. get_all_vnodes() -> Mods = [Mod - || {_App, Mod} <- riak_core:vnode_modules()], + || {_App, Mod} <- riak_core:vnode_modules()], get_all_vnodes(Mods). get_all_vnodes(Mods) when is_list(Mods) -> lists:flatmap(fun (Mod) -> get_all_vnodes(Mod) end, - Mods); + Mods); get_all_vnodes(Mod) -> IdxPids = get_all_index_pid(Mod, []), [{Mod, Idx, Pid} || {Idx, Pid} <- IdxPids]. @@ -255,18 +255,18 @@ get_all_vnodes(Mod) -> %% @private init(_State) -> {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], State = #state{forwarding = dict:new(), - handoff = dict:new(), known_modules = [], - never_started = [], vnode_start_tokens = 0, - repairs = []}, + handoff = dict:new(), known_modules = [], + never_started = [], vnode_start_tokens = 0, + repairs = []}, State2 = find_vnodes(State), AllVNodes = get_all_vnodes(Mods), State3 = update_forwarding(AllVNodes, - Mods, - Ring, - State2), + Mods, + Ring, + State2), State4 = update_handoff(AllVNodes, Ring, CHBin, State3), schedule_management_timer(), {ok, State4}. @@ -277,33 +277,33 @@ find_vnodes(State) -> %% to rebuild our ETS table for routing messages to the appropriate %% vnode. VnodePids = [Pid - || {_, Pid, worker, _} - <- supervisor:which_children(riak_core_vnode_sup), - is_pid(Pid) andalso is_process_alive(Pid)], + || {_, Pid, worker, _} + <- supervisor:which_children(riak_core_vnode_sup), + is_pid(Pid) andalso is_process_alive(Pid)], IdxTable = ets:new(?ETS, - [{keypos, 2}, named_table, protected]), + [{keypos, 2}, named_table, protected]), %% If the vnode manager is being restarted, scan the existing %% vnode children and work out which module and index they are %% responsible for. During startup it is possible that these %% vnodes may be shutting down as we check them if there are %% several types of vnodes active. PidIdxs = lists:flatten([try [{Pid, - riak_core_vnode:get_mod_index(Pid)}] - catch - _:_Err -> [] - end - || Pid <- VnodePids]), + riak_core_vnode:get_mod_index(Pid)}] + catch + _:_Err -> [] + end + || Pid <- VnodePids]), %% Populate the ETS table with processes running this VNodeMod (filtered %% in the list comprehension) F = fun (Pid, Idx, Mod) -> - Mref = erlang:monitor(process, Pid), - #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, - pid = Pid, monref = Mref} - end, + Mref = erlang:monitor(process, Pid), + #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, + pid = Pid, monref = Mref} + end, IdxRecs = [F(Pid, Idx, Mod) - || {Pid, {Mod, Idx}} <- PidIdxs], + || {Pid, {Mod, Idx}} <- PidIdxs], MonRecs = [#monrec{monref = Mref, key = Key} - || #idxrec{key = Key, monref = Mref} <- IdxRecs], + || #idxrec{key = Key, monref = Mref} <- IdxRecs], true = ets:insert_new(IdxTable, IdxRecs ++ MonRecs), State#state{idxtab = IdxTable}. @@ -321,40 +321,40 @@ handle_call({all_index_pid, Mod}, _From, State) -> Reply = get_all_index_pid(Mod, []), {reply, Reply, State}; handle_call({Partition, Mod, get_vnode}, _From, - State) -> + State) -> Pid = get_vnode(Partition, Mod, State), {reply, {ok, Pid}, State}; handle_call(get_tab, _From, State) -> {reply, ets:tab2list(State#state.idxtab), State}; handle_call({repair, - Service, - {Mod, Partition} = ModPartition, - FilterModFun}, - _From, #state{repairs = Repairs} = State) -> + Service, + {Mod, Partition} = ModPartition, + FilterModFun}, + _From, #state{repairs = Repairs} = State) -> case get_repair(ModPartition, Repairs) of - none -> - maybe_create_repair(Partition, - Service, - ModPartition, - FilterModFun, - Mod, - Repairs, - State); - Repair -> - Pairs = Repair#repair.pairs, - {reply, {ok, Pairs}, State} + none -> + maybe_create_repair(Partition, + Service, + ModPartition, + FilterModFun, + Mod, + Repairs, + State); + Repair -> + Pairs = Repair#repair.pairs, + {reply, {ok, Pairs}, State} end; handle_call(all_handoffs, _From, - State = #state{repairs = Repairs, handoff = HO}) -> + State = #state{repairs = Repairs, handoff = HO}) -> Handoffs = dict:to_list(HO) ++ - transform_repair_records(Repairs), + transform_repair_records(Repairs), {reply, Handoffs, State}; handle_call({repair_status, ModPartition}, _From, - State) -> + State) -> Repairs = State#state.repairs, case get_repair(ModPartition, Repairs) of - none -> {reply, not_found, State}; - #repair{} -> {reply, in_progress, State} + none -> {reply, not_found, State}; + #repair{} -> {reply, in_progress, State} end; %% NOTE: The `xfer_complete' logic assumes two things: %% @@ -367,36 +367,36 @@ handle_call({xfer_complete, ModSrcTgt}, _From, State) -> {Mod, _, Partition} = ModSrcTgt, ModPartition = {Mod, Partition}, case get_repair(ModPartition, Repairs) of - none -> - logger:error("Received xfer_complete for non-existing " - "repair: ~p", - [ModPartition]), - {reply, ok, State}; - #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = - R -> - R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> - MOX2 = MOX#xfer_status{status = complete}, - R#repair{minus_one_xfer = MOX2}; - ?XFER_EQ(POX, ModSrcTgt) -> - POX2 = POX#xfer_status{status = complete}, - R#repair{plus_one_xfer = POX2}; - true -> - logger:error("Received xfer_complete for non-existing " - "xfer: ~p", - [ModSrcTgt]) - end, - case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), - ?XFER_COMPLETE((R2#repair.plus_one_xfer))} - of - {true, true} -> - {reply, - ok, - State#state{repairs = remove_repair(R2, Repairs)}}; - _ -> - {reply, - ok, - State#state{repairs = replace_repair(R2, Repairs)}} - end + none -> + logger:error("Received xfer_complete for non-existing " + "repair: ~p", + [ModPartition]), + {reply, ok, State}; + #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = + R -> + R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> + MOX2 = MOX#xfer_status{status = complete}, + R#repair{minus_one_xfer = MOX2}; + ?XFER_EQ(POX, ModSrcTgt) -> + POX2 = POX#xfer_status{status = complete}, + R#repair{plus_one_xfer = POX2}; + true -> + logger:error("Received xfer_complete for non-existing " + "xfer: ~p", + [ModSrcTgt]) + end, + case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), + ?XFER_COMPLETE((R2#repair.plus_one_xfer))} + of + {true, true} -> + {reply, + ok, + State#state{repairs = remove_repair(R2, Repairs)}}; + _ -> + {reply, + ok, + State#state{repairs = replace_repair(R2, Repairs)}} + end end; handle_call(_, _From, State) -> {reply, ok, State}. @@ -405,64 +405,64 @@ transform_repair_records(Repairs) -> %% module/node values in the `pairs' field against %% `minus_one_xfer' and `plus_one_xfer' lists:flatten(lists:map(fun (#repair{pairs = - [{M1SrcIdx, Mnode}, - _FixPartition, - {P1SrcIdx, Pnode}], - minus_one_xfer = - #xfer_status{mod_src_target = - {M1Mod, - M1SrcIdx, - _M1DstIdx}}, - plus_one_xfer = - #xfer_status{mod_src_target = - {P1Mod, - P1SrcIdx, - _P1DstIdx}}}) -> - [{{M1Mod, M1SrcIdx}, - {repair, inbound, Mnode}}, - {{P1Mod, P1SrcIdx}, - {repair, inbound, Pnode}}] - end, - Repairs)). + [{M1SrcIdx, Mnode}, + _FixPartition, + {P1SrcIdx, Pnode}], + minus_one_xfer = + #xfer_status{mod_src_target = + {M1Mod, + M1SrcIdx, + _M1DstIdx}}, + plus_one_xfer = + #xfer_status{mod_src_target = + {P1Mod, + P1SrcIdx, + _P1DstIdx}}}) -> + [{{M1Mod, M1SrcIdx}, + {repair, inbound, Mnode}}, + {{P1Mod, P1SrcIdx}, + {repair, inbound, Pnode}}] + end, + Repairs)). maybe_create_repair(Partition, Service, ModPartition, - FilterModFun, Mod, Repairs, State) -> + FilterModFun, Mod, Repairs, State) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), case riak_core_ring:pending_changes(Ring) of - [] -> - UpNodes = riak_core_node_watcher:nodes(Service), - Pairs = repair_pairs(Ring, Partition), - case check_up(Pairs, UpNodes) of - true -> - create_repair(Pairs, - ModPartition, - FilterModFun, - Mod, - Partition, - Repairs, - State); - {false, Down} -> {reply, {down, Down}, State} - end; - _ -> {reply, ownership_change_in_progress, State} + [] -> + UpNodes = riak_core_node_watcher:nodes(Service), + Pairs = repair_pairs(Ring, Partition), + case check_up(Pairs, UpNodes) of + true -> + create_repair(Pairs, + ModPartition, + FilterModFun, + Mod, + Partition, + Repairs, + State); + {false, Down} -> {reply, {down, Down}, State} + end; + _ -> {reply, ownership_change_in_progress, State} end. create_repair(Pairs, ModPartition, FilterModFun, Mod, - Partition, Repairs, State) -> + Partition, Repairs, State) -> {MOP, _} = MinusOne = get_minus_one(Pairs), {POP, _} = PlusOne = get_plus_one(Pairs), riak_core_handoff_manager:xfer(MinusOne, - ModPartition, - FilterModFun), + ModPartition, + FilterModFun), riak_core_handoff_manager:xfer(PlusOne, - ModPartition, - FilterModFun), + ModPartition, + FilterModFun), MOXStatus = #xfer_status{status = pending, - mod_src_target = {Mod, MOP, Partition}}, + mod_src_target = {Mod, MOP, Partition}}, POXStatus = #xfer_status{status = pending, - mod_src_target = {Mod, POP, Partition}}, + mod_src_target = {Mod, POP, Partition}}, Repair = #repair{mod_partition = ModPartition, - filter_mod_fun = FilterModFun, pairs = Pairs, - minus_one_xfer = MOXStatus, plus_one_xfer = POXStatus}, + filter_mod_fun = FilterModFun, pairs = Pairs, + minus_one_xfer = MOXStatus, plus_one_xfer = POXStatus}, Repairs2 = Repairs ++ [Repair], State2 = State#state{repairs = Repairs2}, logger:debug("add repair ~p", [ModPartition]), @@ -473,27 +473,27 @@ handle_cast({Partition, Mod, start_vnode}, State) -> _ = get_vnode(Partition, Mod, State), {noreply, State}; handle_cast({unregister, Index, Mod, Pid}, - #state{idxtab = T} = State) -> + #state{idxtab = T} = State) -> %% Update forwarding state to ensure vnode is not restarted in %% incorrect forwarding state if next request arrives before next %% ring event. {ok, Ring} = riak_core_ring_manager:get_my_ring(), State2 = update_forwarding({Mod, Index}, Ring, State), ets:match_delete(T, - {idxrec, {Index, Mod}, Index, Mod, Pid, '_'}), + {idxrec, {Index, Mod}, Index, Mod, Pid, '_'}), _ = unregister_vnode_stats(Mod, Index), riak_core_vnode_proxy:unregister_vnode(Mod, Index, Pid), {noreply, State2}; handle_cast({vnode_event, Mod, Idx, Pid, Event}, - State) -> + State) -> handle_vnode_event(Event, Mod, Idx, Pid, State); handle_cast(force_handoffs, State) -> AllVNodes = get_all_vnodes(), {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), State2 = update_handoff(AllVNodes, Ring, CHBin, State), _ = [maybe_trigger_handoff(Mod, Idx, Pid, State2) - || {Mod, Idx, Pid} <- AllVNodes], + || {Mod, Idx, Pid} <- AllVNodes], {noreply, State2}; handle_cast(maybe_start_vnodes, State) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -509,7 +509,7 @@ handle_info(management_tick, State0) -> schedule_management_timer(), RingID = riak_core_ring_manager:get_ring_id(), {ok, Ring, CHBin} = - riak_core_ring_manager:get_raw_ring_chashbin(), + riak_core_ring_manager:get_raw_ring_chashbin(), State = maybe_ring_changed(RingID, Ring, CHBin, State0), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], AllVNodes = get_all_vnodes(Mods), @@ -517,18 +517,18 @@ handle_info(management_tick, State0) -> Transfers = riak_core_ring:pending_changes(Ring), %% Kill/cancel any repairs during ownership changes State3 = case Transfers of - [] -> State2; - _ -> - Repairs = State#state.repairs, - kill_repairs(Repairs, ownership_change), - trigger_ownership_handoff(Transfers, - Mods, - Ring, - State2), - State2#state{repairs = []} - end, + [] -> State2; + _ -> + Repairs = State#state.repairs, + kill_repairs(Repairs, ownership_change), + trigger_ownership_handoff(Transfers, + Mods, + Ring, + State2), + State2#state{repairs = []} + end, State4 = State3#state{vnode_start_tokens = - ?DEFAULT_VNODE_ROLLING_START}, + ?DEFAULT_VNODE_ROLLING_START}, State5 = maybe_start_vnodes(Ring, State4), Repairs2 = check_repairs(State4#state.repairs), {noreply, State5#state{repairs = Repairs2}}; @@ -541,12 +541,12 @@ handle_vnode_event(inactive, Mod, Idx, Pid, State) -> maybe_trigger_handoff(Mod, Idx, Pid, State), {noreply, State}; handle_vnode_event(handoff_complete, Mod, Idx, Pid, - State) -> + State) -> NewHO = dict:erase({Mod, Idx}, State#state.handoff), riak_core_vnode:cast_finish_handoff(Pid), {noreply, State#state{handoff = NewHO}}; handle_vnode_event(handoff_error, Mod, Idx, Pid, - State) -> + State) -> NewHO = dict:erase({Mod, Idx}, State#state.handoff), riak_core_vnode:cancel_handoff(Pid), {noreply, State#state{handoff = NewHO}}. @@ -562,15 +562,15 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% =================================================================== maybe_ring_changed(RingID, Ring, CHBin, - State = #state{last_ring_id = LastID}) -> + State = #state{last_ring_id = LastID}) -> case RingID of - LastID -> - maybe_ensure_vnodes_started(Ring), - State; - _ -> - ensure_vnodes_started(Ring), - State2 = ring_changed(Ring, CHBin, State), - State2#state{last_ring_id = RingID} + LastID -> + maybe_ensure_vnodes_started(Ring), + State; + _ -> + ensure_vnodes_started(Ring), + State2 = ring_changed(Ring, CHBin, State), + State2#state{last_ring_id = RingID} end. ring_changed(Ring, CHBin, State) -> @@ -578,63 +578,63 @@ ring_changed(Ring, CHBin, State) -> AllVNodes = get_all_vnodes(), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], State2 = update_forwarding(AllVNodes, - Mods, - Ring, - State), + Mods, + Ring, + State), %% Update handoff state State3 = update_handoff(AllVNodes, Ring, CHBin, State2), %% Trigger ownership transfers. Transfers = riak_core_ring:pending_changes(Ring), trigger_ownership_handoff(Transfers, - Mods, - Ring, - State3), + Mods, + Ring, + State3), State3. maybe_ensure_vnodes_started(Ring) -> ExitingStates = [leaving, exiting, invalid], Status = riak_core_ring:member_status(Ring, node()), case lists:member(Status, ExitingStates) of - true -> - ensure_vnodes_started(Ring), - ok; - _ -> ok + true -> + ensure_vnodes_started(Ring), + ok; + _ -> ok end. ensure_vnodes_started(Ring) -> spawn(fun () -> - try riak_core_ring_handler:ensure_vnodes_started(Ring) - catch - Type:Reason:Stacktrace -> - logger:error("~p", [{Type, Reason, Stacktrace}]) - end - end). + try riak_core_ring_handler:ensure_vnodes_started(Ring) + catch + Type:Reason:Stacktrace -> + logger:error("~p", [{Type, Reason, Stacktrace}]) + end + end). schedule_management_timer() -> ManagementTick = application:get_env(riak_core, - vnode_management_timer, - 10000), + vnode_management_timer, + 10000), erlang:send_after(ManagementTick, - ?MODULE, - management_tick). + ?MODULE, + management_tick). trigger_ownership_handoff(Transfers, Mods, Ring, - State) -> + State) -> IsResizing = riak_core_ring:is_resizing(Ring), Throttle = limit_ownership_handoff(Transfers, - IsResizing), + IsResizing), Awaiting = [{Mod, Idx} - || {Idx, Node, _, CMods, S} <- Throttle, Mod <- Mods, - S =:= awaiting, Node =:= node(), - not lists:member(Mod, CMods)], + || {Idx, Node, _, CMods, S} <- Throttle, Mod <- Mods, + S =:= awaiting, Node =:= node(), + not lists:member(Mod, CMods)], _ = [maybe_trigger_handoff(Mod, Idx, State) - || {Mod, Idx} <- Awaiting], + || {Mod, Idx} <- Awaiting], ok. limit_ownership_handoff(Transfers, IsResizing) -> Limit = application:get_env(riak_core, - forced_ownership_handoff, - ?DEFAULT_OWNERSHIP_TRIGGER), + forced_ownership_handoff, + ?DEFAULT_OWNERSHIP_TRIGGER), limit_ownership_handoff(Limit, Transfers, IsResizing). limit_ownership_handoff(Limit, Transfers, false) -> @@ -644,28 +644,28 @@ limit_ownership_handoff(Limit, Transfers, true) -> %% since they remain in the list until all are complete. then %% treat transfers as normal Filtered = [Transfer - || {_, _, _, _, Status} = Transfer <- Transfers, - Status =:= awaiting], + || {_, _, _, _, Status} = Transfer <- Transfers, + Status =:= awaiting], limit_ownership_handoff(Limit, Filtered, false). %% @private idx2vnode(Idx, Mod, _State = #state{idxtab = T}) -> case ets:lookup(T, {Idx, Mod}) of - [I] -> I#idxrec.pid; - [] -> no_match + [I] -> I#idxrec.pid; + [] -> no_match end. %% @private delmon(MonRef, _State = #state{idxtab = T}) -> case ets:lookup(T, MonRef) of - [#monrec{key = {Index, Mod} = Key}] -> - _ = unregister_vnode_stats(Mod, Index), - ets:match_delete(T, - {idxrec, Key, '_', '_', '_', MonRef}), - ets:delete(T, MonRef); - [] -> - ets:match_delete(T, - {idxrec, '_', '_', '_', '_', MonRef}) + [#monrec{key = {Index, Mod} = Key}] -> + _ = unregister_vnode_stats(Mod, Index), + ets:match_delete(T, + {idxrec, Key, '_', '_', '_', MonRef}), + ets:delete(T, MonRef); + [] -> + ets:match_delete(T, + {idxrec, '_', '_', '_', '_', MonRef}) end. %% @private @@ -678,197 +678,197 @@ get_vnode(Idx, Mod, State) when not is_list(Idx) -> Result; get_vnode(IdxList, Mod, State) -> Initial = [case idx2vnode(Idx, Mod, State) of - no_match -> Idx; - Pid -> {Idx, Pid} - end - || Idx <- IdxList], + no_match -> Idx; + Pid -> {Idx, Pid} + end + || Idx <- IdxList], {NotStarted, Started} = - lists:partition(fun erlang:is_integer/1, Initial), + lists:partition(fun erlang:is_integer/1, Initial), StartFun = fun (Idx) -> - ForwardTo = get_forward(Mod, Idx, State), - logger:debug("Will start VNode for partition ~p", - [Idx]), - {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, - Idx, - ForwardTo), - register_vnode_stats(Mod, Idx, Pid), - logger:debug("Started VNode, waiting for initialization " - "to\n complete " - "~p, ~p ", - [Pid, Idx]), - ok = riak_core_vnode:wait_for_init(Pid), - logger:debug("VNode initialization ready ~p, ~p", - [Pid, Idx]), - {Idx, Pid} - end, + ForwardTo = get_forward(Mod, Idx, State), + logger:debug("Will start VNode for partition ~p", + [Idx]), + {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, + Idx, + ForwardTo), + register_vnode_stats(Mod, Idx, Pid), + logger:debug("Started VNode, waiting for initialization " + "to\n complete " + "~p, ~p ", + [Pid, Idx]), + ok = riak_core_vnode:wait_for_init(Pid), + logger:debug("VNode initialization ready ~p, ~p", + [Pid, Idx]), + {Idx, Pid} + end, Pairs = Started ++ - riak_core_util:pmap(StartFun, - NotStarted, - ?DEFAULT_VNODE_ROLLING_START), + riak_core_util:pmap(StartFun, + NotStarted, + ?DEFAULT_VNODE_ROLLING_START), %% Return Pids in same order as input PairsDict = dict:from_list(Pairs), _ = [begin - Pid = dict:fetch(Idx, PairsDict), - MonRef = erlang:monitor(process, Pid), - IdxRec = #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, - pid = Pid, monref = MonRef}, - MonRec = #monrec{monref = MonRef, key = {Idx, Mod}}, - add_vnode_rec([IdxRec, MonRec], State) - end - || Idx <- NotStarted], + Pid = dict:fetch(Idx, PairsDict), + MonRef = erlang:monitor(process, Pid), + IdxRec = #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, + pid = Pid, monref = MonRef}, + MonRec = #monrec{monref = MonRef, key = {Idx, Mod}}, + add_vnode_rec([IdxRec, MonRec], State) + end + || Idx <- NotStarted], [dict:fetch(Idx, PairsDict) || Idx <- IdxList]. get_forward(Mod, Idx, #state{forwarding = Fwd}) -> case dict:find({Mod, Idx}, Fwd) of - {ok, ForwardTo} -> ForwardTo; - _ -> undefined + {ok, ForwardTo} -> ForwardTo; + _ -> undefined end. check_forward(Ring, Mod, Index) -> Node = node(), case riak_core_ring:next_owner(Ring, Index, Mod) of - {Node, '$resize', _} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, - Mod), - {{Mod, Index}, Complete}; - {Node, '$delete', _} -> {{Mod, Index}, undefined}; - {Node, NextOwner, complete} -> - {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {Node, '$resize', _} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, + Mod), + {{Mod, Index}, Complete}; + {Node, '$delete', _} -> {{Mod, Index}, undefined}; + {Node, NextOwner, complete} -> + {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. check_forward_precomputed(Completed, Mod, Index, Node, - Ring) -> + Ring) -> case dict:find({Mod, Index}, Completed) of - {ok, '$resize'} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, - Mod), - {{Mod, Index}, Complete}; - {ok, '$delete'} -> {{Mod, Index}, undefined}; - {ok, NextOwner} -> {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {ok, '$resize'} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, + Mod), + {{Mod, Index}, Complete}; + {ok, '$delete'} -> {{Mod, Index}, undefined}; + {ok, NextOwner} -> {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. compute_forwarding(Mods, Ring) -> Node = node(), CL = [{{Mod, Idx}, NextOwner} - || Mod <- Mods, - {Idx, Owner, NextOwner} - <- riak_core_ring:completed_next_owners(Mod, Ring), - Owner =:= Node], + || Mod <- Mods, + {Idx, Owner, NextOwner} + <- riak_core_ring:completed_next_owners(Mod, Ring), + Owner =:= Node], Completed = dict:from_list(CL), Forwarding = [check_forward_precomputed(Completed, - Mod, - I, - N, - Ring) - || {I, N} <- riak_core_ring:all_owners(Ring), - Mod <- Mods], + Mod, + I, + N, + Ring) + || {I, N} <- riak_core_ring:all_owners(Ring), + Mod <- Mods], dict:from_list(Forwarding). update_forwarding(AllVNodes, Mods, Ring, - State = #state{forwarding = Forwarding}) -> + State = #state{forwarding = Forwarding}) -> NewForwarding = compute_forwarding(Mods, Ring), %% Inform vnodes that have changed forwarding status VNodes = dict:from_list([{{Mod, Idx}, Pid} - || {Mod, Idx, Pid} <- AllVNodes]), + || {Mod, Idx, Pid} <- AllVNodes]), Diff = dict:filter(fun (K, V) -> - dict:find(K, Forwarding) /= {ok, V} - end, - NewForwarding), + dict:find(K, Forwarding) /= {ok, V} + end, + NewForwarding), dict:fold(fun ({Mod, Idx}, ForwardTo, _) -> - change_forward(VNodes, Mod, Idx, ForwardTo), - ok - end, - ok, - Diff), + change_forward(VNodes, Mod, Idx, ForwardTo), + ok + end, + ok, + Diff), State#state{forwarding = NewForwarding}. update_forwarding({Mod, Idx}, Ring, - State = #state{forwarding = Forwarding}) -> + State = #state{forwarding = Forwarding}) -> {_, ForwardTo} = check_forward(Ring, Mod, Idx), NewForwarding = dict:store({Mod, Idx}, - ForwardTo, - Forwarding), + ForwardTo, + Forwarding), State#state{forwarding = NewForwarding}. change_forward(VNodes, Mod, Idx, ForwardTo) -> case dict:find({Mod, Idx}, VNodes) of - error -> ok; - {ok, Pid} -> - riak_core_vnode:set_forwarding(Pid, ForwardTo), - ok + error -> ok; + {ok, Pid} -> + riak_core_vnode:set_forwarding(Pid, ForwardTo), + ok end. update_handoff(AllVNodes, Ring, CHBin, State) -> case riak_core_ring:ring_ready(Ring) of - false -> State; - true -> - NewHO = lists:flatten([case should_handoff(Ring, - CHBin, - Mod, - Idx) - of - false -> []; - {true, primary, TargetNode} -> - [{{Mod, Idx}, - {ownership, - outbound, - TargetNode}}]; - {true, {fallback, _Node}, TargetNode} -> - [{{Mod, Idx}, - {hinted, outbound, TargetNode}}]; - {true, '$resize' = Action} -> - [{{Mod, Idx}, - {resize, outbound, Action}}]; - {true, '$delete' = Action} -> - [{{Mod, Idx}, - {delete, local, Action}}] - end - || {Mod, Idx, _Pid} <- AllVNodes]), - State#state{handoff = dict:from_list(NewHO)} + false -> State; + true -> + NewHO = lists:flatten([case should_handoff(Ring, + CHBin, + Mod, + Idx) + of + false -> []; + {true, primary, TargetNode} -> + [{{Mod, Idx}, + {ownership, + outbound, + TargetNode}}]; + {true, {fallback, _Node}, TargetNode} -> + [{{Mod, Idx}, + {hinted, outbound, TargetNode}}]; + {true, '$resize' = Action} -> + [{{Mod, Idx}, + {resize, outbound, Action}}]; + {true, '$delete' = Action} -> + [{{Mod, Idx}, + {delete, local, Action}}] + end + || {Mod, Idx, _Pid} <- AllVNodes]), + State#state{handoff = dict:from_list(NewHO)} end. should_handoff(Ring, _CHBin, Mod, Idx) -> {_, NextOwner, _} = riak_core_ring:next_owner(Ring, - Idx), + Idx), Type = riak_core_ring:vnode_type(Ring, Idx), Ready = riak_core_ring:ring_ready(Ring), IsResizing = riak_core_ring:is_resizing(Ring), case determine_handoff_target(Type, - NextOwner, - Ready, - IsResizing) - of - undefined -> false; - Action - when Action =:= '$resize' orelse Action =:= '$delete' -> - {true, Action}; - TargetNode -> - case app_for_vnode_module(Mod) of - undefined -> false; - {ok, App} -> - case lists:member(TargetNode, - riak_core_node_watcher:nodes(App)) - of - false -> false; - true -> {true, Type, TargetNode} - end - end + NextOwner, + Ready, + IsResizing) + of + undefined -> false; + Action + when Action =:= '$resize' orelse Action =:= '$delete' -> + {true, Action}; + TargetNode -> + case app_for_vnode_module(Mod) of + undefined -> false; + {ok, App} -> + case lists:member(TargetNode, + riak_core_node_watcher:nodes(App)) + of + false -> false; + true -> {true, Type, TargetNode} + end + end end. determine_handoff_target(Type, NextOwner, RingReady, - IsResize) -> + IsResize) -> Me = node(), determine_handoff_target(Type, - NextOwner, - RingReady, - IsResize, - NextOwner =:= Me). + NextOwner, + RingReady, + IsResize, + NextOwner =:= Me). determine_handoff_target(primary, _, _, _, true) -> %% Never hand off to myself as a primary @@ -877,7 +877,7 @@ determine_handoff_target(primary, undefined, _, _, _) -> %% No ring change indicated for this partition undefined; determine_handoff_target(primary, NextOwner, true, _, - _) -> + _) -> %% Primary, ring is ready, go. This may be a node or a `$resize' %% action NextOwner; @@ -885,23 +885,23 @@ determine_handoff_target(primary, _, false, _, _) -> %% Ring isn't ready, no matter what, don't do a primary handoff undefined; determine_handoff_target({fallback, _Target}, - '$delete' = Action, _, _, _) -> + '$delete' = Action, _, _, _) -> %% partitions moved during resize and scheduled for deletion, indexes %% that exist in both the original and resized ring that were moved appear %% as fallbacks. Action; determine_handoff_target(resized_primary, - '$delete' = Action, _, _, _) -> + '$delete' = Action, _, _, _) -> %% partitions that no longer exist after the ring has been resized (shrunk) %% scheduled for deletion Action; determine_handoff_target(resized_primary, _, _, false, - _) -> + _) -> %% partitions that would have existed in a ring whose expansion was aborted %% and are still running need to be cleaned up after and shutdown '$delete'; determine_handoff_target({fallback, For}, undefined, _, - _, _) -> + _, _) -> %% Fallback vnode target is primary (hinted handoff). `For' can %% technically be a `$resize' action but unclear it ever would be For; @@ -909,12 +909,12 @@ determine_handoff_target(_, _, _, _, _) -> undefined. app_for_vnode_module(Mod) when is_atom(Mod) -> case application:get_env(riak_core, vnode_modules) of - {ok, Mods} -> - case lists:keysearch(Mod, 2, Mods) of - {value, {App, Mod}} -> {ok, App}; - false -> undefined - end; - undefined -> undefined + {ok, Mods} -> + case lists:keysearch(Mod, 2, Mods) of + {value, {App, Mod}} -> {ok, App}; + false -> undefined + end; + undefined -> undefined end. maybe_trigger_handoff(Mod, Idx, State) -> @@ -922,93 +922,93 @@ maybe_trigger_handoff(Mod, Idx, State) -> maybe_trigger_handoff(Mod, Idx, Pid, State). maybe_trigger_handoff(Mod, Idx, Pid, - _State = #state{handoff = HO}) -> + _State = #state{handoff = HO}) -> case dict:find({Mod, Idx}, HO) of - {ok, {resize, _Direction, '$resize'}} -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - case riak_core_ring:awaiting_resize_transfer(Ring, - {Idx, node()}, - Mod) - of - undefined -> ok; - {TargetIdx, TargetNode} -> - riak_core_vnode:trigger_handoff(Pid, - TargetIdx, - TargetNode) - end; - {ok, {delete, local, '$delete'}} -> - riak_core_vnode:trigger_delete(Pid); - {ok, {_Type, _Direction, TargetNode}} -> - riak_core_vnode:trigger_handoff(Pid, TargetNode), - ok; - error -> ok + {ok, {resize, _Direction, '$resize'}} -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + case riak_core_ring:awaiting_resize_transfer(Ring, + {Idx, node()}, + Mod) + of + undefined -> ok; + {TargetIdx, TargetNode} -> + riak_core_vnode:trigger_handoff(Pid, + TargetIdx, + TargetNode) + end; + {ok, {delete, local, '$delete'}} -> + riak_core_vnode:trigger_delete(Pid); + {ok, {_Type, _Direction, TargetNode}} -> + riak_core_vnode:trigger_handoff(Pid, TargetNode), + ok; + error -> ok end. get_all_vnodes_status(#state{forwarding = Forwarding, - handoff = HO}) -> + handoff = HO}) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), Owners = riak_core_ring:all_owners(Ring), VNodes = get_all_vnodes(), Mods = [Mod - || {_App, Mod} <- riak_core:vnode_modules()], + || {_App, Mod} <- riak_core:vnode_modules()], ThisNode = node(), Types = [case Owner of - ThisNode -> {{Mod, Idx}, {type, primary}}; - _ -> {{Mod, Idx}, {type, secondary}} - end - || {Idx, Owner} <- Owners, Mod <- Mods], + ThisNode -> {{Mod, Idx}, {type, primary}}; + _ -> {{Mod, Idx}, {type, secondary}} + end + || {Idx, Owner} <- Owners, Mod <- Mods], Types2 = lists:keysort(1, Types), Pids = [{{Mod, Idx}, {pid, Pid}} - || {Mod, Idx, Pid} <- VNodes], + || {Mod, Idx, Pid} <- VNodes], Pids2 = lists:keysort(1, Pids), Forwarding1 = lists:sort(dict:to_list(Forwarding)), Forwarding2 = [{MI, {forwarding, Node}} - || {MI, Node} <- Forwarding1, Node /= undefined], + || {MI, Node} <- Forwarding1, Node /= undefined], Handoff1 = lists:sort(dict:to_list(HO)), Handoff2 = [{MI, {should_handoff, Node}} - || {MI, {_Type, _Direction, Node}} <- Handoff1], + || {MI, {_Type, _Direction, Node}} <- Handoff1], MergeFn = fun (_, V1, V2) - when is_list(V1) and is_list(V2) -> - V1 ++ V2; - (_, V1, V2) when is_list(V1) -> V1 ++ [V2]; - (_, V1, V2) -> [V1, V2] - end, + when is_list(V1) and is_list(V2) -> + V1 ++ V2; + (_, V1, V2) when is_list(V1) -> V1 ++ [V2]; + (_, V1, V2) -> [V1, V2] + end, Status = lists:foldl(fun (B, A) -> - orddict:merge(MergeFn, A, B) - end, - Types2, - [Pids2, Forwarding2, Handoff2]), + orddict:merge(MergeFn, A, B) + end, + Types2, + [Pids2, Forwarding2, Handoff2]), Status. update_never_started(Ring, - State = #state{known_modules = KnownMods}) -> + State = #state{known_modules = KnownMods}) -> UnknownMods = [Mod - || {_App, Mod} <- riak_core:vnode_modules(), - not lists:member(Mod, KnownMods)], + || {_App, Mod} <- riak_core:vnode_modules(), + not lists:member(Mod, KnownMods)], case UnknownMods of - [] -> State; - _ -> - Indices = [Idx - || {Idx, _} <- riak_core_ring:all_owners(Ring)], - lists:foldl(fun (Mod, StateAcc) -> - update_never_started(Mod, Indices, StateAcc) - end, - State, - UnknownMods) + [] -> State; + _ -> + Indices = [Idx + || {Idx, _} <- riak_core_ring:all_owners(Ring)], + lists:foldl(fun (Mod, StateAcc) -> + update_never_started(Mod, Indices, StateAcc) + end, + State, + UnknownMods) end. update_never_started(Mod, Indices, State) -> IdxPids = get_all_index_pid(Mod, []), AlreadyStarted = [Idx || {Idx, _Pid} <- IdxPids], NeverStarted = - ordsets:subtract(ordsets:from_list(Indices), - ordsets:from_list(AlreadyStarted)), + ordsets:subtract(ordsets:from_list(Indices), + ordsets:from_list(AlreadyStarted)), NeverStarted2 = [{Idx, Mod} || Idx <- NeverStarted], NeverStarted3 = NeverStarted2 ++ - State#state.never_started, + State#state.never_started, KnownModules = [Mod | State#state.known_modules], State#state{known_modules = KnownModules, - never_started = NeverStarted3}. + never_started = NeverStarted3}. maybe_start_vnodes(Ring, State) -> State2 = update_never_started(Ring, State), @@ -1016,71 +1016,71 @@ maybe_start_vnodes(Ring, State) -> State3. maybe_start_vnodes(State = #state{vnode_start_tokens = - Tokens, - never_started = NeverStarted}) -> + Tokens, + never_started = NeverStarted}) -> case {Tokens, NeverStarted} of - {0, _} -> State; - {_, []} -> State; - {_, [{Idx, Mod} | NeverStarted2]} -> - _ = get_vnode(Idx, Mod, State), - gen_server:cast(?MODULE, maybe_start_vnodes), - State#state{vnode_start_tokens = Tokens - 1, - never_started = NeverStarted2} + {0, _} -> State; + {_, []} -> State; + {_, [{Idx, Mod} | NeverStarted2]} -> + _ = get_vnode(Idx, Mod, State), + gen_server:cast(?MODULE, maybe_start_vnodes), + State#state{vnode_start_tokens = Tokens - 1, + never_started = NeverStarted2} end. -spec check_repairs(repairs()) -> Repairs2 :: repairs(). check_repairs(Repairs) -> Check = fun (R = #repair{minus_one_xfer = MOX, - plus_one_xfer = POX}, - Repairs2) -> - Pairs = R#repair.pairs, - MO = get_minus_one(Pairs), - PO = get_plus_one(Pairs), - MOX2 = maybe_retry(R, MO, MOX), - POX2 = maybe_retry(R, PO, POX), - if (?XFER_COMPLETE(MOX2)) andalso - (?XFER_COMPLETE(POX2)) -> - Repairs2; - true -> - R2 = R#repair{minus_one_xfer = MOX2, - plus_one_xfer = POX2}, - [R2 | Repairs2] - end - end, + plus_one_xfer = POX}, + Repairs2) -> + Pairs = R#repair.pairs, + MO = get_minus_one(Pairs), + PO = get_plus_one(Pairs), + MOX2 = maybe_retry(R, MO, MOX), + POX2 = maybe_retry(R, PO, POX), + if (?XFER_COMPLETE(MOX2)) andalso + (?XFER_COMPLETE(POX2)) -> + Repairs2; + true -> + R2 = R#repair{minus_one_xfer = MOX2, + plus_one_xfer = POX2}, + [R2 | Repairs2] + end + end, lists:reverse(lists:foldl(Check, [], Repairs)). %% TODO: get all this repair, xfer status and Src business figured out. -spec maybe_retry(repair(), tuple(), - xfer_status()) -> Xfer2 :: xfer_status(). + xfer_status()) -> Xfer2 :: xfer_status(). maybe_retry(R, {SrcPartition, _} = Src, Xfer) -> case Xfer#xfer_status.status of - complete -> Xfer; - pending -> - {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, - FilterModFun = R#repair.filter_mod_fun, - riak_core_handoff_manager:xfer(Src, - {Mod, Partition}, - FilterModFun), - #xfer_status{status = pending, - mod_src_target = {Mod, SrcPartition, Partition}} + complete -> Xfer; + pending -> + {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, + FilterModFun = R#repair.filter_mod_fun, + riak_core_handoff_manager:xfer(Src, + {Mod, Partition}, + FilterModFun), + #xfer_status{status = pending, + mod_src_target = {Mod, SrcPartition, Partition}} end. %% @private %% %% @doc Verify that all nodes are up involved in the repair. -spec check_up([{non_neg_integer(), node()}], - [node()]) -> true | - {false, Down :: [{non_neg_integer(), node()}]}. + [node()]) -> true | + {false, Down :: [{non_neg_integer(), node()}]}. check_up(Pairs, UpNodes) -> Down = [Pair - || {_Partition, Owner} = Pair <- Pairs, - not lists:member(Owner, UpNodes)], + || {_Partition, Owner} = Pair <- Pairs, + not lists:member(Owner, UpNodes)], case Down of - [] -> true; - _ -> {false, Down} + [] -> true; + _ -> {false, Down} end. %% @private @@ -1088,17 +1088,17 @@ check_up(Pairs, UpNodes) -> %% @doc Get the three `{Partition, Owner}' pairs involved in a repair %% operation for the given `Ring' and `Partition'. -spec repair_pairs(riak_core_ring:riak_core_ring(), - non_neg_integer()) -> [{Partition :: non_neg_integer(), - Owner :: node()}]. + non_neg_integer()) -> [{Partition :: non_neg_integer(), + Owner :: node()}]. repair_pairs(Ring, Partition) -> Owner = riak_core_ring:index_owner(Ring, Partition), CH = riak_core_ring:chash(Ring), [_, Before] = - chash:predecessors(<>, CH, 2), + chash:predecessors(<>, CH, 2), [After] = chash:successors(<>, - CH, - 1), + CH, + 1), [Before, {Partition, Owner}, After]. %% @private @@ -1106,15 +1106,15 @@ repair_pairs(Ring, Partition) -> %% @doc Get the corresponding repair entry in `Repairs', if one %% exists, for the given `ModPartition'. -spec get_repair(mod_partition(), - repairs()) -> repair() | none. + repairs()) -> repair() | none. get_repair(ModPartition, Repairs) -> case lists:keyfind(ModPartition, - #repair.mod_partition, - Repairs) - of - false -> none; - Val -> Val + #repair.mod_partition, + Repairs) + of + false -> none; + Val -> Val end. %% @private @@ -1124,8 +1124,8 @@ get_repair(ModPartition, Repairs) -> remove_repair(Repair, Repairs) -> lists:keydelete(Repair#repair.mod_partition, - #repair.mod_partition, - Repairs). + #repair.mod_partition, + Repairs). %% @private %% @@ -1134,16 +1134,16 @@ remove_repair(Repair, Repairs) -> replace_repair(Repair, Repairs) -> lists:keyreplace(Repair#repair.mod_partition, - #repair.mod_partition, - Repairs, - Repair). + #repair.mod_partition, + Repairs, + Repair). %% @private %% %% @doc Get the `{Partition, Owner}' pair that comes before the %% partition under repair. -spec get_minus_one([{index(), node()}]) -> {index(), - node()}. + node()}. get_minus_one([MinusOne, _, _]) -> MinusOne. @@ -1152,7 +1152,7 @@ get_minus_one([MinusOne, _, _]) -> MinusOne. %% @doc Get the `{Partition, Owner}' pair that comes after the %% partition under repair. -spec get_plus_one([{index(), node()}]) -> {index(), - node()}. + node()}. get_plus_one([_, _, PlusOne]) -> PlusOne. @@ -1177,15 +1177,15 @@ kill_repair(Repair, Reason) -> POModSrcTarget = POX#xfer_status.mod_src_target, %% Kill the remote senders riak_core_handoff_manager:kill_xfer(MOOwner, - MOModSrcTarget, - Reason), + MOModSrcTarget, + Reason), riak_core_handoff_manager:kill_xfer(POOwner, - POModSrcTarget, - Reason), + POModSrcTarget, + Reason), %% Kill the local receivers riak_core_handoff_manager:kill_xfer(node(), - {Mod, undefined, Partition}, - Reason). + {Mod, undefined, Partition}, + Reason). register_vnode_stats(_Mod, _Index, _Pid) -> %% STATS diff --git a/src/riak_core_vnode_master.erl b/src/riak_core_vnode_master.erl index 9c563123f..cf60460e3 100644 --- a/src/riak_core_vnode_master.erl +++ b/src/riak_core_vnode_master.erl @@ -29,30 +29,30 @@ -behaviour(gen_server). -export([start_link/1, - start_link/2, - start_link/3, - get_vnode_pid/2, - start_vnode/2, - command/3, - command/4, - command_unreliable/3, - command_unreliable/4, - sync_command/3, - sync_command/4, - coverage/5, - command_return_vnode/4, - sync_spawn_command/3, - make_request/3, - make_coverage_request/4, - all_nodes/1, - reg_name/1]). + start_link/2, + start_link/3, + get_vnode_pid/2, + start_vnode/2, + command/3, + command/4, + command_unreliable/3, + command_unreliable/4, + sync_command/3, + sync_command/4, + coverage/5, + command_return_vnode/4, + sync_spawn_command/3, + make_request/3, + make_coverage_request/4, + all_nodes/1, + reg_name/1]). -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). -record(state, {idxtab, sup_name, vnode_mod, legacy}). @@ -80,9 +80,9 @@ start_link(VNodeMod, LegacyMod) -> start_link(VNodeMod, LegacyMod, Service) -> RegName = reg_name(VNodeMod), gen_server:start_link({local, RegName}, - ?MODULE, - [Service, VNodeMod, LegacyMod, RegName], - []). + ?MODULE, + [Service, VNodeMod, LegacyMod, RegName], + []). start_vnode(Index, VNodeMod) -> riak_core_vnode_manager:start_vnode(Index, VNodeMod). @@ -100,32 +100,32 @@ command(PrefListOrCmd, Msg, Sender, VMaster) -> command2(PrefListOrCmd, Msg, Sender, VMaster, normal). command_unreliable(PrefListOrCmd, Msg, Sender, - VMaster) -> + VMaster) -> command2(PrefListOrCmd, - Msg, - Sender, - VMaster, - unreliable). + Msg, + Sender, + VMaster, + unreliable). %% Send the command to the preflist given with responses going to Sender command2([], _Msg, _Sender, _VMaster, _How) -> ok; command2([{Index, Pid} | Rest], Msg, Sender, VMaster, - How = normal) + How = normal) when is_pid(Pid) -> Request = make_request(Msg, Sender, Index), riak_core_vnode:send_req(Pid, Request), command2(Rest, Msg, Sender, VMaster, How); command2([{Index, Pid} | Rest], Msg, Sender, VMaster, - How = unreliable) + How = unreliable) when is_pid(Pid) -> riak_core_send_msg:send_event_unreliable(Pid, - make_request(Msg, Sender, Index)), + make_request(Msg, Sender, Index)), command2(Rest, Msg, Sender, VMaster, How); command2([{Index, Node} | Rest], Msg, Sender, VMaster, - How) -> + How) -> proxy_cast({VMaster, Node}, - make_request(Msg, Sender, Index), - How), + make_request(Msg, Sender, Index), + How), command2(Rest, Msg, Sender, VMaster, How); command2(DestTuple, Msg, Sender, VMaster, How) when is_tuple(DestTuple) -> @@ -135,29 +135,29 @@ command2(DestTuple, Msg, Sender, VMaster, How) %% Send a command to a covering set of vnodes coverage(Msg, CoverageVNodes, Keyspaces, - {Type, Ref, From}, VMaster) + {Type, Ref, From}, VMaster) when is_list(CoverageVNodes) -> [proxy_cast({VMaster, Node}, - make_coverage_request(Msg, - Keyspaces, - {Type, {Ref, {Index, Node}}, From}, - Index)) + make_coverage_request(Msg, + Keyspaces, + {Type, {Ref, {Index, Node}}, From}, + Index)) || {Index, Node} <- CoverageVNodes]; coverage(Msg, {Index, Node}, Keyspaces, Sender, - VMaster) -> + VMaster) -> proxy_cast({VMaster, Node}, - make_coverage_request(Msg, Keyspaces, Sender, Index)). + make_coverage_request(Msg, Keyspaces, Sender, Index)). %% Send the command to an individual Index/Node combination, but also %% return the pid for the vnode handling the request, as `{ok, VnodePid}'. command_return_vnode({Index, Node}, Msg, Sender, - VMaster) -> + VMaster) -> Req = make_request(Msg, Sender, Index), Mod = vmaster_to_vmod(VMaster), riak_core_vnode_proxy:command_return_vnode({Mod, - Index, - Node}, - Req). + Index, + Node}, + Req). %% Send a synchronous command to an individual Index/Node combination. %% Will not return until the vnode has returned @@ -169,13 +169,13 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% the From for handle_call so that the {reply} return gets %% sent here. Request = make_request(Msg, - {server, undefined, undefined}, - Index), + {server, undefined, undefined}, + Index), case gen_server:call({VMaster, Node}, Request, Timeout) - of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + of + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Send a synchronous spawned command to an individual Index/Node combination. @@ -183,34 +183,34 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% continue to handle requests. sync_spawn_command({Index, Node}, Msg, VMaster) -> Request = make_request(Msg, - {server, undefined, undefined}, - Index), + {server, undefined, undefined}, + Index), case gen_server:call({VMaster, Node}, - {spawn, Request}, - infinity) - of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + {spawn, Request}, + infinity) + of + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Make a request record - exported for use by legacy modules -spec make_request(vnode_req(), sender(), - partition()) -> riak_vnode_req_v1(). + partition()) -> riak_vnode_req_v1(). make_request(Request, Sender, Index) -> #riak_vnode_req_v1{index = Index, sender = Sender, - request = Request}. + request = Request}. %% Make a request record - exported for use by legacy modules -spec make_coverage_request(vnode_req(), keyspaces(), - sender(), partition()) -> riak_coverage_req_v1(). + sender(), partition()) -> riak_coverage_req_v1(). make_coverage_request(Request, KeySpaces, Sender, - Index) -> + Index) -> #riak_coverage_req_v1{index = Index, - keyspaces = KeySpaces, sender = Sender, - request = Request}. + keyspaces = KeySpaces, sender = Sender, + request = Request}. %% Request a list of Pids for all vnodes %% @deprecated @@ -226,7 +226,7 @@ init([Service, VNodeMod, LegacyMod, _RegName]) -> gen_server:cast(self(), {wait_for_service, Service}), {ok, #state{idxtab = undefined, vnode_mod = VNodeMod, - legacy = LegacyMod}}. + legacy = LegacyMod}}. proxy_cast(Who, Req) -> proxy_cast(Who, Req, normal). @@ -234,13 +234,13 @@ proxy_cast({VMaster, Node}, Req, How) -> do_proxy_cast({VMaster, Node}, Req, How). do_proxy_cast({VMaster, Node}, - Req = #riak_vnode_req_v1{index = Idx}, How) -> + Req = #riak_vnode_req_v1{index = Idx}, How) -> Mod = vmaster_to_vmod(VMaster), Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx, Node), send_an_event(Proxy, Req, How), ok; do_proxy_cast({VMaster, Node}, - Req = #riak_coverage_req_v1{index = Idx}, How) -> + Req = #riak_coverage_req_v1{index = Idx}, How) -> Mod = vmaster_to_vmod(VMaster), Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx, Node), send_an_event(Proxy, Req, How), @@ -253,69 +253,69 @@ send_an_event(Dest, Event, unreliable) -> handle_cast({wait_for_service, Service}, State) -> case Service of - undefined -> ok; - _ -> - logger:debug("Waiting for service: ~p", [Service]), - riak_core:wait_for_service(Service) + undefined -> ok; + _ -> + logger:debug("Waiting for service: ~p", [Service]), + riak_core:wait_for_service(Service) end, {noreply, State}; handle_cast(Req = #riak_vnode_req_v1{index = Idx}, - State = #state{vnode_mod = Mod}) -> + State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req), {noreply, State}; handle_cast(Req = #riak_coverage_req_v1{index = Idx}, - State = #state{vnode_mod = Mod}) -> + State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req), {noreply, State}; handle_cast(Other, State = #state{legacy = Legacy}) when Legacy =/= undefined -> case catch Legacy:rewrite_cast(Other) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_cast(Req, State); - _ -> {noreply, State} + {ok, #riak_vnode_req_v1{} = Req} -> + handle_cast(Req, State); + _ -> {noreply, State} end. handle_call({return_vnode, - Req = #riak_vnode_req_v1{index = Idx}}, - _From, State = #state{vnode_mod = Mod}) -> + Req = #riak_vnode_req_v1{index = Idx}}, + _From, State = #state{vnode_mod = Mod}) -> {ok, Pid} = - riak_core_vnode_proxy:command_return_vnode({Mod, - Idx, - node()}, - Req), + riak_core_vnode_proxy:command_return_vnode({Mod, + Idx, + node()}, + Req), {reply, {ok, Pid}, State}; handle_call(Req = #riak_vnode_req_v1{index = Idx, - sender = {server, undefined, undefined}}, - From, State = #state{vnode_mod = Mod}) -> + sender = {server, undefined, undefined}}, + From, State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, - Req#riak_vnode_req_v1{sender = - {server, - undefined, - From}}), + Req#riak_vnode_req_v1{sender = + {server, + undefined, + From}}), {noreply, State}; handle_call({spawn, - Req = #riak_vnode_req_v1{index = Idx, - sender = {server, undefined, undefined}}}, - From, State = #state{vnode_mod = Mod}) -> + Req = #riak_vnode_req_v1{index = Idx, + sender = {server, undefined, undefined}}}, + From, State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), Sender = {server, undefined, From}, spawn_link(fun () -> - riak_core_vnode:send_all_proxy_req(Proxy, - Req#riak_vnode_req_v1{sender - = - Sender}) - end), + riak_core_vnode:send_all_proxy_req(Proxy, + Req#riak_vnode_req_v1{sender + = + Sender}) + end), {noreply, State}; handle_call(Other, From, - State = #state{legacy = Legacy}) + State = #state{legacy = Legacy}) when Legacy =/= undefined -> case catch Legacy:rewrite_call(Other, From) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_call(Req, From, State); - _ -> {noreply, State} + {ok, #riak_vnode_req_v1{} = Req} -> + handle_call(Req, From, State); + _ -> {noreply, State} end. handle_info(_Info, State) -> {noreply, State}. diff --git a/src/riak_core_vnode_proxy.erl b/src/riak_core_vnode_proxy.erl index 76fbdc4bb..e9dff563d 100644 --- a/src/riak_core_vnode_proxy.erl +++ b/src/riak_core_vnode_proxy.erl @@ -19,19 +19,19 @@ -module(riak_core_vnode_proxy). -export([start_link/2, - init/1, - reg_name/2, - reg_name/3, - call/2, - call/3, - cast/2, - unregister_vnode/3, - command_return_vnode/2, - overloaded/1]). + init/1, + reg_name/2, + reg_name/3, + call/2, + call/3, + cast/2, + unregister_vnode/3, + command_return_vnode/2, + overloaded/1]). -export([system_continue/3, - system_terminate/4, - system_code_change/4]). + system_terminate/4, + system_code_change/4]). -include("riak_core_vnode.hrl"). @@ -42,16 +42,16 @@ -endif. -record(state, - {mod :: atom(), - index :: partition(), - vnode_pid :: pid() | undefined, - vnode_mref :: reference() | undefined, - check_mailbox :: non_neg_integer(), - check_threshold :: pos_integer() | undefined, - check_counter :: non_neg_integer(), - check_interval :: pos_integer(), - check_request_interval :: non_neg_integer(), - check_request :: undefined | sent | ignore}). + {mod :: atom(), + index :: partition(), + vnode_pid :: pid() | undefined, + vnode_mref :: reference() | undefined, + check_mailbox :: non_neg_integer(), + check_threshold :: pos_integer() | undefined, + check_counter :: non_neg_integer(), + check_interval :: pos_integer(), + check_request_interval :: non_neg_integer(), + check_request :: undefined | sent | ignore}). -define(DEFAULT_CHECK_INTERVAL, 5000). @@ -61,7 +61,7 @@ reg_name(Mod, Index) -> ModBin = atom_to_binary(Mod, latin1), IdxBin = list_to_binary(integer_to_list(Index)), AllBin = <<$p, $r, $o, $x, $y, $_, ModBin/binary, $_, - IdxBin/binary>>, + IdxBin/binary>>, binary_to_atom(AllBin, latin1). reg_name(Mod, Index, Node) -> @@ -70,54 +70,54 @@ reg_name(Mod, Index, Node) -> start_link(Mod, Index) -> RegName = reg_name(Mod, Index), proc_lib:start_link(?MODULE, - init, - [[self(), RegName, Mod, Index]]). + init, + [[self(), RegName, Mod, Index]]). init([Parent, RegName, Mod, Index]) -> erlang:register(RegName, self()), proc_lib:init_ack(Parent, {ok, self()}), Interval = application:get_env(riak_core, - vnode_check_interval, - ?DEFAULT_CHECK_INTERVAL), + vnode_check_interval, + ?DEFAULT_CHECK_INTERVAL), RequestInterval = application:get_env(riak_core, - vnode_check_request_interval, - Interval div 2), + vnode_check_request_interval, + Interval div 2), Threshold = application:get_env(riak_core, - vnode_overload_threshold, - ?DEFAULT_OVERLOAD_THRESHOLD), + vnode_overload_threshold, + ?DEFAULT_OVERLOAD_THRESHOLD), SafeInterval = case Threshold == undefined orelse - Interval < Threshold - of - true -> Interval; - false -> - logger:warning("Setting riak_core/vnode_check_interval " - "to ~b", - [Threshold div 2]), - Threshold div 2 - end, + Interval < Threshold + of + true -> Interval; + false -> + logger:warning("Setting riak_core/vnode_check_interval " + "to ~b", + [Threshold div 2]), + Threshold div 2 + end, SafeRequestInterval = case RequestInterval < - SafeInterval - of - true -> RequestInterval; - false -> - logger:warning("Setting riak_core/vnode_check_request_interva" - "l to ~b", - [SafeInterval div 2]), - SafeInterval div 2 - end, + SafeInterval + of + true -> RequestInterval; + false -> + logger:warning("Setting riak_core/vnode_check_request_interva" + "l to ~b", + [SafeInterval div 2]), + SafeInterval div 2 + end, State = #state{mod = Mod, index = Index, - check_mailbox = 0, check_counter = 0, - check_threshold = Threshold, - check_interval = SafeInterval, - check_request_interval = SafeRequestInterval}, + check_mailbox = 0, check_counter = 0, + check_threshold = Threshold, + check_interval = SafeInterval, + check_request_interval = SafeRequestInterval}, loop(Parent, State). unregister_vnode(Mod, Index, Pid) -> cast(reg_name(Mod, Index), {unregister_vnode, Pid}). -spec command_return_vnode({atom(), non_neg_integer(), - atom()}, - term()) -> {ok, pid()} | {error, term()}. + atom()}, + term()) -> {ok, pid()} | {error, term()}. command_return_vnode({Mod, Index, Node}, Req) -> call(reg_name(Mod, Index, Node), {return_vnode, Req}). @@ -129,14 +129,14 @@ overloaded(Pid) -> call(Pid, overloaded). call(Name, Msg) -> call_reply(catch gen:call(Name, - '$vnode_proxy_call', - Msg)). + '$vnode_proxy_call', + Msg)). call(Name, Msg, Timeout) -> call_reply(catch gen:call(Name, - '$vnode_proxy_call', - Msg, - Timeout)). + '$vnode_proxy_call', + Msg, + Timeout)). -spec call_reply({atom(), term()}) -> term(). @@ -158,28 +158,28 @@ system_code_change(State, _, _, _) -> {ok, State}. %% @private loop(Parent, State) -> receive - {'$vnode_proxy_call', From, Msg} -> - {reply, Reply, NewState} = handle_call(Msg, - From, - State), - {_, Reply} = gen:reply(From, Reply), - loop(Parent, NewState); - {'$vnode_proxy_cast', Msg} -> - {noreply, NewState} = handle_cast(Msg, State), - loop(Parent, NewState); - {'DOWN', _Mref, process, _Pid, _} -> - NewState = forget_vnode(State), - loop(Parent, NewState); - {system, From, Msg} -> - sys:handle_system_msg(Msg, - From, - Parent, - ?MODULE, - [], - State); - Msg -> - {noreply, NewState} = handle_proxy(Msg, State), - loop(Parent, NewState) + {'$vnode_proxy_call', From, Msg} -> + {reply, Reply, NewState} = handle_call(Msg, + From, + State), + {_, Reply} = gen:reply(From, Reply), + loop(Parent, NewState); + {'$vnode_proxy_cast', Msg} -> + {noreply, NewState} = handle_cast(Msg, State), + loop(Parent, NewState); + {'DOWN', _Mref, process, _Pid, _} -> + NewState = forget_vnode(State), + loop(Parent, NewState); + {system, From, Msg} -> + sys:handle_system_msg(Msg, + From, + Parent, + ?MODULE, + [], + State); + Msg -> + {noreply, NewState} = handle_proxy(Msg, State), + loop(Parent, NewState) end. %% @private @@ -188,8 +188,8 @@ handle_call({return_vnode, Req}, _From, State) -> riak_core_vnode:send_req(Pid, Req), {reply, {ok, Pid}, NewState}; handle_call(overloaded, _From, - State = #state{check_mailbox = Mailbox, - check_threshold = Threshold}) -> + State = #state{check_mailbox = Mailbox, + check_threshold = Threshold}) -> Result = Mailbox > Threshold, {reply, Result, State}; handle_call(_Msg, _From, State) -> {reply, ok, State}. @@ -203,30 +203,30 @@ handle_cast({unregister_vnode, Pid}, State) -> NewState = forget_vnode(State), {noreply, NewState}; handle_cast({vnode_proxy_pong, Ref, Msgs}, - State = #state{check_request = RequestState, - check_mailbox = Mailbox}) -> + State = #state{check_request = RequestState, + check_mailbox = Mailbox}) -> NewState = case Ref of - RequestState -> - State#state{check_mailbox = Mailbox - Msgs, - check_request = undefined, - check_counter = 0}; - _ -> State - end, + RequestState -> + State#state{check_mailbox = Mailbox - Msgs, + check_request = undefined, + check_counter = 0}; + _ -> State + end, {noreply, NewState}; handle_cast(_Msg, State) -> {noreply, State}. %% @private handle_proxy(Msg, - State = #state{check_threshold = undefined}) -> + State = #state{check_threshold = undefined}) -> {Pid, NewState} = get_vnode_pid(State), Pid ! Msg, {noreply, NewState}; handle_proxy(Msg, - State = #state{check_counter = Counter, - check_mailbox = Mailbox, check_interval = Interval, - check_request_interval = RequestInterval, - check_request = RequestState, - check_threshold = Threshold}) -> + State = #state{check_counter = Counter, + check_mailbox = Mailbox, check_interval = Interval, + check_request_interval = RequestInterval, + check_request = RequestState, + check_threshold = Threshold}) -> %% %% NOTE: This function is intentionally written as it is for performance %% reasons -- the vnode proxy is on the critical path of Riak and @@ -244,97 +244,97 @@ handle_proxy(Msg, %% ensure unnecessary work is not being performed needlessly. %% case State#state.vnode_pid of - undefined -> {Pid, State2} = get_vnode_pid(State); - KnownPid -> - Pid = KnownPid, - State2 = State + undefined -> {Pid, State2} = get_vnode_pid(State); + KnownPid -> + Pid = KnownPid, + State2 = State end, Mailbox2 = case Mailbox =< Threshold of - true -> - Pid ! Msg, - Mailbox + 1; - false -> - handle_overload(Msg, State), - Mailbox - end, + true -> + Pid ! Msg, + Mailbox + 1; + false -> + handle_overload(Msg, State), + Mailbox + end, Counter2 = Counter + 1, case Counter2 of - RequestInterval -> - %% Ping the vnode in hopes that we get a pong back before hitting - %% the hard query interval and triggering an expensive process_info - %% call. A successful pong from the vnode means that all messages - %% sent before the ping have already been handled and therefore - %% we can adjust our mailbox estimate accordingly. - case RequestState of - undefined -> - RequestState2 = send_proxy_ping(Pid, Mailbox2); - _ -> RequestState2 = RequestState - end, - Mailbox3 = Mailbox2, - Counter3 = Counter2; - Interval -> - %% Time to directly check the mailbox size. This operation may - %% be extremely expensive. If the vnode is currently active, - %% the proxy will be descheduled until the vnode finishes - %% execution and becomes descheduled itself. - {_, L} = erlang:process_info(Pid, message_queue_len), - Counter3 = 0, - Mailbox3 = L + 1, - %% Send a new proxy ping so that if the new length is above the - %% threshold then the proxy will detect the work is completed, - %% rather than being stuck in overload state until the interval - %% counts are reached. - RequestState2 = send_proxy_ping(Pid, Mailbox3); - _ -> - Mailbox3 = Mailbox2, - Counter3 = Counter2, - RequestState2 = RequestState + RequestInterval -> + %% Ping the vnode in hopes that we get a pong back before hitting + %% the hard query interval and triggering an expensive process_info + %% call. A successful pong from the vnode means that all messages + %% sent before the ping have already been handled and therefore + %% we can adjust our mailbox estimate accordingly. + case RequestState of + undefined -> + RequestState2 = send_proxy_ping(Pid, Mailbox2); + _ -> RequestState2 = RequestState + end, + Mailbox3 = Mailbox2, + Counter3 = Counter2; + Interval -> + %% Time to directly check the mailbox size. This operation may + %% be extremely expensive. If the vnode is currently active, + %% the proxy will be descheduled until the vnode finishes + %% execution and becomes descheduled itself. + {_, L} = erlang:process_info(Pid, message_queue_len), + Counter3 = 0, + Mailbox3 = L + 1, + %% Send a new proxy ping so that if the new length is above the + %% threshold then the proxy will detect the work is completed, + %% rather than being stuck in overload state until the interval + %% counts are reached. + RequestState2 = send_proxy_ping(Pid, Mailbox3); + _ -> + Mailbox3 = Mailbox2, + Counter3 = Counter2, + RequestState2 = RequestState end, {noreply, State2#state{check_counter = Counter3, - check_mailbox = Mailbox3, - check_request = RequestState2}}. + check_mailbox = Mailbox3, + check_request = RequestState2}}. handle_overload(Msg, - #state{mod = Mod, index = Index}) -> + #state{mod = Mod, index = Index}) -> %% STATS %riak_core_stat:update(dropped_vnode_requests), case Msg of - {'$gen_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - {'$gen_all_state_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - {'$gen_event', - #riak_coverage_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - _ -> catch Mod:handle_overload_info(Msg, Index) + {'$gen_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + {'$gen_all_state_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + {'$gen_event', + #riak_coverage_req_v1{sender = Sender, + request = Request}} -> + catch Mod:handle_overload_command(Request, + Sender, + Index); + _ -> catch Mod:handle_overload_info(Msg, Index) end. %% @private forget_vnode(State) -> State#state{vnode_pid = undefined, - vnode_mref = undefined, check_mailbox = 0, - check_counter = 0, check_request = undefined}. + vnode_mref = undefined, check_mailbox = 0, + check_counter = 0, check_request = undefined}. %% @private get_vnode_pid(State = #state{mod = Mod, index = Index, - vnode_pid = undefined}) -> + vnode_pid = undefined}) -> {ok, Pid} = riak_core_vnode_manager:get_vnode_pid(Index, - Mod), + Mod), Mref = erlang:monitor(process, Pid), NewState = State#state{vnode_pid = Pid, - vnode_mref = Mref}, + vnode_mref = Mref}, {Pid, NewState}; get_vnode_pid(State = #state{vnode_pid = Pid}) -> {Pid, State}. @@ -343,46 +343,46 @@ get_vnode_pid(State = #state{vnode_pid = Pid}) -> send_proxy_ping(Pid, MailboxSizeAfterPing) -> Ref = make_ref(), Pid ! - {'$vnode_proxy_ping', - self(), - Ref, - MailboxSizeAfterPing}, + {'$vnode_proxy_ping', + self(), + Ref, + MailboxSizeAfterPing}, Ref. -ifdef(TEST). update_msg_counter() -> Count = case erlang:get(count) of - undefined -> 0; - Val -> Val - end, + undefined -> 0; + Val -> Val + end, put(count, Count + 1). fake_loop() -> receive - block -> fake_loop_block(); - slow -> fake_loop_slow(); - {get_count, Pid} -> - Pid ! {count, erlang:get(count)}, - fake_loop(); - %% Original tests do not expect replies - the - %% results below expect the pings to be counted - %% towards messages received. If you ever wanted - %% to re-instance, uncomment below. - %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> - %% ReplyTo ! {Ref, Msgs}, - %% fake_loop(); - _Msg -> - update_msg_counter(), - fake_loop() + block -> fake_loop_block(); + slow -> fake_loop_slow(); + {get_count, Pid} -> + Pid ! {count, erlang:get(count)}, + fake_loop(); + %% Original tests do not expect replies - the + %% results below expect the pings to be counted + %% towards messages received. If you ever wanted + %% to re-instance, uncomment below. + %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> + %% ReplyTo ! {Ref, Msgs}, + %% fake_loop(); + _Msg -> + update_msg_counter(), + fake_loop() end. fake_loop_slow() -> timer:sleep(100), receive - _Msg -> - update_msg_counter(), - fake_loop_slow() + _Msg -> + update_msg_counter(), + fake_loop_slow() end. fake_loop_block() -> receive unblock -> fake_loop() end. @@ -392,101 +392,101 @@ overload_test_() -> 900, {foreach, fun () -> - VnodePid = spawn(fun fake_loop/0), - meck:unload(), - meck:new(riak_core_vnode_manager, [passthrough]), - meck:expect(riak_core_vnode_manager, - get_vnode_pid, - fun (_Index, fakemod) -> {ok, VnodePid}; - (Index, Mod) -> meck:passthrough([Index, Mod]) - end), - meck:new(fakemod, [non_strict]), - meck:expect(fakemod, - handle_overload_info, - fun (hello, _Idx) -> ok end), - {ok, ProxyPid} = - riak_core_vnode_proxy:start_link(fakemod, 0), - unlink(ProxyPid), - {VnodePid, ProxyPid} + VnodePid = spawn(fun fake_loop/0), + meck:unload(), + meck:new(riak_core_vnode_manager, [passthrough]), + meck:expect(riak_core_vnode_manager, + get_vnode_pid, + fun (_Index, fakemod) -> {ok, VnodePid}; + (Index, Mod) -> meck:passthrough([Index, Mod]) + end), + meck:new(fakemod, [non_strict]), + meck:expect(fakemod, + handle_overload_info, + fun (hello, _Idx) -> ok end), + {ok, ProxyPid} = + riak_core_vnode_proxy:start_link(fakemod, 0), + unlink(ProxyPid), + {VnodePid, ProxyPid} end, fun ({VnodePid, ProxyPid}) -> - unlink(VnodePid), - unlink(ProxyPid), - exit(VnodePid, kill), - exit(ProxyPid, kill) + unlink(VnodePid), + unlink(ProxyPid), + exit(VnodePid, kill), + exit(ProxyPid, kill) end, [fun ({_VnodePid, ProxyPid}) -> - {"should not discard in normal operation", - timeout, - 60, - fun () -> - ToSend = (?DEFAULT_OVERLOAD_THRESHOLD), - [ProxyPid ! hello || _ <- lists:seq(1, ToSend)], - %% synchronize on the proxy and the mailbox - {ok, ok} = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, - infinity), - ProxyPid ! {get_count, self()}, - receive - {count, Count} -> - %% First will hit the request check interval, - %% then will check message queue every interval - %% (no new ping will be resubmitted after the first - %% as the request will already have a reference) - PingReqs = 1 - + % for first request intarval - ToSend div - (?DEFAULT_CHECK_INTERVAL), - ?assertEqual((ToSend + PingReqs), Count) - end - end} + {"should not discard in normal operation", + timeout, + 60, + fun () -> + ToSend = (?DEFAULT_OVERLOAD_THRESHOLD), + [ProxyPid ! hello || _ <- lists:seq(1, ToSend)], + %% synchronize on the proxy and the mailbox + {ok, ok} = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ProxyPid ! {get_count, self()}, + receive + {count, Count} -> + %% First will hit the request check interval, + %% then will check message queue every interval + %% (no new ping will be resubmitted after the first + %% as the request will already have a reference) + PingReqs = 1 + + % for first request intarval + ToSend div + (?DEFAULT_CHECK_INTERVAL), + ?assertEqual((ToSend + PingReqs), Count) + end + end} end, fun ({VnodePid, ProxyPid}) -> - {"should discard during overflow", - timeout, - 60, - fun () -> - VnodePid ! block, - [ProxyPid ! hello || _ <- lists:seq(1, 50000)], - %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, - infinity), - ?assertEqual({ok, ok}, Reply), - VnodePid ! unblock, - VnodePid ! {get_count, self()}, - receive - {count, Count} -> - %% Threshold + 10 unanswered vnode_proxy_ping - ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + - 10), - Count) - end - end} + {"should discard during overflow", + timeout, + 60, + fun () -> + VnodePid ! block, + [ProxyPid ! hello || _ <- lists:seq(1, 50000)], + %% synchronize on the mailbox - no-op that hits msg catchall + Reply = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ?assertEqual({ok, ok}, Reply), + VnodePid ! unblock, + VnodePid ! {get_count, self()}, + receive + {count, Count} -> + %% Threshold + 10 unanswered vnode_proxy_ping + ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + + 10), + Count) + end + end} end, fun ({VnodePid, ProxyPid}) -> - {"should tolerate slow vnodes", - timeout, - 60, - fun () -> - VnodePid ! slow, - [ProxyPid ! hello || _ <- lists:seq(1, 50000)], - %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, - infinity), - ?assertEqual({ok, ok}, Reply), - %% check that the outstanding message count is - %% reasonable - {message_queue_len, L} = erlang:process_info(VnodePid, - message_queue_len), - %% Threshold + 2 unanswered vnode_proxy_ping (one - %% for first ping, second after process_info check) - ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 2)) - end} + {"should tolerate slow vnodes", + timeout, + 60, + fun () -> + VnodePid ! slow, + [ProxyPid ! hello || _ <- lists:seq(1, 50000)], + %% synchronize on the mailbox - no-op that hits msg catchall + Reply = gen:call(ProxyPid, + '$vnode_proxy_call', + sync, + infinity), + ?assertEqual({ok, ok}, Reply), + %% check that the outstanding message count is + %% reasonable + {message_queue_len, L} = erlang:process_info(VnodePid, + message_queue_len), + %% Threshold + 2 unanswered vnode_proxy_ping (one + %% for first ping, second after process_info check) + ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 2)) + end} end]}}. -endif. diff --git a/src/riak_core_vnode_proxy_sup.erl b/src/riak_core_vnode_proxy_sup.erl index c9b0801f4..14ca847a5 100644 --- a/src/riak_core_vnode_proxy_sup.erl +++ b/src/riak_core_vnode_proxy_sup.erl @@ -33,15 +33,15 @@ init([]) -> Indices = get_indices(), VMods = riak_core:vnode_modules(), Proxies = [proxy_ref(Mod, Index) - || {_, Mod} <- VMods, Index <- Indices], + || {_, Mod} <- VMods, Index <- Indices], {ok, {{one_for_one, 5, 10}, Proxies}}. start_proxy(Mod, Index) -> Ref = proxy_ref(Mod, Index), Pid = case supervisor:start_child(?MODULE, Ref) of - {ok, Child} -> Child; - {error, {already_started, Child}} -> Child - end, + {ok, Child} -> Child; + {error, {already_started, Child}} -> Child + end, Pid. stop_proxy(Mod, Index) -> diff --git a/src/riak_core_vnode_sup.erl b/src/riak_core_vnode_sup.erl index f4bfb7527..73f8bbfc4 100644 --- a/src/riak_core_vnode_sup.erl +++ b/src/riak_core_vnode_sup.erl @@ -33,7 +33,7 @@ start_vnode(Mod, Index, ForwardTo) when is_integer(Index) -> supervisor:start_child(?MODULE, - [Mod, Index, ForwardTo]). + [Mod, Index, ForwardTo]). start_link() -> %% This simple_one_for_one supervisor can do a controlled shutdown. @@ -47,8 +47,8 @@ init([]) -> {ok, {{simple_one_for_one, 10, 10}, [{undefined, - {riak_core_vnode, start_link, []}, - temporary, - 300000, - worker, - dynamic}]}}. + {riak_core_vnode, start_link, []}, + temporary, + 300000, + worker, + dynamic}]}}. diff --git a/src/riak_core_vnode_worker.erl b/src/riak_core_vnode_worker.erl index 0a7aa7fa8..852a994d6 100644 --- a/src/riak_core_vnode_worker.erl +++ b/src/riak_core_vnode_worker.erl @@ -24,11 +24,11 @@ % gen_server callbacks -export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). % API -export([start_link/1, handle_work/3, handle_work/4]). @@ -36,27 +36,27 @@ -type mod_state() :: term(). -record(state, - {module :: atom(), modstate :: mod_state()}). + {module :: atom(), modstate :: mod_state()}). -callback init_worker(partition(), Args :: term(), - Props :: [{atom(), term()}]) -> {ok, mod_state()}. + Props :: [{atom(), term()}]) -> {ok, mod_state()}. -callback handle_work(Work :: term(), sender(), - mod_state()) -> {reply, Reply :: term(), mod_state()} | - {noreply, mod_state()}. + mod_state()) -> {reply, Reply :: term(), mod_state()} | + {noreply, mod_state()}. start_link(Args) -> WorkerMod = proplists:get_value(worker_callback_mod, - Args), + Args), [VNodeIndex, WorkerArgs, WorkerProps, Caller] = - proplists:get_value(worker_args, Args), + proplists:get_value(worker_args, Args), gen_server:start_link(?MODULE, - [WorkerMod, - VNodeIndex, - WorkerArgs, - WorkerProps, - Caller], - []). + [WorkerMod, + VNodeIndex, + WorkerArgs, + WorkerProps, + Caller], + []). handle_work(Worker, Work, From) -> handle_work(Worker, Work, From, self()). @@ -70,32 +70,32 @@ init([Module, WorkerProps, Caller]) -> {ok, WorkerState} = Module:init_worker(VNodeIndex, - WorkerArgs, - WorkerProps), + WorkerArgs, + WorkerProps), %% let the pool queue manager know there might be a worker to checkout riak_core_vnode_worker_pool:worker_started(Caller), {ok, #state{module = Module, modstate = WorkerState}}. handle_call(Event, _From, State) -> logger:debug("Vnode worker received synchronous event: " - "~p.", - [Event]), + "~p.", + [Event]), {reply, ok, State}. handle_cast({work, Work, WorkFrom, Caller}, - #state{module = Mod, modstate = ModState} = State) -> + #state{module = Mod, modstate = ModState} = State) -> NewModState = case Mod:handle_work(Work, - WorkFrom, - ModState) - of - {reply, Reply, NS} -> - riak_core_vnode:reply(WorkFrom, Reply), - NS; - {noreply, NS} -> NS - end, + WorkFrom, + ModState) + of + {reply, Reply, NS} -> + riak_core_vnode:reply(WorkFrom, Reply), + NS; + {noreply, NS} -> NS + end, %% check the worker back into the pool riak_core_vnode_worker_pool:checkin_worker(Caller, - self()), + self()), {noreply, State#state{modstate = NewModState}}; handle_cast(_Event, State) -> {noreply, State}. diff --git a/src/riak_core_vnode_worker_pool.erl b/src/riak_core_vnode_worker_pool.erl index a59f77262..392bd9ea6 100644 --- a/src/riak_core_vnode_worker_pool.erl +++ b/src/riak_core_vnode_worker_pool.erl @@ -43,18 +43,18 @@ %% API -export([start_link/5, - start_link/6, - stop/2, - shutdown_pool/2, - handle_work/3, - worker_started/1, - checkin_worker/2]). + start_link/6, + stop/2, + shutdown_pool/2, + handle_work/3, + worker_started/1, + checkin_worker/2]). %% gen_statem callbacks -export([init/1, - terminate/3, - code_change/4, - callback_mode/0]). + terminate/3, + code_change/4, + callback_mode/0]). %% gen_statem states -export([ready/3, queue/3, shutdown/3]). @@ -64,24 +64,24 @@ %% ======== start_link(WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps) -> + WorkerProps) -> start_link(WorkerMod, - PoolSize, - VNodeIndex, - WorkerArgs, - WorkerProps, - []). + PoolSize, + VNodeIndex, + WorkerArgs, + WorkerProps, + []). start_link(WorkerMod, PoolSize, VNodeIndex, WorkerArgs, - WorkerProps, Opts) -> + WorkerProps, Opts) -> gen_statem:start_link(?MODULE, - [WorkerMod, - PoolSize, - VNodeIndex, - WorkerArgs, - WorkerProps, - Opts], - []). + [WorkerMod, + PoolSize, + VNodeIndex, + WorkerArgs, + WorkerProps, + Opts], + []). % #1 cast handle_work(Pid, Work, From) -> @@ -102,7 +102,7 @@ stop(Pid, Reason) -> % #5 call %% wait for all the workers to finish any current work -spec shutdown_pool(pid(), integer()) -> ok | - {error, vnode_shutdown}. + {error, vnode_shutdown}. shutdown_pool(Pid, Wait) -> gen_statem:call(Pid, {shutdown, Wait}, infinity). @@ -114,11 +114,11 @@ shutdown_pool(Pid, Wait) -> %% ======================== -record(state, - {queue :: queue:queue() | list(), - pool :: pid(), - monitors = [] :: list(), - queue_strategy = fifo :: fifo | filo, - shutdown :: undefined | {pid(), reference()}}). + {queue :: queue:queue() | list(), + pool :: pid(), + monitors = [] :: list(), + queue_strategy = fifo :: fifo | filo, + shutdown :: undefined | {pid(), reference()}}). callback_mode() -> [state_functions, state_enter]. @@ -129,28 +129,28 @@ init([WorkerMod, WorkerProps, Opts]) -> {ok, Pid} = poolboy:start_link([{worker_module, - riak_core_vnode_worker}, - {worker_args, - [VNodeIndex, - WorkerArgs, - WorkerProps, - self()]}, - {worker_callback_mod, WorkerMod}, - {size, PoolSize}, - {max_overflow, 0}]), + riak_core_vnode_worker}, + {worker_args, + [VNodeIndex, + WorkerArgs, + WorkerProps, + self()]}, + {worker_callback_mod, WorkerMod}, + {size, PoolSize}, + {max_overflow, 0}]), DefaultStrategy = application:get_env(riak_core, - queue_worker_strategy, - fifo), + queue_worker_strategy, + fifo), State = case proplists:get_value(strategy, - Opts, - DefaultStrategy) - of - fifo -> - #state{pool = Pid, queue = queue:new(), - queue_strategy = fifo}; - filo -> - #state{pool = Pid, queue = [], queue_strategy = filo} - end, + Opts, + DefaultStrategy) + of + fifo -> + #state{pool = Pid, queue = queue:new(), + queue_strategy = fifo}; + filo -> + #state{pool = Pid, queue = [], queue_strategy = filo} + end, {ok, ready, State}. % #4 call @@ -177,11 +177,11 @@ ready(enter, _, State) -> {keep_state, State}; ready(cast, {work, Work, From} = Msg, #state{pool = Pool, monitors = Monitors} = State) -> case poolboy:checkout(Pool, false) of - full -> {next_state, queue, in(Msg, State)}; - Pid when is_pid(Pid) -> - NewMonitors = monitor_worker(Pid, From, Work, Monitors), - riak_core_vnode_worker:handle_work(Pid, Work, From), - {next_state, ready, State#state{monitors = NewMonitors}} + full -> {next_state, queue, in(Msg, State)}; + Pid when is_pid(Pid) -> + NewMonitors = monitor_worker(Pid, From, Work, Monitors), + riak_core_vnode_worker:handle_work(Pid, Work, From), + {next_state, ready, State#state{monitors = NewMonitors}} end; %% #2 ready(cast, worker_start, State) -> @@ -231,19 +231,19 @@ queue(info, {'DOWN', _Ref, _Type, Pid, Info}, State) -> %% enter shutdown(enter, _, - #state{monitors = Monitors, shutdown = From} = State) -> + #state{monitors = Monitors, shutdown = From} = State) -> discard_queued_work(State), case Monitors of - [] -> {stop_and_reply, shutdown, [{reply, From, ok}]}; - _ -> {keep_state, State#state{queue = new(State)}} + [] -> {stop_and_reply, shutdown, [{reply, From, ok}]}; + _ -> {keep_state, State#state{queue = new(State)}} end; %% force shutdown timeout shutdown(state_timeout, _, - #state{monitors = Monitors, shutdown = FromOrigin}) -> + #state{monitors = Monitors, shutdown = FromOrigin}) -> %% we've waited too long to shutdown, time to force the issue. _ = [riak_core_vnode:reply(From, - {error, vnode_shutdown}) - || {_, _, From, _} <- Monitors], + {error, vnode_shutdown}) + || {_, _, From, _} <- Monitors], {stop_and_reply, shutdown, [{reply, FromOrigin, {error, vnode_shutdown}}]}; @@ -256,15 +256,15 @@ shutdown(cast, worker_start, State) -> worker_started(State, shutdown); %% #3 shutdown(cast, {checkin, Pid}, - #state{pool = Pool, monitors = Monitors0, - shutdown = From} = - State) -> + #state{pool = Pool, monitors = Monitors0, + shutdown = From} = + State) -> Monitors = demonitor_worker(Pid, Monitors0), poolboy:checkin(Pool, Pid), case Monitors of - [] -> %% work all done, time to exit! - {stop_and_reply, shutdown, [{reply, From, ok}]}; - _ -> {keep_state, State#state{monitors = Monitors}} + [] -> %% work all done, time to exit! + {stop_and_reply, shutdown, [{reply, From, ok}]}; + _ -> {keep_state, State#state{monitors = Monitors}} end; %% #5 shutdown({call, From}, {shutdown, _Wait}, State) -> @@ -287,33 +287,33 @@ shutdown(info, {'DOWN', _Ref, _, Pid, Info}, State) -> %% worker. Only active workers are tracked monitor_worker(Worker, From, Work, Monitors) -> case lists:keyfind(Worker, 1, Monitors) of - {Worker, Ref, _OldFrom, _OldWork} -> - %% reuse old monitor and just update the from & work - lists:keyreplace(Worker, - 1, - Monitors, - {Worker, Ref, From, Work}); - false -> - Ref = erlang:monitor(process, Worker), - [{Worker, Ref, From, Work} | Monitors] + {Worker, Ref, _OldFrom, _OldWork} -> + %% reuse old monitor and just update the from & work + lists:keyreplace(Worker, + 1, + Monitors, + {Worker, Ref, From, Work}); + false -> + Ref = erlang:monitor(process, Worker), + [{Worker, Ref, From, Work} | Monitors] end. demonitor_worker(Worker, Monitors) -> case lists:keyfind(Worker, 1, Monitors) of - {Worker, Ref, _From, _Work} -> - erlang:demonitor(Ref), - lists:keydelete(Worker, 1, Monitors); - false -> - %% not monitored? - Monitors + {Worker, Ref, _From, _Work} -> + erlang:demonitor(Ref), + lists:keydelete(Worker, 1, Monitors); + false -> + %% not monitored? + Monitors end. discard_queued_work(State) -> case out(State) of - {{value, {work, _Work, From}}, Rem} -> - riak_core_vnode:reply(From, {error, vnode_shutdown}), - discard_queued_work(State#state{queue = Rem}); - {empty, _Empty} -> ok + {{value, {work, _Work, From}}, Rem} -> + riak_core_vnode:reply(From, {error, vnode_shutdown}), + discard_queued_work(State#state{queue = Rem}); + {empty, _Empty} -> ok end. in(Msg, @@ -334,65 +334,65 @@ new(#state{queue_strategy = fifo}) -> queue:new(); new(#state{queue_strategy = filo}) -> []. worker_started(#state{pool = Pool, - monitors = Monitors} = - State, - StateName) -> + monitors = Monitors} = + State, + StateName) -> %% a new worker just started - if we have work pending, try to do it case out(State) of - {{value, {work, Work, From}}, Rem} -> - case poolboy:checkout(Pool, false) of - full -> {next_state, queue, State}; - Pid when is_pid(Pid) -> - NewMonitors = monitor_worker(Pid, From, Work, Monitors), - riak_core_vnode_worker:handle_work(Pid, Work, From), - {next_state, - queue, - State#state{queue = Rem, monitors = NewMonitors}} - end; - {empty, _} -> - {next_state, - %% If we are in state queueing with nothing in the queue, - %% move to the ready state so that the next incoming job - %% checks out the new worker from poolboy. - if StateName == queue -> ready; - true -> StateName - end, - State} + {{value, {work, Work, From}}, Rem} -> + case poolboy:checkout(Pool, false) of + full -> {next_state, queue, State}; + Pid when is_pid(Pid) -> + NewMonitors = monitor_worker(Pid, From, Work, Monitors), + riak_core_vnode_worker:handle_work(Pid, Work, From), + {next_state, + queue, + State#state{queue = Rem, monitors = NewMonitors}} + end; + {empty, _} -> + {next_state, + %% If we are in state queueing with nothing in the queue, + %% move to the ready state so that the next incoming job + %% checks out the new worker from poolboy. + if StateName == queue -> ready; + true -> StateName + end, + State} end. checkin(#state{pool = Pool, monitors = Monitors} = - State, - Worker) -> + State, + Worker) -> case out(State) of - {{value, {work, Work, From}}, Rem} -> - %% there is outstanding work to do - instead of checking - %% the worker back in, just hand it more work to do - NewMonitors = monitor_worker(Worker, - From, - Work, - Monitors), - riak_core_vnode_worker:handle_work(Worker, Work, From), - {next_state, - queue, - State#state{queue = Rem, monitors = NewMonitors}}; - {empty, Empty} -> - NewMonitors = demonitor_worker(Worker, Monitors), - poolboy:checkin(Pool, Worker), - {next_state, - ready, - State#state{queue = Empty, monitors = NewMonitors}} + {{value, {work, Work, From}}, Rem} -> + %% there is outstanding work to do - instead of checking + %% the worker back in, just hand it more work to do + NewMonitors = monitor_worker(Worker, + From, + Work, + Monitors), + riak_core_vnode_worker:handle_work(Worker, Work, From), + {next_state, + queue, + State#state{queue = Rem, monitors = NewMonitors}}; + {empty, Empty} -> + NewMonitors = demonitor_worker(Worker, Monitors), + poolboy:checkin(Pool, Worker), + {next_state, + ready, + State#state{queue = Empty, monitors = NewMonitors}} end. exit_worker(#state{monitors = Monitors} = State, Pid, - Info) -> + Info) -> %% remove the listing for the dead worker case lists:keyfind(Pid, 1, Monitors) of - {Pid, _, From, Work} -> - riak_core_vnode:reply(From, - {error, {worker_crash, Info, Work}}), - NewMonitors = lists:keydelete(Pid, 1, Monitors), - %% trigger to do more work will be 'worker_start' message - %% when poolboy replaces this worker (if not a 'checkin' or 'handle_work') - {ok, State#state{monitors = NewMonitors}}; - false -> {ok, State} + {Pid, _, From, Work} -> + riak_core_vnode:reply(From, + {error, {worker_crash, Info, Work}}), + NewMonitors = lists:keydelete(Pid, 1, Monitors), + %% trigger to do more work will be 'worker_start' message + %% when poolboy replaces this worker (if not a 'checkin' or 'handle_work') + {ok, State#state{monitors = NewMonitors}}; + false -> {ok, State} end. diff --git a/src/vclock.erl b/src/vclock.erl index 742064d89..fced93fd4 100644 --- a/src/vclock.erl +++ b/src/vclock.erl @@ -32,22 +32,22 @@ -module(vclock). -export([fresh/0, - fresh/2, - descends/2, - dominates/2, - descends_dot/2, - pure_dot/1, - merge/1, - get_counter/2, - get_timestamp/2, - get_dot/2, - valid_dot/1, - increment/2, - increment/3, - all_nodes/1, - equal/2, - prune/3, - timestamp/0]). + fresh/2, + descends/2, + dominates/2, + descends_dot/2, + pure_dot/1, + merge/1, + get_counter/2, + get_timestamp/2, + get_dot/2, + valid_dot/1, + increment/2, + increment/3, + all_nodes/1, + equal/2, + prune/3, + timestamp/0]). -ifdef(TEST). @@ -56,15 +56,15 @@ -endif. -export_type([vclock/0, - timestamp/0, - vclock_node/0, - dot/0, - pure_dot/0]). + timestamp/0, + vclock_node/0, + dot/0, + pure_dot/0]). -type vclock() :: [dot()]. -type dot() :: {vclock_node(), - {counter(), timestamp()}}. + {counter(), timestamp()}}. -type pure_dot() :: {vclock_node(), counter()}. @@ -87,7 +87,7 @@ fresh(Node, Count) -> [{Node, {Count, timestamp()}}]. %% @doc Return true if Va is a direct descendant of Vb, %% else false -- remember, a vclock is its own descendant! -spec descends(Va :: vclock(), - Vb :: vclock()) -> boolean(). + Vb :: vclock()) -> boolean(). descends(_, []) -> % all vclocks descend from the empty vclock @@ -95,9 +95,9 @@ descends(_, []) -> descends(Va, Vb) -> [{NodeB, {CtrB, _T}} | RestB] = Vb, case lists:keyfind(NodeB, 1, Va) of - false -> false; - {_, {CtrA, _TSA}} -> - CtrA >= CtrB andalso descends(Va, RestB) + false -> false; + {_, {CtrA, _TSA}} -> + CtrA >= CtrB andalso descends(Va, RestB) end. %% @doc does the given `vclock()' descend from the given `dot()'. The @@ -152,7 +152,7 @@ merge([First | Rest]) -> merge([], NClock) -> NClock; merge([AClock | VClocks], NClock) -> merge(VClocks, - merge(lists:keysort(1, AClock), NClock, [])). + merge(lists:keysort(1, AClock), NClock, [])). merge([], [], AccClock) -> lists:reverse(AccClock); merge([], Left, AccClock) -> @@ -165,41 +165,41 @@ merge(V = [{Node1, {Ctr1, TS1} = CT1} = NCT1 | VClock], if Node1 < Node2 -> merge(VClock, N, [NCT1 | AccClock]); Node1 > Node2 -> merge(V, NClock, [NCT2 | AccClock]); true -> - ({_Ctr, _TS} = CT) = if Ctr1 > Ctr2 -> CT1; - Ctr1 < Ctr2 -> CT2; - true -> {Ctr1, erlang:max(TS1, TS2)} - end, - merge(VClock, NClock, [{Node1, CT} | AccClock]) + ({_Ctr, _TS} = CT) = if Ctr1 > Ctr2 -> CT1; + Ctr1 < Ctr2 -> CT2; + true -> {Ctr1, erlang:max(TS1, TS2)} + end, + merge(VClock, NClock, [{Node1, CT} | AccClock]) end. % @doc Get the counter value in VClock set from Node. -spec get_counter(Node :: vclock_node(), - VClock :: vclock()) -> counter(). + VClock :: vclock()) -> counter(). get_counter(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - {_, {Ctr, _TS}} -> Ctr; - false -> 0 + {_, {Ctr, _TS}} -> Ctr; + false -> 0 end. % @doc Get the timestamp value in a VClock set from Node. -spec get_timestamp(Node :: vclock_node(), - VClock :: vclock()) -> timestamp() | undefined. + VClock :: vclock()) -> timestamp() | undefined. get_timestamp(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - {_, {_Ctr, TS}} -> TS; - false -> undefined + {_, {_Ctr, TS}} -> TS; + false -> undefined end. % @doc Get the entry `dot()' for `vclock_node()' from `vclock()'. -spec get_dot(Node :: vclock_node(), - VClock :: vclock()) -> {ok, dot()} | undefined. + VClock :: vclock()) -> {ok, dot()} | undefined. get_dot(Node, VClock) -> case lists:keyfind(Node, 1, VClock) of - false -> undefined; - Entry -> {ok, Entry} + false -> undefined; + Entry -> {ok, Entry} end. %% @doc is the given argument a valid dot, or entry? @@ -212,24 +212,24 @@ valid_dot(_) -> false. % @doc Increment VClock at Node. -spec increment(Node :: vclock_node(), - VClock :: vclock()) -> vclock(). + VClock :: vclock()) -> vclock(). increment(Node, VClock) -> increment(Node, timestamp(), VClock). % @doc Increment VClock at Node. -spec increment(Node :: vclock_node(), - IncTs :: timestamp(), VClock :: vclock()) -> vclock(). + IncTs :: timestamp(), VClock :: vclock()) -> vclock(). increment(Node, IncTs, VClock) -> {{_Ctr, _TS} = C1, NewV} = case lists:keytake(Node, - 1, - VClock) - of - false -> {{1, IncTs}, VClock}; - {value, {_N, {C, _T}}, ModV} -> - {{C + 1, IncTs}, ModV} - end, + 1, + VClock) + of + false -> {{1, IncTs}, VClock}; + {value, {_N, {C, _T}}, ModV} -> + {{C + 1, IncTs}, ModV} + end, [{Node, C1} | NewV]. % @doc Return the list of all nodes that have ever incremented VClock. @@ -238,11 +238,11 @@ increment(Node, IncTs, VClock) -> all_nodes(VClock) -> [X || {X, {_, _}} <- VClock]. -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, - 1970 * 365 + 478). + 1970 * 365 + 478). -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH, - (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH) * 24 * 60 * - 60). %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}) + (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH) * 24 * 60 * + 60). %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}) % @doc Return a timestamp for a vector clock -spec timestamp() -> timestamp(). @@ -252,55 +252,55 @@ timestamp() -> %% but significantly faster. {MegaSeconds, Seconds, _} = os:timestamp(), (?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH) + - MegaSeconds * 1000000 - + Seconds. + MegaSeconds * 1000000 + + Seconds. % @doc Compares two VClocks for equality. -spec equal(VClockA :: vclock(), - VClockB :: vclock()) -> boolean(). + VClockB :: vclock()) -> boolean(). equal(VA, VB) -> lists:sort(VA) =:= lists:sort(VB). % @doc Possibly shrink the size of a vclock, depending on current age and size. -spec prune(V :: vclock(), Now :: integer(), - BucketProps :: term()) -> vclock(). + BucketProps :: term()) -> vclock(). prune(V, Now, BucketProps) -> %% This sort need to be deterministic, to avoid spurious merge conflicts later. %% We achieve this by using the node ID as secondary key. SortV = lists:sort(fun ({N1, {_, T1}}, {N2, {_, T2}}) -> - {T1, N1} < {T2, N2} - end, - V), + {T1, N1} < {T2, N2} + end, + V), prune_vclock1(SortV, Now, BucketProps). % @private prune_vclock1(V, Now, BProps) -> case length(V) =< get_property(small_vclock, BProps) of - true -> V; - false -> - {_, {_, HeadTime}} = hd(V), - case Now - HeadTime < get_property(young_vclock, BProps) - of - true -> V; - false -> prune_vclock1(V, Now, BProps, HeadTime) - end + true -> V; + false -> + {_, {_, HeadTime}} = hd(V), + case Now - HeadTime < get_property(young_vclock, BProps) + of + true -> V; + false -> prune_vclock1(V, Now, BProps, HeadTime) + end end. % @private prune_vclock1(V, Now, BProps, HeadTime) -> % has a precondition that V is longer than small and older than young case length(V) > get_property(big_vclock, BProps) orelse - Now - HeadTime > get_property(old_vclock, BProps) - of - true -> prune_vclock1(tl(V), Now, BProps); - false -> V + Now - HeadTime > get_property(old_vclock, BProps) + of + true -> prune_vclock1(tl(V), Now, BProps); + false -> V end. get_property(Key, PairList) -> case lists:keyfind(Key, 1, PairList) of - {_Key, Value} -> Value; - false -> undefined + {_Key, Value} -> Value; + false -> undefined end. %% =================================================================== @@ -331,22 +331,22 @@ prune_small_test() -> Now = riak_core_util:moment(), OldTime = Now - 32000000, SmallVC = [{<<"1">>, {1, OldTime}}, - {<<"2">>, {2, OldTime}}, - {<<"3">>, {3, OldTime}}], + {<<"2">>, {2, OldTime}}, + {<<"3">>, {3, OldTime}}], Props = [{small_vclock, 4}], ?assertEqual((lists:sort(SmallVC)), - (lists:sort(prune(SmallVC, Now, Props)))). + (lists:sort(prune(SmallVC, Now, Props)))). prune_young_test() -> % vclock with all entries younger than young_vclock will be untouched Now = riak_core_util:moment(), NewTime = Now - 1, VC = [{<<"1">>, {1, NewTime}}, - {<<"2">>, {2, NewTime}}, - {<<"3">>, {3, NewTime}}], + {<<"2">>, {2, NewTime}}, + {<<"3">>, {3, NewTime}}], Props = [{small_vclock, 1}, {young_vclock, 1000}], ?assertEqual((lists:sort(VC)), - (lists:sort(prune(VC, Now, Props)))). + (lists:sort(prune(VC, Now, Props)))). prune_big_test() -> % vclock not preserved by small or young will be pruned down to @@ -354,12 +354,12 @@ prune_big_test() -> Now = riak_core_util:moment(), NewTime = Now - 1000, VC = [{<<"1">>, {1, NewTime}}, - {<<"2">>, {2, NewTime}}, - {<<"3">>, {3, NewTime}}], + {<<"2">>, {2, NewTime}}, + {<<"3">>, {3, NewTime}}], Props = [{small_vclock, 1}, - {young_vclock, 1}, - {big_vclock, 2}, - {old_vclock, 100000}], + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 100000}], ?assert((length(prune(VC, Now, Props)) =:= 2)). prune_old_test() -> @@ -369,12 +369,12 @@ prune_old_test() -> NewTime = Now - 1000, OldTime = Now - 100000, VC = [{<<"1">>, {1, NewTime}}, - {<<"2">>, {2, OldTime}}, - {<<"3">>, {3, OldTime}}], + {<<"2">>, {2, OldTime}}, + {<<"3">>, {3, OldTime}}], Props = [{small_vclock, 1}, - {young_vclock, 1}, - {big_vclock, 2}, - {old_vclock, 10000}], + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 10000}], ?assert((length(prune(VC, Now, Props)) =:= 1)). prune_order_test() -> @@ -383,14 +383,14 @@ prune_order_test() -> Now = riak_core_util:moment(), OldTime = Now - 100000, VC1 = [{<<"1">>, {1, OldTime}}, - {<<"2">>, {2, OldTime}}], + {<<"2">>, {2, OldTime}}], VC2 = lists:reverse(VC1), Props = [{small_vclock, 1}, - {young_vclock, 1}, - {big_vclock, 2}, - {old_vclock, 10000}], + {young_vclock, 1}, + {big_vclock, 2}, + {old_vclock, 10000}], ?assertEqual((prune(VC1, Now, Props)), - (prune(VC2, Now, Props))). + (prune(VC2, Now, Props))). accessor_test() -> VC = [{<<"1">>, {1, 1}}, {<<"2">>, {2, 2}}], @@ -404,44 +404,44 @@ accessor_test() -> merge_test() -> VC1 = [{<<"1">>, {1, 1}}, - {<<"2">>, {2, 2}}, - {<<"4">>, {4, 4}}], + {<<"2">>, {2, 2}}, + {<<"4">>, {4, 4}}], VC2 = [{<<"3">>, {3, 3}}, {<<"4">>, {3, 3}}], ?assertEqual([], (merge(vclock:fresh()))), ?assertEqual([{<<"1">>, {1, 1}}, - {<<"2">>, {2, 2}}, - {<<"3">>, {3, 3}}, - {<<"4">>, {4, 4}}], - (merge([VC1, VC2]))). + {<<"2">>, {2, 2}}, + {<<"3">>, {3, 3}}, + {<<"4">>, {4, 4}}], + (merge([VC1, VC2]))). merge_less_left_test() -> VC1 = [{<<"5">>, {5, 5}}], VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], ?assertEqual([{<<"5">>, {5, 5}}, - {<<"6">>, {6, 6}}, - {<<"7">>, {7, 7}}], - (vclock:merge([VC1, VC2]))). + {<<"6">>, {6, 6}}, + {<<"7">>, {7, 7}}], + (vclock:merge([VC1, VC2]))). merge_less_right_test() -> VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], VC2 = [{<<"5">>, {5, 5}}], ?assertEqual([{<<"5">>, {5, 5}}, - {<<"6">>, {6, 6}}, - {<<"7">>, {7, 7}}], - (vclock:merge([VC1, VC2]))). + {<<"6">>, {6, 6}}, + {<<"7">>, {7, 7}}], + (vclock:merge([VC1, VC2]))). merge_same_id_test() -> VC1 = [{<<"1">>, {1, 2}}, {<<"2">>, {1, 4}}], VC2 = [{<<"1">>, {1, 3}}, {<<"3">>, {1, 5}}], ?assertEqual([{<<"1">>, {1, 3}}, - {<<"2">>, {1, 4}}, - {<<"3">>, {1, 5}}], - (vclock:merge([VC1, VC2]))). + {<<"2">>, {1, 4}}, + {<<"3">>, {1, 5}}], + (vclock:merge([VC1, VC2]))). get_entry_test() -> VC = vclock:fresh(), VC1 = increment(a, - increment(c, increment(b, increment(a, VC)))), + increment(c, increment(b, increment(a, VC)))), ?assertMatch({ok, {a, {2, _}}}, (get_dot(a, VC1))), ?assertMatch({ok, {b, {1, _}}}, (get_dot(b, VC1))), ?assertMatch({ok, {c, {1, _}}}, (get_dot(c, VC1))), @@ -451,7 +451,7 @@ valid_entry_test() -> VC = vclock:fresh(), VC1 = increment(c, increment(b, increment(a, VC))), [begin - {ok, E} = get_dot(Actor, VC1), ?assert((valid_dot(E))) + {ok, E} = get_dot(Actor, VC1), ?assert((valid_dot(E))) end || Actor <- [a, b, c]], ?assertNot((valid_dot(undefined))), From c3ce2dc8322c0ee3ffe2dbfdb6f6e233d0957955 Mon Sep 17 00:00:00 2001 From: woelki Date: Mon, 5 Oct 2020 17:24:27 +0200 Subject: [PATCH 4/5] Merge with "Finish removing bucket properties and buckets" --- src/gen_fsm_compat.erl | 912 +++++++-------------- src/gen_nb_server.erl | 189 ++--- src/riak_core.app.src | 35 +- src/riak_core.erl | 338 +++----- src/riak_core_apl.erl | 227 ++---- src/riak_core_bucket.erl | 225 ------ src/riak_core_bucket_props.erl | 336 -------- src/riak_core_claim.erl | 4 - src/riak_core_claimant.erl | 756 ++++++++---------- src/riak_core_gossip.erl | 263 +++--- src/riak_core_handoff_manager.erl | 756 ++++++++---------- src/riak_core_handoff_receiver.erl | 103 ++- src/riak_core_node_watcher.erl | 4 +- src/riak_core_rand.erl | 58 -- src/riak_core_ring.erl | 1188 ++++++++++++---------------- src/riak_core_ring_handler.erl | 172 ++-- src/riak_core_ring_manager.erl | 529 +++++-------- src/riak_core_ring_util.erl | 5 +- src/riak_core_send_msg.erl | 2 +- src/riak_core_util.erl | 585 +++++--------- src/riak_core_vnode.erl | 142 ++-- src/riak_core_vnode_manager.erl | 664 +++++++--------- src/riak_core_vnode_master.erl | 126 +-- src/riak_core_vnode_proxy.erl | 365 ++++----- src/riak_core_vnode_worker.erl | 37 +- test/pqc/bprops_eqc.erl | 243 ------ test/pqc/bucket_eqc_utils.erl | 49 -- 27 files changed, 2944 insertions(+), 5369 deletions(-) delete mode 100644 src/riak_core_bucket.erl delete mode 100644 src/riak_core_bucket_props.erl delete mode 100644 src/riak_core_rand.erl delete mode 100644 test/pqc/bprops_eqc.erl delete mode 100644 test/pqc/bucket_eqc_utils.erl diff --git a/src/gen_fsm_compat.erl b/src/gen_fsm_compat.erl index 7622c14b6..f137bdb16 100644 --- a/src/gen_fsm_compat.erl +++ b/src/gen_fsm_compat.erl @@ -105,34 +105,18 @@ %%% %%% --------------------------------------------------- --export([start/3, - start/4, - start_link/3, - start_link/4, - stop/1, - stop/3, - send_event/2, - sync_send_event/2, - sync_send_event/3, - send_all_state_event/2, +-export([start/3, start/4, start_link/3, start_link/4, + stop/1, stop/3, send_event/2, sync_send_event/2, + sync_send_event/3, send_all_state_event/2, sync_send_all_state_event/2, - sync_send_all_state_event/3, - reply/2, - start_timer/2, - send_event_after/2, - cancel_timer/1, - enter_loop/4, - enter_loop/5, - enter_loop/6, - wake_hib/7]). + sync_send_all_state_event/3, reply/2, start_timer/2, + send_event_after/2, cancel_timer/1, enter_loop/4, + enter_loop/5, enter_loop/6, wake_hib/7]). %% Internal exports --export([init_it/6, - system_continue/3, - system_terminate/4, - system_code_change/4, - system_get_state/1, - system_replace_state/2, +-export([init_it/6, system_continue/3, + system_terminate/4, system_code_change/4, + system_get_state/1, system_replace_state/2, format_status/2]). -import(error_logger, [format/2]). @@ -145,8 +129,7 @@ StateName :: atom(), StateData :: term()} | {ok, StateName :: atom(), StateData :: term(), timeout() | hibernate} | - {stop, Reason :: term()} | - ignore. + {stop, Reason :: term()} | ignore. -callback handle_event(Event :: term(), StateName :: atom(), @@ -194,10 +177,8 @@ {stop, Reason :: normal | term(), NewStateData :: term()}. --callback terminate(Reason :: normal | - shutdown | - {shutdown, term()} | - term(), +-callback terminate(Reason :: normal | shutdown | + {shutdown, term()} | term(), StateName :: atom(), StateData :: term()) -> term(). -callback code_change(OldVsn :: term() | {down, term()}, @@ -214,10 +195,8 @@ State :: term(), Status :: term(). --optional_callbacks([handle_info/3, - terminate/3, - code_change/4, - format_status/2]). +-optional_callbacks([handle_info/3, terminate/3, + code_change/4, format_status/2]). %%% --------------------------------------------------- %%% Starts a generic state machine. @@ -227,7 +206,7 @@ %%% start_link(Name, Mod, Args, Options) where: %%% Name ::= {local, atom()} | {global, term()} | {via, atom(), term()} %%% Mod ::= atom(), callback module implementing the 'real' fsm -%%% Args ::= term(), init arguments (to Mod:init/1) +%%% Args ::= term(), init arguments (to Module:init/1) %%% Options ::= [{debug, [Flag]}] %%% Flag ::= trace | log | {logfile, File} | statistics | debug %%% (debug == log && statistics) @@ -253,69 +232,60 @@ stop(Name, Reason, Timeout) -> gen:stop(Name, Reason, Timeout). send_event({global, Name}, Event) -> - catch global:send(Name, {'$gen_event', Event}), - ok; -send_event({via, Mod, Name}, Event) -> - catch Mod:send(Name, {'$gen_event', Event}), - ok; + catch global:send(Name, {'$gen_event', Event}), ok; +send_event({via, Module, Name}, Event) -> + catch Module:send(Name, {'$gen_event', Event}), ok; send_event(Name, Event) -> - Name ! {'$gen_event', Event}, - ok. + Name ! {'$gen_event', Event}, ok. sync_send_event(Name, Event) -> case catch gen:call(Name, '$gen_sync_event', Event) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event]}}) end. sync_send_event(Name, Event, Timeout) -> - case catch gen:call(Name, - '$gen_sync_event', - Event, + case catch gen:call(Name, '$gen_sync_event', Event, Timeout) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_event, [Name, Event, Timeout]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_event, [Name, Event, Timeout]}}) end. send_all_state_event({global, Name}, Event) -> catch global:send(Name, {'$gen_all_state_event', Event}), ok; -send_all_state_event({via, Mod, Name}, Event) -> - catch Mod:send(Name, {'$gen_all_state_event', Event}), +send_all_state_event({via, Module, Name}, Event) -> + catch Module:send(Name, + {'$gen_all_state_event', Event}), ok; send_all_state_event(Name, Event) -> - Name ! {'$gen_all_state_event', Event}, - ok. + Name ! {'$gen_all_state_event', Event}, ok. sync_send_all_state_event(Name, Event) -> - case catch gen:call(Name, - '$gen_sync_all_state_event', + case catch gen:call(Name, '$gen_sync_all_state_event', Event) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, sync_send_all_state_event, [Name, Event]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_all_state_event, [Name, Event]}}) end. sync_send_all_state_event(Name, Event, Timeout) -> - case catch gen:call(Name, - '$gen_sync_all_state_event', - Event, - Timeout) + case catch gen:call(Name, '$gen_sync_all_state_event', + Event, Timeout) of - {ok, Res} -> Res; - {'EXIT', Reason} -> - exit({Reason, - {?MODULE, - sync_send_all_state_event, - [Name, Event, Timeout]}}) + {ok, Res} -> Res; + {'EXIT', Reason} -> + exit({Reason, + {?MODULE, sync_send_all_state_event, + [Name, Event, Timeout]}}) end. %% Designed to be only callable within one of the callbacks @@ -337,9 +307,9 @@ send_event_after(Time, Event) -> %% an active timer/send_event_after, false otherwise. cancel_timer(Ref) -> case erlang:cancel_timer(Ref) of - false -> - receive {timeout, Ref, _} -> 0 after 0 -> false end; - RemainingTime -> RemainingTime + false -> + receive {timeout, Ref, _} -> 0 after 0 -> false end; + RemainingTime -> RemainingTime end. %% enter_loop/4,5,6 @@ -351,37 +321,21 @@ cancel_timer(Ref) -> %% The user is responsible for any initialization of the process, %% including registering a name for it. enter_loop(Mod, Options, StateName, StateData) -> - enter_loop(Mod, - Options, - StateName, - StateData, - self(), + enter_loop(Mod, Options, StateName, StateData, self(), infinity). enter_loop(Mod, Options, StateName, StateData, {Scope, _} = ServerName) when Scope == local; Scope == global -> - enter_loop(Mod, - Options, - StateName, - StateData, - ServerName, - infinity); + enter_loop(Mod, Options, StateName, StateData, + ServerName, infinity); enter_loop(Mod, Options, StateName, StateData, {via, _, _} = ServerName) -> - enter_loop(Mod, - Options, - StateName, - StateData, - ServerName, - infinity); + enter_loop(Mod, Options, StateName, StateData, + ServerName, infinity); enter_loop(Mod, Options, StateName, StateData, Timeout) -> - enter_loop(Mod, - Options, - StateName, - StateData, - self(), + enter_loop(Mod, Options, StateName, StateData, self(), Timeout). enter_loop(Mod, Options, StateName, StateData, @@ -390,65 +344,48 @@ enter_loop(Mod, Options, StateName, StateData, Parent = gen:get_parent(), Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), - loop(Parent, - Name, - StateName, - StateData, - Mod, - Timeout, - HibernateAfterTimeout, - Debug). + loop(Parent, Name, StateName, StateData, Mod, Timeout, + HibernateAfterTimeout, Debug). %%% --------------------------------------------------- %%% Initiate the new process. %%% Register the name using the Rfunc function -%%% Calls the Mod:init/Args function. +%%% Calls the Moduleinit/Args function. %%% Finally an acknowledge is sent to Parent and the main %%% loop is entered. %%% --------------------------------------------------- init_it(Starter, self, Name, Mod, Args, Options) -> init_it(Starter, self(), Name, Mod, Args, Options); -init_it(Starter, Parent, Name0, Mod, Args, Options) -> +init_it(Starter, Parent, Name0, Module, Args, + Options) -> Name = gen:name(Name0), Debug = gen:debug_options(Name, Options), HibernateAfterTimeout = gen:hibernate_after(Options), - case catch Mod:init(Args) of - {ok, StateName, StateData} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug); - {ok, StateName, StateData, Timeout} -> - proc_lib:init_ack(Starter, {ok, self()}), - loop(Parent, - Name, - StateName, - StateData, - Mod, - Timeout, - HibernateAfterTimeout, - Debug); - {stop, Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - ignore -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, ignore), - exit(normal); - {'EXIT', Reason} -> - gen:unregister_name(Name0), - proc_lib:init_ack(Starter, {error, Reason}), - exit(Reason); - Else -> - Error = {bad_return_value, Else}, - proc_lib:init_ack(Starter, {error, Error}), - exit(Error) + case catch Module:init(Args) of + {ok, StateName, StateData} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, StateName, StateData, Module, + infinity, HibernateAfterTimeout, Debug); + {ok, StateName, StateData, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(Parent, Name, StateName, StateData, Module, + Timeout, HibernateAfterTimeout, Debug); + {stop, Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + gen:unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) end. %%----------------------------------------------------------------- @@ -456,202 +393,96 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) -> %%----------------------------------------------------------------- loop(Parent, Name, StateName, StateData, Mod, hibernate, HibernateAfterTimeout, Debug) -> - proc_lib:hibernate(?MODULE, - wake_hib, - [Parent, - Name, - StateName, - StateData, - Mod, - HibernateAfterTimeout, - Debug]); + proc_lib:hibernate(?MODULE, wake_hib, + [Parent, Name, StateName, StateData, Mod, + HibernateAfterTimeout, Debug]); loop(Parent, Name, StateName, StateData, Mod, infinity, HibernateAfterTimeout, Debug) -> receive - Msg -> - decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug, - false) - after HibernateAfterTimeout -> - loop(Parent, - Name, - StateName, - StateData, - Mod, - hibernate, - HibernateAfterTimeout, - Debug) + Msg -> + decode_msg(Msg, Parent, Name, StateName, StateData, Mod, + infinity, HibernateAfterTimeout, Debug, false) + after HibernateAfterTimeout -> + loop(Parent, Name, StateName, StateData, Mod, hibernate, + HibernateAfterTimeout, Debug) end; loop(Parent, Name, StateName, StateData, Mod, Time, HibernateAfterTimeout, Debug) -> Msg = receive - Input -> Input after Time -> {'$gen_event', timeout} + Input -> Input after Time -> {'$gen_event', timeout} end, - decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug, - false). + decode_msg(Msg, Parent, Name, StateName, StateData, Mod, + Time, HibernateAfterTimeout, Debug, false). wake_hib(Parent, Name, StateName, StateData, Mod, HibernateAfterTimeout, Debug) -> Msg = receive Input -> Input end, - decode_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - hibernate, - HibernateAfterTimeout, - Debug, - true). + decode_msg(Msg, Parent, Name, StateName, StateData, Mod, + hibernate, HibernateAfterTimeout, Debug, true). decode_msg(Msg, Parent, Name, StateName, StateData, Mod, Time, HibernateAfterTimeout, Debug, Hib) -> case Msg of - {system, From, Req} -> - sys:handle_system_msg(Req, - From, - Parent, - ?MODULE, - Debug, - [Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout], - Hib); - {'EXIT', Parent, Reason} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - StateData, - Debug); - _Msg when Debug =:= [] -> - handle_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout); - _Msg -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, StateName}, - {in, Msg}), - handle_msg(Msg, - Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug1) + {system, From, Req} -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, + [Name, StateName, StateData, Mod, Time, + HibernateAfterTimeout], + Hib); + {'EXIT', Parent, Reason} -> + terminate(Reason, Name, Msg, Mod, StateName, StateData, + Debug); + _Msg when Debug =:= [] -> + handle_msg(Msg, Parent, Name, StateName, StateData, Mod, + Time, HibernateAfterTimeout); + _Msg -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, + {Name, StateName}, {in, Msg}), + handle_msg(Msg, Parent, Name, StateName, StateData, Mod, + Time, HibernateAfterTimeout, Debug1) end. %%----------------------------------------------------------------- %% Callback functions for system messages handling. %%----------------------------------------------------------------- system_continue(Parent, Debug, - [Name, - StateName, - StateData, - Mod, - Time, + [Name, StateName, StateData, Mod, Time, HibernateAfterTimeout]) -> - loop(Parent, - Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout, - Debug). + loop(Parent, Name, StateName, StateData, Mod, Time, + HibernateAfterTimeout, Debug). -spec system_terminate(term(), _, _, [term(), ...]) -> no_return(). system_terminate(Reason, _Parent, Debug, - [Name, - StateName, - StateData, - Mod, - _Time, + [Name, StateName, StateData, Mod, _Time, _HibernateAfterTimeout]) -> - terminate(Reason, - Name, - [], - Mod, - StateName, - StateData, + terminate(Reason, Name, [], Mod, StateName, StateData, Debug). -system_code_change([Name, - StateName, - StateData, - Mod, - Time, - HibernateAfterTimeout], +system_code_change([Name, StateName, StateData, Module, + Time, HibernateAfterTimeout], _Module, OldVsn, Extra) -> - case catch Mod:code_change(OldVsn, - StateName, - StateData, - Extra) + case catch Module:code_change(OldVsn, StateName, + StateData, Extra) of - {ok, NewStateName, NewStateData} -> - {ok, - [Name, - NewStateName, - NewStateData, - Mod, - Time, - HibernateAfterTimeout]}; - Else -> Else + {ok, NewStateName, NewStateData} -> + {ok, + [Name, NewStateName, NewStateData, Module, Time, + HibernateAfterTimeout]}; + Else -> Else end. -system_get_state([_Name, - StateName, - StateData, - _Mod, - _Time, - _HibernateAfterTimeout]) -> +system_get_state([_Name, StateName, StateData, _Mod, + _Time, _HibernateAfterTimeout]) -> {ok, {StateName, StateData}}. system_replace_state(StateFun, - [Name, - StateName, - StateData, - Mod, - Time, + [Name, StateName, StateData, Mod, Time, HibernateAfterTimeout]) -> Result = {NStateName, NStateData} = StateFun({StateName, StateData}), - {ok, - Result, - [Name, - NStateName, - NStateData, - Mod, - Time, + {ok, Result, + [Name, NStateName, NStateData, Mod, Time, HibernateAfterTimeout]}. %%----------------------------------------------------------------- @@ -660,27 +491,23 @@ system_replace_state(StateFun, %%----------------------------------------------------------------- print_event(Dev, {in, Msg}, {Name, StateName}) -> case Msg of - {'$gen_event', Event} -> - io:format(Dev, - "*DBG* ~tp got event ~tp in state ~tw~n", - [Name, Event, StateName]); - {'$gen_all_state_event', Event} -> - io:format(Dev, - "*DBG* ~tp got all_state_event ~tp in " - "state ~tw~n", - [Name, Event, StateName]); - {timeout, Ref, {'$gen_timer', Message}} -> - io:format(Dev, - "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, {timeout, Ref, Message}, StateName]); - {timeout, _Ref, {'$gen_event', Event}} -> - io:format(Dev, - "*DBG* ~tp got timer ~tp in state ~tw~n", - [Name, Event, StateName]); - _ -> - io:format(Dev, - "*DBG* ~tp got ~tp in state ~tw~n", - [Name, Msg, StateName]) + {'$gen_event', Event} -> + io:format(Dev, "*DBG* ~tp got event ~tp in state ~tw~n", + [Name, Event, StateName]); + {'$gen_all_state_event', Event} -> + io:format(Dev, + "*DBG* ~tp got all_state_event ~tp in " + "state ~tw~n", + [Name, Event, StateName]); + {timeout, Ref, {'$gen_timer', Message}} -> + io:format(Dev, "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, {timeout, Ref, Message}, StateName]); + {timeout, _Ref, {'$gen_event', Event}} -> + io:format(Dev, "*DBG* ~tp got timer ~tp in state ~tw~n", + [Name, Event, StateName]); + _ -> + io:format(Dev, "*DBG* ~tp got ~tp in state ~tw~n", + [Name, Msg, StateName]) end; print_event(Dev, {out, Msg, To, StateName}, Name) -> io:format(Dev, @@ -688,8 +515,7 @@ print_event(Dev, {out, Msg, To, StateName}, Name) -> "switched to state ~tw~n", [Name, Msg, To, StateName]); print_event(Dev, return, {Name, StateName}) -> - io:format(Dev, - "*DBG* ~tp switched to state ~tw~n", + io:format(Dev, "*DBG* ~tp switched to state ~tw~n", [Name, StateName]). handle_msg(Msg, Parent, Name, StateName, StateData, Mod, @@ -697,208 +523,108 @@ handle_msg(Msg, Parent, Name, StateName, StateData, Mod, %No debug here From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {next_state, NStateName, NStateData, Time1} -> - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - []); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - reply(From, Reply), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - []); - {stop, Reason, NStateData} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - []); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - [])), - reply(From, Reply), - exit(R); - {'EXIT', - {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> - error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " - "message: ~tp~n", - [Mod, Msg]), - loop(Parent, - Name, - StateName, - StateData, - Mod, - infinity, - HibernateAfterTimeout, - []); - {'EXIT', What} -> - terminate(What, - Name, - Msg, - Mod, - StateName, - StateData, - []); - Reply -> - terminate({bad_return_value, Reply}, - Name, - Msg, - Mod, - StateName, - StateData, - []) + {next_state, NStateName, NStateData} -> + loop(Parent, Name, NStateName, NStateData, Mod, + infinity, HibernateAfterTimeout, []); + {next_state, NStateName, NStateData, Time1} -> + loop(Parent, Name, NStateName, NStateData, Mod, Time1, + HibernateAfterTimeout, []); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, Name, NStateName, NStateData, Mod, + infinity, HibernateAfterTimeout, []); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + reply(From, Reply), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, + HibernateAfterTimeout, []); + {stop, Reason, NStateData} -> + terminate(Reason, Name, Msg, Mod, StateName, NStateData, + []); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, + StateName, NStateData, [])), + reply(From, Reply), + exit(R); + {'EXIT', + {undef, [{Mod, handle_info, [_, _, _], _} | _]}} -> + error_logger:warning_msg("** Undefined handle_info in ~p~n** Unhandled " + "message: ~tp~n", + [Mod, Msg]), + loop(Parent, Name, StateName, StateData, Mod, infinity, + HibernateAfterTimeout, []); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, StateName, StateData, + []); + Reply -> + terminate({bad_return_value, Reply}, Name, Msg, Mod, + StateName, StateData, []) end. handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time, HibernateAfterTimeout, Debug) -> From = from(Msg), case catch dispatch(Msg, Mod, StateName, StateData) of - {next_state, NStateName, NStateData} -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, NStateName}, - return), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug1); - {next_state, NStateName, NStateData, Time1} -> - Debug1 = sys:handle_debug(Debug, - fun print_event/3, - {Name, NStateName}, - return), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - Debug1); - {reply, Reply, NStateName, NStateData} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - infinity, - HibernateAfterTimeout, - Debug1); - {reply, Reply, NStateName, NStateData, Time1} - when From =/= undefined -> - Debug1 = reply(Name, From, Reply, Debug, NStateName), - loop(Parent, - Name, - NStateName, - NStateData, - Mod, - Time1, - HibernateAfterTimeout, - Debug1); - {stop, Reason, NStateData} -> - terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - Debug); - {stop, Reason, Reply, NStateData} - when From =/= undefined -> - {'EXIT', R} = (catch terminate(Reason, - Name, - Msg, - Mod, - StateName, - NStateData, - Debug)), - _ = reply(Name, From, Reply, Debug, StateName), - exit(R); - {'EXIT', What} -> - terminate(What, - Name, - Msg, - Mod, - StateName, - StateData, - Debug); - Reply -> - terminate({bad_return_value, Reply}, - Name, - Msg, - Mod, - StateName, - StateData, - Debug) + {next_state, NStateName, NStateData} -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, + {Name, NStateName}, return), + loop(Parent, Name, NStateName, NStateData, Mod, + infinity, HibernateAfterTimeout, Debug1); + {next_state, NStateName, NStateData, Time1} -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, + {Name, NStateName}, return), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, + HibernateAfterTimeout, Debug1); + {reply, Reply, NStateName, NStateData} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, Name, NStateName, NStateData, Mod, + infinity, HibernateAfterTimeout, Debug1); + {reply, Reply, NStateName, NStateData, Time1} + when From =/= undefined -> + Debug1 = reply(Name, From, Reply, Debug, NStateName), + loop(Parent, Name, NStateName, NStateData, Mod, Time1, + HibernateAfterTimeout, Debug1); + {stop, Reason, NStateData} -> + terminate(Reason, Name, Msg, Mod, StateName, NStateData, + Debug); + {stop, Reason, Reply, NStateData} + when From =/= undefined -> + {'EXIT', R} = (catch terminate(Reason, Name, Msg, Mod, + StateName, NStateData, Debug)), + _ = reply(Name, From, Reply, Debug, StateName), + exit(R); + {'EXIT', What} -> + terminate(What, Name, Msg, Mod, StateName, StateData, + Debug); + Reply -> + terminate({bad_return_value, Reply}, Name, Msg, Mod, + StateName, StateData, Debug) end. -dispatch({'$gen_event', Event}, Mod, StateName, +dispatch({'$gen_event', Event}, Module, StateName, StateData) -> - Mod:StateName(Event, StateData); -dispatch({'$gen_all_state_event', Event}, Mod, + Module:StateName(Event, StateData); +dispatch({'$gen_all_state_event', Event}, Module, StateName, StateData) -> - Mod:handle_event(Event, StateName, StateData); -dispatch({'$gen_sync_event', From, Event}, Mod, + Module:handle_event(Event, StateName, StateData); +dispatch({'$gen_sync_event', From, Event}, Module, StateName, StateData) -> - Mod:StateName(Event, From, StateData); + Module:StateName(Event, From, StateData); dispatch({'$gen_sync_all_state_event', From, Event}, - Mod, StateName, StateData) -> - Mod:handle_sync_event(Event, - From, - StateName, - StateData); -dispatch({timeout, Ref, {'$gen_timer', Msg}}, Mod, + Module, StateName, StateData) -> + Module:handle_sync_event(Event, From, StateName, + StateData); +dispatch({timeout, Ref, {'$gen_timer', Msg}}, Module, StateName, StateData) -> - Mod:StateName({timeout, Ref, Msg}, StateData); -dispatch({timeout, _Ref, {'$gen_event', Event}}, Mod, + Module:StateName({timeout, Ref, Msg}, StateData); +dispatch({timeout, _Ref, {'$gen_event', Event}}, Module, StateName, StateData) -> - Mod:StateName(Event, StateData); -dispatch(Info, Mod, StateName, StateData) -> - Mod:handle_info(Info, StateName, StateData). + Module:StateName(Event, StateData); +dispatch(Info, Module, StateName, StateData) -> + Module:handle_info(Info, StateName, StateData). from({'$gen_sync_event', From, _Event}) -> From; from({'$gen_sync_all_state_event', From, _Event}) -> @@ -910,9 +636,7 @@ reply({To, Tag}, Reply) -> catch To ! {Tag, Reply}. reply(Name, {To, Tag}, Reply, Debug, StateName) -> reply({To, Tag}, Reply), - sys:handle_debug(Debug, - fun print_event/3, - Name, + sys:handle_debug(Debug, fun print_event/3, Name, {out, Reply, To, StateName}). %%% --------------------------------------------------- @@ -922,69 +646,56 @@ reply(Name, {To, Tag}, Reply, Debug, StateName) -> -spec terminate(term(), _, _, atom(), _, _, _) -> no_return(). -terminate(Reason, Name, Msg, Mod, StateName, StateData, - Debug) -> - case erlang:function_exported(Mod, terminate, 3) of - true -> - case catch Mod:terminate(Reason, StateName, StateData) - of - {'EXIT', R} -> - FmtStateData = format_status(terminate, - Mod, - get(), - StateData), - error_info(R, - Name, - Msg, - StateName, - FmtStateData, - Debug), - exit(R); - _ -> ok - end; - false -> ok +terminate(Reason, Name, Msg, Module, StateName, + StateData, Debug) -> + case erlang:function_exported(Module, terminate, 3) of + true -> + case catch Module:terminate(Reason, StateName, + StateData) + of + {'EXIT', R} -> + FmtStateData = format_status(terminate, Module, get(), + StateData), + error_info(R, Name, Msg, StateName, FmtStateData, + Debug), + exit(R); + _ -> ok + end; + false -> ok end, case Reason of - normal -> exit(normal); - shutdown -> exit(shutdown); - {shutdown, _} = Shutdown -> exit(Shutdown); - _ -> - FmtStateData1 = format_status(terminate, - Mod, - get(), - StateData), - error_info(Reason, - Name, - Msg, - StateName, - FmtStateData1, - Debug), - exit(Reason) + normal -> exit(normal); + shutdown -> exit(shutdown); + {shutdown, _} = Shutdown -> exit(Shutdown); + _ -> + FmtStateData1 = format_status(terminate, Module, get(), + StateData), + error_info(Reason, Name, Msg, StateName, FmtStateData1, + Debug), + exit(Reason) end. error_info(Reason, Name, Msg, StateName, StateData, Debug) -> Reason1 = case Reason of - {undef, [{M, F, A, L} | MFAs]} -> - case code:is_loaded(M) of - false -> - {'module could not be loaded', - [{M, F, A, L} | MFAs]}; - _ -> - case erlang:function_exported(M, F, length(A)) of - true -> Reason; - false -> - {'function not exported', - [{M, F, A, L} | MFAs]} - end - end; - _ -> Reason + {undef, [{M, F, A, L} | MFAs]} -> + case code:is_loaded(M) of + false -> + {'module could not be loaded', [{M, F, A, L} | MFAs]}; + _ -> + case erlang:function_exported(M, F, length(A)) of + true -> Reason; + false -> + {'function not exported', [{M, F, A, L} | MFAs]} + end + end; + _ -> Reason end, Str = "** State machine ~tp terminating \n" ++ - get_msg_str(Msg) ++ - "** When State == ~tp~n** Data " - "== ~tp~n** Reason for termination = " - "~n** ~tp~n", + get_msg_str(Msg) ++ + "** When State == ~tp~n** Data " + "== ~tp~n** Reason for termination = " + "~n** ~tp~n", format(Str, [Name, get_msg(Msg), StateName, StateData, Reason1]), sys:print_log(Debug), @@ -1022,15 +733,8 @@ get_msg(Msg) -> Msg. -if((?OTP_RELEASE) >= 22). format_status(Opt, StatusData) -> - [PDict, - SysState, - Parent, - Debug, - [Name, - StateName, - StateData, - Mod, - _Time, + [PDict, SysState, Parent, Debug, + [Name, StateName, StateData, Mod, _Time, _HibernateAfterTimeout]] = StatusData, Header = @@ -1040,29 +744,20 @@ format_status(Opt, StatusData) -> Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) of - S when is_list(S) -> S; - S -> [S] + S when is_list(S) -> S; + S -> [S] end, [{header, Header}, {data, - [{"Status", SysState}, - {"Parent", Parent}, - {"Logged events", Log}, - {"StateName", StateName}]} + [{"Status", SysState}, {"Parent", Parent}, + {"Logged events", Log}, {"StateName", StateName}]} | Specfic]. -elif((?OTP_RELEASE) >= 21). format_status(Opt, StatusData) -> - [PDict, - SysState, - Parent, - Debug, - [Name, - StateName, - StateData, - Mod, - _Time, + [PDict, SysState, Parent, Debug, + [Name, StateName, StateData, Mod, _Time, _HibernateAfterTimeout]] = StatusData, Header = @@ -1073,29 +768,28 @@ format_status(Opt, StatusData) -> Specfic = format_status(Opt, Mod, PDict, StateData), Specfic = case format_status(Opt, Mod, PDict, StateData) of - S when is_list(S) -> S; - S -> [S] + S when is_list(S) -> S; + S -> [S] end, [{header, Header}, {data, - [{"Status", SysState}, - {"Parent", Parent}, - {"Logged events", Log}, - {"StateName", StateName}]} + [{"Status", SysState}, {"Parent", Parent}, + {"Logged events", Log}, {"StateName", StateName}]} | Specfic]. -endif. -format_status(Opt, Mod, PDict, State) -> +format_status(Opt, Module, PDict, State) -> DefStatus = case Opt of - terminate -> State; - _ -> [{data, [{"StateData", State}]}] + terminate -> State; + _ -> [{data, [{"StateData", State}]}] end, - case erlang:function_exported(Mod, format_status, 2) of - true -> - case catch Mod:format_status(Opt, [PDict, State]) of - {'EXIT', _} -> DefStatus; - Else -> Else - end; - _ -> DefStatus + case erlang:function_exported(Module, format_status, 2) + of + true -> + case catch Module:format_status(Opt, [PDict, State]) of + {'EXIT', _} -> DefStatus; + Else -> Else + end; + _ -> DefStatus end. diff --git a/src/gen_nb_server.erl b/src/gen_nb_server.erl index 3fb24d90f..7db7e7d1c 100644 --- a/src/gen_nb_server.erl +++ b/src/gen_nb_server.erl @@ -94,125 +94,108 @@ {stop, Reason :: term(), NewState :: term()}. -%% @spec start_link(CallbackModule, IpAddr, Port, InitParams) -> Result -%% CallbackModule = atom() +%% @spec start_link(Module, IpAddr, Port, InitParams) -> Result +%% Module = atom() %% IpAddr = string() %% Port = integer() %% InitParams = [any()] %% Result = {ok, pid()} | {error, any()} %% @doc Start server listening on IpAddr:Port -start_link(CallbackModule, IpAddr, Port, InitParams) -> +start_link(Module, IpAddr, Port, InitParams) -> gen_server:start_link(?MODULE, - [CallbackModule, IpAddr, Port, InitParams], - []). + [Module, IpAddr, Port, InitParams], []). %% @hidden -init([CallbackModule, IpAddr, Port, InitParams]) -> - case CallbackModule:init(InitParams) of - {ok, ServerState} -> - case listen_on(CallbackModule, IpAddr, Port) of - {ok, Sock} -> - {ok, - #state{cb = CallbackModule, sock = Sock, - server_state = ServerState}}; - Error -> - CallbackModule:terminate(Error, ServerState), - Error - end; - Err -> Err +init([Module, IpAddr, Port, InitParams]) -> + case Module:init(InitParams) of + {ok, ServerState} -> + case listen_on(Module, IpAddr, Port) of + {ok, Sock} -> + {ok, + #state{cb = Module, sock = Sock, + server_state = ServerState}}; + Error -> Module:terminate(Error, ServerState), Error + end; + Err -> Err end. %% @hidden handle_call(Request, From, - #state{cb = Callback, server_state = ServerState} = + #state{cb = Module, server_state = ServerState} = State) -> - case Callback:handle_call(Request, From, ServerState) of - {reply, Reply, NewServerState} -> - {reply, - Reply, - State#state{server_state = NewServerState}}; - {reply, Reply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {reply, - Reply, - State#state{server_state = NewServerState}, - Arg}; - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}}; - {stop, Reason, Reply, NewServerState} -> - {stop, - Reason, - Reply, - State#state{server_state = NewServerState}} + case Module:handle_call(Request, From, ServerState) of + {reply, Reply, NewServerState} -> + {reply, Reply, + State#state{server_state = NewServerState}}; + {reply, Reply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {reply, Reply, + State#state{server_state = NewServerState}, Arg}; + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, Reason, + State#state{server_state = NewServerState}}; + {stop, Reason, Reply, NewServerState} -> + {stop, Reason, Reply, + State#state{server_state = NewServerState}} end. %% @hidden handle_cast(Msg, - #state{cb = Callback, server_state = ServerState} = + #state{cb = Module, server_state = ServerState} = State) -> - case Callback:handle_cast(Msg, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + case Module:handle_cast(Msg, ServerState) of + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, Reason, + State#state{server_state = NewServerState}} end. %% @hidden -handle_info({inet_async, - ListSock, - _Ref, +handle_info({inet_async, ListSock, _Ref, {ok, CliSocket}}, - #state{cb = Callback, server_state = ServerState} = + #state{cb = Module, server_state = ServerState} = State) -> inet_db:register_socket(CliSocket, inet_tcp), - case Callback:new_connection(CliSocket, ServerState) of - {ok, NewServerState} -> - {ok, _} = prim_inet:async_accept(ListSock, -1), - {noreply, State#state{server_state = NewServerState}}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + case Module:new_connection(CliSocket, ServerState) of + {ok, NewServerState} -> + {ok, _} = prim_inet:async_accept(ListSock, -1), + {noreply, State#state{server_state = NewServerState}}; + {stop, Reason, NewServerState} -> + {stop, Reason, + State#state{server_state = NewServerState}} end; handle_info(Info, - #state{cb = Callback, server_state = ServerState} = + #state{cb = Module, server_state = ServerState} = State) -> - case Callback:handle_info(Info, ServerState) of - {noreply, NewServerState} -> - {noreply, State#state{server_state = NewServerState}}; - {noreply, NewServerState, Arg} - when Arg =:= hibernate orelse is_number(Arg) -> - {noreply, - State#state{server_state = NewServerState}, - Arg}; - {stop, Reason, NewServerState} -> - {stop, - Reason, - State#state{server_state = NewServerState}} + case Module:handle_info(Info, ServerState) of + {noreply, NewServerState} -> + {noreply, State#state{server_state = NewServerState}}; + {noreply, NewServerState, Arg} + when Arg =:= hibernate orelse is_number(Arg) -> + {noreply, State#state{server_state = NewServerState}, + Arg}; + {stop, Reason, NewServerState} -> + {stop, Reason, + State#state{server_state = NewServerState}} end. %% @hidden terminate(Reason, - #state{cb = Callback, sock = Sock, + #state{cb = Module, sock = Sock, server_state = ServerState}) -> gen_tcp:close(Sock), - Callback:terminate(Reason, ServerState), + Module:terminate(Reason, ServerState), ok. %% @hidden @@ -221,28 +204,28 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Internal functions %% @hidden -%% @spec listen_on(CallbackModule, IpAddr, Port) -> Result -%% CallbackModule = atom() +%% @spec listen_on(Module, IpAddr, Port) -> Result +%% Module = atom() %% IpAddr = string() | tuple() %% Port = integer() %% Result = {ok, port()} | {error, any()} -listen_on(CallbackModule, IpAddr, Port) +listen_on(Module, IpAddr, Port) when is_tuple(IpAddr) andalso - (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> - SockOpts = [{ip, IpAddr} | CallbackModule:sock_opts()], + (8 =:= size(IpAddr) orelse 4 =:= size(IpAddr)) -> + SockOpts = [{ip, IpAddr} | Module:sock_opts()], case gen_tcp:listen(Port, SockOpts) of - {ok, LSock} -> - {ok, _Ref} = prim_inet:async_accept(LSock, -1), - {ok, LSock}; - Err -> Err + {ok, LSock} -> + {ok, _Ref} = prim_inet:async_accept(LSock, -1), + {ok, LSock}; + Err -> Err end; -listen_on(CallbackModule, IpAddrStr, Port) -> +listen_on(Module, IpAddrStr, Port) -> case inet_parse:address(IpAddrStr) of - {ok, IpAddr} -> listen_on(CallbackModule, IpAddr, Port); - Err -> - logger:critical("Cannot start listener for ~p\n " - " on invalid address " - "~p:~p", - [CallbackModule, IpAddrStr, Port]), - Err + {ok, IpAddr} -> listen_on(Module, IpAddr, Port); + Err -> + logger:critical("Cannot start listener for ~p\n " + " on invalid address " + "~p:~p", + [Module, IpAddrStr, Port]), + Err end. diff --git a/src/riak_core.app.src b/src/riak_core.app.src index fff574bf7..f5f327bd9 100644 --- a/src/riak_core.app.src +++ b/src/riak_core.app.src @@ -1,35 +1,32 @@ -{application,riak_core, - [{description,"Riak Core Lite"}, +{application, riak_core, + [{description, "Riak Core Lite"}, {vsn,"0.10.1"}, {applications, - [kernel,stdlib,crypto,os_mon,poolboy] + [kernel, stdlib, crypto, os_mon, poolboy] }, - {mod,{riak_core_app,[]}}, + {mod, {riak_core_app, []}}, {env, - [{cluster_name,"default"}, - {platform_data_dir,"data"}, - {ring_state_dir,"data/ring"}, - {ring_creation_size,64}, + [{cluster_name, "default"}, + {platform_data_dir, "data"}, + {ring_state_dir, "data/ring"}, + {ring_creation_size, 64}, {gossip_interval, 60000}, - {target_n_val,4}, + {target_n_val, 4}, {vnode_inactivity_timeout, 60000}, {vnode_check_interval, 5000}, {vnode_overload_threshold, 10000}, {vnode_modules, []}, {vnode_routing, proxy}, - {handoff_concurrency,2}, + {handoff_concurrency, 2}, {handoff_receive_timeout, 300000}, % 5 minutes TCP timeout {handoff_receive_vnode_timeout, 60000}, % timeout for vnode to process the hand-off data msg - {default_bucket_props, []}, - {handoff_port,8099}, - {handoff_ip,"0.0.0.0"}, - {bucket_fixups, []}, - {bucket_validators, []}, + {handoff_port, 8099}, + {handoff_ip, "0.0.0.0"}, {stat_mods, []}, {health_checks, []} ]}, - {pkg_name,"riak_core_lite"}, - {maintainers,["riak_core_lite","Albert Schimpf"]}, - {licenses,["Apache"]}, - {links,[{"Github","https://github.com/riak-core-lite/riak_core_lite"}]} + {pkg_name, "riak_core_lite"}, + {maintainers, ["riak_core_lite", "Albert Schimpf"]}, + {licenses, ["Apache"]}, + {links, [{"Github", "https://github.com/riak-core-lite/riak_core_lite"}]} ]}. diff --git a/src/riak_core.erl b/src/riak_core.erl index e3a303bed..1ee62dbfb 100644 --- a/src/riak_core.erl +++ b/src/riak_core.erl @@ -21,22 +21,12 @@ %% ------------------------------------------------------------------- -module(riak_core). --export([stop/0, - stop/1, - join/1, - join/4, - staged_join/1, - remove/1, - down/1, - leave/0, - remove_from_cluster/1]). +-export([stop/0, stop/1, join/1, join/4, staged_join/1, + remove/1, down/1, leave/0, remove_from_cluster/1]). -export([vnode_modules/0, health_check/1]). --export([register/1, - register/2, - bucket_fixups/0, - bucket_validators/0]). +-export([register/1, register/2]). -export([stat_mods/0, stat_prefix/0]). @@ -95,24 +85,22 @@ join(_, Node, Auto) -> join(node(), Node, false, Auto). join(_, Node, Rejoin, Auto) -> case net_adm:ping(Node) of - pang -> {error, not_reachable}; - pong -> standard_join(Node, Rejoin, Auto) + pang -> {error, not_reachable}; + pong -> standard_join(Node, Rejoin, Auto) end. get_other_ring(Node) -> - riak_core_util:safe_rpc(Node, - riak_core_ring_manager, - get_raw_ring, - []). + riak_core_util:safe_rpc(Node, riak_core_ring_manager, + get_raw_ring, []). standard_join(Node, Rejoin, Auto) when is_atom(Node) -> case net_adm:ping(Node) of - pong -> - case get_other_ring(Node) of - {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); - _ -> {error, unable_to_get_join_ring} - end; - pang -> {error, not_reachable} + pong -> + case get_other_ring(Node) of + {ok, Ring} -> standard_join(Node, Ring, Rejoin, Auto); + _ -> {error, unable_to_get_join_ring} + end; + pang -> {error, not_reachable} end. %% `init:get_status/0' will return a 2-tuple reflecting the init @@ -127,43 +115,37 @@ standard_join(Node, Ring, Rejoin, Auto) -> {ok, MyRing} = riak_core_ring_manager:get_raw_ring(), InitComplete = init_complete(init:get_status()), SameSize = riak_core_ring:num_partitions(MyRing) =:= - riak_core_ring:num_partitions(Ring), + riak_core_ring:num_partitions(Ring), Singleton = [node()] =:= - riak_core_ring:all_members(MyRing), + riak_core_ring:all_members(MyRing), case {InitComplete, Rejoin or Singleton, SameSize} of - {false, _, _} -> {error, node_still_starting}; - {_, false, _} -> {error, not_single_node}; - {_, _, false} -> {error, different_ring_sizes}; - _ -> - Ring2 = riak_core_ring:add_member(node(), Ring, node()), - Ring3 = riak_core_ring:set_owner(Ring2, node()), - Ring4 = riak_core_ring:update_member_meta(node(), - Ring3, - node(), - gossip_vsn, - 2), - Ring5 = Ring4, - Ring6 = maybe_auto_join(Auto, node(), Ring5), - riak_core_ring_manager:set_my_ring(Ring6), - riak_core_gossip:send_ring(Node, node()) + {false, _, _} -> {error, node_still_starting}; + {_, false, _} -> {error, not_single_node}; + {_, _, false} -> {error, different_ring_sizes}; + _ -> + Ring2 = riak_core_ring:add_member(node(), Ring, node()), + Ring3 = riak_core_ring:set_owner(Ring2, node()), + Ring4 = riak_core_ring:update_member_meta(node(), Ring3, + node(), gossip_vsn, 2), + Ring5 = Ring4, + Ring6 = maybe_auto_join(Auto, node(), Ring5), + riak_core_ring_manager:set_my_ring(Ring6), + riak_core_gossip:send_ring(Node, node()) end. maybe_auto_join(false, _Node, Ring) -> Ring; maybe_auto_join(true, Node, Ring) -> - riak_core_ring:update_member_meta(Node, - Ring, - Node, - '$autojoin', - true). + riak_core_ring:update_member_meta(Node, Ring, Node, + '$autojoin', true). remove(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case {riak_core_ring:all_members(Ring), riak_core_ring:member_status(Ring, Node)} of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> standard_remove(Node) + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> standard_remove(Node) end. standard_remove(Node) -> @@ -183,27 +165,27 @@ standard_remove(Node) -> down(Node) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case net_adm:ping(Node) of - pong -> {error, is_up}; - pang -> - case {riak_core_ring:all_members(Ring), - riak_core_ring:member_status(Ring, Node)} - of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - _ -> - riak_core_ring_manager:ring_trans(fun (Ring2, _) -> - Ring3 = - riak_core_ring:down_member(node(), - Ring2, - Node), - Ring4 = - riak_core_ring:ring_changed(node(), - Ring3), - {new_ring, Ring4} - end, - []), - ok - end + pong -> {error, is_up}; + pang -> + case {riak_core_ring:all_members(Ring), + riak_core_ring:member_status(Ring, Node)} + of + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + _ -> + riak_core_ring_manager:ring_trans(fun (Ring2, _) -> + Ring3 = + riak_core_ring:down_member(node(), + Ring2, + Node), + Ring4 = + riak_core_ring:ring_changed(node(), + Ring3), + {new_ring, Ring4} + end, + []), + ok + end end. leave() -> @@ -212,10 +194,10 @@ leave() -> case {riak_core_ring:all_members(Ring), riak_core_ring:member_status(Ring, Node)} of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> standard_leave(Node); - {_, _} -> {error, already_leaving} + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> standard_leave(Node); + {_, _} -> {error, already_leaving} end. standard_leave(Node) -> @@ -238,45 +220,32 @@ remove_from_cluster(ExitingNode) vnode_modules() -> case application:get_env(riak_core, vnode_modules) of - undefined -> []; - {ok, Mods} -> Mods - end. - -bucket_fixups() -> - case application:get_env(riak_core, bucket_fixups) of - undefined -> []; - {ok, Mods} -> Mods - end. - -bucket_validators() -> - case application:get_env(riak_core, bucket_validators) - of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. stat_mods() -> case application:get_env(riak_core, stat_mods) of - undefined -> []; - {ok, Mods} -> Mods + undefined -> []; + {ok, Mods} -> Mods end. health_check(App) -> case application:get_env(riak_core, health_checks) of - undefined -> undefined; - {ok, Mods} -> - case lists:keyfind(App, 1, Mods) of - false -> undefined; - {App, MFA} -> MFA - end + undefined -> undefined; + {ok, Mods} -> + case lists:keyfind(App, 1, Mods) of + false -> undefined; + {App, MFA} -> MFA + end end. %% Get the application name if not supplied, first by get_application %% then by searching by module name get_app(undefined, Module) -> {ok, App} = case application:get_application(self()) of - {ok, AppName} -> {ok, AppName}; - undefined -> app_for_module(Module) + {ok, AppName} -> {ok, AppName}; + undefined -> app_for_module(Module) end, App; get_app(App, _Module) -> App. @@ -296,80 +265,35 @@ register(_App, []) -> undefined), riak_core_ring_events:force_sync_update(), ok; -register(App, [{bucket_fixup, FixupMod} | T]) -> - register_mod(get_app(App, FixupMod), - FixupMod, - bucket_fixups), - register(App, T); -register(App, [{repl_helper, FixupMod} | T]) -> - register_mod(get_app(App, FixupMod), - FixupMod, - repl_helper), - register(App, T); register(App, [{vnode_module, VNodeMod} | T]) -> - register_mod(get_app(App, VNodeMod), - VNodeMod, + register_mod(get_app(App, VNodeMod), VNodeMod, vnode_modules), register(App, T); register(App, [{health_check, HealthMFA} | T]) -> - register_metadata(get_app(App, HealthMFA), - HealthMFA, + register_metadata(get_app(App, HealthMFA), HealthMFA, health_checks), - register(App, T); -register(App, - [{bucket_validator, ValidationMod} | T]) -> - register_mod(get_app(App, ValidationMod), - ValidationMod, - bucket_validators), - register(App, T); -register(App, [{stat_mod, StatMod} | T]) -> - register_mod(App, StatMod, stat_mods), - register(App, T); -register(App, [{permissions, Permissions} | T]) -> - register_mod(App, Permissions, permissions), - register(App, T); -register(App, [{auth_mod, {AuthType, AuthMod}} | T]) -> - register_proplist({AuthType, AuthMod}, auth_mods), register(App, T). register_mod(App, Module, Type) when is_atom(Type) -> case Type of - vnode_modules -> - riak_core_vnode_proxy_sup:start_proxies(Module); - stat_mods -> - %% STATS - %% riak_core_stats_sup:start_server(Module); - logger:warning("Metric collection disabled"), - ok; - _ -> ok + vnode_modules -> + riak_core_vnode_proxy_sup:start_proxies(Module) end, case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Module}]); - {ok, Mods} -> - application:set_env(riak_core, - Type, - lists:usort([{App, Module} | Mods])) + undefined -> + application:set_env(riak_core, Type, [{App, Module}]); + {ok, Mods} -> + application:set_env(riak_core, Type, + lists:usort([{App, Module} | Mods])) end. register_metadata(App, Value, Type) -> case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{App, Value}]); - {ok, Values} -> - application:set_env(riak_core, - Type, - lists:usort([{App, Value} | Values])) - end. - -register_proplist({Key, Value}, Type) -> - case application:get_env(riak_core, Type) of - undefined -> - application:set_env(riak_core, Type, [{Key, Value}]); - {ok, Values} -> - application:set_env(riak_core, - Type, - lists:keystore(Key, 1, Values, {Key, Value})) + undefined -> + application:set_env(riak_core, Type, [{App, Value}]); + {ok, Values} -> + application:set_env(riak_core, Type, + lists:usort([{App, Value} | Values])) end. %% @spec add_guarded_event_handler(HandlerMod, Handler, Args) -> AddResult @@ -378,9 +302,7 @@ register_proplist({Key, Value}, Type) -> %% Args = list() %% AddResult = ok | {error, Reason::term()} add_guarded_event_handler(HandlerMod, Handler, Args) -> - add_guarded_event_handler(HandlerMod, - Handler, - Args, + add_guarded_event_handler(HandlerMod, Handler, Args, undefined). %% @spec add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> AddResult @@ -398,9 +320,7 @@ add_guarded_event_handler(HandlerMod, Handler, Args) -> add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> riak_core_eventhandler_sup:start_guarded_handler(HandlerMod, - Handler, - Args, - ExitFun). + Handler, Args, ExitFun). %% @spec delete_guarded_event_handler(HandlerMod, Handler, Args) -> Result %% HandlerMod = module() @@ -421,8 +341,7 @@ add_guarded_event_handler(HandlerMod, Handler, Args, delete_guarded_event_handler(HandlerMod, Handler, Args) -> riak_core_eventhandler_sup:stop_guarded_handler(HandlerMod, - Handler, - Args). + Handler, Args). app_for_module(Mod) -> app_for_module(application:which_applications(), Mod). @@ -431,38 +350,37 @@ app_for_module([], _Mod) -> {ok, undefined}; app_for_module([{App, _, _} | T], Mod) -> {ok, Mods} = application:get_key(App, modules), case lists:member(Mod, Mods) of - true -> {ok, App}; - false -> app_for_module(T, Mod) + true -> {ok, App}; + false -> app_for_module(T, Mod) end. wait_for_application(App) -> wait_for_application(App, 0). wait_for_application(App, Elapsed) -> - case lists:keymember(App, - 1, + case lists:keymember(App, 1, application:which_applications()) of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for application ~p (~p " - "seconds)", - [App, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for application ~p to start\n " - " " - "(~p seconds).", - [App, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_application(App, - Elapsed + (?WAIT_POLL_INTERVAL)) + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for application ~p (~p " + "seconds)", + [App, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for application ~p to start\n " + " " + "(~p seconds).", + [App, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_application(App, + Elapsed + (?WAIT_POLL_INTERVAL)) end. wait_for_service(Service) -> @@ -472,25 +390,25 @@ wait_for_service(Service, Elapsed) -> case lists:member(Service, riak_core_node_watcher:services(node())) of - true when Elapsed == 0 -> ok; - true when Elapsed > 0 -> - logger:info("Wait complete for service ~p (~p seconds)", - [Service, Elapsed div 1000]), - ok; - false -> - %% Possibly print a notice. - ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, - case ShouldPrint of - true -> - logger:info("Waiting for service ~p to start\n " - " (~p " - "seconds)", - [Service, Elapsed div 1000]); - false -> skip - end, - timer:sleep(?WAIT_POLL_INTERVAL), - wait_for_service(Service, - Elapsed + (?WAIT_POLL_INTERVAL)) + true when Elapsed == 0 -> ok; + true when Elapsed > 0 -> + logger:info("Wait complete for service ~p (~p seconds)", + [Service, Elapsed div 1000]), + ok; + false -> + %% Possibly print a notice. + ShouldPrint = Elapsed rem (?WAIT_PRINT_INTERVAL) == 0, + case ShouldPrint of + true -> + logger:info("Waiting for service ~p to start\n " + " (~p " + "seconds)", + [Service, Elapsed div 1000]); + false -> skip + end, + timer:sleep(?WAIT_POLL_INTERVAL), + wait_for_service(Service, + Elapsed + (?WAIT_POLL_INTERVAL)) end. stat_prefix() -> diff --git a/src/riak_core_apl.erl b/src/riak_core_apl.erl index d693e7db5..df4778746 100644 --- a/src/riak_core_apl.erl +++ b/src/riak_core_apl.erl @@ -24,23 +24,13 @@ %% ------------------------------------------------------------------- -module(riak_core_apl). --export([active_owners/1, - active_owners/2, - get_apl/3, - get_apl/4, - get_apl_ann/2, - get_apl_ann/3, - get_apl_ann/4, - get_apl_ann_with_pnum/1, - get_primary_apl/3, - get_primary_apl/4, - get_primary_apl_chbin/4, - first_up/2, - offline_owners/1, - offline_owners/2]). - --export_type([preflist/0, - preflist_ann/0, +-export([active_owners/1, active_owners/2, get_apl/3, + get_apl/4, get_apl_ann/2, get_apl_ann/3, get_apl_ann/4, + get_apl_ann_with_pnum/1, get_primary_apl/3, + get_primary_apl/4, get_primary_apl_chbin/4, first_up/2, + offline_owners/1, offline_owners/2]). + +-export_type([preflist/0, preflist_ann/0, preflist_with_pnum_ann/0]). -ifdef(TEST). @@ -49,6 +39,8 @@ -endif. +-type bucket() :: binary() | {binary(), binary()}. + -type index() :: chash:index_as_int(). -type n_val() :: non_neg_integer(). @@ -97,9 +89,7 @@ active_owners(Ring, UpNodes) -> get_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), - get_apl_chbin(DocIdx, - N, - CHBin, + get_apl_chbin(DocIdx, N, CHBin, riak_core_node_watcher:nodes(Service)). %% @doc Get the active preflist taking account of which nodes are up @@ -143,12 +133,11 @@ get_apl_ann(DocIdx, N, Ring, UpNodes) -> %% @doc Get the active preflist for a given {bucket, key} and list of nodes %% and annotate each node with type of primary/fallback. --spec get_apl_ann(riak_core_bucket:bucket(), - [node()]) -> preflist_ann(). +-spec get_apl_ann(bucket(), [node()]) -> preflist_ann(). get_apl_ann({Bucket, Key}, UpNodes) -> - BucketProps = riak_core_bucket:get_bucket(Bucket), - NVal = proplists:get_value(n_val, BucketProps), + {ok, NVal} = application:get_env(riak_core, + target_n_val), DocIdx = riak_core_util:chash_key({Bucket, Key}), get_apl_ann(DocIdx, NVal, UpNodes). @@ -156,7 +145,7 @@ get_apl_ann({Bucket, Key}, UpNodes) -> %% for a given {bucket, key} and annotate each node with type of %% primary/fallback -spec - get_apl_ann_with_pnum(riak_core_bucket:bucket()) -> preflist_with_pnum_ann(). + get_apl_ann_with_pnum(bucket()) -> preflist_with_pnum_ann(). get_apl_ann_with_pnum(BKey) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), @@ -184,9 +173,7 @@ get_apl_ann_chbin(DocIdx, N, CHBin, UpNodes) -> get_primary_apl(DocIdx, N, Service) -> {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), - get_primary_apl_chbin(DocIdx, - N, - CHBin, + get_primary_apl_chbin(DocIdx, N, CHBin, riak_core_node_watcher:nodes(Service)). %% @doc Same as get_apl, but returns only the primaries. @@ -250,13 +237,11 @@ check_up([], _UpNodes, Up, Pangs) -> check_up([{Partition, Node} | Rest], UpNodes, Up, Pangs) -> case is_up(Node, UpNodes) of - true -> - check_up(Rest, - UpNodes, - [{{Partition, Node}, primary} | Up], - Pangs); - false -> - check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) + true -> + check_up(Rest, UpNodes, + [{{Partition, Node}, primary} | Up], Pangs); + false -> + check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) end. %% @doc Find fallbacks for downed nodes in the preference list. @@ -270,13 +255,11 @@ find_fallbacks([], _Fallbacks, _UpNodes, Secondaries) -> find_fallbacks([{Partition, _Node} | Rest] = Pangs, [{_, FN} | Fallbacks], UpNodes, Secondaries) -> case is_up(FN, UpNodes) of - true -> - find_fallbacks(Rest, - Fallbacks, - UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) + true -> + find_fallbacks(Rest, Fallbacks, UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) end. %% @doc Find fallbacks for downed nodes in the preference list. @@ -294,13 +277,11 @@ find_fallbacks_chbin([{Partition, _Node} | Rest] = {_, FN} = chashbin:itr_value(Itr), Itr2 = chashbin:itr_next(Itr), case is_up(FN, UpNodes) of - true -> - find_fallbacks_chbin(Rest, - Itr2, - UpNodes, - [{{Partition, FN}, fallback} | Secondaries]); - false -> - find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) + true -> + find_fallbacks_chbin(Rest, Itr2, UpNodes, + [{{Partition, FN}, fallback} | Secondaries]); + false -> + find_fallbacks_chbin(Pangs, Itr2, UpNodes, Secondaries) end. %% @doc Return true if a node is up. @@ -338,9 +319,7 @@ four_node_test() -> {365375409332725729550921208179070754913983135744, nodec}, {0, noded}], - (get_apl(last_in_ring(), - 3, - Ring, + (get_apl(last_in_ring(), 3, Ring, [nodeb, nodec, noded]))), %% With two nodes down ?assertEqual([{365375409332725729550921208179070754913983135744, @@ -368,8 +347,7 @@ perfect_ring(RingSize, Nodes) Rest ++ [NewOwner]} end, {PerfectRing, _} = lists:foldl(TransferNode, - {Ring, Nodes}, - Owners), + {Ring, Nodes}, Owners), PerfectRing. last_in_ring() -> @@ -382,11 +360,8 @@ six_node_test() -> %DocIdx = riak_core_util:chash_key({<<"foo">>, <<"bar">>}), DocIdx = <<73, 212, 27, 234, 104, 13, 150, 207, 0, 82, 86, 183, 125, 225, 172, 154, 135, 46, 6, 112>>, - Nodes = ['dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev5@127.0.0.1', + Nodes = ['dev1@127.0.0.1', 'dev2@127.0.0.1', + 'dev3@127.0.0.1', 'dev4@127.0.0.1', 'dev5@127.0.0.1', 'dev6@127.0.0.1'], %% Fallbacks should be selected by finding the next-highest partition after %% the DocIdx of the key, in this case the 433883 partition. The N @@ -406,9 +381,7 @@ six_node_test() -> 'dev4@127.0.0.1'}, {433883298582611803841718934712646521460354973696, 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- ['dev2@127.0.0.1']))), ?assertEqual([{479555224749202520035584085735030365824602865664, 'dev4@127.0.0.1'}, @@ -416,9 +389,7 @@ six_node_test() -> 'dev5@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev6@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- ['dev2@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev5@127.0.0.1'}, @@ -426,36 +397,27 @@ six_node_test() -> 'dev6@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev1@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- - ['dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1']))), + ['dev2@127.0.0.1', 'dev3@127.0.0.1', + 'dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev5@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev6@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- - ['dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev1@127.0.0.1']))), + ['dev2@127.0.0.1', 'dev3@127.0.0.1', + 'dev4@127.0.0.1', 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev3@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- ['dev4@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, @@ -463,9 +425,7 @@ six_node_test() -> 'dev5@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev6@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- ['dev4@127.0.0.1', 'dev3@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, @@ -473,68 +433,49 @@ six_node_test() -> 'dev5@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev1@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1']))), + ['dev4@127.0.0.1', 'dev3@127.0.0.1', + 'dev6@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev5@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev2@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1']))), + ['dev4@127.0.0.1', 'dev3@127.0.0.1', + 'dev6@127.0.0.1', 'dev1@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev2@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev2@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- - ['dev4@127.0.0.1', - 'dev3@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev5@127.0.0.1']))), + ['dev4@127.0.0.1', 'dev3@127.0.0.1', + 'dev6@127.0.0.1', 'dev1@127.0.0.1', + 'dev5@127.0.0.1']))), ?assertEqual([{433883298582611803841718934712646521460354973696, 'dev2@127.0.0.1'}, {479555224749202520035584085735030365824602865664, 'dev4@127.0.0.1'}, {456719261665907161938651510223838443642478919680, 'dev5@127.0.0.1'}], - (get_apl(DocIdx, - 3, - Ring, + (get_apl(DocIdx, 3, Ring, Nodes -- ['dev3@127.0.0.1']))), ok. six_node_bucket_key_ann_test() -> {ok, [Ring]} = file:consult("test/my_ring"), - Nodes = ['dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1', - 'dev4@127.0.0.1', - 'dev5@127.0.0.1', + Nodes = ['dev1@127.0.0.1', 'dev2@127.0.0.1', + 'dev3@127.0.0.1', 'dev4@127.0.0.1', 'dev5@127.0.0.1', 'dev6@127.0.0.1'], Bucket = <<"favorite">>, Key = <<"jethrotull">>, - application:set_env(riak_core, - default_bucket_props, - [{n_val, 3}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), + application:set_env(riak_core, target_n_val, 3), riak_core_ring_manager:setup_ets(test), riak_core_ring_manager:set_ring_global(Ring), Size = riak_core_ring:num_partitions(Ring), @@ -549,53 +490,53 @@ six_node_bucket_key_ann_test() -> {{34, 'dev2@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev5@127.0.0.1']), + ['dev5@127.0.0.1']), Size))), ?assertEqual([{{36, 'dev1@127.0.0.1'}, primary}, {{34, 'dev2@127.0.0.1'}, fallback}, {{35, 'dev3@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1']), + ['dev5@127.0.0.1', + 'dev6@127.0.0.1']), Size))), ?assertEqual([{{34, 'dev2@127.0.0.1'}, fallback}, {{35, 'dev3@127.0.0.1'}, fallback}, {{36, 'dev4@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1']), + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1']), Size))), ?assertEqual([{{34, 'dev3@127.0.0.1'}, fallback}, {{35, 'dev4@127.0.0.1'}, fallback}, {{36, 'dev3@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1']), + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1']), Size))), ?assertEqual([{{34, 'dev4@127.0.0.1'}, fallback}, {{35, 'dev4@127.0.0.1'}, fallback}, {{36, 'dev4@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev5@127.0.0.1', - 'dev6@127.0.0.1', - 'dev1@127.0.0.1', - 'dev2@127.0.0.1', - 'dev3@127.0.0.1']), + ['dev5@127.0.0.1', + 'dev6@127.0.0.1', + 'dev1@127.0.0.1', + 'dev2@127.0.0.1', + 'dev3@127.0.0.1']), Size))), ?assertEqual([{{34, 'dev5@127.0.0.1'}, primary}, {{35, 'dev6@127.0.0.1'}, primary}, {{36, 'dev3@127.0.0.1'}, fallback}], (apl_with_partition_nums(get_apl_ann({Bucket, Key}, Nodes -- - ['dev1@127.0.0.1', - 'dev2@127.0.0.1']), + ['dev1@127.0.0.1', + 'dev2@127.0.0.1']), Size))), riak_core_ring_manager:cleanup_ets(test), ok. @@ -621,16 +562,14 @@ chbin_test_scenario(Size, NumNodes) -> Shuffled = riak_core_util:shuffle(Nodes), _ = CHBin, [begin - Up = max(0, NumNodes - Down), - UpNodes = lists:sublist(Shuffled, Up), - ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), - (get_apl_chbin(HashKey, N, CHBin, UpNodes))), - ?assertEqual((get_primary_apl(HashKey, - N, - Ring, - UpNodes)), - (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), - ok + Up = max(0, NumNodes - Down), + UpNodes = lists:sublist(Shuffled, Up), + ?assertEqual((get_apl(HashKey, N, Ring, UpNodes)), + (get_apl_chbin(HashKey, N, CHBin, UpNodes))), + ?assertEqual((get_primary_apl(HashKey, N, Ring, + UpNodes)), + (get_primary_apl_chbin(HashKey, N, CHBin, UpNodes))), + ok end || HashKey <- HashKeys, N <- [1, 2, 3, 4], Down <- [0, 1, 2, Size div 2, Size - 1, Size]], diff --git a/src/riak_core_bucket.erl b/src/riak_core_bucket.erl deleted file mode 100644 index 5b547539b..000000000 --- a/src/riak_core_bucket.erl +++ /dev/null @@ -1,225 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% riak_core: Core Riak Application -%% -%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% @doc Functions for manipulating bucket properties. --module(riak_core_bucket). - --export([append_bucket_defaults/1, - set_bucket/2, - get_bucket/1, - get_bucket/2, - reset_bucket/1, - get_buckets/1, - bucket_nval_map/1, - default_object_nval/0, - merge_props/2, - name/1, - n_val/1, - get_value/2]). - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --endif. - --type property() :: {PropName :: atom(), - PropValue :: any()}. - --type properties() :: [property()]. - --type - riak_core_ring() :: riak_core_ring:riak_core_ring(). - --type bucket_type() :: binary(). - --type nval_set() :: ordsets:ordset(pos_integer()). - --type bucket() :: binary() | {bucket_type(), binary()}. - --export_type([property/0, - properties/0, - bucket/0, - nval_set/0]). - -%% @doc Add a list of defaults to global list of defaults for new -%% buckets. If any item is in Items is already set in the -%% current defaults list, the new setting is omitted, and the old -%% setting is kept. Omitting the new setting is intended -%% behavior, to allow settings from app.config to override any -%% hard-coded values. -append_bucket_defaults(Items) when is_list(Items) -> - riak_core_bucket_props:append_defaults(Items). - -%% @doc Set the given BucketProps in Bucket or {BucketType, Bucket}. If BucketType does not -%% exist, or is not active, {error, no_type} is returned. --spec set_bucket(bucket(), [{atom(), any()}]) -> ok | - {error, - no_type | [{atom(), atom()}]}. - -set_bucket({<<"default">>, Name}, BucketProps) -> - set_bucket(Name, BucketProps); -set_bucket(Name, BucketProps0) -> - set_bucket(fun set_bucket_in_ring/2, - Name, - BucketProps0). - -set_bucket(StoreFun, Bucket, BucketProps0) -> - OldBucket = get_bucket(Bucket), - case riak_core_bucket_props:validate(update, - Bucket, - OldBucket, - BucketProps0) - of - {ok, BucketProps} -> - NewBucket = merge_props(BucketProps, OldBucket), - StoreFun(Bucket, NewBucket); - {error, Details} -> - logger:error("Bucket properties validation failed " - "~p~n", - [Details]), - {error, Details} - end. - -set_bucket_in_ring(Bucket, BucketMeta) -> - F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:update_meta(bucket_key(Bucket), - BucketMeta, - Ring)} - end, - {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), - ok. - -%% @spec merge_props(list(), list()) -> list() -%% @doc Merge two sets of bucket props. If duplicates exist, the -%% entries in Overriding are chosen before those in Other. -merge_props(Overriding, Other) -> - riak_core_bucket_props:merge(Overriding, Other). - -%% @spec get_bucket(riak_object:bucket()) -> -%% {ok, BucketProps :: riak_core_bucketprops()} | {error, no_type} -%% @doc Return the complete current list of properties for Bucket. -%% Properties include but are not limited to: -%%
-%% n_val: how many replicas of objects in this bucket (default: 3)
-%% allow_mult: can objects in this bucket have siblings? (default: false)
-%% linkfun: a function returning a m/r FunTerm for link extraction
-%% 
-%% -get_bucket({<<"default">>, Name}) -> get_bucket(Name); -get_bucket(Name) -> - Meta = riak_core_ring_manager:get_bucket_meta(Name), - get_bucket_props(Name, Meta). - -%% @spec get_bucket(Name, Ring::riak_core_ring()) -> -%% BucketProps :: riak_core_bucketprops() -%% @private -get_bucket({<<"default">>, Name}, Ring) -> - get_bucket(Name, Ring); -get_bucket({_Type, _Name} = Bucket, _Ring) -> - %% non-default type buckets are not stored in the ring, so just ignore it - get_bucket(Bucket). - -get_bucket_props(Name, undefined) -> - [{name, Name} | riak_core_bucket_props:defaults()]; -get_bucket_props(_Name, {ok, Bucket}) -> Bucket. - -%% @spec reset_bucket(binary()) -> ok -%% @doc Reset the bucket properties for Bucket to the settings -%% inherited from its Bucket Type -reset_bucket({<<"default">>, Name}) -> - reset_bucket(Name); -reset_bucket(Bucket) -> - F = fun (Ring, _Args) -> - {new_ring, - riak_core_ring:remove_meta(bucket_key(Bucket), Ring)} - end, - {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, - undefined), - ok. - -%% @doc Get bucket properties `Props' for all the buckets in the given -%% `Ring' and stored in metadata --spec get_buckets(riak_core_ring()) -> Props :: list(). - -get_buckets(Ring) -> - RingNames = riak_core_ring:get_buckets(Ring), - RingBuckets = [get_bucket(Name, Ring) - || Name <- RingNames], - RingBuckets. - -%% @doc returns a proplist containing all buckets and their respective N values --spec bucket_nval_map(riak_core_ring()) -> [{binary(), - integer()}]. - -bucket_nval_map(Ring) -> - [{riak_core_bucket:name(B), riak_core_bucket:n_val(B)} - || B <- riak_core_bucket:get_buckets(Ring)]. - -%% @doc returns the default n value for buckets that have not explicitly set the property --spec default_object_nval() -> integer(). - -default_object_nval() -> - riak_core_bucket:n_val(riak_core_bucket_props:defaults()). - -name(BProps) -> get_value(name, BProps). - -n_val(BProps) -> get_value(n_val, BProps). - -% a slighly faster version of proplists:get_value --spec get_value(atom(), properties()) -> any(). - -get_value(Key, Proplist) -> - case lists:keyfind(Key, 1, Proplist) of - {Key, Value} -> Value; - _ -> undefined - end. - -bucket_key({<<"default">>, Name}) -> bucket_key(Name); -bucket_key({_Type, _Name} = Bucket) -> Bucket; -bucket_key(Name) -> {bucket, Name}. - -%% =================================================================== -%% EUnit tests -%% =================================================================== --ifdef(TEST). - -simple_set_test() -> - application:load(riak_core), - application:set_env(riak_core, - ring_state_dir, - "_build/test/tmp"), - %% appending an empty list of defaults makes up for the fact that - %% riak_core_app:start/2 is not called during eunit runs - %% (that's where the usual defaults are set at startup), - %% while also not adding any trash that might affect other tests - append_bucket_defaults([]), - riak_core_ring_events:start_link(), - riak_core_ring_manager:start_link(test), - ok = set_bucket(a_bucket, [{key, value}]), - Bucket = get_bucket(a_bucket), - riak_core_ring_manager:stop(), - ?assertEqual(value, (proplists:get_value(key, Bucket))). - --endif. diff --git a/src/riak_core_bucket_props.erl b/src/riak_core_bucket_props.erl deleted file mode 100644 index 368163a8c..000000000 --- a/src/riak_core_bucket_props.erl +++ /dev/null @@ -1,336 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- --module(riak_core_bucket_props). - --export([merge/2, - validate/4, - resolve/2, - defaults/0, - append_defaults/1]). - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --endif. - --spec merge([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. - -merge(Overriding, Other) -> - lists:ukeymerge(1, - lists:ukeysort(1, Overriding), - lists:ukeysort(1, Other)). - --spec validate(create | update, - {riak_core_bucket:bucket_type(), undefined | binary()} | - binary(), - undefined | [{atom(), any()}], - [{atom(), any()}]) -> {ok, [{atom(), any()}]} | - {error, [{atom(), atom()}]}. - -validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps) -> - ReservedErrors = validate_reserved_names(Bucket), - CoreErrors = validate_core_props(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps), - validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps, - riak_core:bucket_validators(), - [ReservedErrors, CoreErrors]). - -validate(_CreateOrUpdate, _Bucket, _ExistingProps, - Props, [], ErrorLists) -> - case lists:flatten(ErrorLists) of - [] -> {ok, Props}; - Errors -> {error, Errors} - end; -validate(CreateOrUpdate, Bucket, ExistingProps, - BucketProps0, [{_App, Validator} | T], Errors0) -> - {BucketProps, Errors} = - Validator:validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps0), - validate(CreateOrUpdate, - Bucket, - ExistingProps, - BucketProps, - T, - [Errors | Errors0]). - -validate_core_props(CreateOrUpdate, Bucket, - ExistingProps, BucketProps) -> - lists:foldl(fun (Prop, Errors) -> - case validate_core_prop(CreateOrUpdate, - Bucket, - ExistingProps, - Prop) - of - true -> Errors; - Error -> [Error | Errors] - end - end, - [], - BucketProps). - -validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, Claimant}) - when Claimant =:= node() -> - %% claimant valid on first call to create if claimant is this node - true; -validate_core_prop(create, {_Bucket, undefined}, - undefined, {claimant, _BadClaimant}) -> - %% claimant not valid on first call to create if claimant is not this node - {claimant, "Invalid claimant"}; -validate_core_prop(create, {_Bucket, undefined}, - Existing, {claimant, Claimant}) -> - %% subsequent creation calls cannot modify claimant and it should exist - case lists:keyfind(claimant, 1, Existing) of - false -> - {claimant, - "No claimant details found in existing " - "properties"}; - {claimant, Claimant} -> true; - {claimant, _Other} -> - {claimant, "Cannot modify claimant property"} - end; -validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {claimant, _Claimant}) -> - %% cannot update claimant - {claimant, "Cannot update claimant property"}; -validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {ddl, _DDL}) -> - %% cannot update time series DDL - {ddl, "Cannot update time series data definition"}; -validate_core_prop(update, {_Bucket, _BucketName}, - _Existing, {table_def, _DDL}) -> - %% cannot update time series DDL (or, if it slips past riak_kv_console, - %% the table_def SQL(ish) code that is parsed to make a DDL) - %% - %% Defining the table_def atom here also sidesteps occasional - %% errors from existing_atom functions - {ddl, "Cannot update time series data definition"}; -validate_core_prop(create, {_Bucket, undefined}, - undefined, {active, false}) -> - %% first creation call that sets active to false is always valid - true; -validate_core_prop(create, {_Bucket, undefined}, - _Existing, {active, false}) -> - %% subsequent creation calls that leaves active false is valid - true; -validate_core_prop(update, {_Bucket, _}, _Existing, - {active, true}) -> - %% calls to update that do not modify active are valid - true; -validate_core_prop(_, {_Bucket, _}, _Existing, - {active, _}) -> - %% subsequent creation calls or update calls cannot modify active (it is modified directly - %% by riak_core_claimant) - {active, "Cannot modify active property"}; -validate_core_prop(_, _, _, _) -> - %% all other properties are valid from the perspective of riak_core - true. - -validate_reserved_names(Bucket) -> - case validate_reserved_name(Bucket) of - ok -> []; - ErrStr -> [{reserved_name, ErrStr}] - end. - -validate_reserved_name({<<"any">>, _}) -> - "The name 'any' may not be used for bucket " - "types"; -validate_reserved_name(_) -> ok. - --spec defaults() -> [{atom(), any()}]. - -defaults() -> - application:get_env(riak_core, - default_bucket_props, - undefined). - --spec append_defaults([{atom(), any()}]) -> ok. - -append_defaults(Items) when is_list(Items) -> - OldDefaults = application:get_env(riak_core, - default_bucket_props, - []), - NewDefaults = merge(OldDefaults, Items), - FixedDefaults = case riak_core:bucket_fixups() of - [] -> NewDefaults; - Fixups -> - riak_core_ring_manager:run_fixups(Fixups, - default, - NewDefaults) - end, - application:set_env(riak_core, - default_bucket_props, - FixedDefaults), - %% do a noop transform on the ring, to make the fixups re-run - catch riak_core_ring_manager:ring_trans(fun (Ring, _) -> - {new_ring, Ring} - end, - undefined), - ok. - --spec resolve([{atom(), any()}], - [{atom(), any()}]) -> [{atom(), any()}]. - -%%noinspection ErlangUnusedVariable -resolve(PropsA, PropsB) - when is_list(PropsA) andalso is_list(PropsB) -> - PropsASorted = lists:ukeysort(1, PropsA), - PropsBSorted = lists:ukeysort(1, PropsB), - {_, Resolved} = lists:foldl(fun ({KeyA, _} = PropA, - {[{KeyA, _} = PropB | RestB], Acc}) -> - {RestB, - [{KeyA, resolve_prop(PropA, PropB)} - | Acc]}; - (PropA, {RestB, Acc}) -> - {RestB, [PropA | Acc]} - end, - {PropsBSorted, []}, - PropsASorted), - Resolved. - -resolve_prop({allow_mult, Mult1}, - {allow_mult, Mult2}) -> - Mult1 orelse - Mult2; %% assumes allow_mult=true is default -resolve_prop({basic_quorum, Basic1}, - {basic_quorum, Basic2}) -> - Basic1 andalso Basic2; -resolve_prop({big_vclock, Big1}, {big_vclock, Big2}) -> - max(Big1, Big2); -resolve_prop({chash_keyfun, KeyFun1}, - {chash_keyfun, _KeyFun2}) -> - KeyFun1; %% arbitrary choice -resolve_prop({dw, DW1}, {dw, DW2}) -> - %% 'quorum' wins over set numbers - max(DW1, DW2); -resolve_prop({last_write_wins, LWW1}, - {last_write_wins, LWW2}) -> - LWW1 andalso LWW2; -resolve_prop({linkfun, LinkFun1}, - {linkfun, _LinkFun2}) -> - LinkFun1; %% arbitrary choice -resolve_prop({n_val, N1}, {n_val, N2}) -> max(N1, N2); -resolve_prop({notfound_ok, NF1}, {notfound_ok, NF2}) -> - NF1 orelse NF2; -resolve_prop({old_vclock, Old1}, {old_vclock, Old2}) -> - max(Old1, Old2); -resolve_prop({postcommit, PC1}, {postcommit, PC2}) -> - resolve_hooks(PC1, PC2); -resolve_prop({pr, PR1}, {pr, PR2}) -> max(PR1, PR2); -resolve_prop({precommit, PC1}, {precommit, PC2}) -> - resolve_hooks(PC1, PC2); -resolve_prop({pw, PW1}, {pw, PW2}) -> max(PW1, PW2); -resolve_prop({r, R1}, {r, R2}) -> max(R1, R2); -resolve_prop({rw, RW1}, {rw, RW2}) -> max(RW1, RW2); -resolve_prop({small_vclock, Small1}, - {small_vclock, Small2}) -> - max(Small1, Small2); -resolve_prop({w, W1}, {w, W2}) -> max(W1, W2); -resolve_prop({young_vclock, Young1}, - {young_vclock, Young2}) -> - max(Young1, Young2); -resolve_prop({_, V1}, {_, _V2}) -> V1. - -resolve_hooks(Hooks1, Hooks2) -> - lists:usort(Hooks1 ++ Hooks2). - -%% =================================================================== -%% EUnit tests -%% =================================================================== - --ifdef(TEST). - -simple_resolve_test() -> - Props1 = [{name, <<"test">>}, - {allow_mult, false}, - {basic_quorum, false}, - {big_vclock, 50}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, - {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 3}, - {notfound_ok, true}, - {old_vclock, 86400}, - {postcommit, []}, - {pr, 0}, - {precommit, [{a, b}]}, - {pw, 0}, - {r, quorum}, - {rw, quorum}, - {small_vclock, 50}, - {w, quorum}, - {young_vclock, 20}], - Props2 = [{name, <<"test">>}, - {allow_mult, true}, - {basic_quorum, true}, - {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, 3}, - {last_write_wins, true}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, - {notfound_ok, false}, - {old_vclock, 86401}, - {postcommit, [{a, b}]}, - {pr, 1}, - {precommit, [{c, d}]}, - {pw, 3}, - {r, 3}, - {rw, 3}, - {w, 1}, - {young_vclock, 30}], - Expected = [{name, <<"test">>}, - {allow_mult, true}, - {basic_quorum, false}, - {big_vclock, 60}, - {chash_keyfun, {riak_core_util, chash_std_keyfun}}, - {dw, quorum}, - {last_write_wins, false}, - {linkfun, - {modfun, riak_kv_wm_link_walker, mapreduce_linkfun}}, - {n_val, 5}, - {notfound_ok, true}, - {old_vclock, 86401}, - {postcommit, [{a, b}]}, - {pr, 1}, - {precommit, [{a, b}, {c, d}]}, - {pw, 3}, - {r, quorum}, - {rw, quorum}, - {small_vclock, 50}, - {w, quorum}, - {young_vclock, 30}], - ?assertEqual((lists:ukeysort(1, Expected)), - (lists:ukeysort(1, resolve(Props1, Props2)))). - --endif. diff --git a/src/riak_core_claim.erl b/src/riak_core_claim.erl index bb6efa098..7b3fbb228 100644 --- a/src/riak_core_claim.erl +++ b/src/riak_core_claim.erl @@ -47,10 +47,6 @@ %% In that case, Riak will minimize the cases where the constraint is violated %% and they will all exist near the origin point of the ring. -%% A good way to decide on the setting of target_n_val for your application is -%% to set it to the largest value you expect to use for any bucket's n_val. The -%% default is 4. - -module(riak_core_claim). -export([claim/1, diff --git a/src/riak_core_claimant.erl b/src/riak_core_claimant.erl index c1a698862..100c2df2d 100644 --- a/src/riak_core_claimant.erl +++ b/src/riak_core_claimant.erl @@ -25,30 +25,18 @@ %% API -export([start_link/0]). --export([leave_member/1, - remove_member/1, - force_replace/2, - replace/2, - resize_ring/1, - abort_resize/0, - plan/0, - commit/0, - clear/0, +-export([leave_member/1, remove_member/1, + force_replace/2, replace/2, resize_ring/1, + abort_resize/0, plan/0, commit/0, clear/0, ring_changed/2]). -export([reassign_indices/1]). % helpers for claim sim %% gen_server callbacks --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --type action() :: leave | - remove | - {replace, node()} | +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). + +-type action() :: leave | remove | {replace, node()} | {force_replace, node()}. -type @@ -62,12 +50,12 @@ -record(state, {last_ring_id, %% The set of staged cluster changes - changes :: [{node(), action()}], + changes :: [{node(), action()}], %% Ring computed during the last planning stage based on %% applying a set of staged cluster changes. When commiting %% changes, the computed ring must match the previous planned %% ring to be allowed. - next_ring :: riak_core_ring() | undefined, + next_ring :: riak_core_ring() | undefined, %% Random number seed passed to remove_node to ensure the %% current randomized remove algorithm is deterministic %% between plan and commit phases @@ -83,9 +71,7 @@ %% @doc Spawn and register the riak_core_claimant server start_link() -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). %% @doc Determine how the cluster will be affected by the staged changes, @@ -171,9 +157,7 @@ ring_changed(Node, Ring) -> %%%=================================================================== reassign_indices(CState) -> - reassign_indices(CState, - [], - riak_core_rand:rand_seed(), + reassign_indices(CState, [], erlang:timestamp(), fun no_log/2). %%%=================================================================== @@ -181,8 +165,7 @@ reassign_indices(CState) -> %%%=================================================================== stage(Node, Action) -> - gen_server:call(claimant(), - {stage, Node, Action}, + gen_server:call(claimant(), {stage, Node, Action}, infinity). claimant() -> @@ -195,42 +178,34 @@ claimant() -> init([]) -> schedule_tick(), - {ok, - #state{changes = [], - seed = riak_core_rand:rand_seed()}}. + {ok, #state{changes = [], seed = erlang:timestamp()}}. handle_call(clear, _From, State) -> - State2 = clear_staged(State), - {reply, ok, State2}; + State2 = clear_staged(State), {reply, ok, State2}; handle_call({stage, Node, Action}, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), - {Reply, State2} = maybe_stage(Node, - Action, - Ring, + {Reply, State2} = maybe_stage(Node, Action, Ring, State), {reply, Reply, State2}; handle_call(plan, _From, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), case riak_core_ring:ring_ready(Ring) of - false -> - Reply = {error, ring_not_ready}, - {reply, Reply, State}; - true -> - {Reply, State2} = generate_plan(Ring, State), - {reply, Reply, State2} + false -> + Reply = {error, ring_not_ready}, {reply, Reply, State}; + true -> + {Reply, State2} = generate_plan(Ring, State), + {reply, Reply, State2} end; handle_call(commit, _From, State) -> {Reply, State2} = commit_staged(State), {reply, Reply, State2}; handle_call(_Request, _From, State) -> - Reply = ok, - {reply, Reply, State}. + Reply = ok, {reply, Reply, State}. handle_cast(_Msg, State) -> {noreply, State}. handle_info(tick, State) -> - State2 = tick(State), - {noreply, State2}; + State2 = tick(State), {noreply, State2}; handle_info(reset_ring_id, State) -> State2 = State#state{last_ring_id = undefined}, {noreply, State2}; @@ -250,12 +225,12 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. maybe_stage(Node, Action, Ring, State = #state{changes = Changes}) -> case valid_request(Node, Action, Changes, Ring) of - true -> - Changes2 = orddict:store(Node, Action, Changes), - Changes3 = filter_changes(Changes2, Ring), - State2 = State#state{changes = Changes3}, - {ok, State2}; - Error -> {Error, State} + true -> + Changes2 = orddict:store(Node, Action, Changes), + Changes3 = filter_changes(Changes2, Ring), + State2 = State#state{changes = Changes3}, + {ok, State2}; + Error -> {Error, State} end. %% @private @@ -276,13 +251,13 @@ generate_plan([], _, State) -> generate_plan(Changes, Ring, State = #state{seed = Seed}) -> case compute_all_next_rings(Changes, Seed, Ring) of - {error, invalid_resize_claim} -> - {{error, invalid_resize_claim}, State}; - {ok, NextRings} -> - {_, NextRing} = hd(NextRings), - State2 = State#state{next_ring = NextRing}, - Reply = {ok, Changes, NextRings}, - {Reply, State2} + {error, invalid_resize_claim} -> + {{error, invalid_resize_claim}, State}; + {ok, NextRings} -> + {_, NextRing} = hd(NextRings), + State2 = State#state{next_ring = NextRing}, + Reply = {ok, Changes, NextRings}, + {Reply, State2} end. %% @private @@ -292,13 +267,12 @@ commit_staged(State = #state{next_ring = undefined}) -> {{error, nothing_planned}, State}; commit_staged(State) -> case maybe_commit_staged(State) of - {ok, _} -> - State2 = State#state{next_ring = undefined, - changes = [], - seed = riak_core_rand:rand_seed()}, - {ok, State2}; - not_changed -> {error, State}; - {not_changed, Reason} -> {{error, Reason}, State} + {ok, _} -> + State2 = State#state{next_ring = undefined, + changes = [], seed = erlang:timestamp()}, + {ok, State2}; + not_changed -> {error, State}; + {not_changed, Reason} -> {{error, Reason}, State} end. %% @private @@ -311,10 +285,10 @@ maybe_commit_staged(Ring, State = #state{changes = Changes, seed = Seed}) -> Changes2 = filter_changes(Changes, Ring), case compute_next_ring(Changes2, Seed, Ring) of - {error, invalid_resize_claim} -> - {ignore, invalid_resize_claim}; - {ok, NextRing} -> - maybe_commit_staged(Ring, NextRing, State) + {error, invalid_resize_claim} -> + {ignore, invalid_resize_claim}; + {ok, NextRing} -> + maybe_commit_staged(Ring, NextRing, State) end. %% @private @@ -325,13 +299,13 @@ maybe_commit_staged(Ring, NextRing, IsClaimant = Claimant == node(), IsSamePlan = same_plan(PlannedRing, NextRing), case {IsReady, IsClaimant, IsSamePlan} of - {false, _, _} -> {ignore, ring_not_ready}; - {_, false, _} -> ignore; - {_, _, false} -> {ignore, plan_changed}; - _ -> - NewRing = riak_core_ring:increment_vclock(Claimant, - NextRing), - {new_ring, NewRing} + {false, _, _} -> {ignore, ring_not_ready}; + {_, false, _} -> ignore; + {_, _, false} -> {ignore, plan_changed}; + _ -> + NewRing = riak_core_ring:increment_vclock(Claimant, + NextRing), + {new_ring, NewRing} end. %% @private @@ -342,8 +316,7 @@ maybe_commit_staged(Ring, NextRing, %% call {@link clear/0}. clear_staged(State) -> remove_joining_nodes(), - State#state{changes = [], - seed = riak_core_rand:rand_seed()}. + State#state{changes = [], seed = erlang:timestamp()}. %% @private remove_joining_nodes() -> @@ -357,26 +330,22 @@ remove_joining_nodes(Ring, _) -> Joining = riak_core_ring:members(Ring, [joining]), AreJoining = Joining /= [], case IsClaimant and AreJoining of - false -> ignore; - true -> - NewRing = remove_joining_nodes_from_ring(Claimant, - Joining, - Ring), - {new_ring, NewRing} + false -> ignore; + true -> + NewRing = remove_joining_nodes_from_ring(Claimant, + Joining, Ring), + {new_ring, NewRing} end. %% @private remove_joining_nodes_from_ring(Claimant, Joining, Ring) -> NewRing = lists:foldl(fun (Node, RingAcc) -> - riak_core_ring:set_member(Claimant, - RingAcc, - Node, - invalid, + riak_core_ring:set_member(Claimant, RingAcc, + Node, invalid, same_vclock) end, - Ring, - Joining), + Ring, Joining), NewRing2 = riak_core_ring:increment_vclock(Claimant, NewRing), NewRing2. @@ -384,18 +353,16 @@ remove_joining_nodes_from_ring(Claimant, Joining, %% @private valid_request(Node, Action, Changes, Ring) -> case Action of - leave -> valid_leave_request(Node, Ring); - remove -> valid_remove_request(Node, Ring); - {replace, NewNode} -> - valid_replace_request(Node, NewNode, Changes, Ring); - {force_replace, NewNode} -> - valid_force_replace_request(Node, - NewNode, - Changes, - Ring); - {resize, NewRingSize} -> - valid_resize_request(NewRingSize, Changes, Ring); - abort_resize -> valid_resize_abort_request(Ring) + leave -> valid_leave_request(Node, Ring); + remove -> valid_remove_request(Node, Ring); + {replace, NewNode} -> + valid_replace_request(Node, NewNode, Changes, Ring); + {force_replace, NewNode} -> + valid_force_replace_request(Node, NewNode, Changes, + Ring); + {resize, NewRingSize} -> + valid_resize_request(NewRingSize, Changes, Ring); + abort_resize -> valid_resize_abort_request(Ring) end. %% @private @@ -403,24 +370,23 @@ valid_leave_request(Node, Ring) -> case {riak_core_ring:all_members(Ring), riak_core_ring:member_status(Ring, Node)} of - {_, invalid} -> {error, not_member}; - {[Node], _} -> {error, only_member}; - {_, valid} -> true; - {_, joining} -> true; - {_, _} -> {error, already_leaving} + {_, invalid} -> {error, not_member}; + {[Node], _} -> {error, only_member}; + {_, valid} -> true; + {_, joining} -> true; + {_, _} -> {error, already_leaving} end. %% @private valid_remove_request(Node, Ring) -> IsClaimant = Node == riak_core_ring:claimant(Ring), - case {IsClaimant, - riak_core_ring:all_members(Ring), + case {IsClaimant, riak_core_ring:all_members(Ring), riak_core_ring:member_status(Ring, Node)} of - {true, _, _} -> {error, is_claimant}; - {_, _, invalid} -> {error, not_member}; - {_, [Node], _} -> {error, only_member}; - _ -> true + {true, _, _} -> {error, is_claimant}; + {_, _, invalid} -> {error, not_member}; + {_, [Node], _} -> {error, only_member}; + _ -> true end. %% @private @@ -429,17 +395,16 @@ valid_replace_request(Node, NewNode, Changes, Ring) -> existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + == joining) + and not orddict:is_key(NewNode, Changes), case {riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, - NewJoining} + AlreadyReplacement, NewJoining} of - {invalid, _, _} -> {error, not_member}; - {leaving, _, _} -> {error, already_leaving}; - {_, true, _} -> {error, already_replacement}; - {_, _, false} -> {error, invalid_replacement}; - _ -> true + {invalid, _, _} -> {error, not_member}; + {leaving, _, _} -> {error, already_leaving}; + {_, true, _} -> {error, already_replacement}; + {_, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private @@ -450,41 +415,40 @@ valid_force_replace_request(Node, NewNode, Changes, existing_replacements(Changes)), NewJoining = (riak_core_ring:member_status(Ring, NewNode) - == joining) - and not orddict:is_key(NewNode, Changes), + == joining) + and not orddict:is_key(NewNode, Changes), case {IsClaimant, riak_core_ring:member_status(Ring, Node), - AlreadyReplacement, - NewJoining} + AlreadyReplacement, NewJoining} of - {true, _, _, _} -> {error, is_claimant}; - {_, invalid, _, _} -> {error, not_member}; - {_, _, true, _} -> {error, already_replacement}; - {_, _, _, false} -> {error, invalid_replacement}; - _ -> true + {true, _, _, _} -> {error, is_claimant}; + {_, invalid, _, _} -> {error, not_member}; + {_, _, true, _} -> {error, already_replacement}; + {_, _, _, false} -> {error, invalid_replacement}; + _ -> true end. %% @private %% restrictions preventing resize along with other operations are temporary valid_resize_request(NewRingSize, [], Ring) -> IsResizing = riak_core_ring:num_partitions(Ring) =/= - NewRingSize, + NewRingSize, NodeCount = length(riak_core_ring:all_members(Ring)), Changes = length(riak_core_ring:pending_changes(Ring)) > - 0, + 0, case {IsResizing, NodeCount, Changes} of - {true, N, false} when N > 1 -> true; - {false, _, _} -> {error, same_size}; - {_, 1, _} -> {error, single_node}; - {_, _, true} -> {error, pending_changes} + {true, N, false} when N > 1 -> true; + {false, _, _} -> {error, same_size}; + {_, 1, _} -> {error, single_node}; + {_, _, true} -> {error, pending_changes} end. valid_resize_abort_request(Ring) -> IsResizing = riak_core_ring:is_resizing(Ring), IsPostResize = riak_core_ring:is_post_resize(Ring), case IsResizing andalso not IsPostResize of - true -> true; - false -> {error, not_resizing} + true -> true; + false -> {error, not_resizing} end. %% @private @@ -502,14 +466,14 @@ filter_changes_pred(Node, {Change, NewNode}, Changes, Ring) when (Change == replace) or (Change == force_replace) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsJoining = riak_core_ring:member_status(Ring, NewNode) - == joining, + == joining, NotChanging = not orddict:is_key(NewNode, Changes), IsMember and IsJoining and NotChanging; filter_changes_pred(Node, _, _, Ring) -> IsMember = riak_core_ring:member_status(Ring, Node) /= - invalid, + invalid, IsMember. %% @private @@ -522,30 +486,27 @@ existing_replacements(Changes) -> %% Determine if two rings have logically equal cluster state same_plan(RingA, RingB) -> riak_core_ring:all_member_status(RingA) == - riak_core_ring:all_member_status(RingB) + riak_core_ring:all_member_status(RingB) + andalso + riak_core_ring:all_owners(RingA) == + riak_core_ring:all_owners(RingB) andalso - riak_core_ring:all_owners(RingA) == - riak_core_ring:all_owners(RingB) - andalso - riak_core_ring:pending_changes(RingA) == - riak_core_ring:pending_changes(RingB). + riak_core_ring:pending_changes(RingA) == + riak_core_ring:pending_changes(RingB). schedule_tick() -> - Tick = application:get_env(riak_core, - claimant_tick, + Tick = application:get_env(riak_core, claimant_tick, 10000), erlang:send_after(Tick, ?MODULE, tick). tick(State = #state{last_ring_id = LastID}) -> case riak_core_ring_manager:get_ring_id() of - LastID -> - schedule_tick(), - State; - RingID -> - {ok, Ring} = riak_core_ring_manager:get_raw_ring(), - maybe_force_ring_update(Ring), - schedule_tick(), - State#state{last_ring_id = RingID} + LastID -> schedule_tick(), State; + RingID -> + {ok, Ring} = riak_core_ring_manager:get_raw_ring(), + maybe_force_ring_update(Ring), + schedule_tick(), + State#state{last_ring_id = RingID} end. maybe_force_ring_update(Ring) -> @@ -554,25 +515,22 @@ maybe_force_ring_update(Ring) -> %% Do not force if we have any joining nodes unless any of them are %% auto-joining nodes. Otherwise, we will force update continuously. JoinBlock = are_joining_nodes(Ring) andalso - auto_joining_nodes(Ring) == [], + auto_joining_nodes(Ring) == [], case IsClaimant and IsReady and not JoinBlock of - true -> do_maybe_force_ring_update(Ring); - false -> ok + true -> do_maybe_force_ring_update(Ring); + false -> ok end. do_maybe_force_ring_update(Ring) -> - case compute_next_ring([], - riak_core_rand:rand_seed(), - Ring) - of - {ok, NextRing} -> - case same_plan(Ring, NextRing) of - false -> - logger:warning("Forcing update of stalled ring"), - riak_core_ring_manager:force_update(); - true -> ok - end; - _ -> ok + case compute_next_ring([], erlang:timestamp(), Ring) of + {ok, NextRing} -> + case same_plan(Ring, NextRing) of + false -> + logger:warning("Forcing update of stalled ring"), + riak_core_ring_manager:force_update(); + true -> ok + end; + _ -> ok end. %% ========================================================================= @@ -586,15 +544,15 @@ compute_all_next_rings(Changes, Seed, Ring) -> %% @private compute_all_next_rings(Changes, Seed, Ring, Acc) -> case compute_next_ring(Changes, Seed, Ring) of - {error, invalid_resize_claim} = Err -> Err; - {ok, NextRing} -> - Acc2 = [{Ring, NextRing} | Acc], - case not same_plan(Ring, NextRing) of - true -> - FutureRing = riak_core_ring:future_ring(NextRing), - compute_all_next_rings([], Seed, FutureRing, Acc2); - false -> {ok, lists:reverse(Acc2)} - end + {error, invalid_resize_claim} = Err -> Err; + {ok, NextRing} -> + Acc2 = [{Ring, NextRing} | Acc], + case not same_plan(Ring, NextRing) of + true -> + FutureRing = riak_core_ring:future_ring(NextRing), + compute_all_next_rings([], Seed, FutureRing, Acc2); + false -> {ok, lists:reverse(Acc2)} + end end. %% @private @@ -603,14 +561,12 @@ compute_next_ring(Changes, Seed, Ring) -> || {Node, {replace, NewNode}} <- Changes], Ring2 = apply_changes(Ring, Changes), {_, Ring3} = maybe_handle_joining(node(), Ring2), - {_, Ring4} = do_claimant_quiet(node(), - Ring3, - Replacing, + {_, Ring4} = do_claimant_quiet(node(), Ring3, Replacing, Seed), {Valid, Ring5} = maybe_compute_resize(Ring, Ring4), case Valid of - false -> {error, invalid_resize_claim}; - true -> {ok, Ring5} + false -> {error, invalid_resize_claim}; + true -> {ok, Ring5} end. %% @private @@ -618,9 +574,9 @@ maybe_compute_resize(Orig, MbResized) -> OrigSize = riak_core_ring:num_partitions(Orig), NewSize = riak_core_ring:num_partitions(MbResized), case OrigSize =/= NewSize of - false -> {true, MbResized}; - true -> - validate_resized_ring(compute_resize(Orig, MbResized)) + false -> {true, MbResized}; + true -> + validate_resized_ring(compute_resize(Orig, MbResized)) end. %% @private @@ -634,10 +590,10 @@ compute_resize(Orig, Resized) -> %% need to operate on balanced, future ring (apply changes determined by claim) CState0 = riak_core_ring:future_ring(Resized), Type = case riak_core_ring:num_partitions(Orig) < - riak_core_ring:num_partitions(Resized) + riak_core_ring:num_partitions(Resized) of - true -> larger; - false -> smaller + true -> larger; + false -> smaller end, %% Each index in the original ring must perform several transfers %% to properly resize the ring. The first transfer for each index @@ -647,19 +603,16 @@ compute_resize(Orig, Resized) -> %% indexes being abandoned in a shrinking ring have %% no next owner NextOwner = try - riak_core_ring:index_owner(CStateAcc, - Idx) + riak_core_ring:index_owner(CStateAcc, + Idx) catch - error:{badmatch, false} -> - none + error:{badmatch, false} -> none end, - schedule_first_resize_transfer(Type, - IdxOwner, + schedule_first_resize_transfer(Type, IdxOwner, NextOwner, CStateAcc) end, - CState0, - riak_core_ring:all_owners(Orig)), + CState0, riak_core_ring:all_owners(Orig)), riak_core_ring:set_pending_resize(CState1, Orig). %% @private @@ -673,8 +626,7 @@ schedule_first_resize_transfer(smaller, Target = hd(riak_core_ring:preflist(<>, Resized)), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - Target); + IdxOwner, Target); schedule_first_resize_transfer(_Type, {Idx, Owner} = IdxOwner, Owner, Resized) -> %% partition is not being moved during expansion, first predecessor will @@ -682,15 +634,13 @@ schedule_first_resize_transfer(_Type, Target = hd(chash:predecessors(Idx - 1, riak_core_ring:chash(Resized))), riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - Target); + IdxOwner, Target); schedule_first_resize_transfer(_, {Idx, _Owner} = IdxOwner, NextOwner, Resized) -> %% partition is being moved during expansion, schedule transfer to partition %% on new owner since it will still own some of its data riak_core_ring:schedule_resize_transfer(Resized, - IdxOwner, - {Idx, NextOwner}). + IdxOwner, {Idx, NextOwner}). %% @doc verify that resized ring was properly claimed (no owners are the dummy %% resized owner) in both the current and future ring @@ -707,8 +657,8 @@ validate_resized_ring(Ring) -> || {Idx, Owner} <- FutureOwners, not lists:member(Owner, FutureMembers)], case Invalid1 ++ Invalid2 of - [] -> {true, Ring}; - _ -> {false, Ring} + [] -> {true, Ring}; + _ -> {false, Ring} end. %% @private @@ -728,13 +678,13 @@ change({join, Node}, Ring) -> change({leave, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:leave_member(Node, Ring, Node), Ring2; change({remove, Node}, Ring) -> Members = riak_core_ring:all_members(Ring), lists:member(Node, Members) orelse - throw(invalid_member), + throw(invalid_member), Ring2 = riak_core_ring:remove_member(Node, Ring, Node), Ring2; change({{replace, _NewNode}, Node}, Ring) -> @@ -744,15 +694,14 @@ change({{replace, _NewNode}, Node}, Ring) -> change({{force_replace, NewNode}, Node}, Ring) -> Indices = riak_core_ring:indices(Ring, Node), Reassign = [{Idx, NewNode} || Idx <- Indices], - Ring2 = riak_core_ring:add_member(NewNode, - Ring, + Ring2 = riak_core_ring:add_member(NewNode, Ring, NewNode), Ring3 = riak_core_ring:change_owners(Ring2, Reassign), Ring4 = riak_core_ring:remove_member(Node, Ring3, Node), case riak_core_ring:is_resizing(Ring4) of - true -> - replace_node_during_resize(Ring4, Node, NewNode); - false -> Ring4 + true -> + replace_node_during_resize(Ring4, Node, NewNode); + false -> Ring4 end; change({{resize, NewRingSize}, _Node}, Ring) -> riak_core_ring:resize(Ring, NewRingSize); @@ -761,8 +710,7 @@ change({abort_resize, _Node}, Ring) -> %%noinspection ErlangUnboundVariable internal_ring_changed(Node, CState) -> - {Changed, CState5} = do_claimant(Node, - CState, + {Changed, CState5} = do_claimant(Node, CState, fun log/2), inform_removed_nodes(Node, CState, CState5), %% Start/stop converge and rebalance delay timers @@ -775,61 +723,59 @@ internal_ring_changed(Node, CState) -> %% IsClaimant = riak_core_ring:claimant(CState5) =:= Node, WasPending = [] /= - riak_core_ring:pending_changes(CState), + riak_core_ring:pending_changes(CState), IsPending = [] /= - riak_core_ring:pending_changes(CState5), + riak_core_ring:pending_changes(CState5), %% Outer case statement already checks for ring_ready case {IsClaimant, Changed} of - {true, true} -> - %% STATS - %% riak_core_stat:update(converge_timer_end), - %% STATS - %% riak_core_stat:update(converge_timer_begin); - ok; - {true, false} -> - %% STATS - %% riak_core_stat:update(converge_timer_end); - ok; - _ -> ok + {true, true} -> + %% STATS + %% riak_core_stat:update(converge_timer_end), + %% STATS + %% riak_core_stat:update(converge_timer_begin); + ok; + {true, false} -> + %% STATS + %% riak_core_stat:update(converge_timer_end); + ok; + _ -> ok end, case {IsClaimant, WasPending, IsPending} of - {true, false, true} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_begin); - ok; - {true, true, false} -> - %% STATS - %% riak_core_stat:update(rebalance_timer_end); - ok; - _ -> ok + {true, false, true} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_begin); + ok; + {true, true, false} -> + %% STATS + %% riak_core_stat:update(rebalance_timer_end); + ok; + _ -> ok end, %% Set cluster name if it is undefined case {IsClaimant, riak_core_ring:cluster_name(CState5)} of - {true, undefined} -> - ClusterName = {Node, riak_core_rand:rand_seed()}, - {_, _} = - riak_core_util:rpc_every_member(riak_core_ring_manager, - set_cluster_name, - [ClusterName], - 1000), - ok; - _ -> - ClusterName = riak_core_ring:cluster_name(CState5), - ok + {true, undefined} -> + ClusterName = {Node, erlang:timestamp()}, + {_, _} = + riak_core_util:rpc_every_member(riak_core_ring_manager, + set_cluster_name, [ClusterName], + 1000), + ok; + _ -> + ClusterName = riak_core_ring:cluster_name(CState5), ok end, case Changed of - true -> - CState6 = riak_core_ring:set_cluster_name(CState5, - ClusterName), - riak_core_ring:increment_vclock(Node, CState6); - false -> CState5 + true -> + CState6 = riak_core_ring:set_cluster_name(CState5, + ClusterName), + riak_core_ring:increment_vclock(Node, CState6); + false -> CState5 end. inform_removed_nodes(Node, OldRing, NewRing) -> CName = riak_core_ring:cluster_name(NewRing), Exiting = riak_core_ring:members(OldRing, [exiting]) -- - [Node], + [Node], Invalid = riak_core_ring:members(NewRing, [invalid]), Changed = ordsets:intersection(ordsets:from_list(Exiting), @@ -841,18 +787,11 @@ inform_removed_nodes(Node, OldRing, NewRing) -> ok. do_claimant_quiet(Node, CState, Replacing, Seed) -> - do_claimant(Node, - CState, - Replacing, - Seed, + do_claimant(Node, CState, Replacing, Seed, fun no_log/2). do_claimant(Node, CState, Log) -> - do_claimant(Node, - CState, - [], - riak_core_rand:rand_seed(), - Log). + do_claimant(Node, CState, [], erlang:timestamp(), Log). do_claimant(Node, CState, Replacing, Seed, Log) -> AreJoining = are_joining_nodes(CState), @@ -860,18 +799,15 @@ do_claimant(Node, CState, Replacing, Seed, Log) -> {C2, CState3} = maybe_handle_auto_joining(Node, CState2), case AreJoining of - true -> - %% Do not rebalance if there are joining nodes - Changed = C1 or C2, - CState5 = CState3; - false -> - {C3, CState4} = maybe_update_ring(Node, - CState3, - Replacing, - Seed, - Log), - {C4, CState5} = maybe_remove_exiting(Node, CState4), - Changed = C1 or C2 or C3 or C4 + true -> + %% Do not rebalance if there are joining nodes + Changed = C1 or C2, + CState5 = CState3; + false -> + {C3, CState4} = maybe_update_ring(Node, CState3, + Replacing, Seed, Log), + {C4, CState5} = maybe_remove_exiting(Node, CState4), + Changed = C1 or C2 or C3 or C4 end, {Changed, CState5}. @@ -883,65 +819,61 @@ maybe_update_claimant(Node, CState) -> NextClaimant = hd(Members ++ [undefined]), ClaimantMissing = not lists:member(Claimant, Members), case {ClaimantMissing, NextClaimant} of - {true, Node} -> - %% Become claimant - CState2 = riak_core_ring:set_claimant(CState, Node), - CState3 = - riak_core_ring:increment_ring_version(Claimant, - CState2), - {true, CState3}; - _ -> {false, CState} + {true, Node} -> + %% Become claimant + CState2 = riak_core_ring:set_claimant(CState, Node), + CState3 = + riak_core_ring:increment_ring_version(Claimant, + CState2), + {true, CState3}; + _ -> {false, CState} end. %% @private maybe_update_ring(Node, CState, Replacing, Seed, Log) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - case riak_core_ring:claiming_members(CState) of - [] -> - %% Consider logging an error/warning here or even - %% intentionally crashing. This state makes no logical - %% sense given that it represents a cluster without any - %% active nodes. - {false, CState}; - _ -> - Resizing = riak_core_ring:is_resizing(CState), - {Changed, CState2} = update_ring(Node, - CState, - Replacing, - Seed, - Log, - Resizing), - {Changed, CState2} - end; - _ -> {false, CState} + Node -> + case riak_core_ring:claiming_members(CState) of + [] -> + %% Consider logging an error/warning here or even + %% intentionally crashing. This state makes no logical + %% sense given that it represents a cluster without any + %% active nodes. + {false, CState}; + _ -> + Resizing = riak_core_ring:is_resizing(CState), + {Changed, CState2} = update_ring(Node, CState, + Replacing, Seed, Log, + Resizing), + {Changed, CState2} + end; + _ -> {false, CState} end. %% @private maybe_remove_exiting(Node, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - %% Change exiting nodes to invalid, skipping this node. - Exiting = riak_core_ring:members(CState, [exiting]) -- - [Node], - Changed = Exiting /= [], - CState2 = lists:foldl(fun (ENode, CState0) -> - ClearedCS = - riak_core_ring:clear_member_meta(Node, - CState0, - ENode), - riak_core_ring:set_member(Node, - ClearedCS, - ENode, - invalid, - same_vclock) - end, - CState, - Exiting), - {Changed, CState2}; - _ -> {false, CState} + Node -> + %% Change exiting nodes to invalid, skipping this node. + Exiting = riak_core_ring:members(CState, [exiting]) -- + [Node], + Changed = Exiting /= [], + CState2 = lists:foldl(fun (ENode, CState0) -> + ClearedCS = + riak_core_ring:clear_member_meta(Node, + CState0, + ENode), + riak_core_ring:set_member(Node, + ClearedCS, + ENode, + invalid, + same_vclock) + end, + CState, Exiting), + {Changed, CState2}; + _ -> {false, CState} end. %% @private @@ -955,11 +887,10 @@ auto_joining_nodes(CState) -> %% case application:get_env(riak_core, staged_joins, true) of false -> Joining; true -> [Member || Member <- Joining, - riak_core_ring:get_member_meta(CState, - Member, + riak_core_ring:get_member_meta(CState, Member, '$autojoin') - == - true].%% end. + == + true].%% end. %% @private maybe_handle_auto_joining(Node, CState) -> @@ -975,19 +906,16 @@ maybe_handle_joining(Node, CState) -> maybe_handle_joining(Node, Joining, CState) -> Claimant = riak_core_ring:claimant(CState), case Claimant of - Node -> - Changed = Joining /= [], - CState2 = lists:foldl(fun (JNode, CState0) -> - riak_core_ring:set_member(Node, - CState0, - JNode, - valid, - same_vclock) - end, - CState, - Joining), - {Changed, CState2}; - _ -> {false, CState} + Node -> + Changed = Joining /= [], + CState2 = lists:foldl(fun (JNode, CState0) -> + riak_core_ring:set_member(Node, CState0, + JNode, valid, + same_vclock) + end, + CState, Joining), + {Changed, CState2}; + _ -> {false, CState} end. %% @private @@ -996,10 +924,7 @@ update_ring(CNode, CState, Replacing, Seed, Log, Next0 = riak_core_ring:pending_changes(CState), ?ROUT("Members: ~p~n", [riak_core_ring:members(CState, - [joining, - valid, - leaving, - exiting, + [joining, valid, leaving, exiting, invalid])]), ?ROUT("Updating ring :: next0 : ~p~n", [Next0]), %% Remove tuples from next for removed nodes @@ -1009,7 +934,7 @@ update_ring(CNode, CState, Replacing, Seed, Log, {Owner, NextOwner, _} = riak_core_ring:next_owner(NInfo), not lists:member(Owner, InvalidMembers) and - not lists:member(NextOwner, InvalidMembers) + not lists:member(NextOwner, InvalidMembers) end, Next0), CState2 = riak_core_ring:set_pending_changes(CState, @@ -1021,9 +946,7 @@ update_ring(CNode, CState, Replacing, Seed, Log, [riak_core_ring:pending_changes(CState3)]), %% Ressign leaving/inactive indices {RingChanged2, CState4} = reassign_indices(CState3, - Replacing, - Seed, - Log), + Replacing, Seed, Log), ?ROUT("Updating ring :: next2 : ~p~n", [riak_core_ring:pending_changes(CState4)]), %% Rebalance the ring as necessary. If pending changes exist ring @@ -1037,20 +960,20 @@ update_ring(CNode, CState, Replacing, Seed, Log, NextChanged = Next0 /= Next4, Changed = NextChanged or RingChanged1 or RingChanged2, case Changed of - true -> - OldS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next0]), - NewS = ordsets:from_list([{Idx, O, NO} - || {Idx, O, NO, _, _} <- Next4]), - Diff = ordsets:subtract(NewS, OldS), - _ = [Log(next, NChange) || NChange <- Diff], - ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), - CState5 = riak_core_ring:set_pending_changes(CState4, - Next4), - CState6 = riak_core_ring:increment_ring_version(CNode, - CState5), - {true, CState6}; - false -> {false, CState} + true -> + OldS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next0]), + NewS = ordsets:from_list([{Idx, O, NO} + || {Idx, O, NO, _, _} <- Next4]), + Diff = ordsets:subtract(NewS, OldS), + _ = [Log(next, NChange) || NChange <- Diff], + ?ROUT("Updating ring :: next3 : ~p~n", [Next4]), + CState5 = riak_core_ring:set_pending_changes(CState4, + Next4), + CState6 = riak_core_ring:increment_ring_version(CNode, + CState5), + {true, CState6}; + false -> {false, CState} end; update_ring(CNode, CState, _Replacing, _Seed, _Log, true) -> @@ -1060,17 +983,17 @@ update_ring(CNode, CState, _Replacing, _Seed, _Log, riak_core_ring:maybe_abort_resize(CState1), Changed = Installed orelse Aborted, case Changed of - true -> - CState3 = riak_core_ring:increment_ring_version(CNode, - CState2), - {true, CState3}; - false -> {false, CState} + true -> + CState3 = riak_core_ring:increment_ring_version(CNode, + CState2), + {true, CState3}; + false -> {false, CState} end. maybe_install_resized_ring(CState) -> case riak_core_ring:is_resize_complete(CState) of - true -> {true, riak_core_ring:future_ring(CState)}; - false -> {false, CState} + true -> {true, riak_core_ring:future_ring(CState)}; + false -> {false, CState} end. %% @private @@ -1081,28 +1004,25 @@ transfer_ownership(CState, Log) -> {_, NewOwner, S} = riak_core_ring:next_owner(NInfo), not - ((S == complete) and - (riak_core_ring:index_owner(CState, - Idx) - =:= NewOwner)) + ((S == complete) and + (riak_core_ring:index_owner(CState, Idx) + =:= NewOwner)) end, Next), CState2 = lists:foldl(fun (NInfo = {Idx, _, _, _, _}, CState0) -> case riak_core_ring:next_owner(NInfo) of - {_, Node, complete} -> - Log(ownership, {Idx, Node, CState0}), - riak_core_ring:transfer_node(Idx, - Node, - CState0); - _ -> CState0 + {_, Node, complete} -> + Log(ownership, {Idx, Node, CState0}), + riak_core_ring:transfer_node(Idx, Node, + CState0); + _ -> CState0 end end, - CState, - Next2), + CState, Next2), NextChanged = Next2 /= Next, RingChanged = riak_core_ring:all_owners(CState) /= - riak_core_ring:all_owners(CState2), + riak_core_ring:all_owners(CState2), Changed = NextChanged or RingChanged, CState3 = riak_core_ring:set_pending_changes(CState2, Next2), @@ -1113,28 +1033,18 @@ reassign_indices(CState, Replacing, Seed, Log) -> Next = riak_core_ring:pending_changes(CState), Invalid = riak_core_ring:members(CState, [invalid]), CState2 = lists:foldl(fun (Node, CState0) -> - remove_node(CState0, - Node, - invalid, - Replacing, - Seed, - Log) + remove_node(CState0, Node, invalid, Replacing, + Seed, Log) end, - CState, - Invalid), + CState, Invalid), CState3 = case Next of [] -> Leaving = riak_core_ring:members(CState, [leaving]), lists:foldl(fun (Node, CState0) -> - remove_node(CState0, - Node, - leaving, - Replacing, - Seed, - Log) + remove_node(CState0, Node, leaving, + Replacing, Seed, Log) end, - CState2, - Leaving); + CState2, Leaving); _ -> CState2 end, Owners1 = riak_core_ring:all_owners(CState), @@ -1166,17 +1076,15 @@ handle_down_nodes(CState, Next) -> [leaving, invalid]), DownMembers = riak_core_ring:members(CState, [down]), Next2 = [begin - OwnerLeaving = lists:member(O, LeavingMembers), - NextDown = lists:member(NO, DownMembers), - case OwnerLeaving and NextDown of - true -> - Active = riak_core_ring:active_members(CState) -- [O], - RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), - {Idx, O, RNode, Mods, Status}; - _ -> T - end + OwnerLeaving = lists:member(O, LeavingMembers), + NextDown = lists:member(NO, DownMembers), + case OwnerLeaving and NextDown of + true -> + Active = riak_core_ring:active_members(CState) -- [O], + RNode = lists:nth(rand:uniform(length(Active)), Active), + {Idx, O, RNode, Mods, Status}; + _ -> T + end end || T = {Idx, O, NO, Mods, Status} <- Next], Next3 = [T @@ -1196,12 +1104,7 @@ reassign_indices_to(Node, NewNode, Ring) -> remove_node(CState, Node, Status, Replacing, Seed, Log) -> Indices = riak_core_ring:indices(CState, Node), - remove_node(CState, - Node, - Status, - Replacing, - Seed, - Log, + remove_node(CState, Node, Status, Replacing, Seed, Log, Indices). %% @private @@ -1217,8 +1120,7 @@ remove_node(CState, Node, Status, Replacing, Seed, Log, CStateT2 = reassign_indices_to(Node, NewNode, CStateT1); error -> CStateT2 = - riak_core_gossip:remove_from_cluster(CStateT1, - Node, + riak_core_gossip:remove_from_cluster(CStateT1, Node, Seed) end, Owners1 = riak_core_ring:all_owners(CState), @@ -1249,10 +1151,8 @@ remove_node(CState, Node, Status, Replacing, Seed, Log, replace_node_during_resize(CState0, Node, NewNode) -> PostResize = riak_core_ring:is_post_resize(CState0), - CState1 = replace_node_during_resize(CState0, - Node, - NewNode, - PostResize), + CState1 = replace_node_during_resize(CState0, Node, + NewNode, PostResize), riak_core_ring:increment_ring_version(riak_core_ring:claimant(CState1), CState1). @@ -1262,16 +1162,14 @@ replace_node_during_resize(CState0, Node, NewNode, %% transfers where the target is owned by Node. CState1 = riak_core_ring:reschedule_resize_transfers(CState0, - Node, - NewNode), + Node, NewNode), %% since the resized chash is carried directly in state vs. being rebuilt via next %% list, perform reassignment {ok, FutureCHash} = riak_core_ring:resized_ring(CState1), FutureCState = riak_core_ring:set_chash(CState1, FutureCHash), - ReassignedFuture = reassign_indices_to(Node, - NewNode, + ReassignedFuture = reassign_indices_to(Node, NewNode, FutureCState), ReassignedCHash = riak_core_ring:chash(ReassignedFuture), diff --git a/src/riak_core_gossip.erl b/src/riak_core_gossip.erl index 5f517c4fd..ade325237 100644 --- a/src/riak_core_gossip.erl +++ b/src/riak_core_gossip.erl @@ -35,22 +35,13 @@ -export([start_link/0, stop/0]). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --export([distribute_ring/1, - send_ring/1, - send_ring/2, - remove_from_cluster/2, - remove_from_cluster/3, - random_gossip/1, - recursive_gossip/1, - random_recursive_gossip/1, - rejoin/2]). +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). + +-export([distribute_ring/1, send_ring/1, send_ring/2, + remove_from_cluster/2, remove_from_cluster/3, + random_gossip/1, recursive_gossip/1, + random_recursive_gossip/1, rejoin/2]). %% Default gossip rate: allow at most 45 gossip messages every 10 seconds -define(DEFAULT_LIMIT, {45, 10000}). @@ -80,9 +71,7 @@ send_ring(FromNode, ToNode) -> {send_ring_to, ToNode}). start_link() -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). stop() -> gen_server:cast(?MODULE, stop). @@ -93,9 +82,9 @@ rejoin(Node, Ring) -> %% @doc Gossip state to a random node in the ring. random_gossip(Ring) -> case riak_core_ring:random_other_active_node(Ring) of - no_node -> % must be single node cluster - ok; - RandomNode -> send_ring(node(), RandomNode) + no_node -> % must be single node cluster + ok; + RandomNode -> send_ring(node(), RandomNode) end. %% @doc Gossip state to a fixed set of nodes determined from a binary @@ -119,15 +108,13 @@ recursive_gossip(Ring) -> %% and therefore we fallback to random_recursive_gossip as necessary. Active = riak_core_ring:active_members(Ring), case lists:member(node(), Active) of - true -> recursive_gossip(Ring, node()); - false -> random_recursive_gossip(Ring) + true -> recursive_gossip(Ring, node()); + false -> random_recursive_gossip(Ring) end. random_recursive_gossip(Ring) -> Active = riak_core_ring:active_members(Ring), - RNode = - lists:nth(riak_core_rand:uniform(length(Active)), - Active), + RNode = lists:nth(rand:uniform(length(Active)), Active), recursive_gossip(Ring, RNode). %% =================================================================== @@ -138,8 +125,7 @@ random_recursive_gossip(Ring) -> init(_State) -> schedule_next_reset(), {Tokens, _} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, ?DEFAULT_LIMIT), State = #state{gossip_tokens = Tokens}, {ok, State}. @@ -164,8 +150,7 @@ handle_cast({distribute_ring, Ring}, State) -> riak_core_ring:check_tainted(Ring, "Error: riak_core_gossip/distribute_ring " ":: Sending tainted ring over gossip"), - gen_server:abcast(Nodes, - ?MODULE, + gen_server:abcast(Nodes, ?MODULE, {reconcile_ring, Ring}), {noreply, State}; handle_cast({reconcile_ring, OtherRing}, State) -> @@ -184,18 +169,18 @@ handle_cast(gossip_ring, State) -> handle_cast({rejoin, OtherRing}, State) -> {ok, Ring} = riak_core_ring_manager:get_raw_ring(), SameCluster = riak_core_ring:cluster_name(Ring) =:= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), case SameCluster of - true -> - OtherNode = riak_core_ring:owner_node(OtherRing), - case riak_core:join(node(), OtherNode, true, true) of - ok -> ok; - {error, Reason} -> - logger:error("Could not rejoin cluster: ~p", [Reason]), - ok - end, - {noreply, State}; - false -> {noreply, State} + true -> + OtherNode = riak_core_ring:owner_node(OtherRing), + case riak_core:join(node(), OtherNode, true, true) of + ok -> ok; + {error, Reason} -> + logger:error("Could not rejoin cluster: ~p", [Reason]), + ok + end, + {noreply, State}; + false -> {noreply, State} end; handle_cast(_, State) -> {noreply, State}. @@ -203,8 +188,7 @@ handle_info(reset_tokens, State) -> schedule_next_reset(), gen_server:cast(?MODULE, gossip_ring), {Tokens, _} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, ?DEFAULT_LIMIT), {noreply, State#state{gossip_tokens = Tokens}}; handle_info(_Info, State) -> {noreply, State}. @@ -220,8 +204,7 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. schedule_next_reset() -> {_, Reset} = application:get_env(riak_core, - gossip_limit, - ?DEFAULT_LIMIT), + gossip_limit, ?DEFAULT_LIMIT), erlang:send_after(Reset, ?MODULE, reset_tokens). %%noinspection ErlangUnboundVariable @@ -233,44 +216,42 @@ reconcile(Ring0, [OtherRing0]) -> Members = riak_core_ring:reconcile_members(Ring, OtherRing), WrongCluster = riak_core_ring:cluster_name(Ring) /= - riak_core_ring:cluster_name(OtherRing), + riak_core_ring:cluster_name(OtherRing), PreStatus = riak_core_ring:member_status(Members, OtherNode), IgnoreGossip = WrongCluster or (PreStatus =:= invalid) - or (PreStatus =:= down), + or (PreStatus =:= down), case IgnoreGossip of - true -> - Ring2 = Ring, - Changed = false; - false -> - {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, - Ring) + true -> Ring2 = Ring, Changed = false; + false -> + {Changed, Ring2} = riak_core_ring:reconcile(OtherRing, + Ring) end, OtherStatus = riak_core_ring:member_status(Ring2, OtherNode), case {WrongCluster, OtherStatus, Changed} of - {true, _, _} -> - %% TODO: Tell other node to stop gossiping to this node. - %% STATS - % riak_core_stat:update(ignored_gossip), - ignore; - {_, down, _} -> - %% Tell other node to rejoin the cluster. - riak_core_gossip:rejoin(OtherNode, Ring2), - ignore; - {_, invalid, _} -> - %% Exiting/Removed node never saw shutdown cast, re-send. - ClusterName = riak_core_ring:cluster_name(Ring), - riak_core_ring_manager:refresh_ring(OtherNode, - ClusterName), - ignore; - {_, _, new_ring} -> - Ring3 = riak_core_ring:ring_changed(Node, Ring2), - %% STATS - % riak_core_stat:update(rings_reconciled), - log_membership_changes(Ring, Ring3), - {reconciled_ring, Ring3}; - {_, _, _} -> ignore + {true, _, _} -> + %% TODO: Tell other node to stop gossiping to this node. + %% STATS + % riak_core_stat:update(ignored_gossip), + ignore; + {_, down, _} -> + %% Tell other node to rejoin the cluster. + riak_core_gossip:rejoin(OtherNode, Ring2), + ignore; + {_, invalid, _} -> + %% Exiting/Removed node never saw shutdown cast, re-send. + ClusterName = riak_core_ring:cluster_name(Ring), + riak_core_ring_manager:refresh_ring(OtherNode, + ClusterName), + ignore; + {_, _, new_ring} -> + Ring3 = riak_core_ring:ring_changed(Node, Ring2), + %% STATS + % riak_core_stat:update(rings_reconciled), + log_membership_changes(Ring, Ring3), + {reconciled_ring, Ring3}; + {_, _, _} -> ignore end. log_membership_changes(OldRing, NewRing) -> @@ -327,50 +308,41 @@ log_node_removed(Node, Old) -> [Node, Old]). remove_from_cluster(Ring, ExitingNode) -> - remove_from_cluster(Ring, - ExitingNode, - riak_core_rand:rand_seed()). + remove_from_cluster(Ring, ExitingNode, + erlang:timestamp()). remove_from_cluster(Ring, ExitingNode, Seed) -> % Get a list of indices owned by the ExitingNode... AllOwners = riak_core_ring:all_owners(Ring), % Transfer indexes to other nodes... - ExitRing = case attempt_simple_transfer(Seed, - Ring, - AllOwners, - ExitingNode) + ExitRing = case attempt_simple_transfer(Seed, Ring, + AllOwners, ExitingNode) of - {ok, NR} -> NR; - target_n_fail -> - %% re-diagonalize - %% first hand off all claims to *any* one else, - %% just so rebalance doesn't include exiting node - Members = riak_core_ring:claiming_members(Ring), - Other = hd(lists:delete(ExitingNode, Members)), - TempRing = lists:foldl(fun ({I, N}, R) - when N == ExitingNode -> - riak_core_ring:transfer_node(I, - Other, - R); - (_, R) -> R - end, - Ring, - AllOwners), - riak_core_claim:claim_rebalance_n(TempRing, Other) + {ok, NR} -> NR; + target_n_fail -> + %% re-diagonalize + %% first hand off all claims to *any* one else, + %% just so rebalance doesn't include exiting node + Members = riak_core_ring:claiming_members(Ring), + Other = hd(lists:delete(ExitingNode, Members)), + TempRing = lists:foldl(fun ({I, N}, R) + when N == ExitingNode -> + riak_core_ring:transfer_node(I, + Other, + R); + (_, R) -> R + end, + Ring, AllOwners), + riak_core_claim:claim_rebalance_n(TempRing, Other) end, ExitRing. attempt_simple_transfer(Seed, Ring, Owners, ExitingNode) -> - TargetN = application:get_env(riak_core, - target_n_val, + TargetN = application:get_env(riak_core, target_n_val, undefined), - attempt_simple_transfer(Seed, - Ring, - Owners, - TargetN, - ExitingNode, - 0, + attempt_simple_transfer(Seed, Ring, Owners, TargetN, + ExitingNode, 0, [{O, -TargetN} || O <- riak_core_ring:claiming_members(Ring), O /= ExitingNode]). @@ -379,52 +351,41 @@ attempt_simple_transfer(Seed, Ring, [{P, Exit} | Rest], TargetN, Exit, Idx, Last) -> %% handoff case [N || {N, I} <- Last, Idx - I >= TargetN] of - [] -> target_n_fail; - Candidates -> - %% these nodes don't violate target_n in the reverse direction - StepsToNext = fun (Node) -> - length(lists:takewhile(fun ({_, Owner}) -> - Node /= Owner - end, - Rest)) - end, - case lists:filter(fun (N) -> - Next = StepsToNext(N), - Next + 1 >= TargetN orelse - Next == length(Rest) - end, - Candidates) - of - [] -> target_n_fail; - Qualifiers -> - %% these nodes don't violate target_n forward - {Rand, Seed2} = - riak_core_rand:uniform_s(length(Qualifiers), Seed), - Chosen = lists:nth(Rand, Qualifiers), - %% choose one, and do the rest of the ring - attempt_simple_transfer(Seed2, - riak_core_ring:transfer_node(P, - Chosen, - Ring), - Rest, - TargetN, - Exit, - Idx + 1, - lists:keyreplace(Chosen, - 1, - Last, - {Chosen, Idx})) - end + [] -> target_n_fail; + Candidates -> + %% these nodes don't violate target_n in the reverse direction + StepsToNext = fun (Node) -> + length(lists:takewhile(fun ({_, Owner}) -> + Node /= Owner + end, + Rest)) + end, + case lists:filter(fun (N) -> + Next = StepsToNext(N), + Next + 1 >= TargetN orelse + Next == length(Rest) + end, + Candidates) + of + [] -> target_n_fail; + Qualifiers -> + %% these nodes don't violate target_n forward + {Rand, Seed2} = rand:uniform_s(length(Qualifiers), + Seed), + Chosen = lists:nth(Rand, Qualifiers), + %% choose one, and do the rest of the ring + attempt_simple_transfer(Seed2, + riak_core_ring:transfer_node(P, Chosen, + Ring), + Rest, TargetN, Exit, Idx + 1, + lists:keyreplace(Chosen, 1, Last, + {Chosen, Idx})) + end end; attempt_simple_transfer(Seed, Ring, [{_, N} | Rest], TargetN, Exit, Idx, Last) -> %% just keep track of seeing this node - attempt_simple_transfer(Seed, - Ring, - Rest, - TargetN, - Exit, - Idx + 1, - lists:keyreplace(N, 1, Last, {N, Idx})); + attempt_simple_transfer(Seed, Ring, Rest, TargetN, Exit, + Idx + 1, lists:keyreplace(N, 1, Last, {N, Idx})); attempt_simple_transfer(_, Ring, [], _, _, _, _) -> {ok, Ring}. diff --git a/src/riak_core_handoff_manager.erl b/src/riak_core_handoff_manager.erl index 3f78c0027..e023a06f7 100644 --- a/src/riak_core_handoff_manager.erl +++ b/src/riak_core_handoff_manager.erl @@ -18,32 +18,19 @@ -behaviour(gen_server). %% gen_server api --export([start_link/0, - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, +-export([start_link/0, init/1, handle_call/3, + handle_cast/2, handle_info/2, terminate/2, code_change/3]). %% exclusion api --export([add_exclusion/2, - get_exclusions/1, +-export([add_exclusion/2, get_exclusions/1, remove_exclusion/2]). %% handoff api --export([add_outbound/6, - add_outbound/7, - add_inbound/0, - xfer/3, - kill_xfer/3, - status/0, - status/1, - status_update/2, - set_concurrency/1, - get_concurrency/0, - set_recv_data/2, - kill_handoffs/0, +-export([add_outbound/6, add_outbound/7, add_inbound/0, + xfer/3, kill_xfer/3, status/0, status/1, + status_update/2, set_concurrency/1, get_concurrency/0, + set_recv_data/2, kill_handoffs/0, kill_handoffs_in_direction/1, handoff_change_enabled_setting/2]). @@ -58,26 +45,24 @@ -endif. -record(state, - {excl, handoffs = [] :: [handoff_status()]}). + {excl, handoffs = [] :: [handoff_status()]}). %% this can be overridden with riak_core handoff_concurrency -define(HANDOFF_CONCURRENCY, 2). -define(HO_EQ(HOA, HOB), HOA#handoff_status.mod_src_tgt == - HOB#handoff_status.mod_src_tgt - andalso - HOA#handoff_status.timestamp == - HOB#handoff_status.timestamp). + HOB#handoff_status.mod_src_tgt + andalso + HOA#handoff_status.timestamp == + HOB#handoff_status.timestamp). %%%=================================================================== %%% API %%%=================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). init([]) -> @@ -85,12 +70,7 @@ init([]) -> add_outbound(HOType, Module, Idx, Node, VnodePid, Opts) -> - add_outbound(HOType, - Module, - Idx, - Idx, - Node, - VnodePid, + add_outbound(HOType, Module, Idx, Idx, Node, VnodePid, Opts). add_outbound(HOType, Module, SrcIdx, TargetIdx, Node, @@ -98,26 +78,20 @@ add_outbound(HOType, Module, SrcIdx, TargetIdx, Node, case application:get_env(riak_core, disable_outbound_handoff) of - {ok, true} -> {error, max_concurrency}; - _ -> - gen_server:call(?MODULE, - {add_outbound, - HOType, - Module, - SrcIdx, - TargetIdx, - Node, - VnodePid, - Opts}, - infinity) + {ok, true} -> {error, max_concurrency}; + _ -> + gen_server:call(?MODULE, + {add_outbound, HOType, Module, SrcIdx, TargetIdx, + Node, VnodePid, Opts}, + infinity) end. add_inbound() -> case application:get_env(riak_core, disable_inbound_handoff) of - {ok, true} -> {error, max_concurrency}; - _ -> gen_server:call(?MODULE, {add_inbound}, infinity) + {ok, true} -> {error, max_concurrency}; + _ -> gen_server:call(?MODULE, {add_inbound}, infinity) end. %% @doc Initiate a transfer from `SrcPartition' to `TargetPartition' @@ -130,19 +104,15 @@ xfer({SrcPartition, SrcOwner}, %% NOTE: This will not work with old nodes ReqOrigin = node(), gen_server:cast({?MODULE, SrcOwner}, - {send_handoff, - repair, - Module, - {SrcPartition, TargetPartition}, - ReqOrigin, + {send_handoff, repair, Module, + {SrcPartition, TargetPartition}, ReqOrigin, FilterModFun}). %% @doc Associate `Data' with the inbound handoff `Recv'. -spec set_recv_data(pid(), proplists:proplist()) -> ok. set_recv_data(Recv, Data) -> - gen_server:call(?MODULE, - {set_recv_data, Recv, Data}, + gen_server:call(?MODULE, {set_recv_data, Recv, Data}, infinity). status() -> status(none). @@ -159,8 +129,7 @@ status_update(ModSrcTgt, Stats) -> {status_update, ModSrcTgt, Stats}). set_concurrency(Limit) -> - gen_server:call(?MODULE, - {set_concurrency, Limit}, + gen_server:call(?MODULE, {set_concurrency, Limit}, infinity). get_concurrency() -> @@ -179,8 +148,7 @@ kill_handoffs() -> set_concurrency(0). outbound) -> ok. kill_handoffs_in_direction(Direction) -> - gen_server:call(?MODULE, - {kill_in_direction, Direction}, + gen_server:call(?MODULE, {kill_in_direction, Direction}, infinity). add_exclusion(Module, Index) -> @@ -192,8 +160,7 @@ remove_exclusion(Module, Index) -> {del_exclusion, {Module, Index}}). get_exclusions(Module) -> - gen_server:call(?MODULE, - {get_exclusions, Module}, + gen_server:call(?MODULE, {get_exclusions, Module}, infinity). %%%=================================================================== @@ -205,73 +172,58 @@ handle_call({get_exclusions, Module}, _From, Reply = [I || {M, I} <- sets:to_list(Excl), M =:= Module], {reply, {ok, Reply}, State}; -handle_call({add_outbound, - Type, - Mod, - SrcIdx, - TargetIdx, - Node, - Pid, - Opts}, +handle_call({add_outbound, Type, Mod, SrcIdx, TargetIdx, + Node, Pid, Opts}, _From, State = #state{handoffs = HS}) -> - case send_handoff(Type, - {Mod, SrcIdx, TargetIdx}, - Node, - Pid, - HS, - Opts) + case send_handoff(Type, {Mod, SrcIdx, TargetIdx}, Node, + Pid, HS, Opts) of - {ok, - Handoff = #handoff_status{transport_pid = Sender}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Sender}, State#state{handoffs = HS2}}; - {false, - _ExistingHandoff = #handoff_status{transport_pid = - Sender}} -> - {reply, {ok, Sender}, State}; - Error -> {reply, Error, State} + {ok, + Handoff = #handoff_status{transport_pid = Sender}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Sender}, State#state{handoffs = HS2}}; + {false, + _ExistingHandoff = #handoff_status{transport_pid = + Sender}} -> + {reply, {ok, Sender}, State}; + Error -> {reply, Error, State} end; handle_call({add_inbound}, _From, State = #state{handoffs = HS}) -> case receive_handoff() of - {ok, - Handoff = #handoff_status{transport_pid = Receiver}} -> - HS2 = HS ++ [Handoff], - {reply, {ok, Receiver}, State#state{handoffs = HS2}}; - Error -> {reply, Error, State} + {ok, + Handoff = #handoff_status{transport_pid = Receiver}} -> + HS2 = HS ++ [Handoff], + {reply, {ok, Receiver}, State#state{handoffs = HS2}}; + Error -> {reply, Error, State} end; handle_call({set_recv_data, Recv, Data}, _From, State = #state{handoffs = HS}) -> - case lists:keyfind(Recv, - #handoff_status.transport_pid, + case lists:keyfind(Recv, #handoff_status.transport_pid, HS) of - false -> - throw({error, - "set_recv_data called for non-existing " - "receiver", - Recv, - Data}); - #handoff_status{} = H -> - H2 = H#handoff_status{mod_src_tgt = - proplists:get_value(mod_src_tgt, Data), - vnode_pid = - proplists:get_value(vnode_pid, Data)}, - HS2 = lists:keyreplace(Recv, - #handoff_status.transport_pid, - HS, - H2), - {reply, ok, State#state{handoffs = HS2}} + false -> + throw({error, + "set_recv_data called for non-existing " + "receiver", + Recv, Data}); + #handoff_status{} = H -> + H2 = H#handoff_status{mod_src_tgt = + proplists:get_value(mod_src_tgt, Data), + vnode_pid = + proplists:get_value(vnode_pid, Data)}, + HS2 = lists:keyreplace(Recv, + #handoff_status.transport_pid, HS, H2), + {reply, ok, State#state{handoffs = HS2}} end; handle_call({xfer_status, Xfer}, _From, State = #state{handoffs = HS}) -> TP = Xfer#handoff_status.transport_pid, - case lists:keyfind(TP, - #handoff_status.transport_pid, + case lists:keyfind(TP, #handoff_status.transport_pid, HS) of - false -> {reply, not_found, State}; - _ -> {reply, in_progress, State} + false -> {reply, not_found, State}; + _ -> {reply, in_progress, State} end; handle_call({status, Filter}, _From, State = #state{handoffs = HS}) -> @@ -280,20 +232,19 @@ handle_call({status, Filter}, _From, {reply, Status, State}; handle_call({set_concurrency, Limit}, _From, State = #state{handoffs = HS}) -> - application:set_env(riak_core, - handoff_concurrency, + application:set_env(riak_core, handoff_concurrency, Limit), case Limit < erlang:length(HS) of - true -> - %% Note: we don't update the state with the handoffs that we're - %% keeping because we'll still get the 'DOWN' messages with - %% a reason of 'max_concurrency' and we want to be able to do - %% something with that if necessary. - {_Keep, Discard} = lists:split(Limit, HS), - _ = [erlang:exit(Pid, max_concurrency) - || #handoff_status{transport_pid = Pid} <- Discard], - {reply, ok, State}; - false -> {reply, ok, State} + true -> + %% Note: we don't update the state with the handoffs that we're + %% keeping because we'll still get the 'DOWN' messages with + %% a reason of 'max_concurrency' and we want to be able to do + %% something with that if necessary. + {_Keep, Discard} = lists:split(Limit, HS), + _ = [erlang:exit(Pid, max_concurrency) + || #handoff_status{transport_pid = Pid} <- Discard], + {reply, ok, State}; + false -> {reply, ok, State} end; handle_call(get_concurrency, _From, State) -> Concurrency = get_concurrency_limit(), @@ -326,47 +277,34 @@ handle_cast({add_exclusion, {Mod, Idx}}, handle_cast({status_update, ModSrcTgt, StatsUpdate}, State = #state{handoffs = HS}) -> case lists:keyfind(ModSrcTgt, - #handoff_status.mod_src_tgt, - HS) + #handoff_status.mod_src_tgt, HS) of - false -> - logger:error("status_update for non-existing handoff ~p", - [ModSrcTgt]), - {noreply, State}; - HO -> - Stats2 = update_stats(StatsUpdate, - HO#handoff_status.stats), - HO2 = HO#handoff_status{stats = Stats2}, - HS2 = lists:keyreplace(ModSrcTgt, - #handoff_status.mod_src_tgt, - HS, - HO2), - {noreply, State#state{handoffs = HS2}} + false -> + logger:error("status_update for non-existing handoff ~p", + [ModSrcTgt]), + {noreply, State}; + HO -> + Stats2 = update_stats(StatsUpdate, + HO#handoff_status.stats), + HO2 = HO#handoff_status{stats = Stats2}, + HS2 = lists:keyreplace(ModSrcTgt, + #handoff_status.mod_src_tgt, HS, HO2), + {noreply, State#state{handoffs = HS2}} end; -handle_cast({send_handoff, - Type, - Mod, - {Src, Target}, - ReqOrigin, - {FilterMod, FilterFun} = FMF}, +handle_cast({send_handoff, Type, Mod, {Src, Target}, + ReqOrigin, {Module, FilterFun} = FMF}, State = #state{handoffs = HS}) -> - Filter = FilterMod:FilterFun(Target), + Filter = Module:FilterFun(Target), %% TODO: make a record? {ok, VNode} = riak_core_vnode_manager:get_vnode_pid(Src, Mod), - case send_handoff(Type, - {Mod, Src, Target}, - ReqOrigin, - VNode, - HS, - {Filter, FMF}, - ReqOrigin, - []) + case send_handoff(Type, {Mod, Src, Target}, ReqOrigin, + VNode, HS, {Filter, FMF}, ReqOrigin, []) of - {ok, Handoff} -> - HS2 = HS ++ [Handoff], - {noreply, State#state{handoffs = HS2}}; - _ -> {noreply, State} + {ok, Handoff} -> + HS2 = HS ++ [Handoff], + {noreply, State#state{handoffs = HS2}}; + _ -> {noreply, State} end; handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> HS = State#state.handoffs, @@ -375,76 +313,75 @@ handle_cast({kill_xfer, ModSrcTarget, Reason}, State) -> handle_info({'DOWN', Ref, process, _Pid, Reason}, State = #state{handoffs = HS}) -> - case lists:keytake(Ref, - #handoff_status.transport_mon, + case lists:keytake(Ref, #handoff_status.transport_mon, HS) of - {value, - #handoff_status{mod_src_tgt = {M, S, I}, - direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, - req_origin = Origin}, - NewHS} -> - WarnVnode = case Reason of - %% if the reason the handoff process died was anything other - %% than 'normal' we should log the reason why as an error - normal -> false; - X - when X == max_concurrency orelse - element(1, X) == shutdown andalso - element(2, X) == max_concurrency -> - logger:info("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true; - _ -> - logger:error("An ~w handoff of partition ~w ~w was " - "terminated\n " - " for reason: ~w~n", - [Dir, M, I, Reason]), - true - end, - %% if we have the vnode process pid, tell the vnode why the - %% handoff stopped so it can clean up its state - case WarnVnode andalso is_pid(Vnode) of - true -> - riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); - _ -> - case Origin of - none -> ok; + {value, + #handoff_status{mod_src_tgt = {M, S, I}, + direction = Dir, vnode_pid = Vnode, vnode_mon = VnodeM, + req_origin = Origin}, + NewHS} -> + WarnVnode = case Reason of + %% if the reason the handoff process died was anything other + %% than 'normal' we should log the reason why as an error + normal -> false; + X + when X == max_concurrency orelse + element(1, X) == shutdown andalso + element(2, X) == max_concurrency -> + logger:info("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true; _ -> - %% Use proplist instead so it's more - %% flexible in future, or does - %% capabilities nullify that? - Msg = {M, S, I}, - riak_core_vnode_manager:xfer_complete(Origin, Msg) - end, - ok - end, - %% No monitor on vnode for receiver - if VnodeM /= undefined -> demonitor(VnodeM); - true -> ok - end, - %% removed the handoff from the list of active handoffs - {noreply, State#state{handoffs = NewHS}}; - false -> - case lists:keytake(Ref, #handoff_status.vnode_mon, HS) - of - {value, - #handoff_status{mod_src_tgt = {M, _, I}, - direction = Dir, transport_pid = Trans, - transport_mon = TransM}, - NewHS} -> - %% In this case the vnode died and the handoff - %% sender must be killed. - logger:error("An ~w handoff of partition ~w ~w was " - "terminated because the vnode died", - [Dir, M, I]), - demonitor(TransM), - exit(Trans, vnode_died), - {noreply, State#state{handoffs = NewHS}}; - _ -> {noreply, State} - end + logger:error("An ~w handoff of partition ~w ~w was " + "terminated\n " + " for reason: ~w~n", + [Dir, M, I, Reason]), + true + end, + %% if we have the vnode process pid, tell the vnode why the + %% handoff stopped so it can clean up its state + case WarnVnode andalso is_pid(Vnode) of + true -> + riak_core_vnode:handoff_error(Vnode, 'DOWN', Reason); + _ -> + case Origin of + none -> ok; + _ -> + %% Use proplist instead so it's more + %% flexible in future, or does + %% capabilities nullify that? + Msg = {M, S, I}, + riak_core_vnode_manager:xfer_complete(Origin, Msg) + end, + ok + end, + %% No monitor on vnode for receiver + if VnodeM /= undefined -> demonitor(VnodeM); + true -> ok + end, + %% removed the handoff from the list of active handoffs + {noreply, State#state{handoffs = NewHS}}; + false -> + case lists:keytake(Ref, #handoff_status.vnode_mon, HS) + of + {value, + #handoff_status{mod_src_tgt = {M, _, I}, + direction = Dir, transport_pid = Trans, + transport_mon = TransM}, + NewHS} -> + %% In this case the vnode died and the handoff + %% sender must be killed. + logger:error("An ~w handoff of partition ~w ~w was " + "terminated because the vnode died", + [Dir, M, I]), + demonitor(TransM), + exit(Trans, vnode_died), + {noreply, State#state{handoffs = NewHS}}; + _ -> {noreply, State} + end end. terminate(_Reason, _State) -> ok. @@ -462,37 +399,29 @@ build_status(HO) -> transport_pid = TPid, type = Type} = HO, {status_v2, - [{mod, Mod}, - {src_partition, SrcP}, - {target_partition, TargetP}, - {src_node, SrcNode}, - {target_node, TargetNode}, - {direction, Dir}, - {status, Status}, - {start_ts, StartTS}, - {sender_pid, TPid}, - {stats, calc_stats(HO)}, + [{mod, Mod}, {src_partition, SrcP}, + {target_partition, TargetP}, {src_node, SrcNode}, + {target_node, TargetNode}, {direction, Dir}, + {status, Status}, {start_ts, StartTS}, + {sender_pid, TPid}, {stats, calc_stats(HO)}, {type, Type}]}. calc_stats(#handoff_status{stats = Stats, timestamp = StartTS, size = Size}) -> case dict:find(last_update, Stats) of - error -> no_stats; - {ok, LastUpdate} -> - Objs = dict:fetch(objs, Stats), - Bytes = dict:fetch(bytes, Stats), - CalcSize = get_size(Size), - Done = calc_pct_done(Objs, Bytes, CalcSize), - ElapsedS = timer:now_diff(LastUpdate, StartTS) / - 1000000, - ObjsS = round(Objs / ElapsedS), - BytesS = round(Bytes / ElapsedS), - [{objs_total, Objs}, - {objs_per_s, ObjsS}, - {bytes_per_s, BytesS}, - {last_update, LastUpdate}, - {size, CalcSize}, - {pct_done_decimal, Done}] + error -> no_stats; + {ok, LastUpdate} -> + Objs = dict:fetch(objs, Stats), + Bytes = dict:fetch(bytes, Stats), + CalcSize = get_size(Size), + Done = calc_pct_done(Objs, Bytes, CalcSize), + ElapsedS = timer:now_diff(LastUpdate, StartTS) / + 1000000, + ObjsS = round(Objs / ElapsedS), + BytesS = round(Bytes / ElapsedS), + [{objs_total, Objs}, {objs_per_s, ObjsS}, + {bytes_per_s, BytesS}, {last_update, LastUpdate}, + {size, CalcSize}, {pct_done_decimal, Done}] end. get_size({F, dynamic}) -> F(); @@ -506,59 +435,49 @@ filter(none) -> fun (_) -> true end; filter({Key, Value} = _Filter) -> fun ({status_v2, Status}) -> case proplists:get_value(Key, Status) of - Value -> true; - _ -> false + Value -> true; + _ -> false end end. -resize_transfer_filter(Ring, Mod, Src, Target) -> +resize_transfer_filter(Ring, Module, Src, Target) -> fun (K) -> - {_, Hashed} = Mod:object_info(K), - riak_core_ring:is_future_index(Hashed, - Src, - Target, + {_, Hashed} = Module:object_info(K), + riak_core_ring:is_future_index(Hashed, Src, Target, Ring) end. -resize_transfer_notsent_fun(Ring, Mod, Src) -> +resize_transfer_notsent_fun(Ring, Module, Src) -> Shrinking = riak_core_ring:num_partitions(Ring) > - riak_core_ring:future_num_partitions(Ring), - case Shrinking of - false -> NValMap = DefaultN = undefined; - true -> - NValMap = Mod:nval_map(Ring), - DefaultN = riak_core_bucket:default_object_nval() - end, + riak_core_ring:future_num_partitions(Ring), + {NValMap, DefaultN} = case Shrinking of + false -> {undefined, undefined}; + true -> + {ok, DefN} = application:get_env(riak_core, + target_n_val), + {Module:nval_map(Ring), DefN} + end, fun (Key, Acc) -> - record_seen_index(Ring, - Shrinking, - NValMap, - DefaultN, - Mod, - Src, - Key, - Acc) + record_seen_index(Ring, Shrinking, NValMap, DefaultN, + Module, Src, Key, Acc) end. record_seen_index(Ring, Shrinking, NValMap, DefaultN, - Mod, Src, Key, Seen) -> - {Bucket, Hashed} = Mod:object_info(Key), + Module, Src, Key, Seen) -> + {Bucket, Hashed} = Module:object_info(Key), CheckNVal = case Shrinking of - false -> undefined; - true -> proplists:get_value(Bucket, NValMap, DefaultN) + false -> undefined; + true -> proplists:get_value(Bucket, NValMap, DefaultN) end, - case riak_core_ring:future_index(Hashed, - Src, - CheckNVal, + case riak_core_ring:future_index(Hashed, Src, CheckNVal, Ring) of - undefined -> Seen; - FutureIndex -> ordsets:add_element(FutureIndex, Seen) + undefined -> Seen; + FutureIndex -> ordsets:add_element(FutureIndex, Seen) end. get_concurrency_limit() -> - application:get_env(riak_core, - handoff_concurrency, + application:get_env(riak_core, handoff_concurrency, ?HANDOFF_CONCURRENCY). %% true if handoff_concurrency (inbound + outbound) hasn't yet been reached @@ -571,18 +490,12 @@ handoff_concurrency_limit_reached() -> Receivers), ActiveSenders = proplists:get_value(active, Senders), get_concurrency_limit() =< - ActiveReceivers + ActiveSenders. + ActiveReceivers + ActiveSenders. send_handoff(HOType, ModSrcTarget, Node, Pid, HS, Opts) -> - send_handoff(HOType, - ModSrcTarget, - Node, - Pid, - HS, - {none, none}, - none, - Opts). + send_handoff(HOType, ModSrcTarget, Node, Pid, HS, + {none, none}, none, Opts). %% @private %% @@ -601,97 +514,87 @@ send_handoff(HOType, ModSrcTarget, Node, Pid, HS, send_handoff(HOType, {Mod, Src, Target}, Node, Vnode, HS, {Filter, FilterModFun}, Origin, Opts) -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - ShouldHandoff = case lists:keyfind({Mod, Src, Target}, - #handoff_status.mod_src_tgt, - HS) - of - false -> true; - Handoff = #handoff_status{target_node = Node, - vnode_pid = Vnode} -> - {false, Handoff}; - #handoff_status{transport_pid = Sender} -> - %% found a running handoff with a different vnode - %% source or a different target node, kill the current - %% one and the new one will start up - erlang:exit(Sender, - resubmit_handoff_change), - true - end, - case ShouldHandoff of - true -> - VnodeM = monitor(process, Vnode), - %% start the sender process - BaseOpts = [{src_partition, Src}, - {target_partition, Target}], - case HOType of - repair -> - HOFilter = Filter, - HOAcc0 = undefined, - HONotSentFun = undefined; - resize -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - HOFilter = resize_transfer_filter(Ring, - Mod, - Src, - Target), - HOAcc0 = ordsets:new(), - HONotSentFun = resize_transfer_notsent_fun(Ring, - Mod, - Src); - _ -> - HOFilter = none, - HOAcc0 = undefined, - HONotSentFun = undefined - end, - HOOpts = [{filter, HOFilter}, - {notsent_acc0, HOAcc0}, - {notsent_fun, HONotSentFun} - | BaseOpts], - {ok, Pid} = - riak_core_handoff_sender_sup:start_sender(HOType, - Mod, - Node, - Vnode, - HOOpts), - PidM = monitor(process, Pid), - Size = validate_size(proplists:get_value(size, Opts)), - %% successfully started up a new sender handoff - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = outbound, - timestamp = os:timestamp(), - src_node = node(), target_node = Node, - mod_src_tgt = {Mod, Src, Target}, - vnode_pid = Vnode, vnode_mon = VnodeM, - status = [], stats = dict:new(), - type = HOType, req_origin = Origin, - filter_mod_fun = FilterModFun, - size = Size}}; - %% handoff already going, just return it - AlreadyExists = {false, _CurrentHandoff} -> - AlreadyExists - end + true -> {error, max_concurrency}; + false -> + ShouldHandoff = case lists:keyfind({Mod, Src, Target}, + #handoff_status.mod_src_tgt, HS) + of + false -> true; + Handoff = #handoff_status{target_node = Node, + vnode_pid = Vnode} -> + {false, Handoff}; + #handoff_status{transport_pid = Sender} -> + %% found a running handoff with a different vnode + %% source or a different target node, kill the current + %% one and the new one will start up + erlang:exit(Sender, resubmit_handoff_change), + true + end, + case ShouldHandoff of + true -> + VnodeM = monitor(process, Vnode), + %% start the sender process + BaseOpts = [{src_partition, Src}, + {target_partition, Target}], + case HOType of + repair -> + HOFilter = Filter, + HOAcc0 = undefined, + HONotSentFun = undefined; + resize -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + HOFilter = resize_transfer_filter(Ring, Mod, Src, + Target), + HOAcc0 = ordsets:new(), + HONotSentFun = resize_transfer_notsent_fun(Ring, Mod, + Src); + _ -> + HOFilter = none, + HOAcc0 = undefined, + HONotSentFun = undefined + end, + HOOpts = [{filter, HOFilter}, {notsent_acc0, HOAcc0}, + {notsent_fun, HONotSentFun} + | BaseOpts], + {ok, Pid} = + riak_core_handoff_sender_sup:start_sender(HOType, Mod, + Node, Vnode, + HOOpts), + PidM = monitor(process, Pid), + Size = validate_size(proplists:get_value(size, Opts)), + %% successfully started up a new sender handoff + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = outbound, + timestamp = os:timestamp(), src_node = node(), + target_node = Node, + mod_src_tgt = {Mod, Src, Target}, + vnode_pid = Vnode, vnode_mon = VnodeM, + status = [], stats = dict:new(), type = HOType, + req_origin = Origin, + filter_mod_fun = FilterModFun, size = Size}}; + %% handoff already going, just return it + AlreadyExists = {false, _CurrentHandoff} -> + AlreadyExists + end end. %% spawn a receiver process receive_handoff() -> case handoff_concurrency_limit_reached() of - true -> {error, max_concurrency}; - false -> - {ok, Pid} = - riak_core_handoff_receiver_sup:start_receiver(), - PidM = monitor(process, Pid), - %% successfully started up a new receiver - {ok, - #handoff_status{transport_pid = Pid, - transport_mon = PidM, direction = inbound, - timestamp = os:timestamp(), - mod_src_tgt = {undefined, undefined, undefined}, - src_node = undefined, target_node = undefined, - status = [], stats = dict:new(), - req_origin = none}} + true -> {error, max_concurrency}; + false -> + {ok, Pid} = + riak_core_handoff_receiver_sup:start_receiver(), + PidM = monitor(process, Pid), + %% successfully started up a new receiver + {ok, + #handoff_status{transport_pid = Pid, + transport_mon = PidM, direction = inbound, + timestamp = os:timestamp(), + mod_src_tgt = {undefined, undefined, undefined}, + src_node = undefined, target_node = undefined, + status = [], stats = dict:new(), req_origin = none}} end. update_stats(StatsUpdate, Stats) -> @@ -704,7 +607,7 @@ update_stats(StatsUpdate, Stats) -> validate_size(Size = {N, U}) when is_number(N) andalso - N > 0 andalso (U =:= bytes orelse U =:= objects) -> + N > 0 andalso (U =:= bytes orelse U =:= objects) -> Size; validate_size(Size = {F, dynamic}) when is_function(F) -> @@ -718,64 +621,52 @@ validate_size(_) -> undefined. %% can have two simultaneous inbound xfers. kill_xfer_i(ModSrcTarget, Reason, HS) -> case lists:keytake(ModSrcTarget, - #handoff_status.mod_src_tgt, - HS) + #handoff_status.mod_src_tgt, HS) of - false -> HS; - {value, Xfer, HS2} -> - #handoff_status{mod_src_tgt = - {Mod, SrcPartition, TargetPartition}, - type = Type, target_node = TargetNode, - src_node = SrcNode, transport_pid = TP} = - Xfer, - Msg = "~p transfer of ~p from ~p ~p to ~p ~p " - "killed for reason ~p", - case Type of - undefined -> ok; - _ -> - logger:info(Msg, - [Type, - Mod, - SrcNode, - SrcPartition, - TargetNode, - TargetPartition, - Reason]) - end, - exit(TP, {kill_xfer, Reason}), - kill_xfer_i(ModSrcTarget, Reason, HS2) + false -> HS; + {value, Xfer, HS2} -> + #handoff_status{mod_src_tgt = + {Mod, SrcPartition, TargetPartition}, + type = Type, target_node = TargetNode, + src_node = SrcNode, transport_pid = TP} = + Xfer, + Msg = "~p transfer of ~p from ~p ~p to ~p ~p " + "killed for reason ~p", + case Type of + undefined -> ok; + _ -> + logger:info(Msg, + [Type, Mod, SrcNode, SrcPartition, TargetNode, + TargetPartition, Reason]) + end, + exit(TP, {kill_xfer, Reason}), + kill_xfer_i(ModSrcTarget, Reason, HS2) end. handoff_change_enabled_setting(EnOrDis, Direction) -> SetFun = case EnOrDis of - enable -> fun handoff_enable/1; - disable -> fun handoff_disable/1 + enable -> fun handoff_enable/1; + disable -> fun handoff_disable/1 end, case Direction of - inbound -> SetFun(inbound); - outbound -> SetFun(outbound); - both -> - SetFun(inbound), - SetFun(outbound) + inbound -> SetFun(inbound); + outbound -> SetFun(outbound); + both -> SetFun(inbound), SetFun(outbound) end. handoff_enable(inbound) -> - application:set_env(riak_core, - disable_inbound_handoff, + application:set_env(riak_core, disable_inbound_handoff, false); handoff_enable(outbound) -> - application:set_env(riak_core, - disable_outbound_handoff, + application:set_env(riak_core, disable_outbound_handoff, false). handoff_disable(inbound) -> - application:set_env(riak_core, - disable_inbound_handoff, + application:set_env(riak_core, disable_inbound_handoff, true), kill_handoffs_in_direction(inbound); handoff_disable(outbound) -> - application:set_env(riak_core, - disable_outbound_handoff, + application:set_env(riak_core, disable_outbound_handoff, true), kill_handoffs_in_direction(outbound). @@ -810,12 +701,8 @@ simple_handoff() -> ?assertEqual(ok, (set_concurrency(0))), ?assertEqual({error, max_concurrency}, (add_inbound())), ?assertEqual({error, max_concurrency}, - (add_outbound(ownership, - riak_kv_vnode, - 0, - node(), - self(), - []))), + (add_outbound(ownership, riak_kv_vnode, 0, node(), + self(), []))), %% allow for a single handoff ?assertEqual(ok, (set_concurrency(1))), %% done @@ -835,11 +722,9 @@ config_disable() -> Ref = monitor(process, Pid), CatchDownFun = fun () -> receive - {'DOWN', Ref, process, Pid, max_concurrency} -> - ok; - Other -> {error, unexpected_message, Other} - after 1000 -> - {error, timeout_waiting_for_down_msg} + {'DOWN', Ref, process, Pid, max_concurrency} -> ok; + Other -> {error, unexpected_message, Other} + after 1000 -> {error, timeout_waiting_for_down_msg} end end, ?assertEqual(ok, (handoff_disable(inbound))), @@ -862,11 +747,10 @@ config_disable() -> wait_until(Fun, Retry, Delay) when Retry > 0 -> Res = Fun(), case Res of - true -> ok; - _ when Retry == 1 -> {fail, Res}; - _ -> - timer:sleep(Delay), - wait_until(Fun, Retry - 1, Delay) + true -> ok; + _ when Retry == 1 -> {fail, Res}; + _ -> + timer:sleep(Delay), wait_until(Fun, Retry - 1, Delay) end. -endif. diff --git a/src/riak_core_handoff_receiver.erl b/src/riak_core_handoff_receiver.erl index 31d36402d..d6558297e 100644 --- a/src/riak_core_handoff_receiver.erl +++ b/src/riak_core_handoff_receiver.erl @@ -26,26 +26,20 @@ -behaviour(gen_server). --export([start_link/0, - set_socket/2, +-export([start_link/0, set_socket/2, supports_batching/0]). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). -record(state, - {sock :: port() | undefined, - peer :: term(), - recv_timeout_len :: non_neg_integer(), - vnode_timeout_len :: non_neg_integer(), - partition :: non_neg_integer() | undefined, - vnode_mod = riak_kv_vnode :: module(), - vnode :: pid() | undefined, - count = 0 :: non_neg_integer()}). + {sock :: port() | undefined, peer :: term(), + recv_timeout_len :: non_neg_integer(), + vnode_timeout_len :: non_neg_integer(), + partition :: non_neg_integer() | undefined, + vnode_mod = riak_kv_vnode :: module(), + vnode :: pid() | undefined, + count = 0 :: non_neg_integer()}). %% set the TCP receive timeout to five minutes to be conservative. -define(RECV_TIMEOUT, 300000). @@ -63,8 +57,7 @@ supports_batching() -> true. init([]) -> {ok, #state{recv_timeout_len = - application:get_env(riak_core, - handoff_receive_timeout, + application:get_env(riak_core, handoff_receive_timeout, ?RECV_TIMEOUT), vnode_timeout_len = application:get_env(riak_core, @@ -96,26 +89,22 @@ handle_info({tcp_error, _Socket, Reason}, handle_info({tcp, Socket, Data}, State) -> [MsgType | MsgData] = Data, case catch process_message(MsgType, MsgData, State) of - {'EXIT', Reason} -> - logger:error("Handoff receiver for partition ~p exited " - "abnormally after processing ~p objects " - "from ~p: ~p", - [State#state.partition, - State#state.count, - State#state.peer, - Reason]), - {stop, normal, State}; - NewState when is_record(NewState, state) -> - InetMod = inet, - InetMod:setopts(Socket, [{active, once}]), - {noreply, NewState, State#state.recv_timeout_len} + {'EXIT', Reason} -> + logger:error("Handoff receiver for partition ~p exited " + "abnormally after processing ~p objects " + "from ~p: ~p", + [State#state.partition, State#state.count, + State#state.peer, Reason]), + {stop, normal, State}; + NewState when is_record(NewState, state) -> + inet:setopts(Socket, [{active, once}]), + {noreply, NewState, State#state.recv_timeout_len} end; handle_info(timeout, State) -> logger:error("Handoff receiver for partition ~p timed " "out after processing ~p objects from " "~p.", - [State#state.partition, - State#state.count, + [State#state.partition, State#state.count, State#state.peer]), {stop, normal, State}. @@ -136,24 +125,20 @@ process_message(?PT_MSG_BATCH, MsgData, State) -> lists:foldl(fun (Obj, StateAcc) -> process_message(?PT_MSG_OBJ, Obj, StateAcc) end, - State, - binary_to_term(MsgData)); + State, binary_to_term(MsgData)); process_message(?PT_MSG_OBJ, MsgData, State = #state{vnode = VNode, count = Count, vnode_timeout_len = VNodeTimeout}) -> - try riak_core_vnode:handoff_data(VNode, - MsgData, + try riak_core_vnode:handoff_data(VNode, MsgData, VNodeTimeout) of - ok -> State#state{count = Count + 1}; - E = {error, _} -> exit(E) + ok -> State#state{count = Count + 1}; + E = {error, _} -> exit(E) catch - exit:{timeout, _} -> - exit({error, - {vnode_timeout, - VNodeTimeout, - size(MsgData), - binary:part(MsgData, {0, min(size(MsgData), 128)})}}) + exit:{timeout, _} -> + exit({error, + {vnode_timeout, VNodeTimeout, size(MsgData), + binary:part(MsgData, {0, min(size(MsgData), 128)})}}) end; process_message(?PT_MSG_OLDSYNC, MsgData, State = #state{sock = Socket}) -> @@ -168,14 +153,14 @@ process_message(?PT_MSG_SYNC, _MsgData, process_message(?PT_MSG_VERIFY_NODE, ExpectedName, State = #state{sock = Socket, peer = Peer}) -> case binary_to_term(ExpectedName) of - _Node when _Node =:= node() -> - gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), - State; - Node -> - logger:error("Handoff from ~p expects us to be ~s " - "but we are ~s.", - [Peer, Node, node()]), - exit({error, {wrong_node, Node}}) + _Node when _Node =:= node() -> + gen_tcp:send(Socket, <<(?PT_MSG_VERIFY_NODE):8>>), + State; + Node -> + logger:error("Handoff from ~p expects us to be ~s " + "but we are ~s.", + [Peer, Node, node()]), + exit({error, {wrong_node, Node}}) end; process_message(?PT_MSG_CONFIGURE, MsgData, State) -> ConfProps = binary_to_term(MsgData), @@ -194,10 +179,10 @@ terminate(_Reason, _State) -> ok. code_change(_OldVsn, State, _Extra) -> {ok, State}. -safe_peername(Skt, Mod) -> - case Mod:peername(Skt) of - {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; - _ -> - {unknown, - unknown} % Real info is {Addr, Port} +safe_peername(Skt, Module) -> + case Module:peername(Skt) of + {ok, {Host, Port}} -> {inet_parse:ntoa(Host), Port}; + _ -> + {unknown, + unknown} % Real info is {Addr, Port} end. diff --git a/src/riak_core_node_watcher.erl b/src/riak_core_node_watcher.erl index 05ff058fd..dfd009fd2 100644 --- a/src/riak_core_node_watcher.erl +++ b/src/riak_core_node_watcher.erl @@ -444,8 +444,8 @@ broadcast(Nodes, State) -> up -> Msg = {up, node(), State#state.services}; down -> Msg = {down, node()} end, - {Mod, Fn} = State#state.bcast_mod, - Mod:Fn(Nodes, ?MODULE, Msg), + {Module, Fn} = State#state.bcast_mod, + Module:Fn(Nodes, ?MODULE, Msg), schedule_broadcast(State). schedule_broadcast(State) -> diff --git a/src/riak_core_rand.erl b/src/riak_core_rand.erl deleted file mode 100644 index 03ac0fd47..000000000 --- a/src/riak_core_rand.erl +++ /dev/null @@ -1,58 +0,0 @@ -%% Generalized random module that offers a backwards compatible API -%% around some of the changes in rand, crypto and for time units. - --module(riak_core_rand). - -%% API --export([uniform/0, - uniform/1, - uniform_s/2, - seed/0, - seed/1, - rand_seed/0, - rand_bytes/1]). - -%% As the algorithm is not changed in any place we can use the default -%% algorithm for all call here. --define(ALGO, exsplus). - -uniform() -> rand:uniform(). - -uniform(N) -> rand:uniform(N). - -%% The old random:uniform_s took a 3 touple however this is no longer -%% the case, so what we need to do if we see such a situation is to first -%% create a state using seed_s (which can take the old data) and then -%% using uniform_s with this newly generated state. -%% -%% Note that seed_s does **not** change the current seed but just -%% create a new seed state. -uniform_s(N, {A, B, C}) -> - State = rand:seed_s(?ALGO, {A, B, C}), - rand:uniform_s(N, State); -uniform_s(N, State) -> rand:uniform_s(N, State). - -seed() -> rand:seed(?ALGO). - -%% We are a bit tricky here, while random:seed did return the **prior** seed -%% rand:seed will return the **new** seed. We can work around this by first -%% getting the exported seed then using this instead. --spec seed({integer(), integer(), integer()} | - rand:export_state()) -> rand:export_state() | undefined. - -seed({_, _, _} = Seed) -> - Old = rand:export_seed(), - _New = rand:seed(?ALGO, Seed), - Old; -seed(Seed) -> - Old = rand:export_seed(), - _New = rand:seed(Seed), - Old. - -rand_bytes(Size) -> crypto:strong_rand_bytes(Size). - -%%%=================================================================== -%%% General functions -%%%=================================================================== - -rand_seed() -> erlang:timestamp(). diff --git a/src/riak_core_ring.erl b/src/riak_core_ring.erl index 0859f802f..eaecd191a 100644 --- a/src/riak_core_ring.erl +++ b/src/riak_core_ring.erl @@ -28,114 +28,50 @@ -module(riak_core_ring). --export([all_members/1, - all_owners/1, - all_preflists/2, - diff_nodes/2, - equal_rings/2, - fresh/0, - fresh/1, - fresh/2, - get_meta/2, - get_buckets/1, - index_owner/2, - my_indices/1, - num_partitions/1, - owner_node/1, - preflist/2, - random_node/1, - random_other_index/1, - random_other_index/2, - random_other_node/1, - reconcile/2, - rename_node/3, - responsible_index/2, - transfer_node/3, - update_meta/3, - remove_meta/2]). - --export([cluster_name/1, - set_tainted/1, - check_tainted/2, - nearly_equal/2, - claimant/1, - member_status/2, - pretty_print/2, - all_member_status/1, - update_member_meta/5, - clear_member_meta/3, - get_member_meta/3, - add_member/3, - remove_member/3, - leave_member/3, - exit_member/3, - down_member/3, - set_member/4, - set_member/5, - members/2, - set_claimant/2, - increment_vclock/2, - ring_version/1, - increment_ring_version/2, - set_pending_changes/2, - active_members/1, - claiming_members/1, - ready_members/1, - random_other_active_node/1, - down_members/1, - set_owner/2, - indices/2, - future_indices/2, - future_ring/1, - disowning_indices/2, - cancel_transfers/1, - pending_changes/1, - next_owner/1, - next_owner/2, - next_owner/3, - completed_next_owners/2, - all_next_owners/1, - change_owners/2, - handoff_complete/3, - ring_ready/0, - ring_ready/1, - ring_ready_info/1, - ring_changed/2, - set_cluster_name/2, - reconcile_names/2, - reconcile_members/2, - is_primary/2, - chash/1, - set_chash/2, - resize/2, - set_pending_resize/2, - set_pending_resize_abort/1, - maybe_abort_resize/1, - schedule_resize_transfer/3, - awaiting_resize_transfer/3, - resize_transfer_status/4, - resize_transfer_complete/4, +-export([all_members/1, all_owners/1, all_preflists/2, + diff_nodes/2, equal_rings/2, fresh/0, fresh/1, fresh/2, + get_meta/2, index_owner/2, my_indices/1, + num_partitions/1, owner_node/1, preflist/2, + random_node/1, random_other_index/1, + random_other_index/2, random_other_node/1, reconcile/2, + rename_node/3, responsible_index/2, transfer_node/3, + update_meta/3, remove_meta/2]). + +-export([cluster_name/1, set_tainted/1, check_tainted/2, + nearly_equal/2, claimant/1, member_status/2, + pretty_print/2, all_member_status/1, + update_member_meta/5, clear_member_meta/3, + get_member_meta/3, add_member/3, remove_member/3, + leave_member/3, exit_member/3, down_member/3, + set_member/4, set_member/5, members/2, set_claimant/2, + increment_vclock/2, ring_version/1, + increment_ring_version/2, set_pending_changes/2, + active_members/1, claiming_members/1, ready_members/1, + random_other_active_node/1, down_members/1, set_owner/2, + indices/2, future_indices/2, future_ring/1, + disowning_indices/2, cancel_transfers/1, + pending_changes/1, next_owner/1, next_owner/2, + next_owner/3, completed_next_owners/2, + all_next_owners/1, change_owners/2, handoff_complete/3, + ring_ready/0, ring_ready/1, ring_ready_info/1, + ring_changed/2, set_cluster_name/2, reconcile_names/2, + reconcile_members/2, is_primary/2, chash/1, set_chash/2, + resize/2, set_pending_resize/2, + set_pending_resize_abort/1, maybe_abort_resize/1, + schedule_resize_transfer/3, awaiting_resize_transfer/3, + resize_transfer_status/4, resize_transfer_complete/4, complete_resize_transfers/3, - reschedule_resize_transfers/3, - is_resizing/1, - is_post_resize/1, - is_resize_complete/1, - resized_ring/1, - set_resized_ring/2, - future_index/3, - future_index/4, - future_index/5, - is_future_index/4, - future_owner/2, - future_num_partitions/1, - vnode_type/2, + reschedule_resize_transfers/3, is_resizing/1, + is_post_resize/1, is_resize_complete/1, resized_ring/1, + set_resized_ring/2, future_index/3, future_index/4, + future_index/5, is_future_index/4, future_owner/2, + future_num_partitions/1, vnode_type/2, deletion_complete/3]). %% upgrade/1, %% downgrade/2, --export_type([riak_core_ring/0, - ring_size/0, +-export_type([riak_core_ring/0, ring_size/0, partition_id/0]). -ifdef(TEST). @@ -145,37 +81,32 @@ -endif. -record(chstate, - {nodename :: + {nodename :: term(), % the Node responsible for this chstate - vclock :: + vclock :: vclock:vclock() | undefined, % for this chstate object, entries are % {Node, Ctr} - chring :: + chring :: chash:chash() | undefined, % chash ring of {IndexAsInt, Node} mappings - meta :: dict:dict() | undefined, - % dict of cluster-wide other data (primarily - % bucket N-value, etc) - clustername :: {term(), term()} | undefined, - next :: + meta :: dict:dict() | undefined, + % dict of cluster-wide other data (primarily N-value, etc) + clustername :: {term(), term()} | undefined, + next :: [{integer(), term(), term(), [module()], awaiting | complete}], - members :: + members :: [{node(), {member_status(), vclock:vclock(), [{atom(), term()}]}}] | undefined, - claimant :: term(), - seen :: [{term(), vclock:vclock()}] | undefined, - rvsn :: vclock:vclock() | undefined}). + claimant :: term(), + seen :: [{term(), vclock:vclock()}] | undefined, + rvsn :: vclock:vclock() | undefined}). --type member_status() :: joining | - valid | - invalid | - leaving | - exiting | - down. +-type member_status() :: joining | valid | invalid | + leaving | exiting | down. %% type meta_entry(). Record for each entry in #chstate.meta -record(meta_entry, @@ -209,17 +140,12 @@ set_tainted(Ring) -> update_meta(riak_core_ring_tainted, true, Ring). check_tainted(Ring = #chstate{}, Msg) -> - Exit = application:get_env(riak_core, - exit_when_tainted, + Exit = application:get_env(riak_core, exit_when_tainted, false), case {get_meta(riak_core_ring_tainted, Ring), Exit} of - {{ok, true}, true} -> - riak_core:stop(Msg), - ok; - {{ok, true}, false} -> - logger:error(Msg), - ok; - _ -> ok + {{ok, true}, true} -> riak_core:stop(Msg), ok; + {{ok, true}, false} -> logger:error(Msg), ok; + _ -> ok end. %% @doc Verify that the two rings are identical expect that metadata can @@ -308,8 +234,8 @@ equal_rings(_A = #chstate{chring = RA, meta = MA}, MDA = lists:sort(dict:to_list(MA)), MDB = lists:sort(dict:to_list(MB)), case MDA =:= MDB of - false -> false; - true -> RA =:= RB + false -> false; + true -> RA =:= RB end. %% @doc This is used only when this node is creating a brand new cluster. @@ -324,8 +250,7 @@ fresh() -> -spec fresh(NodeName :: term()) -> chstate(). fresh(NodeName) -> - fresh(application:get_env(riak_core, - ring_creation_size, + fresh(application:get_env(riak_core, ring_creation_size, undefined), NodeName). @@ -363,11 +288,11 @@ resize(State, NewRingSize) -> get_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - error -> undefined; - {ok, '$removed'} -> undefined; - {ok, M} when M#meta_entry.value =:= '$removed' -> - undefined; - {ok, M} -> {ok, M#meta_entry.value} + error -> undefined; + {ok, '$removed'} -> undefined; + {ok, M} when M#meta_entry.value =:= '$removed' -> + undefined; + {ok, M} -> {ok, M#meta_entry.value} end. -spec get_meta(term(), term(), chstate()) -> {ok, @@ -375,22 +300,10 @@ get_meta(Key, State) -> get_meta(Key, Default, State) -> case get_meta(Key, State) of - undefined -> {ok, Default}; - Res -> Res + undefined -> {ok, Default}; + Res -> Res end. -%% @doc return the names of all the custom buckets stored in the ring. --spec get_buckets(State :: chstate()) -> [term()]. - -get_buckets(State) -> - Keys = dict:fetch_keys(State#chstate.meta), - lists:foldl(fun ({bucket, Bucket}, Acc) -> - [Bucket | Acc]; - (_, Acc) -> Acc - end, - [], - Keys). - %% @doc Return the node that owns the given index. -spec index_owner(State :: chstate(), Idx :: chash:index_as_int()) -> Node :: term(). @@ -429,8 +342,8 @@ num_partitions(State) -> future_num_partitions(State = #chstate{chring = CHRing}) -> case resized_ring(State) of - {ok, C} -> chash:size(C); - undefined -> chash:size(CHRing) + {ok, C} -> chash:size(C); + undefined -> chash:size(CHRing) end. %% @doc Return the node that is responsible for a given chstate. @@ -452,7 +365,7 @@ preflist(Key, State) -> random_node(State) -> L = all_members(State), - lists:nth(riak_core_rand:uniform(length(L)), L). + lists:nth(rand:uniform(length(L)), L). %% @doc Return a partition index not owned by the node executing this function. %% If this node owns all partitions, return any index. @@ -464,8 +377,8 @@ random_other_index(State) -> || {I, Owner} <- (?MODULE):all_owners(State), Owner =/= node()], case L of - [] -> hd(my_indices(State)); - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> hd(my_indices(State)); + _ -> lists:nth(rand:uniform(length(L)), L) end. -spec random_other_index(State :: chstate(), @@ -478,8 +391,8 @@ random_other_index(State, Exclude) || {I, Owner} <- (?MODULE):all_owners(State), Owner =/= node(), not lists:member(I, Exclude)], case L of - [] -> no_indices; - _ -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_indices; + _ -> lists:nth(rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen node from amongst the owners other than this one. @@ -488,8 +401,8 @@ random_other_index(State, Exclude) random_other_node(State) -> case lists:delete(node(), all_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(rand:uniform(length(L)), L) end. %% @doc Return a randomly-chosen active node other than this one. @@ -498,8 +411,8 @@ random_other_node(State) -> random_other_active_node(State) -> case lists:delete(node(), active_members(State)) of - [] -> no_node; - L -> lists:nth(riak_core_rand:uniform(length(L)), L) + [] -> no_node; + L -> lists:nth(rand:uniform(length(L)), L) end. %% @doc Incorporate another node's state into our view of the Riak world. @@ -515,8 +428,8 @@ reconcile(ExternState, MyState) -> "Error: riak_core_ring/reconcile :: reconcilin" "g tainted internal ring"), case internal_reconcile(MyState, ExternState) of - {false, State} -> {no_change, State}; - {true, State} -> {new_ring, State} + {false, State} -> {no_change, State}; + {true, State} -> {new_ring, State} end. %% @doc Rename OldNode to NewNode in a Riak ring. @@ -531,15 +444,13 @@ rename_node(State = #chstate{chring = Ring, State#chstate{chring = lists:foldl(fun ({Idx, Owner}, AccIn) -> case Owner of - OldNode -> - chash:update(Idx, - NewNode, - AccIn); - _ -> AccIn + OldNode -> + chash:update(Idx, NewNode, + AccIn); + _ -> AccIn end end, - Ring, - riak_core_ring:all_owners(State)), + Ring, riak_core_ring:all_owners(State)), members = orddict:from_list(proplists:substitute_aliases([{OldNode, NewNode}], @@ -550,13 +461,13 @@ rename_node(State = #chstate{chring = Ring, Seen)), nodename = case ThisNode of - OldNode -> NewNode; - _ -> ThisNode + OldNode -> NewNode; + _ -> ThisNode end, claimant = case Claimant of - OldNode -> NewNode; - _ -> Claimant + OldNode -> NewNode; + _ -> Claimant end, vclock = vclock:increment(NewNode, State#chstate.vclock)}. @@ -591,10 +502,7 @@ future_index(CHashKey, OrigIdx, State) -> future_index(CHashKey, OrigIdx, NValCheck, State) -> OrigCount = num_partitions(State), NextCount = future_num_partitions(State), - future_index(CHashKey, - OrigIdx, - NValCheck, - OrigCount, + future_index(CHashKey, OrigIdx, NValCheck, OrigCount, NextCount). future_index(CHashKey, OrigIdx, NValCheck, OrigCount, @@ -609,42 +517,41 @@ future_index(CHashKey, OrigIdx, NValCheck, OrigCount, %% Determine position of the source partition in the ring %% if OrigIdx is 0 we know the position is OrigCount (number of partitions) OrigPos = case OrigIdx of - 0 -> OrigCount; - _ -> OrigIdx div OrigInc + 0 -> OrigCount; + _ -> OrigIdx div OrigInc end, %% The distance between the key's owner (head of preflist) and the source partition %% is the position of the source in the preflist, the distance may be negative %% in which case we have wrapped around the ring. distance of zero means the source %% is the head of the preflist. OrigDist = case OrigPos - OwnerPos of - P when P < 0 -> OrigCount + P; - P -> P + P when P < 0 -> OrigCount + P; + P -> P end, %% In the case that the ring is shrinking the future index for a key whose position %% in the preflist is >= ring size may be calculated, any transfer is invalid in %% this case, return undefined. The position may also be >= an optional N value for %% the key, if this is true undefined is also returned - case check_invalid_future_index(OrigDist, - NextCount, + case check_invalid_future_index(OrigDist, NextCount, NValCheck) of - true -> undefined; - false -> - %% Determine the partition (head of preflist) that will own the key in the future ring - FuturePos = CHashInt div NextInc + 1, - NextOwner = FuturePos * NextInc, - %% Determine the partition that the key should be transferred to (has same position - %% in future preflist as source partition does in current preflist) - RingTop = trunc(math:pow(2, 160) - 1), - (NextOwner + NextInc * OrigDist) rem RingTop + true -> undefined; + false -> + %% Determine the partition (head of preflist) that will own the key in the future ring + FuturePos = CHashInt div NextInc + 1, + NextOwner = FuturePos * NextInc, + %% Determine the partition that the key should be transferred to (has same position + %% in future preflist as source partition does in current preflist) + RingTop = trunc(math:pow(2, 160) - 1), + (NextOwner + NextInc * OrigDist) rem RingTop end. check_invalid_future_index(OrigDist, NextCount, NValCheck) -> OverRingSize = OrigDist >= NextCount, OverNVal = case NValCheck of - undefined -> false; - _ -> OrigDist >= NValCheck + undefined -> false; + _ -> OrigDist >= NValCheck end, OverRingSize orelse OverNVal. @@ -656,9 +563,7 @@ check_invalid_future_index(OrigDist, NextCount, integer(), chstate()) -> boolean(). is_future_index(CHashKey, OrigIdx, TargetIdx, State) -> - FutureIndex = future_index(CHashKey, - OrigIdx, - undefined, + FutureIndex = future_index(CHashKey, OrigIdx, undefined, State), FutureIndex =:= TargetIdx. @@ -667,14 +572,13 @@ is_future_index(CHashKey, OrigIdx, TargetIdx, State) -> transfer_node(Idx, Node, MyState) -> case chash:lookup(Idx, MyState#chstate.chring) of - Node -> MyState; - _ -> - Me = MyState#chstate.nodename, - VClock = vclock:increment(Me, MyState#chstate.vclock), - CHRing = chash:update(Idx, - Node, - MyState#chstate.chring), - MyState#chstate{vclock = VClock, chring = CHRing} + Node -> MyState; + _ -> + Me = MyState#chstate.nodename, + VClock = vclock:increment(Me, MyState#chstate.vclock), + CHRing = chash:update(Idx, Node, + MyState#chstate.chring), + MyState#chstate{vclock = VClock, chring = CHRing} end. % @doc Set a key in the cluster metadata dict @@ -683,8 +587,8 @@ transfer_node(Idx, Node, MyState) -> update_meta(Key, Val, State) -> Change = case dict:find(Key, State#chstate.meta) of - {ok, OldM} -> Val /= OldM#meta_entry.value; - error -> true + {ok, OldM} -> Val /= OldM#meta_entry.value; + error -> true end, if Change -> M = #meta_entry{lastmod = @@ -703,8 +607,8 @@ update_meta(Key, Val, State) -> remove_meta(Key, State) -> case dict:find(Key, State#chstate.meta) of - {ok, _} -> update_meta(Key, '$removed', State); - error -> State + {ok, _} -> update_meta(Key, '$removed', State); + error -> State end. %% @doc Return the current claimant. @@ -727,10 +631,10 @@ set_cluster_name(State, Name) -> reconcile_names(RingA = #chstate{clustername = NameA}, RingB = #chstate{clustername = NameB}) -> case (NameA =:= undefined) or (NameB =:= undefined) of - true -> - {RingA#chstate{clustername = undefined}, - RingB#chstate{clustername = undefined}}; - false -> {RingA, RingB} + true -> + {RingA#chstate{clustername = undefined}, + RingB#chstate{clustername = undefined}}; + false -> {RingA, RingB} end. increment_vclock(Node, State) -> @@ -751,8 +655,8 @@ member_status(#chstate{members = Members}, Node) -> member_status(Members, Node); member_status(Members, Node) -> case orddict:find(Node, Members) of - {ok, {Status, _, _}} -> Status; - _ -> invalid + {ok, {Status, _, _}} -> Status; + _ -> invalid end. %% @doc Returns the current membership status for all nodes in the cluster. @@ -766,54 +670,48 @@ all_member_status(#chstate{members = Members}) -> get_member_meta(State, Member, Key) -> case orddict:find(Member, State#chstate.members) of - error -> undefined; - {ok, {_, _, Meta}} -> - case orddict:find(Key, Meta) of - error -> undefined; - {ok, Value} -> Value - end + error -> undefined; + {ok, {_, _, Meta}} -> + case orddict:find(Key, Meta) of + error -> undefined; + {ok, Value} -> Value + end end. %% @doc Set a key in the member metadata orddict update_member_meta(Node, State, Member, Key, Val) -> VClock = vclock:increment(Node, State#chstate.vclock), - State2 = update_member_meta(Node, - State, - Member, - Key, - Val, - same_vclock), + State2 = update_member_meta(Node, State, Member, Key, + Val, same_vclock), State2#chstate{vclock = VClock}. update_member_meta(Node, State, Member, Key, Val, same_vclock) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, MD}) -> - {Status, - vclock:increment(Node, VC), - orddict:store(Key, Val, MD)} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, MD}) -> + {Status, vclock:increment(Node, VC), + orddict:store(Key, Val, MD)} + end, + Members), + State#chstate{members = Members2}; + false -> State end. clear_member_meta(Node, State, Member) -> Members = State#chstate.members, case orddict:is_key(Member, Members) of - true -> - Members2 = orddict:update(Member, - fun ({Status, VC, _MD}) -> - {Status, - vclock:increment(Node, VC), - orddict:new()} - end, - Members), - State#chstate{members = Members2}; - false -> State + true -> + Members2 = orddict:update(Member, + fun ({Status, VC, _MD}) -> + {Status, vclock:increment(Node, VC), + orddict:new()} + end, + Members), + State#chstate{members = Members2}; + false -> State end. add_member(PNode, State, Node) -> @@ -834,10 +732,7 @@ down_member(PNode, State, Node) -> set_member(Node, CState, Member, Status) -> VClock = vclock:increment(Node, CState#chstate.vclock), - CState2 = set_member(Node, - CState, - Member, - Status, + CState2 = set_member(Node, CState, Member, Status, same_vclock), CState2#chstate{vclock = VClock}. @@ -846,8 +741,7 @@ set_member(Node, CState, Member, Status, same_vclock) -> fun ({_, VC, MD}) -> {Status, vclock:increment(Node, VC), MD} end, - {Status, - vclock:increment(Node, vclock:fresh()), + {Status, vclock:increment(Node, vclock:fresh()), []}, CState#chstate.members), CState#chstate{members = Members2}. @@ -904,34 +798,33 @@ change_owners(CState, Reassign) -> %% ignore the error try riak_core_ring:transfer_node(Idx, NewOwner, CState0) catch - error:{badmatch, _} -> CState0 + error:{badmatch, _} -> CState0 end end, - CState, - Reassign). + CState, Reassign). %% @doc Return all indices that a node is scheduled to give to another. disowning_indices(State, Node) -> case is_resizing(State) of - false -> - [Idx - || {Idx, Owner, _NextOwner, _Mods, _Status} - <- State#chstate.next, - Owner =:= Node]; - true -> - [Idx - || {Idx, Owner} <- all_owners(State), Owner =:= Node, - disowned_during_resize(State, Idx, Owner)] + false -> + [Idx + || {Idx, Owner, _NextOwner, _Mods, _Status} + <- State#chstate.next, + Owner =:= Node]; + true -> + [Idx + || {Idx, Owner} <- all_owners(State), Owner =:= Node, + disowned_during_resize(State, Idx, Owner)] end. disowned_during_resize(CState, Idx, Owner) -> %% catch error when index doesn't exist, we are disowning it if its going away NextOwner = try future_owner(CState, Idx) catch - _:_ -> undefined + _:_ -> undefined end, case NextOwner of - Owner -> false; - _ -> true + Owner -> false; + _ -> true end. %% @doc Returns a list of all pending ownership transfers. @@ -987,14 +880,14 @@ maybe_abort_resize(State) -> PostResize = is_post_resize(State), PendingAbort = is_resize_aborted(State), case PendingAbort andalso - Resizing andalso not PostResize + Resizing andalso not PostResize of - true -> - State1 = State#chstate{next = []}, - State2 = clear_all_resize_transfers(State1), - State3 = remove_meta('$resized_ring_abort', State2), - {true, remove_meta('$resized_ring', State3)}; - false -> {false, State} + true -> + State1 = State#chstate{next = []}, + State2 = clear_all_resize_transfers(State1), + State3 = remove_meta('$resized_ring_abort', State2), + {true, remove_meta('$resized_ring', State3)}; + false -> {false, State} end. -spec set_pending_resize_abort(chstate()) -> chstate(). @@ -1009,8 +902,7 @@ set_pending_resize_abort(State) -> schedule_resize_transfer(State, Source, TargetIdx) when is_integer(TargetIdx) -> TargetNode = index_owner(future_ring(State), TargetIdx), - schedule_resize_transfer(State, - Source, + schedule_resize_transfer(State, Source, {TargetIdx, TargetNode}); schedule_resize_transfer(State, Source, Source) -> State; @@ -1018,13 +910,11 @@ schedule_resize_transfer(State, Source, Target) -> Transfers = resize_transfers(State, Source), %% ignore if we have already scheduled a transfer from source -> target case lists:keymember(Target, 1, Transfers) of - true -> State; - false -> - Transfers1 = lists:keystore(Target, - 1, - Transfers, - {Target, ordsets:new(), awaiting}), - set_resize_transfers(State, Source, Transfers1) + true -> State; + false -> + Transfers1 = lists:keystore(Target, 1, Transfers, + {Target, ordsets:new(), awaiting}), + set_resize_transfers(State, Source, Transfers1) end. %% @doc reassign all outbound and inbound resize transfers from `Node' to `NewNode' @@ -1041,59 +931,45 @@ reschedule_resize_transfers(State = #chstate{next = Entry, StateAcc) end, - State, - Next), + State, Next), NewState#chstate{next = NewNext}. reschedule_resize_operation(N, NewNode, {Idx, N, '$resize', _Mods, _Status}, State) -> - NewEntry = {Idx, - NewNode, - '$resize', - ordsets:new(), + NewEntry = {Idx, NewNode, '$resize', ordsets:new(), awaiting}, NewState = reschedule_outbound_resize_transfers(State, - Idx, - N, - NewNode), + Idx, N, NewNode), {NewEntry, NewState}; reschedule_resize_operation(Node, NewNode, {Idx, OtherNode, '$resize', _Mods, _Status} = Entry, State) -> {Changed, NewState} = reschedule_inbound_resize_transfers({Idx, OtherNode}, - Node, - NewNode, - State), + Node, NewNode, State), case Changed of - true -> - NewEntry = {Idx, - OtherNode, - '$resize', - ordsets:new(), - awaiting}, - {NewEntry, NewState}; - false -> {Entry, State} + true -> + NewEntry = {Idx, OtherNode, '$resize', ordsets:new(), + awaiting}, + {NewEntry, NewState}; + false -> {Entry, State} end. reschedule_inbound_resize_transfers(Source, Node, NewNode, State) -> F = fun (Transfer, Acc) -> {NewXfer, NewAcc} = - reschedule_inbound_resize_transfer(Transfer, - Node, + reschedule_inbound_resize_transfer(Transfer, Node, NewNode), {NewXfer, NewAcc orelse Acc} end, - {ResizeTransfers, Changed} = lists:mapfoldl(F, - false, + {ResizeTransfers, Changed} = lists:mapfoldl(F, false, resize_transfers(State, Source)), {Changed, set_resize_transfers(State, Source, ResizeTransfers)}. -reschedule_inbound_resize_transfer({{Idx, Target}, - _, +reschedule_inbound_resize_transfer({{Idx, Target}, _, _}, Target, NewNode) -> {{{Idx, NewNode}, ordsets:new(), awaiting}, true}; @@ -1112,8 +988,7 @@ reschedule_outbound_resize_transfers(State, Idx, Node, || {Target, _, _} <- Transfers], set_resize_transfers(clear_resize_transfers(OldSource, State), - NewSource, - NewTransfers). + NewSource, NewTransfers). %% @doc returns the first awaiting resize_transfer for a {SourceIdx, SourceNode} %% pair. If all transfers for the pair are complete, undefined is returned @@ -1128,8 +1003,8 @@ awaiting_resize_transfer(State, Source, Mod) -> || {Target, Mods, Status} <- ResizeTransfers, Status =/= complete, not ordsets:is_element(Mod, Mods)], case Awaiting of - [] -> undefined; - [{Target, _, _} | _] -> Target + [] -> undefined; + [{Target, _, _} | _] -> Target end. %% @doc return the status of a resize_transfer for `Source' (an index-node pair). undefined @@ -1142,19 +1017,18 @@ awaiting_resize_transfer(State, Source, Mod) -> resize_transfer_status(State, Source, Target, Mod) -> ResizeTransfers = resize_transfers(State, Source), - IsComplete = case lists:keyfind(Target, - 1, + IsComplete = case lists:keyfind(Target, 1, ResizeTransfers) of - false -> undefined; - {Target, _, complete} -> true; - {Target, Mods, awaiting} -> - ordsets:is_element(Mod, Mods) + false -> undefined; + {Target, _, complete} -> true; + {Target, Mods, awaiting} -> + ordsets:is_element(Mod, Mods) end, case IsComplete of - true -> complete; - false -> awaiting; - undefined -> undefined + true -> complete; + false -> awaiting; + undefined -> undefined end. %% @doc mark a resize_transfer from `Source' to `Target' for `Mod' complete. @@ -1171,67 +1045,65 @@ resize_transfer_complete(State, {SrcIdx, _} = Source, ResizeTransfers = resize_transfers(State, Source), Transfer = lists:keyfind(Target, 1, ResizeTransfers), case Transfer of - {Target, Mods, Status} -> - VNodeMods = ordsets:from_list([VMod - || {_, VMod} - <- riak_core:vnode_modules()]), - Mods2 = ordsets:add_element(Mod, Mods), - Status2 = case {Status, Mods2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting - end, - ResizeTransfers2 = lists:keyreplace(Target, - 1, - ResizeTransfers, - {Target, Mods2, Status2}), - State1 = set_resize_transfers(State, - Source, - ResizeTransfers2), - AllComplete = lists:all(fun ({_, _, complete}) -> true; - ({_, Ms, awaiting}) -> - ordsets:is_element(Mod, Ms) - end, - ResizeTransfers2), - case AllComplete of - true -> transfer_complete(State1, SrcIdx, Mod); - false -> State1 - end; - _ -> State + {Target, Mods, Status} -> + VNodeMods = ordsets:from_list([VMod + || {_, VMod} + <- riak_core:vnode_modules()]), + Mods2 = ordsets:add_element(Mod, Mods), + Status2 = case {Status, Mods2} of + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting + end, + ResizeTransfers2 = lists:keyreplace(Target, 1, + ResizeTransfers, + {Target, Mods2, Status2}), + State1 = set_resize_transfers(State, Source, + ResizeTransfers2), + AllComplete = lists:all(fun ({_, _, complete}) -> true; + ({_, Ms, awaiting}) -> + ordsets:is_element(Mod, Ms) + end, + ResizeTransfers2), + case AllComplete of + true -> transfer_complete(State1, SrcIdx, Mod); + false -> State1 + end; + _ -> State end. -spec is_resizing(chstate()) -> boolean(). is_resizing(State) -> case resized_ring(State) of - undefined -> false; - {ok, _} -> true + undefined -> false; + {ok, _} -> true end. -spec is_post_resize(chstate()) -> boolean(). is_post_resize(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> true; - _ -> false + {ok, '$cleanup'} -> true; + _ -> false end. -spec is_resize_aborted(chstate()) -> boolean(). is_resize_aborted(State) -> case get_meta('$resized_ring_abort', State) of - {ok, true} -> true; - _ -> false + {ok, true} -> true; + _ -> false end. -spec is_resize_complete(chstate()) -> boolean(). is_resize_complete(#chstate{next = Next}) -> not - lists:any(fun ({_, _, _, _, awaiting}) -> true; - ({_, _, _, _, complete}) -> false - end, - Next). + lists:any(fun ({_, _, _, _, awaiting}) -> true; + ({_, _, _, _, complete}) -> false + end, + Next). -spec complete_resize_transfers(chstate(), {integer(), term()}, atom()) -> [{integer(), @@ -1242,7 +1114,7 @@ complete_resize_transfers(State, Source, Mod) -> || {Target, Mods, Status} <- resize_transfers(State, Source), Status =:= complete orelse - ordsets:is_element(Mod, Mods)]. + ordsets:is_element(Mod, Mods)]. -spec deletion_complete(chstate(), integer(), atom()) -> chstate(). @@ -1265,8 +1137,7 @@ set_resize_transfers(State, Source, Transfers) -> update_meta({resize, Source}, Transfers, State). clear_all_resize_transfers(State) -> - lists:foldl(fun clear_resize_transfers/2, - State, + lists:foldl(fun clear_resize_transfers/2, State, all_owners(State)). clear_resize_transfers(Source, State) -> @@ -1277,9 +1148,9 @@ clear_resize_transfers(Source, State) -> resized_ring(State) -> case get_meta('$resized_ring', State) of - {ok, '$cleanup'} -> {ok, State#chstate.chring}; - {ok, CHRing} -> {ok, CHRing}; - _ -> undefined + {ok, '$cleanup'} -> {ok, State#chstate.chring}; + {ok, CHRing} -> {ok, CHRing}; + _ -> undefined end. -spec set_resized_ring(chstate(), @@ -1292,8 +1163,7 @@ cleanup_after_resize(State) -> update_meta('$resized_ring', '$cleanup', State). -spec vnode_type(chstate(), integer()) -> primary | - {fallback, term()} | - future_primary | + {fallback, term()} | future_primary | resized_primary. vnode_type(State, Idx) -> @@ -1301,16 +1171,16 @@ vnode_type(State, Idx) -> vnode_type(State, Idx, Node) -> try index_owner(State, Idx) of - Node -> primary; - Owner -> - case next_owner(State, Idx) of - {_, Node, _} -> future_primary; - _ -> {fallback, Owner} - end + Node -> primary; + Owner -> + case next_owner(State, Idx) of + {_, Node, _} -> future_primary; + _ -> {fallback, Owner} + end catch - error:{badmatch, _} -> - %% idx doesn't exist so must be an index in a resized ring - resized_primary + error:{badmatch, _} -> + %% idx doesn't exist so must be an index in a resized ring + resized_primary end. %% @doc Return details for a pending partition ownership change. @@ -1319,8 +1189,8 @@ vnode_type(State, Idx, Node) -> next_owner(State, Idx) -> case lists:keyfind(Idx, 1, State#chstate.next) of - false -> {undefined, undefined, undefined}; - NInfo -> next_owner(NInfo) + false -> {undefined, undefined, undefined}; + NInfo -> next_owner(NInfo) end. %% @doc Return details for a pending partition ownership change. @@ -1333,14 +1203,14 @@ next_owner(State, Idx, Mod) -> next_owner_status(NInfo, Mod) -> case NInfo of - false -> {undefined, undefined, undefined}; - {_, Owner, NextOwner, _Transfers, complete} -> - {Owner, NextOwner, complete}; - {_, Owner, NextOwner, Transfers, _Status} -> - case ordsets:is_element(Mod, Transfers) of - true -> {Owner, NextOwner, complete}; - false -> {Owner, NextOwner, awaiting} - end + false -> {undefined, undefined, undefined}; + {_, Owner, NextOwner, _Transfers, complete} -> + {Owner, NextOwner, complete}; + {_, Owner, NextOwner, Transfers, _Status} -> + case ordsets:is_element(Mod, Transfers) of + true -> {Owner, NextOwner, complete}; + false -> {Owner, NextOwner, awaiting} + end end. %% @private @@ -1366,10 +1236,10 @@ ring_ready(State0) -> [valid, leaving, exiting]), VClock = State#chstate.vclock, R = [begin - case orddict:find(Node, Seen) of - error -> false; - {ok, VC} -> vclock:equal(VClock, VC) - end + case orddict:find(Node, Seen) of + error -> false; + {ok, VC} -> vclock:equal(VClock, VC) + end end || Node <- Members], Ready = lists:all(fun (X) -> X =:= true end, R), @@ -1387,15 +1257,14 @@ ring_ready_info(State0) -> [valid, leaving, exiting]), RecentVC = orddict:fold(fun (_, VC, Recent) -> case vclock:descends(VC, Recent) of - true -> VC; - false -> Recent + true -> VC; + false -> Recent end end, - State#chstate.vclock, - Seen), + State#chstate.vclock, Seen), Outdated = orddict:filter(fun (Node, VC) -> not vclock:equal(VC, RecentVC) and - lists:member(Node, Members) + lists:member(Node, Members) end, Seen), Outdated. @@ -1430,101 +1299,92 @@ future_ring(State, false) -> [leaving]), FutureState2 = lists:foldl(fun (Node, StateAcc) -> case indices(StateAcc, Node) of - [] -> - riak_core_ring:exit_member(Node, - StateAcc, - Node); - _ -> StateAcc + [] -> + riak_core_ring:exit_member(Node, + StateAcc, + Node); + _ -> StateAcc end end, - FutureState, - Leaving), + FutureState, Leaving), FutureState2#chstate{next = []}; future_ring(State0 = #chstate{next = OldNext}, true) -> case is_post_resize(State0) of - false -> - {ok, FutureCHash} = resized_ring(State0), - State1 = cleanup_after_resize(State0), - State2 = clear_all_resize_transfers(State1), - Resized = State2#chstate{chring = FutureCHash}, - Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, - Acc) -> - DeleteEntry = {Idx, - Owner, - '$delete', - [], - awaiting}, - %% catch error when index doesn't exist in new ring - try index_owner(Resized, Idx) of - Owner -> Acc; - _ -> [DeleteEntry | Acc] - catch - error:{badmatch, _} -> - [DeleteEntry | Acc] - end - end, - [], - OldNext), - Resized#chstate{next = Next}; - true -> - State1 = remove_meta('$resized_ring', State0), - State1#chstate{next = []} + false -> + {ok, FutureCHash} = resized_ring(State0), + State1 = cleanup_after_resize(State0), + State2 = clear_all_resize_transfers(State1), + Resized = State2#chstate{chring = FutureCHash}, + Next = lists:foldl(fun ({Idx, Owner, '$resize', _, _}, + Acc) -> + DeleteEntry = {Idx, Owner, '$delete', [], + awaiting}, + %% catch error when index doesn't exist in new ring + try index_owner(Resized, Idx) of + Owner -> Acc; + _ -> [DeleteEntry | Acc] + catch + error:{badmatch, _} -> + [DeleteEntry | Acc] + end + end, + [], OldNext), + Resized#chstate{next = Next}; + true -> + State1 = remove_meta('$resized_ring', State0), + State1#chstate{next = []} end. pretty_print(Ring, Opts) -> OptNumeric = lists:member(numeric, Opts), OptLegend = lists:member(legend, Opts), Out = proplists:get_value(out, Opts, standard_io), - TargetN = proplists:get_value(target_n, - Opts, - application:get_env(riak_core, - target_n_val, + TargetN = proplists:get_value(target_n, Opts, + application:get_env(riak_core, target_n_val, undefined)), Owners = riak_core_ring:all_members(Ring), Indices = riak_core_ring:all_owners(Ring), RingSize = length(Indices), Numeric = OptNumeric orelse length(Owners) > 26, case Numeric of - true -> - Ids = [integer_to_list(N) - || N <- lists:seq(1, length(Owners))]; - false -> - Ids = [[Letter] - || Letter <- lists:seq(97, 96 + length(Owners))] + true -> + Ids = [integer_to_list(N) + || N <- lists:seq(1, length(Owners))]; + false -> + Ids = [[Letter] + || Letter <- lists:seq(97, 96 + length(Owners))] end, Names = lists:zip(Owners, Ids), case OptLegend of - true -> - io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), - _ = [begin - NodeIndices = [Idx - || {Idx, Owner} <- Indices, Owner =:= Node], - RingPercent = length(NodeIndices) * 100 / RingSize, - io:format(Out, - "Node ~s: ~w (~5.1f%) ~s~n", - [Name, length(NodeIndices), RingPercent, Node]) - end - || {Node, Name} <- Names], - io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); - false -> ok + true -> + io:format(Out, "~36..=s Nodes ~36..=s~n", ["", ""]), + _ = [begin + NodeIndices = [Idx + || {Idx, Owner} <- Indices, Owner =:= Node], + RingPercent = length(NodeIndices) * 100 / RingSize, + io:format(Out, "Node ~s: ~w (~5.1f%) ~s~n", + [Name, length(NodeIndices), RingPercent, Node]) + end + || {Node, Name} <- Names], + io:format(Out, "~36..=s Ring ~37..=s~n", ["", ""]); + false -> ok end, case Numeric of - true -> - Ownership = [orddict:fetch(Owner, Names) - || {_Idx, Owner} <- Indices], - io:format(Out, "~p~n", [Ownership]); - false -> - lists:foldl(fun ({_, Owner}, N) -> - Name = orddict:fetch(Owner, Names), - case N rem TargetN of - 0 -> io:format(Out, "~s|", [[Name]]); - _ -> io:format(Out, "~s", [[Name]]) - end, - N + 1 - end, - 1, - Indices), - io:format(Out, "~n", []) + true -> + Ownership = [orddict:fetch(Owner, Names) + || {_Idx, Owner} <- Indices], + io:format(Out, "~p~n", [Ownership]); + false -> + lists:foldl(fun ({_, Owner}, N) -> + Name = orddict:fetch(Owner, Names), + case N rem TargetN of + 0 -> io:format(Out, "~s|", [[Name]]); + _ -> io:format(Out, "~s", [[Name]]) + end, + N + 1 + end, + 1, Indices), + io:format(Out, "~n", []) end. %% @doc Return a ring with all transfers cancelled - for claim sim @@ -1538,8 +1398,8 @@ cancel_transfers(Ring) -> Ring#chstate{next = []}. internal_ring_changed(Node, CState0) -> CState = update_seen(Node, CState0), case ring_ready(CState) of - false -> CState; - true -> riak_core_claimant:ring_changed(Node, CState) + false -> CState; + true -> riak_core_claimant:ring_changed(Node, CState) end. %% @private @@ -1547,18 +1407,17 @@ merge_meta({N1, M1}, {N2, M2}) -> Meta = dict:merge(fun (_, D1, D2) -> pick_val({N1, D1}, {N2, D2}) end, - M1, - M2), + M1, M2), log_meta_merge(M1, M2, Meta), Meta. %% @private pick_val({N1, M1}, {N2, M2}) -> case {M1#meta_entry.lastmod, N1} > - {M2#meta_entry.lastmod, N2} + {M2#meta_entry.lastmod, N2} of - true -> M1; - false -> M2 + true -> M1; + false -> M2 end. %% @private @@ -1597,35 +1456,33 @@ internal_reconcile(State, OtherState) -> VMerge2 = vclock:merge([VC2, VC1]), case {vclock:equal(VMerge1, VMerge2), VMerge1 < VMerge2} of - {true, _} -> VC3 = VMerge1; - {_, true} -> VC3 = VMerge1; - {_, false} -> VC3 = VMerge2 + {true, _} -> VC3 = VMerge1; + {_, true} -> VC3 = VMerge1; + {_, false} -> VC3 = VMerge2 end, Newer = vclock:descends(VC1, VC2), Older = vclock:descends(VC2, VC1), Equal = equal_cstate(State3, OtherState3), case {Equal, Newer, Older} of - {_, true, false} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, false, true} -> - {true, - OtherState3#chstate{nodename = VNode, vclock = VC3}}; - {true, _, _} -> - {SeenChanged, State3#chstate{vclock = VC3}}; - {_, true, true} -> - %% Exceptional condition that should only occur during - %% rolling upgrades and manual setting of the ring. - %% Merge as a divergent case. - State4 = reconcile_divergent(VNode, - State3, - OtherState3), - {true, State4#chstate{nodename = VNode}}; - {_, false, false} -> - %% Unable to reconcile based on vector clock, merge rings. - State4 = reconcile_divergent(VNode, - State3, - OtherState3), - {true, State4#chstate{nodename = VNode}} + {_, true, false} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, false, true} -> + {true, + OtherState3#chstate{nodename = VNode, vclock = VC3}}; + {true, _, _} -> + {SeenChanged, State3#chstate{vclock = VC3}}; + {_, true, true} -> + %% Exceptional condition that should only occur during + %% rolling upgrades and manual setting of the ring. + %% Merge as a divergent case. + State4 = reconcile_divergent(VNode, State3, + OtherState3), + {true, State4#chstate{nodename = VNode}}; + {_, false, false} -> + %% Unable to reconcile based on vector clock, merge rings. + State4 = reconcile_divergent(VNode, State3, + OtherState3), + {true, State4#chstate{nodename = VNode}} end. %% @private @@ -1637,8 +1494,7 @@ reconcile_divergent(VNode, StateA, StateB) -> Meta = merge_meta({StateA#chstate.nodename, StateA#chstate.meta}, {StateB#chstate.nodename, StateB#chstate.meta}), - NewState = reconcile_ring(StateA, - StateB, + NewState = reconcile_ring(StateA, StateB, get_members(Members)), NewState1 = NewState#chstate{vclock = VClock, members = Members, meta = Meta}, @@ -1655,29 +1511,26 @@ reconcile_members(StateA, StateB) -> New2 = vclock:descends(VC2, VC1), MergeVC = vclock:merge([VC1, VC2]), case {New1, New2} of - {true, false} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {Valid1, MergeVC, MergeMeta}; - {false, true} -> - MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), - {Valid2, MergeVC, MergeMeta}; - {_, _} -> - MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), - {merge_status(Valid1, Valid2), - MergeVC, - MergeMeta} + {true, false} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {Valid1, MergeVC, MergeMeta}; + {false, true} -> + MergeMeta = lists:ukeysort(1, Meta2 ++ Meta1), + {Valid2, MergeVC, MergeMeta}; + {_, _} -> + MergeMeta = lists:ukeysort(1, Meta1 ++ Meta2), + {merge_status(Valid1, Valid2), MergeVC, + MergeMeta} end end, - StateA#chstate.members, - StateB#chstate.members). + StateA#chstate.members, StateB#chstate.members). %% @private reconcile_seen(StateA, StateB) -> orddict:merge(fun (_, VC1, VC2) -> vclock:merge([VC1, VC2]) end, - StateA#chstate.seen, - StateB#chstate.seen). + StateA#chstate.seen, StateB#chstate.seen). %% @private merge_next_status(complete, _) -> complete; @@ -1688,20 +1541,14 @@ merge_next_status(awaiting, awaiting) -> awaiting. %% @doc Merge two next lists that must be of the same size and have %% the same Idx/Owner pair. reconcile_next(Next1, Next2) -> - lists:zipwith(fun ({Idx, - Owner, - Node, - Transfers1, + lists:zipwith(fun ({Idx, Owner, Node, Transfers1, Status1}, {Idx, Owner, Node, Transfers2, Status2}) -> - {Idx, - Owner, - Node, + {Idx, Owner, Node, ordsets:union(Transfers1, Transfers2), merge_next_status(Status1, Status2)} end, - Next1, - Next2). + Next1, Next2). %% @private %% @doc Merge two next lists that may be of different sizes and @@ -1710,34 +1557,28 @@ reconcile_next(Next1, Next2) -> %% the merge is the same as in reconcile_next/2. reconcile_divergent_next(BaseNext, OtherNext) -> MergedNext = substitute(1, BaseNext, OtherNext), - lists:zipwith(fun ({Idx, - Owner1, - Node1, - Transfers1, + lists:zipwith(fun ({Idx, Owner1, Node1, Transfers1, Status1}, {Idx, Owner2, Node2, Transfers2, Status2}) -> Same = {Owner1, Node1} =:= {Owner2, Node2}, case {Same, Status1, Status2} of - {false, _, _} -> - {Idx, Owner1, Node1, Transfers1, Status1}; - _ -> - {Idx, - Owner1, - Node1, - ordsets:union(Transfers1, Transfers2), - merge_next_status(Status1, Status2)} + {false, _, _} -> + {Idx, Owner1, Node1, Transfers1, Status1}; + _ -> + {Idx, Owner1, Node1, + ordsets:union(Transfers1, Transfers2), + merge_next_status(Status1, Status2)} end end, - BaseNext, - MergedNext). + BaseNext, MergedNext). %% @private substitute(Idx, TL1, TL2) -> lists:map(fun (T) -> Key = element(Idx, T), case lists:keyfind(Key, Idx, TL2) of - false -> T; - T2 -> T2 + false -> T; + T2 -> T2 end end, TL1). @@ -1752,60 +1593,60 @@ reconcile_ring(StateA = #chstate{claimant = Claimant1, V1Newer = vclock:descends(VC1, VC2), V2Newer = vclock:descends(VC2, VC1), EqualVC = vclock:equal(VC1, VC2) and - (Claimant1 =:= Claimant2), + (Claimant1 =:= Claimant2), case {EqualVC, V1Newer, V2Newer} of - {true, _, _} -> - Next = reconcile_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {_, false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {_, _, _} -> - %% Ring versions were divergent, so fall back to reconciling based - %% on claimant. Under normal operation, divergent ring versions - %% should only occur if there are two different claimants, and one - %% claimant is invalid. For example, when a claimant is removed and - %% a new claimant has just taken over. We therefore chose the ring - %% with the valid claimant. - CValid1 = lists:member(Claimant1, Members), - CValid2 = lists:member(Claimant2, Members), - case {CValid1, CValid2} of - {true, false} -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - {false, true} -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next}; - {false, false} -> - %% This can occur when removed/down nodes are still - %% up and gossip to each other. We need to pick a - %% claimant to handle this case, although the choice - %% is irrelevant as a correct valid claimant will - %% eventually emerge when the ring converges. - %TODO False-false and true-true are the same. _-_ maybe better not repitition - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end; - {true, true} -> - %% This should never happen in normal practice. - %% But, we need to handle it for exceptional cases. - case Claimant1 < Claimant2 of - true -> - Next = reconcile_divergent_next(Next1, Next2), - StateA#chstate{next = Next}; - false -> - Next = reconcile_divergent_next(Next2, Next1), - StateB#chstate{next = Next} - end - end + {true, _, _} -> + Next = reconcile_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {_, false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {_, _, _} -> + %% Ring versions were divergent, so fall back to reconciling based + %% on claimant. Under normal operation, divergent ring versions + %% should only occur if there are two different claimants, and one + %% claimant is invalid. For example, when a claimant is removed and + %% a new claimant has just taken over. We therefore chose the ring + %% with the valid claimant. + CValid1 = lists:member(Claimant1, Members), + CValid2 = lists:member(Claimant2, Members), + case {CValid1, CValid2} of + {true, false} -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + {false, true} -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next}; + {false, false} -> + %% This can occur when removed/down nodes are still + %% up and gossip to each other. We need to pick a + %% claimant to handle this case, although the choice + %% is irrelevant as a correct valid claimant will + %% eventually emerge when the ring converges. + %TODO False-false and true-true are the same. _-_ maybe better not repitition + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end; + {true, true} -> + %% This should never happen in normal practice. + %% But, we need to handle it for exceptional cases. + case Claimant1 < Claimant2 of + true -> + Next = reconcile_divergent_next(Next1, Next2), + StateA#chstate{next = Next}; + false -> + Next = reconcile_divergent_next(Next2, Next1), + StateB#chstate{next = Next} + end + end end. %% @private @@ -1833,13 +1674,11 @@ transfer_complete(CState = #chstate{next = Next, VNodeMods = ordsets:from_list([VMod || {_, VMod} <- riak_core:vnode_modules()]), Status2 = case {Status, Transfers2} of - {complete, _} -> complete; - {awaiting, VNodeMods} -> complete; - _ -> awaiting + {complete, _} -> complete; + {awaiting, VNodeMods} -> complete; + _ -> awaiting end, - Next2 = lists:keyreplace(Idx, - 1, - Next, + Next2 = lists:keyreplace(Idx, 1, Next, {Idx, Owner, NextOwner, Transfers2, Status2}), VClock2 = vclock:increment(Owner, VClock), CState#chstate{next = Next2, vclock = VClock2}. @@ -1860,8 +1699,7 @@ update_seen(Node, CState = #chstate{vclock = VClock, seen = Seen}) -> Seen2 = orddict:update(Node, fun (SeenVC) -> vclock:merge([SeenVC, VClock]) end, - VClock, - Seen), + VClock, Seen), CState#chstate{seen = Seen2}. %% @private @@ -1895,10 +1733,9 @@ equal_members(M1, M2) -> L = orddict:merge(fun (_, {Status1, VC1, Meta1}, {Status2, VC2, Meta2}) -> Status1 =:= Status2 andalso - vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 + vclock:equal(VC1, VC2) andalso Meta1 =:= Meta2 end, - M1, - M2), + M1, M2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). @@ -1909,19 +1746,18 @@ equal_seen(StateA, StateB) -> L = orddict:merge(fun (_, VC1, VC2) -> vclock:equal(VC1, VC2) end, - Seen1, - Seen2), + Seen1, Seen2), {_, R} = lists:unzip(L), lists:all(fun (X) -> X =:= true end, R). %% @private filtered_seen(State = #chstate{seen = Seen}) -> case get_members(State#chstate.members) of - [] -> Seen; - Members -> - orddict:filter(fun (N, _) -> lists:member(N, Members) - end, - Seen) + [] -> Seen; + Members -> + orddict:filter(fun (N, _) -> lists:member(N, Members) + end, + Seen) end. %% =================================================================== @@ -2067,40 +1903,36 @@ membership_test() -> {_, RingA7} = reconcile(RingB2, RingA6), ?assertEqual([nodeA, nodeB, nodeC], (all_members(RingA7))), - Priority = [{invalid, 1}, - {down, 2}, - {joining, 3}, - {valid, 4}, - {exiting, 5}, - {leaving, 6}], + Priority = [{invalid, 1}, {down, 2}, {joining, 3}, + {valid, 4}, {exiting, 5}, {leaving, 6}], RingX1 = fresh(nodeA), RingX2 = add_member(nodeA, RingX1, nodeB), RingX3 = add_member(nodeA, RingX2, nodeC), ?assertEqual(joining, (member_status(RingX3, nodeC))), %% Parallel/sibling status changes merge based on priority [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - StatusC = case PriorityA < PriorityB of - true -> StatusA; - false -> StatusB - end, - {_, RingT3} = reconcile(RingT2, RingT1), - ?assertEqual(StatusC, (member_status(RingT3, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingX3, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + StatusC = case PriorityA < PriorityB of + true -> StatusA; + false -> StatusB + end, + {_, RingT3} = reconcile(RingT2, RingT1), + ?assertEqual(StatusC, (member_status(RingT3, nodeC))) end || {StatusA, PriorityA} <- Priority, {StatusB, PriorityB} <- Priority], %% Related status changes merge to descendant [begin - RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), - ?assertEqual(StatusA, (member_status(RingT1, nodeC))), - RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), - ?assertEqual(StatusB, (member_status(RingT2, nodeC))), - RingT3 = set_member(nodeA, RingT1, nodeA, valid), - {_, RingT4} = reconcile(RingT2, RingT3), - ?assertEqual(StatusB, (member_status(RingT4, nodeC))) + RingT1 = set_member(nodeA, RingX3, nodeC, StatusA), + ?assertEqual(StatusA, (member_status(RingT1, nodeC))), + RingT2 = set_member(nodeB, RingT1, nodeC, StatusB), + ?assertEqual(StatusB, (member_status(RingT2, nodeC))), + RingT3 = set_member(nodeA, RingT1, nodeA, valid), + {_, RingT4} = reconcile(RingT2, RingT3), + ?assertEqual(StatusB, (member_status(RingT4, nodeC))) end || {StatusA, _} <- Priority, {StatusB, _} <- Priority], ok. @@ -2156,11 +1988,8 @@ reconcile_next_test() -> Next2 = [{0, nodeA, nodeB, [riak_kv_vnode], complete}, {1, nodeA, nodeB, [], awaiting}, {2, nodeA, nodeB, [], awaiting}], - Next3 = [{0, - nodeA, - nodeB, - [riak_kv_vnode, riak_pipe_vnode], - complete}, + Next3 = [{0, nodeA, nodeB, + [riak_kv_vnode, riak_pipe_vnode], complete}, {1, nodeA, nodeB, [riak_pipe_vnode], awaiting}, {2, nodeA, nodeB, [riak_pipe_vnode], complete}], ?assertEqual(Next3, (reconcile_next(Next1, Next2))), @@ -2171,10 +2000,7 @@ reconcile_next_test() -> {2, nodeA, nodeB, [riak_kv_vnode], complete}], Next6 = [{0, nodeA, nodeB, [riak_pipe_vnode], awaiting}, {1, nodeA, nodeB, [], awaiting}, - {2, - nodeA, - nodeB, - [riak_kv_vnode, riak_pipe_vnode], + {2, nodeA, nodeB, [riak_kv_vnode, riak_pipe_vnode], complete}], ?assertEqual(Next6, (reconcile_divergent_next(Next4, Next5))). @@ -2207,15 +2033,13 @@ resize_xfer_test_() -> fun () -> meck:unload(), meck:new(riak_core, [passthrough]), - meck:expect(riak_core, - vnode_modules, + meck:expect(riak_core, vnode_modules, fun () -> [{some_app, fake_vnode}, {other_app, other_vnode}] end) end, - fun (_) -> meck:unload() end, - fun test_resize_xfers/0}. + fun (_) -> meck:unload() end, fun test_resize_xfers/0}. test_resize_xfers() -> Ring0 = riak_core_ring:fresh(4, a), @@ -2225,65 +2049,44 @@ test_resize_xfers() -> {730750818665451459101842416358141509827966271488, a}, TargetIdx2 = 365375409332725729550921208179070754913983135744, - Ring2 = schedule_resize_transfer(Ring1, - Source1, + Ring2 = schedule_resize_transfer(Ring1, Source1, Target1), ?assertEqual(Target1, (awaiting_resize_transfer(Ring2, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring2, - Source1, - Target1, + (resize_transfer_status(Ring2, Source1, Target1, fake_vnode))), %% use Target1 since we haven't used it as a source index ?assertEqual(undefined, (awaiting_resize_transfer(Ring2, Target1, fake_vnode))), ?assertEqual(undefined, - (resize_transfer_status(Ring2, - Target1, - Source1, + (resize_transfer_status(Ring2, Target1, Source1, fake_vnode))), - Ring3 = schedule_resize_transfer(Ring2, - Source1, + Ring3 = schedule_resize_transfer(Ring2, Source1, TargetIdx2), - Ring4 = resize_transfer_complete(Ring3, - Source1, - Target1, - fake_vnode), + Ring4 = resize_transfer_complete(Ring3, Source1, + Target1, fake_vnode), ?assertEqual({TargetIdx2, a}, (awaiting_resize_transfer(Ring4, Source1, fake_vnode))), ?assertEqual(awaiting, - (resize_transfer_status(Ring4, - Source1, - {TargetIdx2, a}, + (resize_transfer_status(Ring4, Source1, {TargetIdx2, a}, fake_vnode))), ?assertEqual(complete, - (resize_transfer_status(Ring4, - Source1, - Target1, + (resize_transfer_status(Ring4, Source1, Target1, fake_vnode))), - Ring5 = resize_transfer_complete(Ring4, - Source1, - {TargetIdx2, a}, - fake_vnode), - {_, '$resize', Status1} = next_owner(Ring5, - 0, + Ring5 = resize_transfer_complete(Ring4, Source1, + {TargetIdx2, a}, fake_vnode), + {_, '$resize', Status1} = next_owner(Ring5, 0, fake_vnode), ?assertEqual(complete, Status1), - Ring6 = resize_transfer_complete(Ring5, - Source1, - {TargetIdx2, a}, - other_vnode), - Ring7 = resize_transfer_complete(Ring6, - Source1, - Target1, - other_vnode), - {_, '$resize', Status2} = next_owner(Ring7, - 0, + Ring6 = resize_transfer_complete(Ring5, Source1, + {TargetIdx2, a}, other_vnode), + Ring7 = resize_transfer_complete(Ring6, Source1, + Target1, other_vnode), + {_, '$resize', Status2} = next_owner(Ring7, 0, fake_vnode), ?assertEqual(complete, Status2), - {_, '$resize', Status3} = next_owner(Ring7, - 0, + {_, '$resize', Status3} = next_owner(Ring7, 0, other_vnode), ?assertEqual(complete, Status3), {_, '$resize', complete} = next_owner(Ring7, 0). @@ -2291,9 +2094,8 @@ test_resize_xfers() -> valid_resize(Ring0, Ring1) -> lists:foreach(fun ({Idx, Owner}) -> case lists:keyfind(Idx, 1, all_owners(Ring0)) of - false -> - ?assertEqual('$dummyhost@resized', Owner); - {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) + false -> ?assertEqual('$dummyhost@resized', Owner); + {Idx, OrigOwner} -> ?assertEqual(OrigOwner, Owner) end end, all_owners(Ring1)). diff --git a/src/riak_core_ring_handler.erl b/src/riak_core_ring_handler.erl index 6ca16cfae..c61a49359 100644 --- a/src/riak_core_ring_handler.erl +++ b/src/riak_core_ring_handler.erl @@ -19,12 +19,8 @@ -behaviour(gen_event). %% gen_event callbacks --export([init/1, - handle_event/2, - handle_call/2, - handle_info/2, - terminate/2, - code_change/3]). +-export([init/1, handle_event/2, handle_call/2, + handle_info/2, terminate/2, code_change/3]). -export([ensure_vnodes_started/1]). @@ -59,38 +55,36 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. ensure_vnodes_started(Ring) -> case riak_core:vnode_modules() of - [] -> ok; - AppMods -> - case ensure_vnodes_started(AppMods, Ring, []) of - [] -> - Ready = riak_core_ring:ring_ready(Ring), - FutureIndices = riak_core_ring:future_indices(Ring, - node()), - Status = riak_core_ring:member_status(Ring, node()), - case {Ready, FutureIndices, Status} of - {true, [], leaving} -> - case ready_to_exit(AppMods) of - true -> - exit_ring_trans(), - maybe_shutdown(Ring); - false -> ok - end; - {_, _, invalid} -> - riak_core_ring_manager:refresh_my_ring(); - {_, _, exiting} -> - %% Deliberately do nothing. - ok; - {_, _, _} -> ok - end; - _ -> ok - end + [] -> ok; + AppMods -> + case ensure_vnodes_started(AppMods, Ring, []) of + [] -> + Ready = riak_core_ring:ring_ready(Ring), + FutureIndices = riak_core_ring:future_indices(Ring, + node()), + Status = riak_core_ring:member_status(Ring, node()), + case {Ready, FutureIndices, Status} of + {true, [], leaving} -> + case ready_to_exit(AppMods) of + true -> exit_ring_trans(), maybe_shutdown(Ring); + false -> ok + end; + {_, _, invalid} -> + riak_core_ring_manager:refresh_my_ring(); + {_, _, exiting} -> + %% Deliberately do nothing. + ok; + {_, _, _} -> ok + end; + _ -> ok + end end. %% Shutdown if we are the only node in the cluster maybe_shutdown(Ring) -> case riak_core_ring:random_other_node(Ring) of - no_node -> riak_core_ring_manager:refresh_my_ring(); - _ -> ok + no_node -> riak_core_ring_manager:refresh_my_ring(); + _ -> ok end. exit_ring_trans() -> @@ -104,23 +98,22 @@ exit_ring_trans() -> []). ready_to_exit([]) -> true; -ready_to_exit([{_App, Mod} | AppMods]) -> - case erlang:function_exported(Mod, ready_to_exit, 0) - andalso not Mod:ready_to_exit() +ready_to_exit([{_App, Module} | AppMods]) -> + case erlang:function_exported(Module, ready_to_exit, 0) + andalso not Module:ready_to_exit() of - true -> false; - false -> ready_to_exit(AppMods) + true -> false; + false -> ready_to_exit(AppMods) end. ensure_vnodes_started([], _Ring, Acc) -> lists:flatten(Acc); ensure_vnodes_started([{App, Mod} | T], Ring, Acc) -> - ensure_vnodes_started(T, - Ring, + ensure_vnodes_started(T, Ring, [ensure_vnodes_started({App, Mod}, Ring) | Acc]). -ensure_vnodes_started({App, Mod}, Ring) -> - Startable = startable_vnodes(Mod, Ring), +ensure_vnodes_started({App, Module}, Ring) -> + Startable = startable_vnodes(Module, Ring), %% NOTE: This following is a hack. There's a basic %% dependency/race between riak_core (want to start vnodes %% right away to trigger possible handoffs) and riak_kv @@ -129,30 +122,29 @@ ensure_vnodes_started({App, Mod}, Ring) -> spawn_link(fun () -> %% Use a registered name as a lock to prevent the same %% vnode module from being started twice. - ModList = atom_to_list(Mod), + ModList = atom_to_list(Module), RegName = "riak_core_ring_handler_ensure_" ++ ModList, try erlang:register(list_to_atom(RegName), self()) catch - error:badarg -> exit(normal) + error:badarg -> exit(normal) end, %% Let the app finish starting... ok = riak_core:wait_for_application(App), %% Start the vnodes. HasStartVnodes = lists:member({start_vnodes, 1}, - Mod:module_info(exports)), + Module:module_info(exports)), case HasStartVnodes of - true -> Mod:start_vnodes(Startable); - false -> [Mod:start_vnode(I) || I <- Startable] + true -> Module:start_vnodes(Startable); + false -> [Module:start_vnode(I) || I <- Startable] end, %% Mark the service as up. SupName = list_to_atom(atom_to_list(App) ++ "_sup"), SupPid = erlang:whereis(SupName), case riak_core:health_check(App) of - undefined -> - riak_core_node_watcher:service_up(App, SupPid); - HealthMFA -> - riak_core_node_watcher:service_up(App, - SupPid, - HealthMFA) + undefined -> + riak_core_node_watcher:service_up(App, SupPid); + HealthMFA -> + riak_core_node_watcher:service_up(App, SupPid, + HealthMFA) end, exit(normal) end), @@ -161,22 +153,22 @@ ensure_vnodes_started({App, Mod}, Ring) -> startable_vnodes(Mod, Ring) -> AllMembers = riak_core_ring:all_members(Ring), case {length(AllMembers), hd(AllMembers) =:= node()} of - {1, true} -> riak_core_ring:my_indices(Ring); - _ -> - {ok, ModExcl} = - riak_core_handoff_manager:get_exclusions(Mod), - Excl = ModExcl -- - riak_core_ring:disowning_indices(Ring, node()), - case riak_core_ring:random_other_index(Ring, Excl) of - no_indices -> - case length(Excl) =:= - riak_core_ring:num_partitions(Ring) - of - true -> []; - false -> riak_core_ring:my_indices(Ring) - end; - RO -> [RO | riak_core_ring:my_indices(Ring)] - end + {1, true} -> riak_core_ring:my_indices(Ring); + _ -> + {ok, ModExcl} = + riak_core_handoff_manager:get_exclusions(Mod), + Excl = ModExcl -- + riak_core_ring:disowning_indices(Ring, node()), + case riak_core_ring:random_other_index(Ring, Excl) of + no_indices -> + case length(Excl) =:= + riak_core_ring:num_partitions(Ring) + of + true -> []; + false -> riak_core_ring:my_indices(Ring) + end; + RO -> [RO | riak_core_ring:my_indices(Ring)] + end end. maybe_start_vnode_proxies(Ring) -> @@ -185,30 +177,30 @@ maybe_start_vnode_proxies(Ring) -> FutureSize = riak_core_ring:future_num_partitions(Ring), Larger = Size < FutureSize, case Larger of - true -> - FutureIdxs = - riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), - _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) - || {Idx, _} <- FutureIdxs, Mod <- Mods], - ok; - false -> ok + true -> + FutureIdxs = + riak_core_ring:all_owners(riak_core_ring:future_ring(Ring)), + _ = [riak_core_vnode_proxy_sup:start_proxy(Mod, Idx) + || {Idx, _} <- FutureIdxs, Mod <- Mods], + ok; + false -> ok end. maybe_stop_vnode_proxies(Ring) -> Mods = [M || {_, M} <- riak_core:vnode_modules()], case riak_core_ring:pending_changes(Ring) of - [] -> - Idxs = [{I, M} - || {I, _} <- riak_core_ring:all_owners(Ring), - M <- Mods], - ProxySpecs = - supervisor:which_children(riak_core_vnode_proxy_sup), - Running = [{I, M} - || {{M, I}, _, _, _} <- ProxySpecs, - lists:member(M, Mods)], - ToShutdown = Running -- Idxs, - _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) - || {I, M} <- ToShutdown], - ok; - _ -> ok + [] -> + Idxs = [{I, M} + || {I, _} <- riak_core_ring:all_owners(Ring), + M <- Mods], + ProxySpecs = + supervisor:which_children(riak_core_vnode_proxy_sup), + Running = [{I, M} + || {{M, I}, _, _, _} <- ProxySpecs, + lists:member(M, Mods)], + ToShutdown = Running -- Idxs, + _ = [riak_core_vnode_proxy_sup:stop_proxy(M, I) + || {I, M} <- ToShutdown], + ok; + _ -> ok end. diff --git a/src/riak_core_ring_manager.erl b/src/riak_core_ring_manager.erl index ee970b157..24a4893e7 100644 --- a/src/riak_core_ring_manager.erl +++ b/src/riak_core_ring_manager.erl @@ -39,8 +39,8 @@ %% %% To alleviate the slow down while in the ETS phase, `riak_core' %% exploits the fact that most time sensitive operations access the ring -%% in order to read only a subset of its data: bucket properties and -%% partition ownership. Therefore, these pieces of information are +%% in order to read only a subset of its data: partition ownership. +%% Therefore, these pieces of information are %% extracted from the ring and stored in the ETS table as well to %% minimize copying overhead. Furthermore, the partition ownership %% information (represented by the {@link chash} structure) is converted @@ -51,9 +51,9 @@ %% structure for normal operations. %% %% As of Riak 1.4, it is therefore recommended that operations that -%% can be performed by directly using the bucket properties API or -%% `chashbin' structure do so using those methods rather than -%% retrieving the ring via `get_my_ring/0' or `get_raw_ring/0'. +%% can be performed by directly using the `chashbin' structure. +%% Do so using that method rather than retrieving the ring via +%% `get_my_ring/0' or `get_raw_ring/0'. -module(riak_core_ring_manager). @@ -61,34 +61,17 @@ -behaviour(gen_server). --export([start_link/0, - start_link/1, - get_my_ring/0, - get_raw_ring/0, - get_raw_ring_chashbin/0, - get_chash_bin/0, - get_ring_id/0, - get_bucket_meta/1, - refresh_my_ring/0, - refresh_ring/2, - set_my_ring/1, - write_ringfile/0, - prune_ringfiles/0, - read_ringfile/1, - find_latest_ringfile/0, - force_update/0, - do_write_ringfile/1, - ring_trans/2, - run_fixups/3, - set_cluster_name/1, +-export([start_link/0, start_link/1, get_my_ring/0, + get_raw_ring/0, get_raw_ring_chashbin/0, + get_chash_bin/0, get_ring_id/0, refresh_my_ring/0, + refresh_ring/2, set_my_ring/1, write_ringfile/0, + prune_ringfiles/0, read_ringfile/1, + find_latest_ringfile/0, force_update/0, + do_write_ringfile/1, ring_trans/2, set_cluster_name/1, is_stable_ring/0]). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). -ifdef(TEST). @@ -99,9 +82,7 @@ -record(state, {mode, raw_ring, ring_changed_time, inactivity_timer}). --export([setup_ets/1, - cleanup_ets/1, - set_ring_global/1, +-export([setup_ets/1, cleanup_ets/1, set_ring_global/1, promote_ring/0]). %% For EUnit testing @@ -121,16 +102,12 @@ %% =================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [live], + gen_server:start_link({local, ?MODULE}, ?MODULE, [live], []). %% Testing entry point start_link(test) -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [test], + gen_server:start_link({local, ?MODULE}, ?MODULE, [test], []). -spec get_my_ring() -> {ok, @@ -139,23 +116,23 @@ start_link(test) -> get_my_ring() -> Ring = case persistent_term:get(?RING_KEY, undefined) of - ets -> - case ets:lookup(?ETS, ring) of - [{_, RingETS}] -> RingETS; - _ -> undefined - end; - RingMochi -> RingMochi + ets -> + case ets:lookup(?ETS, ring) of + [{_, RingETS}] -> RingETS; + _ -> undefined + end; + RingMochi -> RingMochi end, case Ring of - Ring when is_tuple(Ring) -> {ok, Ring}; - undefined -> {error, no_ring} + Ring when is_tuple(Ring) -> {ok, Ring}; + undefined -> {error, no_ring} end. get_raw_ring() -> try Ring = ets:lookup_element(?ETS, raw_ring, 2), {ok, Ring} catch - _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) + _:_ -> gen_server:call(?MODULE, get_raw_ring, infinity) end. get_raw_ring_chashbin() -> @@ -163,10 +140,9 @@ get_raw_ring_chashbin() -> {ok, CHBin} = get_chash_bin(), {ok, Ring, CHBin} catch - _:_ -> - gen_server:call(?MODULE, - get_raw_ring_chashbin, - infinity) + _:_ -> + gen_server:call(?MODULE, get_raw_ring_chashbin, + infinity) end. %% @spec refresh_my_ring() -> ok @@ -183,31 +159,15 @@ set_my_ring(Ring) -> get_ring_id() -> case ets:lookup(?ETS, id) of - [{_, Id}] -> Id; - _ -> {0, 0} - end. - -%% @doc Return metadata for the given bucket. If a bucket -%% for the non-default type is provided {error, no_type} -%% is returned when the type does not exist -get_bucket_meta({<<"default">>, Name}) -> - get_bucket_meta(Name); -get_bucket_meta({_Type, _Name} = Bucket) -> - %% reads from cluster metadata ets table - %% these aren't stored in ring manager ever - riak_core_bucket:get_bucket(Bucket); -get_bucket_meta(Bucket) -> - case ets:lookup(?ETS, {bucket, Bucket}) of - [] -> undefined; - [{_, undefined}] -> undefined; - [{_, Meta}] -> {ok, Meta} + [{_, Id}] -> Id; + _ -> {0, 0} end. %% @doc Return the {@link chashbin} generated from the current ring get_chash_bin() -> case ets:lookup(?ETS, chashbin) of - [{chashbin, CHBin}] -> {ok, CHBin}; - _ -> {error, no_ring} + [{chashbin, CHBin}] -> {ok, CHBin}; + _ -> {error, no_ring} end. %% @spec write_ringfile() -> ok @@ -215,13 +175,11 @@ write_ringfile() -> gen_server:cast(?MODULE, write_ringfile). ring_trans(Fun, Args) -> - gen_server:call(?MODULE, - {ring_trans, Fun, Args}, + gen_server:call(?MODULE, {ring_trans, Fun, Args}, infinity). set_cluster_name(Name) -> - gen_server:call(?MODULE, - {set_cluster_name, Name}, + gen_server:call(?MODULE, {set_cluster_name, Name}, infinity). is_stable_ring() -> @@ -232,8 +190,7 @@ is_stable_ring() -> force_update() -> ring_trans(fun (Ring, _) -> NewRing = riak_core_ring:update_member_meta(node(), - Ring, - node(), + Ring, node(), unused, erlang:timestamp()), {new_ring, NewRing} @@ -243,18 +200,17 @@ force_update() -> do_write_ringfile(Ring) -> case ring_dir() of - "" -> nop; - Dir -> - {{Year, Month, Day}, {Hour, Minute, Second}} = - calendar:universal_time(), - TS = - io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - [Year, Month, Day, Hour, Minute, Second]), - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, - do_write_ringfile(Ring, FN) + "" -> nop; + Dir -> + {{Year, Month, Day}, {Hour, Minute, Second}} = + calendar:universal_time(), + TS = + io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + [Year, Month, Day, Hour, Minute, Second]), + {ok, Cluster} = application:get_env(riak_core, + cluster_name), + FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, + do_write_ringfile(Ring, FN) end. do_write_ringfile(Ring, FN) -> @@ -262,91 +218,87 @@ do_write_ringfile(Ring, FN) -> try ok = riak_core_util:replace_file(FN, term_to_binary(Ring)) catch - _:Err -> - logger:error("Unable to write ring to \"~s\" - ~p\n", - [FN, Err]), - {error, Err} + _:Err -> + logger:error("Unable to write ring to \"~s\" - ~p\n", + [FN, Err]), + {error, Err} end. %% @spec find_latest_ringfile() -> string() find_latest_ringfile() -> Dir = ring_dir(), case file:list_dir(Dir) of - {ok, Filenames} -> - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - Timestamps = [list_to_integer(TS) - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, ".")) - || FN <- Filenames], - C1 =:= Cluster], - SortedTimestamps = - lists:reverse(lists:sort(Timestamps)), - case SortedTimestamps of - [Latest | _] -> - {ok, - Dir ++ - "/riak_core_ring." ++ - Cluster ++ "." ++ integer_to_list(Latest)}; - _ -> {error, not_found} - end; - {error, Reason} -> {error, Reason} + {ok, Filenames} -> + {ok, Cluster} = application:get_env(riak_core, + cluster_name), + Timestamps = [list_to_integer(TS) + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, ".")) + || FN <- Filenames], + C1 =:= Cluster], + SortedTimestamps = + lists:reverse(lists:sort(Timestamps)), + case SortedTimestamps of + [Latest | _] -> + {ok, + Dir ++ + "/riak_core_ring." ++ + Cluster ++ "." ++ integer_to_list(Latest)}; + _ -> {error, not_found} + end; + {error, Reason} -> {error, Reason} end. %% @spec read_ringfile(string()) -> riak_core_ring:riak_core_ring() | {error, any()} read_ringfile(RingFile) -> case file:read_file(RingFile) of - {ok, Binary} -> binary_to_term(Binary); - {error, Reason} -> {error, Reason} + {ok, Binary} -> binary_to_term(Binary); + {error, Reason} -> {error, Reason} end. %% @spec prune_ringfiles() -> ok | {error, Reason} prune_ringfiles() -> case ring_dir() of - "" -> ok; - Dir -> - Cluster = application:get_env(riak_core, - cluster_name, - undefined), - case file:list_dir(Dir) of - {error, enoent} -> ok; - {error, Reason} -> {error, Reason}; - {ok, []} -> ok; - {ok, Filenames} -> - Timestamps = [TS - || {"riak_core_ring", C1, TS} - <- [list_to_tuple(string:tokens(FN, - ".")) - || FN <- Filenames], - C1 =:= Cluster], - if Timestamps /= [] -> - %% there are existing ring files - TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) - || TS <- Timestamps], - TSL = lists:reverse(lists:sort([TS - || {ok, TS, []} - <- TSPat])), - Keep = prune_list(TSL), - KeepTSs = - [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - K)) - || K <- Keep], - DelFNs = [Dir ++ "/" ++ FN - || FN <- Filenames, - lists:all(fun (TS) -> - string:str(FN, TS) =:= - 0 - end, - KeepTSs)], - _ = [file:delete(DelFN) || DelFN <- DelFNs], - ok; - true -> - %% directory wasn't empty, but there are no ring - %% files in it - ok - end - end + "" -> ok; + Dir -> + Cluster = application:get_env(riak_core, cluster_name, + undefined), + case file:list_dir(Dir) of + {error, enoent} -> ok; + {error, Reason} -> {error, Reason}; + {ok, []} -> ok; + {ok, Filenames} -> + Timestamps = [TS + || {"riak_core_ring", C1, TS} + <- [list_to_tuple(string:tokens(FN, ".")) + || FN <- Filenames], + C1 =:= Cluster], + if Timestamps /= [] -> + %% there are existing ring files + TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d", TS) + || TS <- Timestamps], + TSL = lists:reverse(lists:sort([TS + || {ok, TS, []} + <- TSPat])), + Keep = prune_list(TSL), + KeepTSs = + [lists:flatten(io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + K)) + || K <- Keep], + DelFNs = [Dir ++ "/" ++ FN + || FN <- Filenames, + lists:all(fun (TS) -> + string:str(FN, TS) =:= 0 + end, + KeepTSs)], + _ = [file:delete(DelFN) || DelFN <- DelFNs], + ok; + true -> + %% directory wasn't empty, but there are no ring + %% files in it + ok + end + end end. -ifdef(TEST). @@ -354,7 +306,7 @@ prune_ringfiles() -> %% @private (only used for test instances) stop() -> try gen_server:call(?MODULE, stop) catch - exit:{noproc, _} -> ok + exit:{noproc, _} -> ok end. -endif. @@ -373,21 +325,21 @@ init([Mode]) -> reload_ring(test) -> riak_core_ring:fresh(16, node()); reload_ring(live) -> case riak_core_ring_manager:find_latest_ringfile() of - {ok, RingFile} -> - case riak_core_ring_manager:read_ringfile(RingFile) of - {error, Reason} -> - logger:critical("Failed to read ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}); - Ring -> Ring - end; - {error, not_found} -> - logger:warning("No ring file available."), - riak_core_ring:fresh(); - {error, Reason} -> - logger:critical("Failed to load ring file: ~p", - [riak_core_util:posix_error(Reason)]), - throw({error, Reason}) + {ok, RingFile} -> + case riak_core_ring_manager:read_ringfile(RingFile) of + {error, Reason} -> + logger:critical("Failed to read ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}); + Ring -> Ring + end; + {error, not_found} -> + logger:warning("No ring file available."), + riak_core_ring:fresh(); + {error, Reason} -> + logger:critical("Failed to load ring file: ~p", + [riak_core_util:posix_error(Reason)]), + throw({error, Reason}) end. handle_call(get_raw_ring, _From, @@ -413,24 +365,24 @@ handle_call(refresh_my_ring, _From, State) -> handle_call({ring_trans, Fun, Args}, _From, State = #state{raw_ring = Ring}) -> case catch Fun(Ring, Args) of - {new_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:random_recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - {set_only, NewRing} -> - State2 = prune_write_ring(NewRing, State), - {reply, {ok, NewRing}, State2}; - {reconciled_ring, NewRing} -> - State2 = prune_write_notify_ring(NewRing, State), - riak_core_gossip:recursive_gossip(NewRing), - {reply, {ok, NewRing}, State2}; - ignore -> {reply, not_changed, State}; - {ignore, Reason} -> - {reply, {not_changed, Reason}, State}; - Other -> - logger:error("ring_trans: invalid return value: ~p", - [Other]), - {reply, not_changed, State} + {new_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:random_recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + {set_only, NewRing} -> + State2 = prune_write_ring(NewRing, State), + {reply, {ok, NewRing}, State2}; + {reconciled_ring, NewRing} -> + State2 = prune_write_notify_ring(NewRing, State), + riak_core_gossip:recursive_gossip(NewRing), + {reply, {ok, NewRing}, State2}; + ignore -> {reply, not_changed, State}; + {ignore, Reason} -> + {reply, {not_changed, Reason}, State}; + Other -> + logger:error("ring_trans: invalid return value: ~p", + [Other]), + {reply, not_changed, State} end; handle_call({set_cluster_name, Name}, _From, State = #state{raw_ring = Ring}) -> @@ -446,31 +398,29 @@ handle_call(stop, _From, State) -> handle_cast({refresh_my_ring, ClusterName}, State) -> {ok, Ring} = get_my_ring(), case riak_core_ring:cluster_name(Ring) of - ClusterName -> handle_cast(refresh_my_ring, State); - _ -> {noreply, State} + ClusterName -> handle_cast(refresh_my_ring, State); + _ -> {noreply, State} end; handle_cast(refresh_my_ring, State) -> - {_, _, State2} = handle_call(refresh_my_ring, - undefined, + {_, _, State2} = handle_call(refresh_my_ring, undefined, State), {noreply, State2}; handle_cast(write_ringfile, test) -> {noreply, test}; handle_cast(write_ringfile, State = #state{raw_ring = Ring}) -> - ok = do_write_ringfile(Ring), - {noreply, State}. + ok = do_write_ringfile(Ring), {noreply, State}. handle_info(inactivity_timeout, State) -> case is_stable_ring(State) of - {true, DeltaMS} -> - logger:debug("Promoting ring after ~p", [DeltaMS]), - promote_ring(), - State2 = State#state{inactivity_timer = undefined}, - {noreply, State2}; - {false, DeltaMS} -> - Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, - State2 = set_timer(Remaining, State), - {noreply, State2} + {true, DeltaMS} -> + logger:debug("Promoting ring after ~p", [DeltaMS]), + promote_ring(), + State2 = State#state{inactivity_timer = undefined}, + {noreply, State2}; + {false, DeltaMS} -> + Remaining = (?PROMOTE_TIMEOUT) - DeltaMS, + State2 = set_timer(Remaining, State), + {noreply, State2} end; handle_info(_Info, State) -> {noreply, State}. @@ -485,55 +435,28 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% =================================================================== ring_dir() -> - case application:get_env(riak_core, - ring_state_dir, + case application:get_env(riak_core, ring_state_dir, undefined) of - undefined -> - filename:join(application:get_env(riak_core, - platform_data_dir, - "data"), - "ring"); - D -> D + undefined -> + filename:join(application:get_env(riak_core, + platform_data_dir, "data"), + "ring"); + D -> D end. prune_list([X | Rest]) -> - lists:usort(lists:append([[X], - back(1, X, Rest), - back(2, X, Rest), - back(3, X, Rest), - back(4, X, Rest), - back(5, X, Rest)])). + lists:usort(lists:append([[X], back(1, X, Rest), + back(2, X, Rest), back(3, X, Rest), + back(4, X, Rest), back(5, X, Rest)])). back(_N, _X, []) -> []; back(N, X, [H | T]) -> case lists:nth(N, X) =:= lists:nth(N, H) of - true -> back(N, X, T); - false -> [H] + true -> back(N, X, T); + false -> [H] end. -%% @private -run_fixups([], _Bucket, BucketProps) -> BucketProps; -run_fixups([{App, Fixup} | T], BucketName, - BucketProps) -> - BP = try Fixup:fixup(BucketName, BucketProps) of - {ok, NewBucketProps} -> NewBucketProps; - {error, Reason} -> - logger:error("Error while running bucket fixup module " - "~p from application ~p on bucket ~p: " - "~p", - [Fixup, App, BucketName, Reason]), - BucketProps - catch - What:Why -> - logger:error("Crash while running bucket fixup module " - "~p from application ~p on bucket ~p " - ": ~p:~p", - [Fixup, App, BucketName, What, Why]), - BucketProps - end, - run_fixups(T, BucketName, BP). - set_ring(Ring, State) -> set_ring_global(Ring), Now = os:timestamp(), @@ -548,8 +471,7 @@ maybe_set_timer(Duration, maybe_set_timer(_Duration, State) -> State. set_timer(Duration, State) -> - Timer = erlang:send_after(Duration, - self(), + Timer = erlang:send_after(Duration, self(), inactivity_timeout), State#state{inactivity_timer = Timer}. @@ -558,8 +480,8 @@ setup_ets(Mode) -> %% eunit tests, but is unneeded for normal Riak operation. catch ets:delete(?ETS), Access = case Mode of - live -> protected; - test -> public + live -> protected; + test -> public end, (?ETS) = ets:new(?ETS, [named_table, Access, {read_concurrency, true}]), @@ -576,85 +498,28 @@ reset_ring_id() -> Epoch = case persistent_term:get(riak_ring_id_epoch, undefined) of - undefined -> 0; - Value -> Value + undefined -> 0; + Value -> Value end, persistent_term:put(riak_ring_id_epoch, Epoch + 1), {Epoch + 1, 0}. -%% Set the ring in persistent_term/ETS. Exported during unit testing +%% Set the ring in persistent_term/ETS. Exported during unit testing %% to make test setup simpler - no need to spin up a riak_core_ring_manager %% process. set_ring_global(Ring) -> - DefaultProps = case application:get_env(riak_core, - default_bucket_props) - of - {ok, Val} -> Val; - _ -> [] - end, - %% run fixups on the ring before storing it in persistent_term - FixedRing = case riak_core:bucket_fixups() of - [] -> Ring; - Fixups -> - Buckets = riak_core_ring:get_buckets(Ring), - lists:foldl(fun (Bucket, AccRing) -> - BucketProps = - riak_core_bucket:get_bucket(Bucket, - Ring), - %% Merge anything in the default properties but not in - %% the bucket's properties. This is to ensure default - %% properties added after the bucket is created are - %% inherited to the bucket. - MergedProps = - riak_core_bucket:merge_props(BucketProps, - DefaultProps), - %% fixup the ring - NewBucketProps = run_fixups(Fixups, - Bucket, - MergedProps), - %% update the bucket in the ring - riak_core_ring:update_meta({bucket, - Bucket}, - NewBucketProps, - AccRing) - end, - Ring, - Buckets) - end, %% Mark ring as tainted to check if it is ever leaked over gossip or %% relied upon for any non-local ring operations. - TaintedRing = riak_core_ring:set_tainted(FixedRing), - %% Extract bucket properties and place into ETS table. We want all bucket - %% additions, modifications, and deletions to appear in a single atomic - %% operation. Since ETS does not provide a means to change + delete - %% multiple values in a single operation, we emulate the deletion by - %% overwriting all deleted buckets with the "undefined" atom that has - %% special meaning in `riak_core_bucket:get_bucket_props/2`. We then - %% cleanup these values in a subsequent `ets:match_delete`. - OldBuckets = ets:select(?ETS, - [{{{bucket, '$1'}, '_'}, [], ['$1']}]), - BucketDefaults = [{{bucket, Bucket}, undefined} - || Bucket <- OldBuckets], - BucketMeta = [{{bucket, Bucket}, Meta} - || Bucket <- riak_core_ring:get_buckets(TaintedRing), - {ok, Meta} - <- [riak_core_ring:get_meta({bucket, Bucket}, - TaintedRing)]], - BucketMeta2 = lists:ukeysort(1, - BucketMeta ++ BucketDefaults), + TaintedRing = riak_core_ring:set_tainted(Ring), CHBin = chashbin:create(riak_core_ring:chash(TaintedRing)), {Epoch, Id} = ets:lookup_element(?ETS, id, 2), - Actions = [{ring, TaintedRing}, - {raw_ring, Ring}, - {id, {Epoch, Id + 1}}, - {chashbin, CHBin} - | BucketMeta2], + Actions = [{ring, TaintedRing}, {raw_ring, Ring}, + {id, {Epoch, Id + 1}}, {chashbin, CHBin}], ets:insert(?ETS, Actions), - ets:match_delete(?ETS, {{bucket, '_'}, undefined}), case persistent_term:get(?RING_KEY, undefined) of - ets -> ok; - _ -> persistent_term:put(?RING_KEY, ets) + ets -> ok; + _ -> persistent_term:put(?RING_KEY, ets) end, ok. @@ -690,10 +555,7 @@ is_stable_ring(#state{ring_changed_time = Then}) -> back_test() -> X = [1, 2, 3], - List1 = [[1, 2, 3], - [4, 2, 3], - [7, 8, 3], - [11, 12, 13], + List1 = [[1, 2, 3], [4, 2, 3], [7, 8, 3], [11, 12, 13], [1, 2, 3]], List2 = [[7, 8, 9], [1, 2, 3]], List3 = [[1, 2, 3]], @@ -705,15 +567,11 @@ back_test() -> prune_list_test() -> TSList1 = [[2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36], - [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 32, 36], [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], [2011, 2, 28, 16, 32, 36]], TSList2 = [[2011, 2, 28, 16, 32, 36], - [2011, 2, 28, 16, 31, 16], - [2011, 2, 28, 16, 30, 27], - [2011, 2, 28, 16, 32, 16], - [2011, 2, 28, 16, 32, 36]], + [2011, 2, 28, 16, 31, 16], [2011, 2, 28, 16, 30, 27], + [2011, 2, 28, 16, 32, 16], [2011, 2, 28, 16, 32, 36]], PrunedList1 = [[2011, 2, 28, 16, 30, 27], [2011, 2, 28, 16, 32, 16]], PrunedList2 = [[2011, 2, 28, 16, 31, 16], @@ -749,9 +607,9 @@ refresh_my_ring_test() -> {ring_state_dir, "_build/test/tmp"}, {cluster_name, "test"}], [begin - put({?MODULE, AppKey}, - application:get_env(riak_core, AppKey, undefined)), - ok = application:set_env(riak_core, AppKey, Val) + put({?MODULE, AppKey}, + application:get_env(riak_core, AppKey, undefined)), + ok = application:set_env(riak_core, AppKey, Val) end || {AppKey, Val} <- Core_Settings], stop_core_processes(), @@ -766,8 +624,7 @@ refresh_my_ring_test() -> %% Cleanup the ring file created for this test {ok, RingFile} = find_latest_ringfile(), file:delete(RingFile), - [ok = application:set_env(riak_core, - AppKey, + [ok = application:set_env(riak_core, AppKey, get({?MODULE, AppKey})) || {AppKey, _Val} <- Core_Settings], ok @@ -786,13 +643,14 @@ stop_core_processes() -> -define(TMP_RINGFILE, (?TEST_RINGFILE) ++ ".tmp"). do_write_ringfile_test() -> + application:set_env(riak_core, cluster_name, "test"), %% Make sure no data exists from previous runs file:change_mode(?TMP_RINGFILE, 8#00644), file:delete(?TMP_RINGFILE), %% Check happy path GenR = fun (Name) -> riak_core_ring:fresh(64, Name) end, ?assertEqual(ok, - (do_write_ringfile(GenR(happy), ?TEST_RINGFILE))), + (do_write_ringfile(GenR(happy), ?TMP_RINGFILE))), %% errors expected error_logger:tty(false), %% Check write fails (create .tmp file with no write perms) @@ -810,8 +668,7 @@ do_write_ringfile_test() -> ok = file:change_mode(?TEST_RINGDIR, 8#00755), error_logger:tty(true), %% Cleanup the ring file created for this test - {ok, RingFile} = find_latest_ringfile(), - file:delete(RingFile). + file:delete(?TMP_RINGFILE). is_stable_ring_test() -> {A, B, C} = Now = os:timestamp(), diff --git a/src/riak_core_ring_util.erl b/src/riak_core_ring_util.erl index 6138a0d80..01fd48269 100644 --- a/src/riak_core_ring_util.erl +++ b/src/riak_core_ring_util.erl @@ -51,9 +51,8 @@ check_ring() -> check_ring(R). check_ring(Ring) -> - {ok, Props} = application:get_env(riak_core, - default_bucket_props), - {n_val, Nval} = lists:keyfind(n_val, 1, Props), + {ok, Nval} = application:get_env(riak_core, + target_n_val), check_ring(Ring, Nval). %% @doc Check a ring for any preflists that do not satisfy n_val diff --git a/src/riak_core_send_msg.erl b/src/riak_core_send_msg.erl index b2bc97086..e06094d84 100644 --- a/src/riak_core_send_msg.erl +++ b/src/riak_core_send_msg.erl @@ -55,7 +55,7 @@ cast_unreliable(Dest, Request) -> send_event_unreliable({global, _Name} = GlobalTo, Event) -> erlang:error({unimplemented_send, GlobalTo, Event}); -send_event_unreliable({via, _Mod, _Name} = ViaTo, +send_event_unreliable({via, _Module, _Name} = ViaTo, Event) -> erlang:error({unimplemented_send, ViaTo, Event}); send_event_unreliable(Name, Event) -> diff --git a/src/riak_core_util.erl b/src/riak_core_util.erl index 46825b483..a36dcd622 100644 --- a/src/riak_core_util.erl +++ b/src/riak_core_util.erl @@ -21,65 +21,25 @@ %% @doc Various functions that are useful throughout Riak. -module(riak_core_util). --export([moment/0, - make_tmp_dir/0, - replace_file/2, - compare_dates/2, - reload_all/1, - integer_to_list/2, - unique_id_62/0, - str_to_node/1, - chash_key/1, - chash_key/2, - chash_std_keyfun/1, - chash_bucketonly_keyfun/1, - mkclientid/1, - start_app_deps/1, - build_tree/3, - orddict_delta/2, - safe_rpc/4, - safe_rpc/5, - rpc_every_member/4, - rpc_every_member_ann/4, - count/2, - keydelete/2, - multi_keydelete/2, - multi_keydelete/3, - compose/1, - compose/2, - pmap/2, - pmap/3, - multi_rpc/4, - multi_rpc/5, - multi_rpc_ann/4, - multi_rpc_ann/5, - multicall_ann/4, - multicall_ann/5, - shuffle/1, - is_arch/1, - format_ip_and_port/2, - peername/2, - sockname/2, - sha/1, - md5/1, - make_fold_req/1, - make_fold_req/2, - make_fold_req/4, - make_newest_fold_req/1, - proxy_spawn/1, - proxy/2, - enable_job_class/1, - enable_job_class/2, - disable_job_class/1, - disable_job_class/2, - job_class_enabled/1, - job_class_enabled/2, +-export([moment/0, make_tmp_dir/0, replace_file/2, + compare_dates/2, reload_all/1, integer_to_list/2, + unique_id_62/0, str_to_node/1, chash_key/1, chash_key/2, + chash_std_keyfun/1, chash_bucketonly_keyfun/1, + mkclientid/1, start_app_deps/1, build_tree/3, + orddict_delta/2, safe_rpc/4, safe_rpc/5, + rpc_every_member/4, rpc_every_member_ann/4, count/2, + keydelete/2, multi_keydelete/2, multi_keydelete/3, + compose/1, compose/2, pmap/2, pmap/3, multi_rpc/4, + multi_rpc/5, multi_rpc_ann/4, multi_rpc_ann/5, + multicall_ann/4, multicall_ann/5, shuffle/1, is_arch/1, + format_ip_and_port/2, peername/2, sockname/2, sha/1, + md5/1, make_fold_req/1, make_fold_req/2, + make_fold_req/4, make_newest_fold_req/1, proxy_spawn/1, + proxy/2, enable_job_class/1, enable_job_class/2, + disable_job_class/1, disable_job_class/2, + job_class_enabled/1, job_class_enabled/2, job_class_disabled_message/2, - report_job_request_disposition/6, - responsible_preflists/1, - responsible_preflists/2, - get_index_n/1, - preflist_siblings/1, + report_job_request_disposition/6, get_index_n/1, posix_error/1]). -include("riak_core_vnode.hrl"). @@ -95,15 +55,11 @@ -include_lib("eunit/include/eunit.hrl"). --export([counter_loop/1, - incr_counter/1, +-export([counter_loop/1, incr_counter/1, decr_counter/1]). -endif. --type - riak_core_ring() :: riak_core_ring:riak_core_ring(). - -type index() :: non_neg_integer(). -type index_n() :: {index(), pos_integer()}. @@ -121,9 +77,9 @@ posix_error(Error) -> case erl_posix_msg:message(Error) of - "unknown POSIX error" -> - lists:flatten(io_lib:format("~p", [Error])); - Message -> Message + "unknown POSIX error" -> + lists:flatten(io_lib:format("~p", [Error])); + Message -> Message end. %% @spec moment() -> integer() @@ -160,13 +116,11 @@ rfc1123_to_now(String) when is_list(String) -> %% to the new directory. make_tmp_dir() -> TmpId = io_lib:format("riptemp.~p", - [erlang:phash2({riak_core_rand:uniform(), self()})]), + [erlang:phash2({rand:uniform(), self()})]), TempDir = filename:join("/tmp", TmpId), case filelib:is_dir(TempDir) of - true -> make_tmp_dir(); - false -> - ok = file:make_dir(TempDir), - TempDir + true -> make_tmp_dir(); + false -> ok = file:make_dir(TempDir), TempDir end. %% @doc Atomically/safely (to some reasonable level of durablity) @@ -179,18 +133,18 @@ make_tmp_dir() -> replace_file(FN, Data) -> TmpFN = FN ++ ".tmp", case file:open(TmpFN, [write, raw]) of - {ok, FH} -> - try ok = file:write(FH, Data), - ok = file:sync(FH), - ok = file:close(FH), - ok = file:rename(TmpFN, FN), - {ok, Contents} = read_file(FN), - true = Contents == iolist_to_binary(Data), - ok - catch - _:Err -> {error, Err} - end; - Err -> Err + {ok, FH} -> + try ok = file:write(FH, Data), + ok = file:sync(FH), + ok = file:close(FH), + ok = file:rename(TmpFN, FN), + {ok, Contents} = read_file(FN), + true = Contents == iolist_to_binary(Data), + ok + catch + _:Err -> {error, Err} + end; + Err -> Err end. %% @doc Similar to {@link file:read_file/1} but uses raw file `I/O' @@ -202,8 +156,8 @@ read_file(FName) -> read_file(FD, Acc) -> case file:read(FD, 4096) of - {ok, Data} -> read_file(FD, [Data | Acc]); - eof -> lists:reverse(Acc) + {ok, Data} -> read_file(FD, [Data | Acc]); + eof -> lists:reverse(Acc) end. %% @spec integer_to_list(Integer :: integer(), Base :: integer()) -> @@ -267,31 +221,22 @@ reload_all(Module) -> mkclientid(RemoteNode) -> {{Y, Mo, D}, {H, Mi, S}} = erlang:universaltime(), {_, _, NowPart} = os:timestamp(), - Id = erlang:phash2([Y, - Mo, - D, - H, - Mi, - S, - node(), - RemoteNode, - NowPart, - self()]), + Id = erlang:phash2([Y, Mo, D, H, Mi, S, node(), + RemoteNode, NowPart, self()]), <>. %% @spec chash_key(BKey :: riak_object:bkey()) -> chash:index() %% @doc Create a binary used for determining replica placement. -chash_key({Bucket, _Key} = BKey) -> - BucketProps = riak_core_bucket:get_bucket(Bucket), - chash_key(BKey, BucketProps). +chash_key(BKey) -> + %% TODO remove + %% BucketProps = riak_core_bucket:get_bucket(Bucket), + chash_key(BKey, undefined). %% @spec chash_key(BKey :: riak_object:bkey(), [{atom(), any()}]) -> %% chash:index() %% @doc Create a binary used for determining replica placement. chash_key({Bucket, Key}, _BucketProps) -> - %{_, {M, F}} = lists:keyfind(chash_keyfun, 1, BucketProps), - %M:F({Bucket,Key}). - % FIX static keyfun + % static keyfun chash_std_keyfun({Bucket, Key}). %% @spec chash_std_keyfun(BKey :: riak_object:bkey()) -> chash:index() @@ -308,21 +253,21 @@ str_to_node(Node) when is_atom(Node) -> str_to_node(atom_to_list(Node)); str_to_node(NodeStr) -> case string:tokens(NodeStr, "@") of - [NodeName] -> - %% Node name only; no host name. If the local node has a hostname, - %% append it - case node_hostname() of - [] -> list_to_atom(NodeName); - Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) - end; - _ -> list_to_atom(NodeStr) + [NodeName] -> + %% Node name only; no host name. If the local node has a hostname, + %% append it + case node_hostname() of + [] -> list_to_atom(NodeName); + Hostname -> list_to_atom(NodeName ++ "@" ++ Hostname) + end; + _ -> list_to_atom(NodeStr) end. node_hostname() -> NodeStr = atom_to_list(node()), case string:tokens(NodeStr, "@") of - [_NodeName, Hostname] -> Hostname; - _ -> [] + [_NodeName, Hostname] -> Hostname; + _ -> [] end. %% @spec start_app_deps(App :: atom()) -> ok @@ -336,8 +281,8 @@ start_app_deps(App) -> %% @doc Start the named application if not already started. ensure_started(App) -> case application:start(App) of - ok -> ok; - {error, {already_started, App}} -> ok + ok -> ok; + {error, {already_started, App}} -> ok end. %% @doc Applies `Pred' to each element in `List', and returns a count of how many @@ -348,8 +293,8 @@ ensure_started(App) -> count(Pred, List) -> FoldFun = fun (E, A) -> case Pred(E) of - false -> A; - true -> A + 1 + false -> A; + true -> A + 1 end end, lists:foldl(FoldFun, 0, List). @@ -380,8 +325,7 @@ multi_keydelete(KeysToDelete, N, TupleList) -> lists:foldl(fun (Key, Acc) -> lists:keydelete(Key, N, Acc) end, - TupleList, - KeysToDelete). + TupleList, KeysToDelete). %% @doc Function composition: returns a function that is the composition of %% `F' and `G'. @@ -415,20 +359,14 @@ pmap(F, L) -> spawn_link(fun () -> Parent ! {pmap, N, F(X)} end), N + 1 end, - 0, - L), + 0, L), L2 = [receive {pmap, N, R} -> {N, R} end || _ <- L], L3 = lists:keysort(1, L2), [R || {_, R} <- L3]. -record(pmap_acc, - {mapper, - fn, - n_pending = 0, - pending = sets:new(), - n_done = 0, - done = [], - max_concurrent = 1}). + {mapper, fn, n_pending = 0, pending = sets:new(), + n_done = 0, done = [], max_concurrent = 1}). %% @doc Parallel map with a cap on the number of concurrent worker processes. %% Note: Worker processes are linked to the parent, so a crash propagates. @@ -476,21 +414,21 @@ pmap_worker(X, %% @doc Waits for one pending pmap task to finish pmap_collect_one(Pending) -> receive - {pmap_result, Pid, Result} -> - Size = sets:size(Pending), - NewPending = sets:del_element(Pid, Pending), - case sets:size(NewPending) of - Size -> pmap_collect_one(Pending); - _ -> {Result, NewPending} - end + {pmap_result, Pid, Result} -> + Size = sets:size(Pending), + NewPending = sets:del_element(Pid, Pending), + case sets:size(NewPending) of + Size -> pmap_collect_one(Pending); + _ -> {Result, NewPending} + end end. pmap_collect_rest(Pending, Done) -> case sets:size(Pending) of - 0 -> Done; - _ -> - {Result, NewPending} = pmap_collect_one(Pending), - pmap_collect_rest(NewPending, [Result | Done]) + 0 -> Done; + _ -> + {Result, NewPending} = pmap_collect_one(Pending), + pmap_collect_rest(NewPending, [Result | Done]) end. %% @doc Wraps an rpc:call/4 in a try/catch to handle the case where the @@ -504,10 +442,10 @@ pmap_collect_rest(Pending, Done) -> safe_rpc(Node, Module, Function, Args) -> try rpc:call(Node, Module, Function, Args) of - Result -> Result + Result -> Result catch - exit:{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + exit:{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @doc Wraps an rpc:call/5 in a try/catch to handle the case where the @@ -520,10 +458,10 @@ safe_rpc(Node, Module, Function, Args) -> safe_rpc(Node, Module, Function, Args, Timeout) -> try rpc:call(Node, Module, Function, Args, Timeout) of - Result -> Result + Result -> Result catch - 'EXIT':{noproc, _NoProcDetails} -> - {badrpc, rpc_process_down} + 'EXIT':{noproc, _NoProcDetails} -> + {badrpc, rpc_process_down} end. %% @spec rpc_every_member(atom(), atom(), [term()], integer()|infinity) @@ -541,11 +479,8 @@ rpc_every_member(Module, Function, Args, Timeout) -> rpc_every_member_ann(Module, Function, Args, Timeout) -> {ok, MyRing} = riak_core_ring_manager:get_my_ring(), Nodes = riak_core_ring:all_members(MyRing), - {Results, Down} = multicall_ann(Nodes, - Module, - Function, - Args, - Timeout), + {Results, Down} = multicall_ann(Nodes, Module, Function, + Args, Timeout), {Results, Down}. %% @doc Perform an RPC call to a list of nodes in parallel, returning the @@ -629,9 +564,9 @@ multicall_ann(Nodes, Mod, Fun, Args, Timeout) -> build_tree(N, Nodes, Opts) -> case lists:member(cycles, Opts) of - true -> - Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); - false -> Expand = Nodes + true -> + Expand = lists:flatten(lists:duplicate(N + 1, Nodes)); + false -> Expand = Nodes end, {Tree, _} = lists:foldl(fun (Elm, {Result, Worklist}) -> Len = erlang:min(N, length(Worklist)), @@ -640,27 +575,23 @@ build_tree(N, Nodes, Opts) -> NewResult = [{Elm, Children} | Result], {NewResult, Rest} end, - {[], tl(Expand)}, - Nodes), + {[], tl(Expand)}, Nodes), orddict:from_list(Tree). orddict_delta(A, B) -> %% Pad both A and B to the same length DummyA = [{Key, '$none'} || {Key, _} <- B], - A2 = orddict:merge(fun (_, Value, _) -> Value end, - A, + A2 = orddict:merge(fun (_, Value, _) -> Value end, A, DummyA), DummyB = [{Key, '$none'} || {Key, _} <- A], - B2 = orddict:merge(fun (_, Value, _) -> Value end, - B, + B2 = orddict:merge(fun (_, Value, _) -> Value end, B, DummyB), %% Merge and filter out equal values Merged = orddict:merge(fun (_, AVal, BVal) -> {AVal, BVal} end, - A2, - B2), - Diff = orddict:filter(fun (_, {Same, Same}) -> false; + A2, B2), + Diff = orddict:filter(fun (_, {_Same, _Same}) -> false; (_, _) -> true end, Merged), @@ -668,7 +599,7 @@ orddict_delta(A, B) -> shuffle(L) -> N = 134217727, %% Largest small integer on 32-bit Erlang - L2 = [{riak_core_rand:uniform(N), E} || E <- L], + L2 = [{rand:uniform(N), E} || E <- L], L3 = [E || {_, E} <- lists:sort(L2)], L3. @@ -695,20 +626,20 @@ format_ip_and_port(Ip, Port) when is_tuple(Ip) -> lists:flatten(io_lib:format("~s:~p", [inet_parse:ntoa(Ip), Port])). -peername(Socket, Transport) -> - case Transport:peername(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) +peername(Socket, Module) -> + case Module:peername(Socket) of + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. -sockname(Socket, Transport) -> - case Transport:sockname(Socket) of - {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); - {error, Reason} -> - %% just return a string so JSON doesn't blow up - lists:flatten(io_lib:format("error:~p", [Reason])) +sockname(Socket, Module) -> + case Module:sockname(Socket) of + {ok, {Ip, Port}} -> format_ip_and_port(Ip, Port); + {error, Reason} -> + %% just return a string so JSON doesn't blow up + lists:flatten(io_lib:format("error:~p", [Reason])) end. %% @doc Convert a #riak_core_fold_req_v? record to the cluster's maximum @@ -746,17 +677,16 @@ proxy_spawn(Fun) -> MRef = monitor(process, Pid), Pid ! {proxy, MRef}, receive - {proxy_reply, MRef, Result} -> - demonitor(MRef, [flush]), - Result; - {'DOWN', MRef, _, _, Reason} -> {error, Reason} + {proxy_reply, MRef, Result} -> + demonitor(MRef, [flush]), Result; + {'DOWN', MRef, _, _, Reason} -> {error, Reason} end. %% @private make_fold_reqv(_, FoldFun, Acc0, Forwardable, Opts) when is_function(FoldFun, 3) andalso - (Forwardable == true orelse Forwardable == false) - andalso is_list(Opts) -> + (Forwardable == true orelse Forwardable == false) + andalso is_list(Opts) -> #riak_core_fold_req_v2{foldfun = FoldFun, acc0 = Acc0, forwardable = Forwardable, opts = Opts}. @@ -764,10 +694,9 @@ make_fold_reqv(_, FoldFun, Acc0, Forwardable, Opts) proxy(Parent, Fun) -> _ = monitor(process, Parent), receive - {proxy, MRef} -> - Result = Fun(), - Parent ! {proxy_reply, MRef, Result}; - {'DOWN', _, _, _, _} -> ok + {proxy, MRef} -> + Result = Fun(), Parent ! {proxy_reply, MRef, Result}; + {'DOWN', _, _, _, _} -> ok end. -spec enable_job_class(atom(), atom()) -> ok | @@ -779,7 +708,7 @@ proxy(Parent, Fun) -> %% or its complement disable_job_class/2. enable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> enable_job_class({Application, Operation}); enable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. @@ -793,7 +722,7 @@ enable_job_class(Application, Operation) -> %% or its complement enable_job_class/2. disable_job_class(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> disable_job_class({Application, Operation}); disable_job_class(Application, Operation) -> {error, {badarg, {Application, Operation}}}. @@ -805,7 +734,7 @@ disable_job_class(Application, Operation) -> %% This is the public API for use via RPC. job_class_enabled(Application, Operation) when erlang:is_atom(Application) andalso - erlang:is_atom(Operation) -> + erlang:is_atom(Operation) -> job_class_enabled({Application, Operation}); job_class_enabled(Application, Operation) -> {error, {badarg, {Application, Operation}}}. @@ -819,22 +748,19 @@ job_class_enabled(Application, Operation) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use enable_job_class/2. enable_job_class(Class) -> - case application:get_env(riak_core, - job_accept_class, + case application:get_env(riak_core, job_accept_class, undefined) of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - true -> ok; - _ -> - application:set_env(riak_core, - job_accept_class, - [Class | EnabledClasses]) - end; - _ -> - application:set_env(riak_core, - job_accept_class, - [Class]) + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + true -> ok; + _ -> + application:set_env(riak_core, job_accept_class, + [Class | EnabledClasses]) + end; + _ -> + application:set_env(riak_core, job_accept_class, + [Class]) end. -spec disable_job_class(Class :: term()) -> ok | @@ -846,19 +772,17 @@ enable_job_class(Class) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use disable_job_class/2. disable_job_class(Class) -> - case application:get_env(riak_core, - job_accept_class, + case application:get_env(riak_core, job_accept_class, undefined) of - [_ | _] = EnabledClasses -> - case lists:member(Class, EnabledClasses) of - false -> ok; - _ -> - application:set_env(riak_core, - job_accept_class, - lists:delete(Class, EnabledClasses)) - end; - _ -> ok + [_ | _] = EnabledClasses -> + case lists:member(Class, EnabledClasses) of + false -> ok; + _ -> + application:set_env(riak_core, job_accept_class, + lists:delete(Class, EnabledClasses)) + end; + _ -> ok end. -spec job_class_enabled(Class :: term()) -> boolean(). @@ -869,23 +793,22 @@ disable_job_class(Class) -> %% * Parameter types ARE NOT validated by the same rules as the public API! %% You are STRONGLY advised to use job_class_enabled/2. job_class_enabled(Class) -> - case application:get_env(riak_core, - job_accept_class, + case application:get_env(riak_core, job_accept_class, undefined) of - undefined -> true; - [] -> false; - [_ | _] = EnabledClasses -> - lists:member(Class, EnabledClasses); - Other -> - % Don't crash if it's not a list - that should never be the case, - % but since the value *can* be manipulated externally be more - % accommodating. If someone mucks it up, nothing's going to be - % allowed, but give them a chance to catch on instead of crashing. - _ = logger:error("riak_core.job_accept_class is not a " - "list: ~p", - [Other]), - false + undefined -> true; + [] -> false; + [_ | _] = EnabledClasses -> + lists:member(Class, EnabledClasses); + Other -> + % Don't crash if it's not a list - that should never be the case, + % but since the value *can* be manipulated externally be more + % accommodating. If someone mucks it up, nothing's going to be + % allowed, but give them a chance to catch on instead of crashing. + _ = logger:error("riak_core.job_accept_class is not a " + "list: ~p", + [Other]), + false end. -spec job_class_disabled_message(ReturnType :: atom(), @@ -946,100 +869,13 @@ report_job_request_disposition(false, Class, Mod, Func, -spec get_index_n({binary(), binary()}) -> index_n(). get_index_n({Bucket, Key}) -> - BucketProps = riak_core_bucket:get_bucket(Bucket), - N = proplists:get_value(n_val, BucketProps), + %% BucketProps = riak_core_bucket:get_bucket(Bucket), + {ok, N} = application:get_env(riak_core, target_n_val), ChashKey = riak_core_util:chash_key({Bucket, Key}), {ok, CHBin} = riak_core_ring_manager:get_chash_bin(), Index = chashbin:responsible_index(ChashKey, CHBin), {Index, N}. -%% @doc Given an index, determine all sibling indices that participate in one -%% or more preflists with the specified index. --spec preflist_siblings(index()) -> [index()]. - -preflist_siblings(Index) -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - preflist_siblings(Index, Ring). - -%% @doc See {@link preflist_siblings/1}. --spec preflist_siblings(index(), - riak_core_ring()) -> [index()]. - -preflist_siblings(Index, Ring) -> - MaxN = determine_max_n(Ring), - preflist_siblings(Index, MaxN, Ring). - --spec preflist_siblings(index(), pos_integer(), - riak_core_ring()) -> [index()]. - -preflist_siblings(Index, N, Ring) -> - IndexBin = <>, - PL = riak_core_ring:preflist(IndexBin, Ring), - Indices = [Idx || {Idx, _} <- PL], - RevIndices = lists:reverse(Indices), - {Succ, _} = lists:split(N - 1, Indices), - {Pred, _} = lists:split(N - 1, tl(RevIndices)), - lists:reverse(Pred) ++ Succ. - --spec responsible_preflists(index()) -> [index_n()]. - -responsible_preflists(Index) -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - responsible_preflists(Index, Ring). - --spec responsible_preflists(index(), - riak_core_ring()) -> [index_n()]. - -responsible_preflists(Index, Ring) -> - AllN = determine_all_n(Ring), - responsible_preflists(Index, AllN, Ring). - --spec responsible_preflists(index(), - [pos_integer(), ...], - riak_core_ring()) -> [index_n()]. - -responsible_preflists(Index, AllN, Ring) -> - IndexBin = <>, - PL = riak_core_ring:preflist(IndexBin, Ring), - Indices = [Idx || {Idx, _} <- PL], - RevIndices = lists:reverse(Indices), - lists:flatmap(fun (N) -> - responsible_preflists_n(RevIndices, N) - end, - AllN). - --spec responsible_preflists_n([index()], - pos_integer()) -> [index_n()]. - -responsible_preflists_n(RevIndices, N) -> - {Pred, _} = lists:split(N, RevIndices), - [{Idx, N} || Idx <- lists:reverse(Pred)]. - --spec - determine_max_n(riak_core_ring()) -> pos_integer(). - -determine_max_n(Ring) -> - lists:max(determine_all_n(Ring)). - --spec - determine_all_n(riak_core_ring()) -> [pos_integer(), ...]. - -determine_all_n(Ring) -> - Buckets = riak_core_ring:get_buckets(Ring), - BucketProps = [riak_core_bucket:get_bucket(Bucket, Ring) - || Bucket <- Buckets], - Default = application:get_env(riak_core, - default_bucket_props, - undefined), - DefaultN = proplists:get_value(n_val, Default), - AllN = lists:foldl(fun (Props, AllN) -> - N = proplists:get_value(n_val, Props), - ordsets:add_element(N, AllN) - end, - [DefaultN], - BucketProps), - AllN. - %% =================================================================== %% EUnit tests %% =================================================================== @@ -1054,89 +890,51 @@ clientid_uniqueness_test() -> ClientIds = [mkclientid(somenode@somehost) || _I <- lists:seq(0, 10000)], length(ClientIds) =:= - length(sets:to_list(sets:from_list(ClientIds))). + length(sets:to_list(sets:from_list(ClientIds))). build_tree_test() -> - Flat = [1, - 11, - 12, - 111, - 112, - 121, - 122, - 1111, - 1112, - 1121, - 1122, - 1211, - 1212, - 1221, - 1222], + Flat = [1, 11, 12, 111, 112, 121, 122, 1111, 1112, 1121, + 1122, 1211, 1212, 1221, 1222], %% 2-ary tree decomposition - ATree = [{1, [11, 12]}, - {11, [111, 112]}, - {12, [121, 122]}, - {111, [1111, 1112]}, - {112, [1121, 1122]}, - {121, [1211, 1212]}, - {122, [1221, 1222]}, - {1111, []}, - {1112, []}, - {1121, []}, - {1122, []}, - {1211, []}, - {1212, []}, - {1221, []}, + ATree = [{1, [11, 12]}, {11, [111, 112]}, + {12, [121, 122]}, {111, [1111, 1112]}, + {112, [1121, 1122]}, {121, [1211, 1212]}, + {122, [1221, 1222]}, {1111, []}, {1112, []}, {1121, []}, + {1122, []}, {1211, []}, {1212, []}, {1221, []}, {1222, []}], %% 2-ary tree decomposition with cyclic wrap-around - CTree = [{1, [11, 12]}, - {11, [111, 112]}, - {12, [121, 122]}, - {111, [1111, 1112]}, - {112, [1121, 1122]}, - {121, [1211, 1212]}, - {122, [1221, 1222]}, - {1111, [1, 11]}, - {1112, [12, 111]}, - {1121, [112, 121]}, - {1122, [122, 1111]}, - {1211, [1112, 1121]}, - {1212, [1122, 1211]}, - {1221, [1212, 1221]}, - {1222, [1222, 1]}], + CTree = [{1, [11, 12]}, {11, [111, 112]}, + {12, [121, 122]}, {111, [1111, 1112]}, + {112, [1121, 1122]}, {121, [1211, 1212]}, + {122, [1221, 1222]}, {1111, [1, 11]}, {1112, [12, 111]}, + {1121, [112, 121]}, {1122, [122, 1111]}, + {1211, [1112, 1121]}, {1212, [1122, 1211]}, + {1221, [1212, 1221]}, {1222, [1222, 1]}], ?assertEqual(ATree, (build_tree(2, Flat, []))), ?assertEqual(CTree, (build_tree(2, Flat, [cycles]))), ok. counter_loop(N) -> receive - {up, Pid} -> - N2 = N + 1, - Pid ! {counter_value, N2}, - counter_loop(N2); - down -> counter_loop(N - 1); - exit -> exit(normal) + {up, Pid} -> + N2 = N + 1, Pid ! {counter_value, N2}, counter_loop(N2); + down -> counter_loop(N - 1); + exit -> exit(normal) end. incr_counter(CounterPid) -> CounterPid ! {up, self()}, receive - {counter_value, N} -> N after 3000 -> ?assert(false) + {counter_value, N} -> N after 3000 -> ?assert(false) end. decr_counter(CounterPid) -> CounterPid ! down. multi_keydelete_test_() -> - Languages = [{lisp, 1958}, - {ml, 1973}, - {erlang, 1986}, - {haskell, 1990}, - {ocaml, 1996}, - {clojure, 2007}, + Languages = [{lisp, 1958}, {ml, 1973}, {erlang, 1986}, + {haskell, 1990}, {ocaml, 1996}, {clojure, 2007}, {elixir, 2012}], - ?_assertMatch([{lisp, _}, - {ml, _}, - {erlang, _}, + ?_assertMatch([{lisp, _}, {ml, _}, {erlang, _}, {haskell, _}], (multi_keydelete([ocaml, clojure, elixir], Languages))). @@ -1148,8 +946,7 @@ compose_test_() -> Increment = fun (N) when is_integer(N) -> N + 1 end, Double = fun (N) when is_integer(N) -> N * 2 end, Square = fun (N) when is_integer(N) -> N * N end, - SquareDoubleIncrement = compose([Increment, - Double, + SquareDoubleIncrement = compose([Increment, Double, Square]), CompatibleTypes = compose(Increment, fun (X) when is_list(X) -> list_to_integer(X) @@ -1174,8 +971,7 @@ pmap_test_() -> end, Lin = [1, 2, 3, 4], Lout = [2, 4, 6, 8], - {setup, - fun () -> error_logger:tty(false) end, + {setup, fun () -> error_logger:tty(false) end, fun (_) -> error_logger:tty(true) end, [fun () -> % Test simple map case @@ -1192,8 +988,8 @@ pmap_test_() -> end), MonRef = monitor(process, Pid), receive - {'DOWN', MonRef, _, _, _} -> ok; - no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) + {'DOWN', MonRef, _, _, _} -> ok; + no_crash_yo -> ?assert(pmap_did_not_crash_as_expected) end end]}. @@ -1203,7 +999,7 @@ bounded_pmap_test_() -> GFun = fun (Max) -> fun (X) -> ?assert((incr_counter(CountPid) =< - Max)), + Max)), timer:sleep(1), decr_counter(CountPid), Fun1(X) @@ -1211,8 +1007,7 @@ bounded_pmap_test_() -> end, [fun () -> ?assertEqual((lists:seq(Fun1(1), Fun1(N))), - (pmap(GFun(MaxP), - lists:seq(1, N), + (pmap(GFun(MaxP), lists:seq(1, N), MaxP))) end || MaxP <- lists:seq(1, 20), N <- lists:seq(0, 10)] @@ -1226,11 +1021,11 @@ bounded_pmap_test_() -> fun (Pid) -> Pid ! exit, receive - {'DOWN', _Ref, process, Pid, _Info} -> ok - after 3000 -> - ?debugMsg("pmap counter process did not go down " - "in time"), - ?assert(false) + {'DOWN', _Ref, process, Pid, _Info} -> ok + after 3000 -> + ?debugMsg("pmap counter process did not go down " + "in time"), + ?assert(false) end, ok end, @@ -1243,10 +1038,10 @@ proxy_spawn_test() -> ?assertEqual({error, killer_fun}, B), %% Ensure no errant 'DOWN' messages receive - {'DOWN', _, _, _, _} = Msg -> - throw({error, {badmsg, Msg}}); - _ -> ok - after 1000 -> ok + {'DOWN', _, _, _, _} = Msg -> + throw({error, {badmsg, Msg}}); + _ -> ok + after 1000 -> ok end. -ifdef(PROPER). @@ -1257,7 +1052,7 @@ count_test() -> prop_count_correct() -> ?FORALL(List, (list(bool())), (count(fun (E) -> E end, List) =:= - length([E || E <- List, E]))). + length([E || E <- List, E]))). -endif. %% EQC diff --git a/src/riak_core_vnode.erl b/src/riak_core_vnode.erl index b6616eb74..3e78ab22d 100644 --- a/src/riak_core_vnode.erl +++ b/src/riak_core_vnode.erl @@ -419,15 +419,15 @@ monitor(ignore) -> erlang:monitor(process, self()). manager_event_timer :: reference() | undefined, inactivity_timeout :: non_neg_integer()}). -init([Mod, Index, InitialInactivityTimeout, Forward]) -> +init([Module, Index, InitialInactivityTimeout, Forward]) -> process_flag(trap_exit, true), - State = #state{index = Index, mod = Mod, + State = #state{index = Index, mod = Module, forward = Forward, inactivity_timeout = InitialInactivityTimeout}, {ok, started, State, 0}. terminate(Reason, _StateName, - #state{mod = Mod, modstate = ModState, + #state{mod = Module, modstate = ModState, pool_pid = Pool}) -> %% Shutdown if the pool is still alive and a normal `Reason' is %% given - there could be a race on delivery of the unregistered @@ -446,10 +446,10 @@ terminate(Reason, _StateName, [Type, Reason, Stacktrace]) after case ModState of - %% Handoff completed, Mod:delete has been called, now terminate. + %% Handoff completed, Module:delete has been called, now terminate. {deleted, ModState1} -> - Mod:terminate(Reason, ModState1); - _ -> Mod:terminate(Reason, ModState) + Module:terminate(Reason, ModState1); + _ -> Module:terminate(Reason, ModState) end end. @@ -485,8 +485,8 @@ started(wait_for_init, _From, %%active %%%%%%%%%%%% active(timeout, - State = #state{mod = Mod, index = Idx}) -> - riak_core_vnode_manager:vnode_event(Mod, + State = #state{mod = Module, index = Idx}) -> + riak_core_vnode_manager:vnode_event(Module, Idx, self(), inactive), @@ -509,8 +509,8 @@ active(#riak_vnode_req_v1{sender = Sender, request = Request}, State = #state{handoff_type = resize, handoff_target = {HOIdx, HONode}, index = Index, - forward = Forward, mod = Mod}) -> - RequestHash = Mod:request_hash(Request), + forward = Forward, mod = Module}) -> + RequestHash = Module:request_hash(Request), case RequestHash of %% will never have enough information to forward request so only handle locally undefined -> vnode_command(Sender, Request, State); @@ -548,7 +548,7 @@ active(handoff_complete, State) -> State), continue(State2); active({resize_transfer_complete, SeenIdxs}, - State = #state{mod = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, handoff_target = Target}) -> case Target of none -> continue(State); @@ -573,25 +573,25 @@ active({trigger_handoff, TargetIdx, TargetNode}, State) -> maybe_handoff(TargetIdx, TargetNode, State); active(trigger_delete, - State = #state{mod = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, index = Idx}) -> case mark_delete_complete(Idx, Mod) of {ok, _NewRing} -> - {ok, NewModState} = Mod:delete(ModState), - logger:debug("~p ~p vnode deleted", [Idx, Mod]); + {ok, NewModState} = Module:delete(ModState), + logger:debug("~p ~p vnode deleted", [Idx, Module]); _ -> NewModState = ModState end, maybe_shutdown_pool(State), - riak_core_vnode_manager:unregister_vnode(Idx, Mod), + riak_core_vnode_manager:unregister_vnode(Idx, Module), continue(State#state{modstate = {deleted, NewModState}}); active(unregistered, - State = #state{mod = Mod, index = Index}) -> + State = #state{mod = Module, index = Index}) -> %% Add exclusion so the ring handler will not try to spin this vnode %% up until it receives traffic. - riak_core_handoff_manager:add_exclusion(Mod, Index), + riak_core_handoff_manager:add_exclusion(Module, Index), logger:debug("~p ~p vnode excluded and unregistered.", - [Index, Mod]), + [Index, Module]), {stop, normal, State#state{handoff_target = none, @@ -626,25 +626,25 @@ handle_event(finish_handoff, _StateName, stop_manager_event_timer(State), continue(State#state{handoff_target = none}); handle_event(finish_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, handoff_target = Target}) -> stop_manager_event_timer(State), case Target of none -> continue(State); _ -> - {ok, NewModState} = Mod:handoff_finished(Target, + {ok, NewModState} = Module:handoff_finished(Target, ModState), finish_handoff(State#state{modstate = NewModState}) end; handle_event(cancel_handoff, _StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Module, modstate = ModState}) -> %% it would be nice to pass {Err, Reason} to the vnode but the %% API doesn't currently allow for that. stop_manager_event_timer(State), case State#state.handoff_target of none -> continue(State); _ -> - {ok, NewModState} = Mod:handoff_cancelled(ModState), + {ok, NewModState} = Module:handoff_cancelled(ModState), continue(State#state{handoff_target = none, handoff_type = undefined, modstate = NewModState}) @@ -700,8 +700,8 @@ handle_sync_event({handoff_data, _BinObj}, _From, State#state.inactivity_timeout}; handle_sync_event({handoff_data, BinObj}, _From, StateName, - State = #state{mod = Mod, modstate = ModState}) -> - case Mod:handle_handoff_data(BinObj, ModState) of + State = #state{mod = Module, modstate = ModState}) -> + case Module:handle_handoff_data(BinObj, ModState) of {reply, ok, NewModState} -> {reply, ok, @@ -710,7 +710,7 @@ handle_sync_event({handoff_data, BinObj}, _From, State#state.inactivity_timeout}; {reply, {error, Err}, NewModState} -> logger:error("~p failed to store handoff obj: ~p", - [Mod, Err]), + [Module, Err]), {reply, {error, Err}, StateName, @@ -718,7 +718,7 @@ handle_sync_event({handoff_data, BinObj}, _From, State#state.inactivity_timeout} end; handle_sync_event(core_status, _From, StateName, - State = #state{index = Index, mod = Mod, + State = #state{index = Index, mod = Module, modstate = ModState, handoff_target = HT, forward = FN}) -> Mode = case {FN, HT} of @@ -727,7 +727,7 @@ handle_sync_event(core_status, _From, StateName, {FN, none} -> forward; _ -> undefined end, - Status = [{index, Index}, {mod, Mod}] ++ + Status = [{index, Index}, {mod, Module}] ++ case FN of undefined -> []; _ -> [{forward, FN}] @@ -760,14 +760,14 @@ handle_info({'$vnode_proxy_ping', From, Ref, Msgs}, State, State#state.inactivity_timeout}; handle_info({'EXIT', Pid, Reason}, _StateName, - State = #state{mod = Mod, index = Index, pool_pid = Pid, - pool_config = PoolConfig}) -> + State = #state{mod = Module, index = Index, + pool_pid = Pid, pool_config = PoolConfig}) -> case Reason of Reason when Reason == normal; Reason == shutdown -> continue(State#state{pool_pid = undefined}); _ -> logger:error("~p ~p worker pool crashed ~p\n", - [Index, Mod, Reason]), + [Index, Module, Reason]), {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, logger:debug("starting worker pool ~p with size of " "~p for vnode ~p.", @@ -788,20 +788,20 @@ handle_info({'DOWN', _Ref, process, _Pid, normal}, %% need them in other states continue(State); handle_info(Info, _StateName, - State = #state{mod = Mod, modstate = {deleted, _}, + State = #state{mod = Module, modstate = {deleted, _}, index = Index}) -> logger:info("~p ~p ignored handle_info ~p - vnode " "unregistering\n", - [Index, Mod, Info]), + [Index, Module, Info]), continue(State); handle_info({'EXIT', Pid, Reason}, StateName, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Module, modstate = ModState}) -> %% A linked processes has died so use the %% handle_exit callback to allow the vnode %% process to take appropriate action. %% If the function is not implemented default %% to crashing the process. - try case Mod:handle_exit(Pid, Reason, ModState) of + try case Module:handle_exit(Pid, Reason, ModState) of {noreply, NewModState} -> {next_state, StateName, @@ -814,10 +814,10 @@ handle_info({'EXIT', Pid, Reason}, StateName, _ErrorType:undef -> {stop, linked_process_crash, State} end; handle_info(Info, StateName, - State = #state{mod = Mod, modstate = ModState}) -> - case erlang:function_exported(Mod, handle_info, 2) of + State = #state{mod = Module, modstate = ModState}) -> + case erlang:function_exported(Module, handle_info, 2) of true -> - {ok, NewModState} = Mod:handle_info(Info, ModState), + {ok, NewModState} = Module:handle_info(Info, ModState), {next_state, StateName, State#state{modstate = NewModState}, @@ -834,17 +834,17 @@ handle_info(Info, StateName, %% Internal Helper Functions %% ======== %% ======================== -do_init(State = #state{index = Index, mod = Mod, +do_init(State = #state{index = Index, mod = Module, forward = Forward}) -> - {ModState, Props} = case Mod:init([Index]) of + {ModState, Props} = case Module:init([Index]) of {ok, MS} -> {MS, []}; {ok, MS, P} -> {MS, P}; {error, R} -> {error, R} end, - case {ModState, Props} of + case {ModState, Props} of {error, Reason} -> {error, Reason}; _ -> - case lists:keyfind(pool, 1, Props) of + PoolConfig = case lists:keyfind(pool, 1, Props) of {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig -> logger:debug("starting worker pool ~p with size of " @@ -858,16 +858,16 @@ do_init(State = #state{index = Index, mod = Mod, worker_props); _ -> PoolPid = PoolConfig = undefined end, - riak_core_handoff_manager:remove_exclusion(Mod, Index), + riak_core_handoff_manager:remove_exclusion(Module, Index), Timeout = application:get_env(riak_core, vnode_inactivity_timeout, ?DEFAULT_TIMEOUT), - Timeout2 = Timeout + riak_core_rand:uniform(Timeout), + Timeout2 = Timeout + rand:uniform(Timeout), State2 = State#state{modstate = ModState, inactivity_timeout = Timeout2, pool_pid = PoolPid, pool_config = PoolConfig}, logger:debug("vnode :: ~p/~p :: ~p~n", - [Mod, Index, Forward]), + [Module, Index, Forward]), State3 = mod_set_forwarding(Forward, State2), {ok, State3} end. @@ -910,11 +910,11 @@ continue(State, NewModState) -> %% to a partition for which the transfer has already completed, are forwarded. All other %% requests are passed to handle_handoff_command. forward_or_vnode_command(Sender, Request, - State = #state{forward = Forward, mod = Mod, + State = #state{forward = Forward, mod = Module, index = Index}) -> Resizing = is_list(Forward), RequestHash = case Resizing of - true -> Mod:request_hash(Request); + true -> Module:request_hash(Request); false -> undefined end, case {Forward, RequestHash} of @@ -946,13 +946,13 @@ vnode_command(_Sender, _Request, State = #state{modstate = {deleted, _}}) -> continue(State); vnode_command(Sender, Request, - State = #state{mod = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, pool_pid = Pool}) -> - case catch Mod:handle_command(Request, Sender, ModState) + case catch Module:handle_command(Request, Sender, ModState) of {'EXIT', ExitReason} -> reply(Sender, {vnode_error, ExitReason}), - logger:error("~p command failed ~p", [Mod, ExitReason]), + logger:error("~p command failed ~p", [Module, ExitReason]), {stop, ExitReason, State#state{modstate = ModState}}; continue -> continue(State, ModState); {reply, Reply, NewModState} -> @@ -971,19 +971,19 @@ vnode_command(Sender, Request, end. vnode_coverage(Sender, Request, KeySpaces, - State = #state{index = Index, mod = Mod, + State = #state{index = Index, mod = Module, modstate = ModState, pool_pid = Pool, forward = Forward}) -> %% Check if we should forward case Forward of undefined -> - Action = Mod:handle_coverage(Request, + Action = Module:handle_coverage(Request, KeySpaces, Sender, ModState); %% handle coverage requests locally during ring resize Forwards when is_list(Forwards) -> - Action = Mod:handle_coverage(Request, + Action = Module:handle_coverage(Request, KeySpaces, Sender, ModState); @@ -1015,10 +1015,10 @@ vnode_coverage(Sender, Request, KeySpaces, end. vnode_handoff_command(Sender, Request, ForwardTo, - State = #state{mod = Mod, modstate = ModState, + State = #state{module = Mod, modstate = ModState, handoff_target = HOTarget, handoff_type = HOType, pool_pid = Pool}) -> - case Mod:handle_handoff_command(Request, + case Module:handle_handoff_command(Request, Sender, ModState) of @@ -1210,13 +1210,13 @@ mark_handoff_complete(Idx, {Idx, New}, [], Mod, _) -> finish_handoff(State) -> finish_handoff([], State). finish_handoff(SeenIdxs, - State = #state{mod = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, index = Idx, handoff_target = Target, handoff_type = HOType}) -> case mark_handoff_complete(Idx, Target, SeenIdxs, - Mod, + Module, HOType) of continue -> @@ -1236,10 +1236,10 @@ finish_handoff(SeenIdxs, %% Shutdown the async pool beforehand, don't want callbacks %% running on non-existant data. maybe_shutdown_pool(State), - {ok, NewModState} = Mod:delete(ModState), + {ok, NewModState} = Module:delete(ModState), logger:debug("~p ~p vnode finished handoff and deleted.", - [Idx, Mod]), - riak_core_vnode_manager:unregister_vnode(Idx, Mod), + [Idx, Module]), + riak_core_vnode_manager:unregister_vnode(Idx, Module), logger:debug("vnode hn/fwd :: ~p/~p :: ~p -> ~p~n", [State#state.mod, State#state.index, @@ -1308,7 +1308,7 @@ maybe_handoff(_TargetIdx, _TargetNode, %% Modstate has been deleted, waiting for unregistered. No handoff. continue(State); maybe_handoff(TargetIdx, TargetNode, - State = #state{index = Idx, mod = Mod, + State = #state{index = Idx, mod = Module, modstate = ModState, handoff_target = CurrentTarget, handoff_pid = HPid}) -> @@ -1321,7 +1321,7 @@ maybe_handoff(TargetIdx, TargetNode, _ -> logger:info("~s/~b: handoff request to ~p before " "finishing handoff to ~p", - [Mod, Idx, Target, CurrentTarget]), + [Module, Idx, Target, CurrentTarget]), not ExistingHO end, case ValidHN of @@ -1334,7 +1334,7 @@ maybe_handoff(TargetIdx, TargetNode, {_, true} -> ownership; {_, false} -> hinted end, - case Mod:handoff_starting({HOType, Target}, ModState) of + case Module:handoff_starting({HOType, Target}, ModState) of {true, NewModState} -> start_handoff(HOType, TargetIdx, @@ -1346,8 +1346,8 @@ maybe_handoff(TargetIdx, TargetNode, end. start_handoff(HOType, TargetIdx, TargetNode, - State = #state{mod = Mod, modstate = ModState}) -> - case Mod:is_empty(ModState) of + State = #state{mod = Module, modstate = ModState}) -> + case Module:is_empty(ModState) of {true, NewModState} -> finish_handoff(State#state{modstate = NewModState, handoff_type = HOType, @@ -1372,9 +1372,9 @@ start_handoff(HOType, TargetIdx, TargetNode, end. start_outbound(HOType, TargetIdx, TargetNode, Opts, - State = #state{index = Idx, mod = Mod}) -> + State = #state{index = Idx, mod = Module}) -> case riak_core_handoff_manager:add_outbound(HOType, - Mod, + Module, Idx, TargetIdx, TargetNode, @@ -1386,7 +1386,7 @@ start_outbound(HOType, TargetIdx, TargetNode, Opts, handoff_target = {TargetIdx, TargetNode}}; {error, _Reason} -> {ok, NewModState} = - Mod:handoff_cancelled(State#state.modstate), + Module:handoff_cancelled(State#state.modstate), State#state{modstate = NewModState} end. @@ -1399,8 +1399,8 @@ start_outbound(HOType, TargetIdx, TargetNode, Opts, %% messages until an appropriate message is received back from the vnode %% manager. The event timer functions below implement this logic. start_manager_event_timer(Event, - State = #state{mod = Mod, index = Idx}) -> - riak_core_vnode_manager:vnode_event(Mod, + State = #state{mod = Module, index = Idx}) -> + riak_core_vnode_manager:vnode_event(Module, Idx, self(), Event), @@ -1421,9 +1421,9 @@ mod_set_forwarding(_Forward, State = #state{modstate = {deleted, _}}) -> State; mod_set_forwarding(Forward, - State = #state{mod = Mod, modstate = ModState}) -> + State = #state{mod = Module, modstate = ModState}) -> case lists:member({set_vnode_forwarding, 2}, - Mod:module_info(exports)) + Module:module_info(exports)) of true -> NewModState = Mod:set_vnode_forwarding(Forward, diff --git a/src/riak_core_vnode_manager.erl b/src/riak_core_vnode_manager.erl index 5db955403..1c85dc542 100644 --- a/src/riak_core_vnode_manager.erl +++ b/src/riak_core_vnode_manager.erl @@ -26,28 +26,16 @@ -export([start_link/0, stop/0]). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --export([all_vnodes/0, - all_vnodes/1, - all_vnodes_status/0, - force_handoffs/0, - repair/3, - all_handoffs/0, - repair_status/1, - xfer_complete/2, +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). + +-export([all_vnodes/0, all_vnodes/1, + all_vnodes_status/0, force_handoffs/0, repair/3, + all_handoffs/0, repair_status/1, xfer_complete/2, kill_repairs/1]). --export([all_index_pid/1, - get_vnode_pid/2, - start_vnode/2, - unregister_vnode/2, - unregister_vnode/3, +-export([all_index_pid/1, get_vnode_pid/2, + start_vnode/2, unregister_vnode/2, unregister_vnode/3, vnode_event/4]). %% Field debugging @@ -58,31 +46,28 @@ -record(monrec, {monref, key}). -record(xfer_status, - {status :: pending | complete, - mod_src_target :: {module(), index(), index()}}). + {status :: pending | complete, + mod_src_target :: {module(), index(), index()}}). -type xfer_status() :: #xfer_status{}. -record(repair, - {mod_partition :: mod_partition(), - filter_mod_fun :: {module(), atom()}, - minus_one_xfer :: xfer_status(), - plus_one_xfer :: xfer_status(), - pairs :: [{index(), node()}]}). + {mod_partition :: mod_partition(), + filter_mod_fun :: {module(), atom()}, + minus_one_xfer :: xfer_status(), + plus_one_xfer :: xfer_status(), + pairs :: [{index(), node()}]}). -type repair() :: #repair{}. -type repairs() :: [repair()]. -record(state, - {idxtab, - forwarding :: dict:dict(), - handoff :: dict:dict(), - known_modules :: [term()], - never_started :: [{integer(), term()}], - vnode_start_tokens :: integer(), - last_ring_id :: term(), - repairs :: repairs()}). + {idxtab, forwarding :: dict:dict(), + handoff :: dict:dict(), known_modules :: [term()], + never_started :: [{integer(), term()}], + vnode_start_tokens :: integer(), + last_ring_id :: term(), repairs :: repairs()}). -include("riak_core_handoff.hrl"). @@ -107,9 +92,7 @@ %% =================================================================== start_link() -> - gen_server:start_link({local, ?MODULE}, - ?MODULE, - [], + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). stop() -> gen_server:cast(?MODULE, stop). @@ -142,8 +125,7 @@ repair_status({_Module, Partition} = ModPartition) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), Owner = riak_core_ring:index_owner(Ring, Partition), gen_server:call({?MODULE, Owner}, - {repair_status, ModPartition}, - ?LONG_TIMEOUT). + {repair_status, ModPartition}, ?LONG_TIMEOUT). %% @doc Get all handoffs known by this manager. -spec all_handoffs() -> [known_handoff()]. @@ -158,8 +140,7 @@ all_handoffs() -> xfer_complete(Origin, Xfer) -> gen_server:call({?MODULE, Origin}, - {xfer_complete, Xfer}, - ?LONG_TIMEOUT). + {xfer_complete, Xfer}, ?LONG_TIMEOUT). kill_repairs(Reason) -> gen_server:cast(?MODULE, {kill_repairs, Reason}). @@ -181,16 +162,15 @@ start_vnode(Index, VNodeMod) -> gen_server:cast(?MODULE, {Index, VNodeMod, start_vnode}). -vnode_event(Mod, Idx, Pid, Event) -> +vnode_event(Module, Idx, Pid, Event) -> gen_server:cast(?MODULE, - {vnode_event, Mod, Idx, Pid, Event}). + {vnode_event, Module, Idx, Pid, Event}). get_tab() -> gen_server:call(?MODULE, get_tab, infinity). get_vnode_pid(Index, VNodeMod) -> - gen_server:call(?MODULE, - {Index, VNodeMod, get_vnode}, + gen_server:call(?MODULE, {Index, VNodeMod, get_vnode}, infinity). %% =================================================================== @@ -201,49 +181,51 @@ get_vnode_pid(Index, VNodeMod) -> all_vnodes() -> case get_all_vnodes() of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, all_vnodes, infinity); - Result -> Result + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, all_vnodes, infinity); + Result -> Result end. -all_vnodes(Mod) -> - case get_all_vnodes(Mod) of - [] -> - %% ETS error could produce empty list, call manager to be sure. - gen_server:call(?MODULE, {all_vnodes, Mod}, infinity); - Result -> Result +all_vnodes(Module) -> + case get_all_vnodes(Module) of + [] -> + %% ETS error could produce empty list, call manager to be sure. + gen_server:call(?MODULE, {all_vnodes, Module}, + infinity); + Result -> Result end. all_index_pid(VNodeMod) -> case get_all_index_pid(VNodeMod, ets_error) of - ets_error -> - gen_server:call(?MODULE, - {all_index_pid, VNodeMod}, - infinity); - Result -> Result + ets_error -> + gen_server:call(?MODULE, {all_index_pid, VNodeMod}, + infinity); + Result -> Result end. %% =================================================================== %% Protected ETS Accessors %% =================================================================== -get_all_index_pid(Mod, Default) -> +get_all_index_pid(Module, Default) -> try [list_to_tuple(L) || L - <- ets:match(?ETS, {idxrec, '_', '$1', Mod, '$2', '_'})] + <- ets:match(?ETS, + {idxrec, '_', '$1', Module, '$2', '_'})] catch - _:_ -> Default + _:_ -> Default end. get_all_vnodes() -> - Mods = [Mod - || {_App, Mod} <- riak_core:vnode_modules()], - get_all_vnodes(Mods). - -get_all_vnodes(Mods) when is_list(Mods) -> - lists:flatmap(fun (Mod) -> get_all_vnodes(Mod) end, - Mods); + Modules = [Module + || {_App, Module} <- riak_core:vnode_modules()], + get_all_vnodes(Modules). + +get_all_vnodes(Modules) when is_list(Modules) -> + lists:flatmap(fun (Module) -> get_all_vnodes(Module) + end, + Modules); get_all_vnodes(Mod) -> IdxPids = get_all_index_pid(Mod, []), [{Mod, Idx, Pid} || {Idx, Pid} <- IdxPids]. @@ -263,9 +245,7 @@ init(_State) -> repairs = []}, State2 = find_vnodes(State), AllVNodes = get_all_vnodes(Mods), - State3 = update_forwarding(AllVNodes, - Mods, - Ring, + State3 = update_forwarding(AllVNodes, Mods, Ring, State2), State4 = update_handoff(AllVNodes, Ring, CHBin, State3), schedule_management_timer(), @@ -290,14 +270,14 @@ find_vnodes(State) -> PidIdxs = lists:flatten([try [{Pid, riak_core_vnode:get_mod_index(Pid)}] catch - _:_Err -> [] + _:_Err -> [] end || Pid <- VnodePids]), %% Populate the ETS table with processes running this VNodeMod (filtered %% in the list comprehension) - F = fun (Pid, Idx, Mod) -> + F = fun (Pid, Idx, Module) -> Mref = erlang:monitor(process, Pid), - #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, + #idxrec{key = {Idx, Module}, idx = Idx, mod = Module, pid = Pid, monref = Mref} end, IdxRecs = [F(Pid, Idx, Mod) @@ -312,11 +292,9 @@ handle_call(all_vnodes_status, _From, State) -> Reply = get_all_vnodes_status(State), {reply, Reply, State}; handle_call(all_vnodes, _From, State) -> - Reply = get_all_vnodes(), - {reply, Reply, State}; + Reply = get_all_vnodes(), {reply, Reply, State}; handle_call({all_vnodes, Mod}, _From, State) -> - Reply = get_all_vnodes(Mod), - {reply, Reply, State}; + Reply = get_all_vnodes(Mod), {reply, Reply, State}; handle_call({all_index_pid, Mod}, _From, State) -> Reply = get_all_index_pid(Mod, []), {reply, Reply, State}; @@ -326,35 +304,27 @@ handle_call({Partition, Mod, get_vnode}, _From, {reply, {ok, Pid}, State}; handle_call(get_tab, _From, State) -> {reply, ets:tab2list(State#state.idxtab), State}; -handle_call({repair, - Service, - {Mod, Partition} = ModPartition, - FilterModFun}, +handle_call({repair, Service, + {Mod, Partition} = ModPartition, FilterModFun}, _From, #state{repairs = Repairs} = State) -> case get_repair(ModPartition, Repairs) of - none -> - maybe_create_repair(Partition, - Service, - ModPartition, - FilterModFun, - Mod, - Repairs, - State); - Repair -> - Pairs = Repair#repair.pairs, - {reply, {ok, Pairs}, State} + none -> + maybe_create_repair(Partition, Service, ModPartition, + FilterModFun, Mod, Repairs, State); + Repair -> + Pairs = Repair#repair.pairs, {reply, {ok, Pairs}, State} end; handle_call(all_handoffs, _From, State = #state{repairs = Repairs, handoff = HO}) -> Handoffs = dict:to_list(HO) ++ - transform_repair_records(Repairs), + transform_repair_records(Repairs), {reply, Handoffs, State}; handle_call({repair_status, ModPartition}, _From, State) -> Repairs = State#state.repairs, case get_repair(ModPartition, Repairs) of - none -> {reply, not_found, State}; - #repair{} -> {reply, in_progress, State} + none -> {reply, not_found, State}; + #repair{} -> {reply, in_progress, State} end; %% NOTE: The `xfer_complete' logic assumes two things: %% @@ -367,36 +337,34 @@ handle_call({xfer_complete, ModSrcTgt}, _From, State) -> {Mod, _, Partition} = ModSrcTgt, ModPartition = {Mod, Partition}, case get_repair(ModPartition, Repairs) of - none -> - logger:error("Received xfer_complete for non-existing " - "repair: ~p", - [ModPartition]), - {reply, ok, State}; - #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = - R -> - R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> - MOX2 = MOX#xfer_status{status = complete}, - R#repair{minus_one_xfer = MOX2}; - ?XFER_EQ(POX, ModSrcTgt) -> - POX2 = POX#xfer_status{status = complete}, - R#repair{plus_one_xfer = POX2}; - true -> - logger:error("Received xfer_complete for non-existing " - "xfer: ~p", - [ModSrcTgt]) - end, - case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), - ?XFER_COMPLETE((R2#repair.plus_one_xfer))} - of - {true, true} -> - {reply, - ok, - State#state{repairs = remove_repair(R2, Repairs)}}; - _ -> - {reply, - ok, - State#state{repairs = replace_repair(R2, Repairs)}} - end + none -> + logger:error("Received xfer_complete for non-existing " + "repair: ~p", + [ModPartition]), + {reply, ok, State}; + #repair{minus_one_xfer = MOX, plus_one_xfer = POX} = + R -> + R2 = if ?XFER_EQ(MOX, ModSrcTgt) -> + MOX2 = MOX#xfer_status{status = complete}, + R#repair{minus_one_xfer = MOX2}; + ?XFER_EQ(POX, ModSrcTgt) -> + POX2 = POX#xfer_status{status = complete}, + R#repair{plus_one_xfer = POX2}; + true -> + logger:error("Received xfer_complete for non-existing " + "xfer: ~p", + [ModSrcTgt]) + end, + case {?XFER_COMPLETE((R2#repair.minus_one_xfer)), + ?XFER_COMPLETE((R2#repair.plus_one_xfer))} + of + {true, true} -> + {reply, ok, + State#state{repairs = remove_repair(R2, Repairs)}}; + _ -> + {reply, ok, + State#state{repairs = replace_repair(R2, Repairs)}} + end end; handle_call(_, _From, State) -> {reply, ok, State}. @@ -405,18 +373,15 @@ transform_repair_records(Repairs) -> %% module/node values in the `pairs' field against %% `minus_one_xfer' and `plus_one_xfer' lists:flatten(lists:map(fun (#repair{pairs = - [{M1SrcIdx, Mnode}, - _FixPartition, + [{M1SrcIdx, Mnode}, _FixPartition, {P1SrcIdx, Pnode}], minus_one_xfer = #xfer_status{mod_src_target = - {M1Mod, - M1SrcIdx, + {M1Mod, M1SrcIdx, _M1DstIdx}}, plus_one_xfer = #xfer_status{mod_src_target = - {P1Mod, - P1SrcIdx, + {P1Mod, P1SrcIdx, _P1DstIdx}}}) -> [{{M1Mod, M1SrcIdx}, {repair, inbound, Mnode}}, @@ -429,32 +394,25 @@ maybe_create_repair(Partition, Service, ModPartition, FilterModFun, Mod, Repairs, State) -> {ok, Ring} = riak_core_ring_manager:get_my_ring(), case riak_core_ring:pending_changes(Ring) of - [] -> - UpNodes = riak_core_node_watcher:nodes(Service), - Pairs = repair_pairs(Ring, Partition), - case check_up(Pairs, UpNodes) of - true -> - create_repair(Pairs, - ModPartition, - FilterModFun, - Mod, - Partition, - Repairs, - State); - {false, Down} -> {reply, {down, Down}, State} - end; - _ -> {reply, ownership_change_in_progress, State} + [] -> + UpNodes = riak_core_node_watcher:nodes(Service), + Pairs = repair_pairs(Ring, Partition), + case check_up(Pairs, UpNodes) of + true -> + create_repair(Pairs, ModPartition, FilterModFun, Mod, + Partition, Repairs, State); + {false, Down} -> {reply, {down, Down}, State} + end; + _ -> {reply, ownership_change_in_progress, State} end. create_repair(Pairs, ModPartition, FilterModFun, Mod, Partition, Repairs, State) -> {MOP, _} = MinusOne = get_minus_one(Pairs), {POP, _} = PlusOne = get_plus_one(Pairs), - riak_core_handoff_manager:xfer(MinusOne, - ModPartition, + riak_core_handoff_manager:xfer(MinusOne, ModPartition, FilterModFun), - riak_core_handoff_manager:xfer(PlusOne, - ModPartition, + riak_core_handoff_manager:xfer(PlusOne, ModPartition, FilterModFun), MOXStatus = #xfer_status{status = pending, mod_src_target = {Mod, MOP, Partition}}, @@ -470,8 +428,7 @@ create_repair(Pairs, ModPartition, FilterModFun, Mod, %% @private handle_cast({Partition, Mod, start_vnode}, State) -> - _ = get_vnode(Partition, Mod, State), - {noreply, State}; + _ = get_vnode(Partition, Mod, State), {noreply, State}; handle_cast({unregister, Index, Mod, Pid}, #state{idxtab = T} = State) -> %% Update forwarding state to ensure vnode is not restarted in @@ -517,15 +474,13 @@ handle_info(management_tick, State0) -> Transfers = riak_core_ring:pending_changes(Ring), %% Kill/cancel any repairs during ownership changes State3 = case Transfers of - [] -> State2; - _ -> - Repairs = State#state.repairs, - kill_repairs(Repairs, ownership_change), - trigger_ownership_handoff(Transfers, - Mods, - Ring, - State2), - State2#state{repairs = []} + [] -> State2; + _ -> + Repairs = State#state.repairs, + kill_repairs(Repairs, ownership_change), + trigger_ownership_handoff(Transfers, Mods, Ring, + State2), + State2#state{repairs = []} end, State4 = State3#state{vnode_start_tokens = ?DEFAULT_VNODE_ROLLING_START}, @@ -533,8 +488,7 @@ handle_info(management_tick, State0) -> Repairs2 = check_repairs(State4#state.repairs), {noreply, State5#state{repairs = Repairs2}}; handle_info({'DOWN', MonRef, process, _P, _I}, State) -> - delmon(MonRef, State), - {noreply, State}. + delmon(MonRef, State), {noreply, State}. %% @private handle_vnode_event(inactive, Mod, Idx, Pid, State) -> @@ -564,30 +518,24 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. maybe_ring_changed(RingID, Ring, CHBin, State = #state{last_ring_id = LastID}) -> case RingID of - LastID -> - maybe_ensure_vnodes_started(Ring), - State; - _ -> - ensure_vnodes_started(Ring), - State2 = ring_changed(Ring, CHBin, State), - State2#state{last_ring_id = RingID} + LastID -> maybe_ensure_vnodes_started(Ring), State; + _ -> + ensure_vnodes_started(Ring), + State2 = ring_changed(Ring, CHBin, State), + State2#state{last_ring_id = RingID} end. ring_changed(Ring, CHBin, State) -> %% Update vnode forwarding state AllVNodes = get_all_vnodes(), Mods = [Mod || {_, Mod} <- riak_core:vnode_modules()], - State2 = update_forwarding(AllVNodes, - Mods, - Ring, + State2 = update_forwarding(AllVNodes, Mods, Ring, State), %% Update handoff state State3 = update_handoff(AllVNodes, Ring, CHBin, State2), %% Trigger ownership transfers. Transfers = riak_core_ring:pending_changes(Ring), - trigger_ownership_handoff(Transfers, - Mods, - Ring, + trigger_ownership_handoff(Transfers, Mods, Ring, State3), State3. @@ -595,27 +543,23 @@ maybe_ensure_vnodes_started(Ring) -> ExitingStates = [leaving, exiting, invalid], Status = riak_core_ring:member_status(Ring, node()), case lists:member(Status, ExitingStates) of - true -> - ensure_vnodes_started(Ring), - ok; - _ -> ok + true -> ensure_vnodes_started(Ring), ok; + _ -> ok end. ensure_vnodes_started(Ring) -> spawn(fun () -> try riak_core_ring_handler:ensure_vnodes_started(Ring) catch - Type:Reason:Stacktrace -> - logger:error("~p", [{Type, Reason, Stacktrace}]) + Type:Reason:Stacktrace -> + logger:error("~p", [{Type, Reason, Stacktrace}]) end end). schedule_management_timer() -> ManagementTick = application:get_env(riak_core, - vnode_management_timer, - 10000), - erlang:send_after(ManagementTick, - ?MODULE, + vnode_management_timer, 10000), + erlang:send_after(ManagementTick, ?MODULE, management_tick). trigger_ownership_handoff(Transfers, Mods, Ring, @@ -651,21 +595,21 @@ limit_ownership_handoff(Limit, Transfers, true) -> %% @private idx2vnode(Idx, Mod, _State = #state{idxtab = T}) -> case ets:lookup(T, {Idx, Mod}) of - [I] -> I#idxrec.pid; - [] -> no_match + [I] -> I#idxrec.pid; + [] -> no_match end. %% @private delmon(MonRef, _State = #state{idxtab = T}) -> case ets:lookup(T, MonRef) of - [#monrec{key = {Index, Mod} = Key}] -> - _ = unregister_vnode_stats(Mod, Index), - ets:match_delete(T, - {idxrec, Key, '_', '_', '_', MonRef}), - ets:delete(T, MonRef); - [] -> - ets:match_delete(T, - {idxrec, '_', '_', '_', '_', MonRef}) + [#monrec{key = {Index, Mod} = Key}] -> + _ = unregister_vnode_stats(Mod, Index), + ets:match_delete(T, + {idxrec, Key, '_', '_', '_', MonRef}), + ets:delete(T, MonRef); + [] -> + ets:match_delete(T, + {idxrec, '_', '_', '_', '_', MonRef}) end. %% @private @@ -673,25 +617,23 @@ add_vnode_rec(I, _State = #state{idxtab = T}) -> ets:insert(T, I). %% @private -get_vnode(Idx, Mod, State) when not is_list(Idx) -> - [Result] = get_vnode([Idx], Mod, State), - Result; -get_vnode(IdxList, Mod, State) -> - Initial = [case idx2vnode(Idx, Mod, State) of - no_match -> Idx; - Pid -> {Idx, Pid} +get_vnode(Idx, Module, State) when not is_list(Idx) -> + [Result] = get_vnode([Idx], Module, State), Result; +get_vnode(IdxList, Module, State) -> + Initial = [case idx2vnode(Idx, Module, State) of + no_match -> Idx; + Pid -> {Idx, Pid} end || Idx <- IdxList], {NotStarted, Started} = lists:partition(fun erlang:is_integer/1, Initial), StartFun = fun (Idx) -> - ForwardTo = get_forward(Mod, Idx, State), + ForwardTo = get_forward(Module, Idx, State), logger:debug("Will start VNode for partition ~p", [Idx]), - {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, - Idx, + {ok, Pid} = riak_core_vnode_sup:start_vnode(Module, Idx, ForwardTo), - register_vnode_stats(Mod, Idx, Pid), + register_vnode_stats(Module, Idx, Pid), logger:debug("Started VNode, waiting for initialization " "to\n complete " "~p, ~p ", @@ -702,55 +644,52 @@ get_vnode(IdxList, Mod, State) -> {Idx, Pid} end, Pairs = Started ++ - riak_core_util:pmap(StartFun, - NotStarted, - ?DEFAULT_VNODE_ROLLING_START), + riak_core_util:pmap(StartFun, NotStarted, + ?DEFAULT_VNODE_ROLLING_START), %% Return Pids in same order as input PairsDict = dict:from_list(Pairs), _ = [begin - Pid = dict:fetch(Idx, PairsDict), - MonRef = erlang:monitor(process, Pid), - IdxRec = #idxrec{key = {Idx, Mod}, idx = Idx, mod = Mod, - pid = Pid, monref = MonRef}, - MonRec = #monrec{monref = MonRef, key = {Idx, Mod}}, - add_vnode_rec([IdxRec, MonRec], State) + Pid = dict:fetch(Idx, PairsDict), + MonRef = erlang:monitor(process, Pid), + IdxRec = #idxrec{key = {Idx, Module}, idx = Idx, + mod = Module, pid = Pid, monref = MonRef}, + MonRec = #monrec{monref = MonRef, key = {Idx, Module}}, + add_vnode_rec([IdxRec, MonRec], State) end || Idx <- NotStarted], [dict:fetch(Idx, PairsDict) || Idx <- IdxList]. get_forward(Mod, Idx, #state{forwarding = Fwd}) -> case dict:find({Mod, Idx}, Fwd) of - {ok, ForwardTo} -> ForwardTo; - _ -> undefined + {ok, ForwardTo} -> ForwardTo; + _ -> undefined end. check_forward(Ring, Mod, Index) -> Node = node(), case riak_core_ring:next_owner(Ring, Index, Mod) of - {Node, '$resize', _} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, - Mod), - {{Mod, Index}, Complete}; - {Node, '$delete', _} -> {{Mod, Index}, undefined}; - {Node, NextOwner, complete} -> - {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {Node, '$resize', _} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, Mod), + {{Mod, Index}, Complete}; + {Node, '$delete', _} -> {{Mod, Index}, undefined}; + {Node, NextOwner, complete} -> + {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. check_forward_precomputed(Completed, Mod, Index, Node, Ring) -> case dict:find({Mod, Index}, Completed) of - {ok, '$resize'} -> - Complete = - riak_core_ring:complete_resize_transfers(Ring, - {Index, Node}, - Mod), - {{Mod, Index}, Complete}; - {ok, '$delete'} -> {{Mod, Index}, undefined}; - {ok, NextOwner} -> {{Mod, Index}, NextOwner}; - _ -> {{Mod, Index}, undefined} + {ok, '$resize'} -> + Complete = + riak_core_ring:complete_resize_transfers(Ring, + {Index, Node}, Mod), + {{Mod, Index}, Complete}; + {ok, '$delete'} -> {{Mod, Index}, undefined}; + {ok, NextOwner} -> {{Mod, Index}, NextOwner}; + _ -> {{Mod, Index}, undefined} end. compute_forwarding(Mods, Ring) -> @@ -761,11 +700,8 @@ compute_forwarding(Mods, Ring) -> <- riak_core_ring:completed_next_owners(Mod, Ring), Owner =:= Node], Completed = dict:from_list(CL), - Forwarding = [check_forward_precomputed(Completed, - Mod, - I, - N, - Ring) + Forwarding = [check_forward_precomputed(Completed, Mod, + I, N, Ring) || {I, N} <- riak_core_ring:all_owners(Ring), Mod <- Mods], dict:from_list(Forwarding). @@ -781,56 +717,47 @@ update_forwarding(AllVNodes, Mods, Ring, end, NewForwarding), dict:fold(fun ({Mod, Idx}, ForwardTo, _) -> - change_forward(VNodes, Mod, Idx, ForwardTo), - ok + change_forward(VNodes, Mod, Idx, ForwardTo), ok end, - ok, - Diff), + ok, Diff), State#state{forwarding = NewForwarding}. update_forwarding({Mod, Idx}, Ring, State = #state{forwarding = Forwarding}) -> {_, ForwardTo} = check_forward(Ring, Mod, Idx), - NewForwarding = dict:store({Mod, Idx}, - ForwardTo, + NewForwarding = dict:store({Mod, Idx}, ForwardTo, Forwarding), State#state{forwarding = NewForwarding}. change_forward(VNodes, Mod, Idx, ForwardTo) -> case dict:find({Mod, Idx}, VNodes) of - error -> ok; - {ok, Pid} -> - riak_core_vnode:set_forwarding(Pid, ForwardTo), - ok + error -> ok; + {ok, Pid} -> + riak_core_vnode:set_forwarding(Pid, ForwardTo), ok end. update_handoff(AllVNodes, Ring, CHBin, State) -> case riak_core_ring:ring_ready(Ring) of - false -> State; - true -> - NewHO = lists:flatten([case should_handoff(Ring, - CHBin, - Mod, - Idx) - of - false -> []; - {true, primary, TargetNode} -> - [{{Mod, Idx}, - {ownership, - outbound, - TargetNode}}]; - {true, {fallback, _Node}, TargetNode} -> - [{{Mod, Idx}, - {hinted, outbound, TargetNode}}]; - {true, '$resize' = Action} -> - [{{Mod, Idx}, - {resize, outbound, Action}}]; - {true, '$delete' = Action} -> - [{{Mod, Idx}, - {delete, local, Action}}] - end - || {Mod, Idx, _Pid} <- AllVNodes]), - State#state{handoff = dict:from_list(NewHO)} + false -> State; + true -> + NewHO = lists:flatten([case should_handoff(Ring, CHBin, + Mod, Idx) + of + false -> []; + {true, primary, TargetNode} -> + [{{Mod, Idx}, + {ownership, outbound, TargetNode}}]; + {true, {fallback, _Node}, TargetNode} -> + [{{Mod, Idx}, + {hinted, outbound, TargetNode}}]; + {true, '$resize' = Action} -> + [{{Mod, Idx}, + {resize, outbound, Action}}]; + {true, '$delete' = Action} -> + [{{Mod, Idx}, {delete, local, Action}}] + end + || {Mod, Idx, _Pid} <- AllVNodes]), + State#state{handoff = dict:from_list(NewHO)} end. should_handoff(Ring, _CHBin, Mod, Idx) -> @@ -839,36 +766,31 @@ should_handoff(Ring, _CHBin, Mod, Idx) -> Type = riak_core_ring:vnode_type(Ring, Idx), Ready = riak_core_ring:ring_ready(Ring), IsResizing = riak_core_ring:is_resizing(Ring), - case determine_handoff_target(Type, - NextOwner, - Ready, + case determine_handoff_target(Type, NextOwner, Ready, IsResizing) of - undefined -> false; - Action - when Action =:= '$resize' orelse Action =:= '$delete' -> - {true, Action}; - TargetNode -> - case app_for_vnode_module(Mod) of - undefined -> false; - {ok, App} -> - case lists:member(TargetNode, - riak_core_node_watcher:nodes(App)) - of - false -> false; - true -> {true, Type, TargetNode} - end - end + undefined -> false; + Action + when Action =:= '$resize' orelse Action =:= '$delete' -> + {true, Action}; + TargetNode -> + case app_for_vnode_module(Mod) of + undefined -> false; + {ok, App} -> + case lists:member(TargetNode, + riak_core_node_watcher:nodes(App)) + of + false -> false; + true -> {true, Type, TargetNode} + end + end end. determine_handoff_target(Type, NextOwner, RingReady, IsResize) -> Me = node(), - determine_handoff_target(Type, - NextOwner, - RingReady, - IsResize, - NextOwner =:= Me). + determine_handoff_target(Type, NextOwner, RingReady, + IsResize, NextOwner =:= Me). determine_handoff_target(primary, _, _, _, true) -> %% Never hand off to myself as a primary @@ -909,12 +831,12 @@ determine_handoff_target(_, _, _, _, _) -> undefined. app_for_vnode_module(Mod) when is_atom(Mod) -> case application:get_env(riak_core, vnode_modules) of - {ok, Mods} -> - case lists:keysearch(Mod, 2, Mods) of - {value, {App, Mod}} -> {ok, App}; - false -> undefined - end; - undefined -> undefined + {ok, Mods} -> + case lists:keysearch(Mod, 2, Mods) of + {value, {App, Mod}} -> {ok, App}; + false -> undefined + end; + undefined -> undefined end. maybe_trigger_handoff(Mod, Idx, State) -> @@ -924,24 +846,21 @@ maybe_trigger_handoff(Mod, Idx, State) -> maybe_trigger_handoff(Mod, Idx, Pid, _State = #state{handoff = HO}) -> case dict:find({Mod, Idx}, HO) of - {ok, {resize, _Direction, '$resize'}} -> - {ok, Ring} = riak_core_ring_manager:get_my_ring(), - case riak_core_ring:awaiting_resize_transfer(Ring, - {Idx, node()}, - Mod) - of - undefined -> ok; - {TargetIdx, TargetNode} -> - riak_core_vnode:trigger_handoff(Pid, - TargetIdx, - TargetNode) - end; - {ok, {delete, local, '$delete'}} -> - riak_core_vnode:trigger_delete(Pid); - {ok, {_Type, _Direction, TargetNode}} -> - riak_core_vnode:trigger_handoff(Pid, TargetNode), - ok; - error -> ok + {ok, {resize, _Direction, '$resize'}} -> + {ok, Ring} = riak_core_ring_manager:get_my_ring(), + case riak_core_ring:awaiting_resize_transfer(Ring, + {Idx, node()}, Mod) + of + undefined -> ok; + {TargetIdx, TargetNode} -> + riak_core_vnode:trigger_handoff(Pid, TargetIdx, + TargetNode) + end; + {ok, {delete, local, '$delete'}} -> + riak_core_vnode:trigger_delete(Pid); + {ok, {_Type, _Direction, TargetNode}} -> + riak_core_vnode:trigger_handoff(Pid, TargetNode), ok; + error -> ok end. get_all_vnodes_status(#state{forwarding = Forwarding, @@ -953,8 +872,8 @@ get_all_vnodes_status(#state{forwarding = Forwarding, || {_App, Mod} <- riak_core:vnode_modules()], ThisNode = node(), Types = [case Owner of - ThisNode -> {{Mod, Idx}, {type, primary}}; - _ -> {{Mod, Idx}, {type, secondary}} + ThisNode -> {{Mod, Idx}, {type, primary}}; + _ -> {{Mod, Idx}, {type, secondary}} end || {Idx, Owner} <- Owners, Mod <- Mods], Types2 = lists:keysort(1, Types), @@ -976,8 +895,7 @@ get_all_vnodes_status(#state{forwarding = Forwarding, Status = lists:foldl(fun (B, A) -> orddict:merge(MergeFn, A, B) end, - Types2, - [Pids2, Forwarding2, Handoff2]), + Types2, [Pids2, Forwarding2, Handoff2]), Status. update_never_started(Ring, @@ -986,15 +904,14 @@ update_never_started(Ring, || {_App, Mod} <- riak_core:vnode_modules(), not lists:member(Mod, KnownMods)], case UnknownMods of - [] -> State; - _ -> - Indices = [Idx - || {Idx, _} <- riak_core_ring:all_owners(Ring)], - lists:foldl(fun (Mod, StateAcc) -> - update_never_started(Mod, Indices, StateAcc) - end, - State, - UnknownMods) + [] -> State; + _ -> + Indices = [Idx + || {Idx, _} <- riak_core_ring:all_owners(Ring)], + lists:foldl(fun (Mod, StateAcc) -> + update_never_started(Mod, Indices, StateAcc) + end, + State, UnknownMods) end. update_never_started(Mod, Indices, State) -> @@ -1005,7 +922,7 @@ update_never_started(Mod, Indices, State) -> ordsets:from_list(AlreadyStarted)), NeverStarted2 = [{Idx, Mod} || Idx <- NeverStarted], NeverStarted3 = NeverStarted2 ++ - State#state.never_started, + State#state.never_started, KnownModules = [Mod | State#state.known_modules], State#state{known_modules = KnownModules, never_started = NeverStarted3}. @@ -1019,13 +936,13 @@ maybe_start_vnodes(State = #state{vnode_start_tokens = Tokens, never_started = NeverStarted}) -> case {Tokens, NeverStarted} of - {0, _} -> State; - {_, []} -> State; - {_, [{Idx, Mod} | NeverStarted2]} -> - _ = get_vnode(Idx, Mod, State), - gen_server:cast(?MODULE, maybe_start_vnodes), - State#state{vnode_start_tokens = Tokens - 1, - never_started = NeverStarted2} + {0, _} -> State; + {_, []} -> State; + {_, [{Idx, Mod} | NeverStarted2]} -> + _ = get_vnode(Idx, Mod, State), + gen_server:cast(?MODULE, maybe_start_vnodes), + State#state{vnode_start_tokens = Tokens - 1, + never_started = NeverStarted2} end. -spec check_repairs(repairs()) -> Repairs2 :: repairs(). @@ -1040,7 +957,7 @@ check_repairs(Repairs) -> MOX2 = maybe_retry(R, MO, MOX), POX2 = maybe_retry(R, PO, POX), if (?XFER_COMPLETE(MOX2)) andalso - (?XFER_COMPLETE(POX2)) -> + (?XFER_COMPLETE(POX2)) -> Repairs2; true -> R2 = R#repair{minus_one_xfer = MOX2, @@ -1056,15 +973,14 @@ check_repairs(Repairs) -> maybe_retry(R, {SrcPartition, _} = Src, Xfer) -> case Xfer#xfer_status.status of - complete -> Xfer; - pending -> - {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, - FilterModFun = R#repair.filter_mod_fun, - riak_core_handoff_manager:xfer(Src, - {Mod, Partition}, - FilterModFun), - #xfer_status{status = pending, - mod_src_target = {Mod, SrcPartition, Partition}} + complete -> Xfer; + pending -> + {Mod, _, Partition} = Xfer#xfer_status.mod_src_target, + FilterModFun = R#repair.filter_mod_fun, + riak_core_handoff_manager:xfer(Src, {Mod, Partition}, + FilterModFun), + #xfer_status{status = pending, + mod_src_target = {Mod, SrcPartition, Partition}} end. %% @private @@ -1079,8 +995,8 @@ check_up(Pairs, UpNodes) -> || {_Partition, Owner} = Pair <- Pairs, not lists:member(Owner, UpNodes)], case Down of - [] -> true; - _ -> {false, Down} + [] -> true; + _ -> {false, Down} end. %% @private @@ -1097,8 +1013,7 @@ repair_pairs(Ring, Partition) -> [_, Before] = chash:predecessors(<>, CH, 2), [After] = chash:successors(<>, - CH, - 1), + CH, 1), [Before, {Partition, Owner}, After]. %% @private @@ -1109,12 +1024,11 @@ repair_pairs(Ring, Partition) -> repairs()) -> repair() | none. get_repair(ModPartition, Repairs) -> - case lists:keyfind(ModPartition, - #repair.mod_partition, + case lists:keyfind(ModPartition, #repair.mod_partition, Repairs) of - false -> none; - Val -> Val + false -> none; + Val -> Val end. %% @private @@ -1124,8 +1038,7 @@ get_repair(ModPartition, Repairs) -> remove_repair(Repair, Repairs) -> lists:keydelete(Repair#repair.mod_partition, - #repair.mod_partition, - Repairs). + #repair.mod_partition, Repairs). %% @private %% @@ -1134,9 +1047,7 @@ remove_repair(Repair, Repairs) -> replace_repair(Repair, Repairs) -> lists:keyreplace(Repair#repair.mod_partition, - #repair.mod_partition, - Repairs, - Repair). + #repair.mod_partition, Repairs, Repair). %% @private %% @@ -1177,15 +1088,12 @@ kill_repair(Repair, Reason) -> POModSrcTarget = POX#xfer_status.mod_src_target, %% Kill the remote senders riak_core_handoff_manager:kill_xfer(MOOwner, - MOModSrcTarget, - Reason), + MOModSrcTarget, Reason), riak_core_handoff_manager:kill_xfer(POOwner, - POModSrcTarget, - Reason), + POModSrcTarget, Reason), %% Kill the local receivers riak_core_handoff_manager:kill_xfer(node(), - {Mod, undefined, Partition}, - Reason). + {Mod, undefined, Partition}, Reason). register_vnode_stats(_Mod, _Index, _Pid) -> %% STATS diff --git a/src/riak_core_vnode_master.erl b/src/riak_core_vnode_master.erl index 6fd89dacb..ea307c34c 100644 --- a/src/riak_core_vnode_master.erl +++ b/src/riak_core_vnode_master.erl @@ -28,33 +28,17 @@ -behaviour(gen_server). --export([start_link/1, - start_link/2, - start_link/3, - get_vnode_pid/2, - start_vnode/2, - command/3, - command/4, - command_unreliable/3, - command_unreliable/4, - sync_command/3, - sync_command/4, - coverage/5, - command_return_vnode/4, - sync_spawn_command/3, - make_request/3, - make_coverage_request/4, - all_nodes/1, - reg_name/1]). - --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --record(state, {idxtab, sup_name, vnode_mod, legacy}). +-export([start_link/1, get_vnode_pid/2, start_vnode/2, + command/3, command/4, command_unreliable/3, + command_unreliable/4, sync_command/3, sync_command/4, + coverage/5, command_return_vnode/4, + sync_spawn_command/3, make_request/3, + make_coverage_request/4, all_nodes/1, reg_name/1]). + +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). + +-record(state, {idxtab, sup_name, vnode_mod}). -define(LONG_TIMEOUT, 120 * 1000). @@ -72,17 +56,10 @@ vmaster_to_vmod(VMaster) -> L = atom_to_list(VMaster), list_to_atom(lists:sublist(L, length(L) - 7)). -start_link(VNodeMod) -> start_link(VNodeMod, undefined). - -start_link(VNodeMod, LegacyMod) -> - start_link(VNodeMod, LegacyMod, undefined). - -start_link(VNodeMod, LegacyMod, Service) -> +start_link(VNodeMod) -> RegName = reg_name(VNodeMod), - gen_server:start_link({local, RegName}, - ?MODULE, - [Service, VNodeMod, LegacyMod, RegName], - []). + gen_server:start_link({local, RegName}, ?MODULE, + [VNodeMod, RegName], []). start_vnode(Index, VNodeMod) -> riak_core_vnode_manager:start_vnode(Index, VNodeMod). @@ -101,10 +78,7 @@ command(PrefListOrCmd, Msg, Sender, VMaster) -> command_unreliable(PrefListOrCmd, Msg, Sender, VMaster) -> - command2(PrefListOrCmd, - Msg, - Sender, - VMaster, + command2(PrefListOrCmd, Msg, Sender, VMaster, unreliable). %% Send the command to the preflist given with responses going to Sender @@ -124,8 +98,7 @@ command2([{Index, Pid} | Rest], Msg, Sender, VMaster, command2([{Index, Node} | Rest], Msg, Sender, VMaster, How) -> proxy_cast({VMaster, Node}, - make_request(Msg, Sender, Index), - How), + make_request(Msg, Sender, Index), How), command2(Rest, Msg, Sender, VMaster, How); command2(DestTuple, Msg, Sender, VMaster, How) when is_tuple(DestTuple) -> @@ -138,8 +111,7 @@ coverage(Msg, CoverageVNodes, Keyspaces, {Type, Ref, From}, VMaster) when is_list(CoverageVNodes) -> [proxy_cast({VMaster, Node}, - make_coverage_request(Msg, - Keyspaces, + make_coverage_request(Msg, Keyspaces, {Type, {Ref, {Index, Node}}, From}, Index)) || {Index, Node} <- CoverageVNodes]; @@ -154,8 +126,7 @@ command_return_vnode({Index, Node}, Msg, Sender, VMaster) -> Req = make_request(Msg, Sender, Index), Mod = vmaster_to_vmod(VMaster), - riak_core_vnode_proxy:command_return_vnode({Mod, - Index, + riak_core_vnode_proxy:command_return_vnode({Mod, Index, Node}, Req). @@ -169,13 +140,12 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% the From for handle_call so that the {reply} return gets %% sent here. Request = make_request(Msg, - {server, undefined, undefined}, - Index), + {server, undefined, undefined}, Index), case gen_server:call({VMaster, Node}, Request, Timeout) of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Send a synchronous spawned command to an individual Index/Node combination. @@ -183,15 +153,13 @@ sync_command({Index, Node}, Msg, VMaster, Timeout) -> %% continue to handle requests. sync_spawn_command({Index, Node}, Msg, VMaster) -> Request = make_request(Msg, - {server, undefined, undefined}, - Index), - case gen_server:call({VMaster, Node}, - {spawn, Request}, + {server, undefined, undefined}, Index), + case gen_server:call({VMaster, Node}, {spawn, Request}, infinity) of - {vnode_error, {Error, _Args}} -> error(Error); - {vnode_error, Error} -> error(Error); - Else -> Else + {vnode_error, {Error, _Args}} -> error(Error); + {vnode_error, Error} -> error(Error); + Else -> Else end. %% Make a request record - exported for use by legacy modules @@ -222,11 +190,8 @@ all_nodes(VNodeMod) -> [Pid || {_Mod, _Idx, Pid} <- VNodes]. %% @private -init([Service, VNodeMod, LegacyMod, _RegName]) -> - gen_server:cast(self(), {wait_for_service, Service}), - {ok, - #state{idxtab = undefined, vnode_mod = VNodeMod, - legacy = LegacyMod}}. +init([VNodeMod, _RegName]) -> + {ok, #state{idxtab = undefined, vnode_mod = VNodeMod}}. proxy_cast(Who, Req) -> proxy_cast(Who, Req, normal). @@ -253,10 +218,10 @@ send_an_event(Dest, Event, unreliable) -> handle_cast({wait_for_service, Service}, State) -> case Service of - undefined -> ok; - _ -> - logger:debug("Waiting for service: ~p", [Service]), - riak_core:wait_for_service(Service) + undefined -> ok; + _ -> + logger:debug("Waiting for service: ~p", [Service]), + riak_core:wait_for_service(Service) end, {noreply, State}; handle_cast(Req = #riak_vnode_req_v1{index = Idx}, @@ -268,21 +233,13 @@ handle_cast(Req = #riak_coverage_req_v1{index = Idx}, State = #state{vnode_mod = Mod}) -> Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req), - {noreply, State}; -handle_cast(Other, State = #state{legacy = Legacy}) - when Legacy =/= undefined -> - case catch Legacy:rewrite_cast(Other) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_cast(Req, State); - _ -> {noreply, State} - end. + {noreply, State}. handle_call({return_vnode, Req = #riak_vnode_req_v1{index = Idx}}, _From, State = #state{vnode_mod = Mod}) -> {ok, Pid} = - riak_core_vnode_proxy:command_return_vnode({Mod, - Idx, + riak_core_vnode_proxy:command_return_vnode({Mod, Idx, node()}, Req), {reply, {ok, Pid}, State}; @@ -292,8 +249,7 @@ handle_call(Req = #riak_vnode_req_v1{index = Idx, Proxy = riak_core_vnode_proxy:reg_name(Mod, Idx), riak_core_vnode:send_req(Proxy, Req#riak_vnode_req_v1{sender = - {server, - undefined, + {server, undefined, From}}), {noreply, State}; handle_call({spawn, @@ -308,15 +264,7 @@ handle_call({spawn, = Sender}) end), - {noreply, State}; -handle_call(Other, From, - State = #state{legacy = Legacy}) - when Legacy =/= undefined -> - case catch Legacy:rewrite_call(Other, From) of - {ok, #riak_vnode_req_v1{} = Req} -> - handle_call(Req, From, State); - _ -> {noreply, State} - end. + {noreply, State}. handle_info(_Info, State) -> {noreply, State}. diff --git a/src/riak_core_vnode_proxy.erl b/src/riak_core_vnode_proxy.erl index e9dff563d..1b67a8189 100644 --- a/src/riak_core_vnode_proxy.erl +++ b/src/riak_core_vnode_proxy.erl @@ -18,19 +18,11 @@ %% ------------------------------------------------------------------- -module(riak_core_vnode_proxy). --export([start_link/2, - init/1, - reg_name/2, - reg_name/3, - call/2, - call/3, - cast/2, - unregister_vnode/3, - command_return_vnode/2, - overloaded/1]). - --export([system_continue/3, - system_terminate/4, +-export([start_link/2, init/1, reg_name/2, reg_name/3, + call/2, call/3, cast/2, unregister_vnode/3, + command_return_vnode/2, overloaded/1]). + +-export([system_continue/3, system_terminate/4, system_code_change/4]). -include("riak_core_vnode.hrl"). @@ -42,23 +34,22 @@ -endif. -record(state, - {mod :: atom(), - index :: partition(), - vnode_pid :: pid() | undefined, - vnode_mref :: reference() | undefined, - check_mailbox :: non_neg_integer(), - check_threshold :: pos_integer() | undefined, - check_counter :: non_neg_integer(), - check_interval :: pos_integer(), - check_request_interval :: non_neg_integer(), - check_request :: undefined | sent | ignore}). + {mod :: atom(), index :: partition(), + vnode_pid :: pid() | undefined, + vnode_mref :: reference() | undefined, + check_mailbox :: non_neg_integer(), + check_threshold :: pos_integer() | undefined, + check_counter :: non_neg_integer(), + check_interval :: pos_integer(), + check_request_interval :: non_neg_integer(), + check_request :: undefined | sent | ignore}). -define(DEFAULT_CHECK_INTERVAL, 5000). -define(DEFAULT_OVERLOAD_THRESHOLD, 10000). -reg_name(Mod, Index) -> - ModBin = atom_to_binary(Mod, latin1), +reg_name(Module, Index) -> + ModBin = atom_to_binary(Module, latin1), IdxBin = list_to_binary(integer_to_list(Index)), AllBin = <<$p, $r, $o, $x, $y, $_, ModBin/binary, $_, IdxBin/binary>>, @@ -69,11 +60,10 @@ reg_name(Mod, Index, Node) -> start_link(Mod, Index) -> RegName = reg_name(Mod, Index), - proc_lib:start_link(?MODULE, - init, + proc_lib:start_link(?MODULE, init, [[self(), RegName, Mod, Index]]). -init([Parent, RegName, Mod, Index]) -> +init([Parent, RegName, Module, Index]) -> erlang:register(RegName, self()), proc_lib:init_ack(Parent, {ok, self()}), Interval = application:get_env(riak_core, @@ -86,26 +76,26 @@ init([Parent, RegName, Mod, Index]) -> vnode_overload_threshold, ?DEFAULT_OVERLOAD_THRESHOLD), SafeInterval = case Threshold == undefined orelse - Interval < Threshold + Interval < Threshold of - true -> Interval; - false -> - logger:warning("Setting riak_core/vnode_check_interval " - "to ~b", - [Threshold div 2]), - Threshold div 2 + true -> Interval; + false -> + logger:warning("Setting riak_core/vnode_check_interval " + "to ~b", + [Threshold div 2]), + Threshold div 2 end, SafeRequestInterval = case RequestInterval < - SafeInterval + SafeInterval of - true -> RequestInterval; - false -> - logger:warning("Setting riak_core/vnode_check_request_interva" - "l to ~b", - [SafeInterval div 2]), - SafeInterval div 2 + true -> RequestInterval; + false -> + logger:warning("Setting riak_core/vnode_check_request_interva" + "l to ~b", + [SafeInterval div 2]), + SafeInterval div 2 end, - State = #state{mod = Mod, index = Index, + State = #state{mod = Module, index = Index, check_mailbox = 0, check_counter = 0, check_threshold = Threshold, check_interval = SafeInterval, @@ -128,15 +118,12 @@ overloaded({Mod, Index, Node}) -> overloaded(Pid) -> call(Pid, overloaded). call(Name, Msg) -> - call_reply(catch gen:call(Name, - '$vnode_proxy_call', + call_reply(catch gen:call(Name, '$vnode_proxy_call', Msg)). call(Name, Msg, Timeout) -> - call_reply(catch gen:call(Name, - '$vnode_proxy_call', - Msg, - Timeout)). + call_reply(catch gen:call(Name, '$vnode_proxy_call', + Msg, Timeout)). -spec call_reply({atom(), term()}) -> term(). @@ -144,8 +131,7 @@ call_reply({ok, Res}) -> Res; call_reply({'EXIT', Reason}) -> {error, Reason}. cast(Name, Msg) -> - catch erlang:send(Name, {'$vnode_proxy_cast', Msg}), - ok. + catch erlang:send(Name, {'$vnode_proxy_cast', Msg}), ok. system_continue(Parent, _, State) -> loop(Parent, State). @@ -158,28 +144,22 @@ system_code_change(State, _, _, _) -> {ok, State}. %% @private loop(Parent, State) -> receive - {'$vnode_proxy_call', From, Msg} -> - {reply, Reply, NewState} = handle_call(Msg, - From, - State), - {_, Reply} = gen:reply(From, Reply), - loop(Parent, NewState); - {'$vnode_proxy_cast', Msg} -> - {noreply, NewState} = handle_cast(Msg, State), - loop(Parent, NewState); - {'DOWN', _Mref, process, _Pid, _} -> - NewState = forget_vnode(State), - loop(Parent, NewState); - {system, From, Msg} -> - sys:handle_system_msg(Msg, - From, - Parent, - ?MODULE, - [], - State); - Msg -> - {noreply, NewState} = handle_proxy(Msg, State), - loop(Parent, NewState) + {'$vnode_proxy_call', From, Msg} -> + {reply, Reply, NewState} = handle_call(Msg, From, + State), + {_, Reply} = gen:reply(From, Reply), + loop(Parent, NewState); + {'$vnode_proxy_cast', Msg} -> + {noreply, NewState} = handle_cast(Msg, State), + loop(Parent, NewState); + {'DOWN', _Mref, process, _Pid, _} -> + NewState = forget_vnode(State), loop(Parent, NewState); + {system, From, Msg} -> + sys:handle_system_msg(Msg, From, Parent, ?MODULE, [], + State); + Msg -> + {noreply, NewState} = handle_proxy(Msg, State), + loop(Parent, NewState) end. %% @private @@ -190,8 +170,7 @@ handle_call({return_vnode, Req}, _From, State) -> handle_call(overloaded, _From, State = #state{check_mailbox = Mailbox, check_threshold = Threshold}) -> - Result = Mailbox > Threshold, - {reply, Result, State}; + Result = Mailbox > Threshold, {reply, Result, State}; handle_call(_Msg, _From, State) -> {reply, ok, State}. %% @private @@ -206,11 +185,10 @@ handle_cast({vnode_proxy_pong, Ref, Msgs}, State = #state{check_request = RequestState, check_mailbox = Mailbox}) -> NewState = case Ref of - RequestState -> - State#state{check_mailbox = Mailbox - Msgs, - check_request = undefined, - check_counter = 0}; - _ -> State + RequestState -> + State#state{check_mailbox = Mailbox - Msgs, + check_request = undefined, check_counter = 0}; + _ -> State end, {noreply, NewState}; handle_cast(_Msg, State) -> {noreply, State}. @@ -244,51 +222,45 @@ handle_proxy(Msg, %% ensure unnecessary work is not being performed needlessly. %% case State#state.vnode_pid of - undefined -> {Pid, State2} = get_vnode_pid(State); - KnownPid -> - Pid = KnownPid, - State2 = State + undefined -> {Pid, State2} = get_vnode_pid(State); + KnownPid -> Pid = KnownPid, State2 = State end, Mailbox2 = case Mailbox =< Threshold of - true -> - Pid ! Msg, - Mailbox + 1; - false -> - handle_overload(Msg, State), - Mailbox + true -> Pid ! Msg, Mailbox + 1; + false -> handle_overload(Msg, State), Mailbox end, Counter2 = Counter + 1, case Counter2 of - RequestInterval -> - %% Ping the vnode in hopes that we get a pong back before hitting - %% the hard query interval and triggering an expensive process_info - %% call. A successful pong from the vnode means that all messages - %% sent before the ping have already been handled and therefore - %% we can adjust our mailbox estimate accordingly. - case RequestState of - undefined -> - RequestState2 = send_proxy_ping(Pid, Mailbox2); - _ -> RequestState2 = RequestState - end, - Mailbox3 = Mailbox2, - Counter3 = Counter2; - Interval -> - %% Time to directly check the mailbox size. This operation may - %% be extremely expensive. If the vnode is currently active, - %% the proxy will be descheduled until the vnode finishes - %% execution and becomes descheduled itself. - {_, L} = erlang:process_info(Pid, message_queue_len), - Counter3 = 0, - Mailbox3 = L + 1, - %% Send a new proxy ping so that if the new length is above the - %% threshold then the proxy will detect the work is completed, - %% rather than being stuck in overload state until the interval - %% counts are reached. - RequestState2 = send_proxy_ping(Pid, Mailbox3); - _ -> - Mailbox3 = Mailbox2, - Counter3 = Counter2, - RequestState2 = RequestState + RequestInterval -> + %% Ping the vnode in hopes that we get a pong back before hitting + %% the hard query interval and triggering an expensive process_info + %% call. A successful pong from the vnode means that all messages + %% sent before the ping have already been handled and therefore + %% we can adjust our mailbox estimate accordingly. + case RequestState of + undefined -> + RequestState2 = send_proxy_ping(Pid, Mailbox2); + _ -> RequestState2 = RequestState + end, + Mailbox3 = Mailbox2, + Counter3 = Counter2; + Interval -> + %% Time to directly check the mailbox size. This operation may + %% be extremely expensive. If the vnode is currently active, + %% the proxy will be descheduled until the vnode finishes + %% execution and becomes descheduled itself. + {_, L} = erlang:process_info(Pid, message_queue_len), + Counter3 = 0, + Mailbox3 = L + 1, + %% Send a new proxy ping so that if the new length is above the + %% threshold then the proxy will detect the work is completed, + %% rather than being stuck in overload state until the interval + %% counts are reached. + RequestState2 = send_proxy_ping(Pid, Mailbox3); + _ -> + Mailbox3 = Mailbox2, + Counter3 = Counter2, + RequestState2 = RequestState end, {noreply, State2#state{check_counter = Counter3, @@ -296,29 +268,26 @@ handle_proxy(Msg, check_request = RequestState2}}. handle_overload(Msg, - #state{mod = Mod, index = Index}) -> + #state{mod = Module, index = Index}) -> %% STATS %riak_core_stat:update(dropped_vnode_requests), case Msg of - {'$gen_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - {'$gen_all_state_event', - #riak_vnode_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - {'$gen_event', - #riak_coverage_req_v1{sender = Sender, - request = Request}} -> - catch Mod:handle_overload_command(Request, - Sender, - Index); - _ -> catch Mod:handle_overload_info(Msg, Index) + {'$gen_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Module:handle_overload_command(Request, Sender, + Index); + {'$gen_all_state_event', + #riak_vnode_req_v1{sender = Sender, + request = Request}} -> + catch Module:handle_overload_command(Request, Sender, + Index); + {'$gen_event', + #riak_coverage_req_v1{sender = Sender, + request = Request}} -> + catch Module:handle_overload_command(Request, Sender, + Index); + _ -> catch Module:handle_overload_info(Msg, Index) end. %% @private @@ -328,10 +297,10 @@ forget_vnode(State) -> check_counter = 0, check_request = undefined}. %% @private -get_vnode_pid(State = #state{mod = Mod, index = Index, - vnode_pid = undefined}) -> +get_vnode_pid(State = #state{mod = Module, + index = Index, vnode_pid = undefined}) -> {ok, Pid} = riak_core_vnode_manager:get_vnode_pid(Index, - Mod), + Module), Mref = erlang:monitor(process, Pid), NewState = State#state{vnode_pid = Pid, vnode_mref = Mref}, @@ -343,66 +312,56 @@ get_vnode_pid(State = #state{vnode_pid = Pid}) -> send_proxy_ping(Pid, MailboxSizeAfterPing) -> Ref = make_ref(), Pid ! - {'$vnode_proxy_ping', - self(), - Ref, - MailboxSizeAfterPing}, + {'$vnode_proxy_ping', self(), Ref, + MailboxSizeAfterPing}, Ref. -ifdef(TEST). update_msg_counter() -> Count = case erlang:get(count) of - undefined -> 0; - Val -> Val + undefined -> 0; + Val -> Val end, put(count, Count + 1). fake_loop() -> receive - block -> fake_loop_block(); - slow -> fake_loop_slow(); - {get_count, Pid} -> - Pid ! {count, erlang:get(count)}, - fake_loop(); - %% Original tests do not expect replies - the - %% results below expect the pings to be counted - %% towards messages received. If you ever wanted - %% to re-instance, uncomment below. - %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> - %% ReplyTo ! {Ref, Msgs}, - %% fake_loop(); - _Msg -> - update_msg_counter(), - fake_loop() + block -> fake_loop_block(); + slow -> fake_loop_slow(); + {get_count, Pid} -> + Pid ! {count, erlang:get(count)}, fake_loop(); + %% Original tests do not expect replies - the + %% results below expect the pings to be counted + %% towards messages received. If you ever wanted + %% to re-instance, uncomment below. + %% {'$vnode_proxy_ping', ReplyTo, Ref, Msgs} -> + %% ReplyTo ! {Ref, Msgs}, + %% fake_loop(); + _Msg -> update_msg_counter(), fake_loop() end. fake_loop_slow() -> timer:sleep(100), receive - _Msg -> - update_msg_counter(), - fake_loop_slow() + _Msg -> update_msg_counter(), fake_loop_slow() end. fake_loop_block() -> receive unblock -> fake_loop() end. overload_test_() -> - {timeout, - 900, + {timeout, 900, {foreach, fun () -> VnodePid = spawn(fun fake_loop/0), meck:unload(), meck:new(riak_core_vnode_manager, [passthrough]), - meck:expect(riak_core_vnode_manager, - get_vnode_pid, + meck:expect(riak_core_vnode_manager, get_vnode_pid, fun (_Index, fakemod) -> {ok, VnodePid}; (Index, Mod) -> meck:passthrough([Index, Mod]) end), meck:new(fakemod, [non_strict]), - meck:expect(fakemod, - handle_overload_info, + meck:expect(fakemod, handle_overload_info, fun (hello, _Idx) -> ok end), {ok, ProxyPid} = riak_core_vnode_proxy:start_link(fakemod, 0), @@ -416,76 +375,60 @@ overload_test_() -> exit(ProxyPid, kill) end, [fun ({_VnodePid, ProxyPid}) -> - {"should not discard in normal operation", - timeout, - 60, + {"should not discard in normal operation", timeout, 60, fun () -> ToSend = (?DEFAULT_OVERLOAD_THRESHOLD), [ProxyPid ! hello || _ <- lists:seq(1, ToSend)], %% synchronize on the proxy and the mailbox - {ok, ok} = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, + {ok, ok} = gen:call(ProxyPid, '$vnode_proxy_call', sync, infinity), ProxyPid ! {get_count, self()}, receive - {count, Count} -> - %% First will hit the request check interval, - %% then will check message queue every interval - %% (no new ping will be resubmitted after the first - %% as the request will already have a reference) - PingReqs = 1 - + % for first request intarval - ToSend div - (?DEFAULT_CHECK_INTERVAL), - ?assertEqual((ToSend + PingReqs), Count) + {count, Count} -> + %% First will hit the request check interval, + %% then will check message queue every interval + %% (no new ping will be resubmitted after the first + %% as the request will already have a reference) + PingReqs = 1 + + % for first request intarval + ToSend div (?DEFAULT_CHECK_INTERVAL), + ?assertEqual((ToSend + PingReqs), Count) end end} end, fun ({VnodePid, ProxyPid}) -> - {"should discard during overflow", - timeout, - 60, + {"should discard during overflow", timeout, 60, fun () -> VnodePid ! block, [ProxyPid ! hello || _ <- lists:seq(1, 50000)], %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, + Reply = gen:call(ProxyPid, '$vnode_proxy_call', sync, infinity), ?assertEqual({ok, ok}, Reply), VnodePid ! unblock, VnodePid ! {get_count, self()}, receive - {count, Count} -> - %% Threshold + 10 unanswered vnode_proxy_ping - ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + - 10), - Count) + {count, Count} -> + %% Threshold + 10 unanswered vnode_proxy_ping + ?assertEqual(((?DEFAULT_OVERLOAD_THRESHOLD) + 10), + Count) end end} end, fun ({VnodePid, ProxyPid}) -> - {"should tolerate slow vnodes", - timeout, - 60, + {"should tolerate slow vnodes", timeout, 60, fun () -> VnodePid ! slow, [ProxyPid ! hello || _ <- lists:seq(1, 50000)], %% synchronize on the mailbox - no-op that hits msg catchall - Reply = gen:call(ProxyPid, - '$vnode_proxy_call', - sync, + Reply = gen:call(ProxyPid, '$vnode_proxy_call', sync, infinity), ?assertEqual({ok, ok}, Reply), - %% check that the outstanding message count is - %% reasonable + %% check that the outstanding message count is reasonable {message_queue_len, L} = erlang:process_info(VnodePid, message_queue_len), - %% Threshold + 2 unanswered vnode_proxy_ping (one - %% for first ping, second after process_info check) - ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 2)) + %% Threshold + (at most) 10 unanswered vnode_proxy_ping + ?assert((L =< (?DEFAULT_OVERLOAD_THRESHOLD) + 10)) end} end]}}. diff --git a/src/riak_core_vnode_worker.erl b/src/riak_core_vnode_worker.erl index 852a994d6..293a40605 100644 --- a/src/riak_core_vnode_worker.erl +++ b/src/riak_core_vnode_worker.erl @@ -23,12 +23,8 @@ -include("riak_core_vnode.hrl"). % gen_server callbacks --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3]). % API -export([start_link/1, handle_work/3, handle_work/4]). @@ -36,7 +32,7 @@ -type mod_state() :: term(). -record(state, - {module :: atom(), modstate :: mod_state()}). + {module :: atom(), modstate :: mod_state()}). -callback init_worker(partition(), Args :: term(), Props :: [{atom(), term()}]) -> {ok, mod_state()}. @@ -51,10 +47,7 @@ start_link(Args) -> [VNodeIndex, WorkerArgs, WorkerProps, Caller] = proplists:get_value(worker_args, Args), gen_server:start_link(?MODULE, - [WorkerMod, - VNodeIndex, - WorkerArgs, - WorkerProps, + [WorkerMod, VNodeIndex, WorkerArgs, WorkerProps, Caller], []). @@ -64,14 +57,10 @@ handle_work(Worker, Work, From) -> handle_work(Worker, Work, From, Caller) -> gen_server:cast(Worker, {work, Work, From, Caller}). -init([Module, - VNodeIndex, - WorkerArgs, - WorkerProps, +init([Module, VNodeIndex, WorkerArgs, WorkerProps, Caller]) -> {ok, WorkerState} = Module:init_worker(VNodeIndex, - WorkerArgs, - WorkerProps), + WorkerArgs, WorkerProps), %% let the pool queue manager know there might be a worker to checkout riak_core_vnode_worker_pool:worker_started(Caller), {ok, #state{module = Module, modstate = WorkerState}}. @@ -83,15 +72,13 @@ handle_call(Event, _From, State) -> {reply, ok, State}. handle_cast({work, Work, WorkFrom, Caller}, - #state{module = Mod, modstate = ModState} = State) -> - NewModState = case Mod:handle_work(Work, - WorkFrom, - ModState) + #state{module = Module, modstate = ModState} = State) -> + NewModState = case Module:handle_work(Work, WorkFrom, + ModState) of - {reply, Reply, NS} -> - riak_core_vnode:reply(WorkFrom, Reply), - NS; - {noreply, NS} -> NS + {reply, Reply, NS} -> + riak_core_vnode:reply(WorkFrom, Reply), NS; + {noreply, NS} -> NS end, %% check the worker back into the pool riak_core_vnode_worker_pool:checkin_worker(Caller, diff --git a/test/pqc/bprops_eqc.erl b/test/pqc/bprops_eqc.erl deleted file mode 100644 index 73cc0b323..000000000 --- a/test/pqc/bprops_eqc.erl +++ /dev/null @@ -1,243 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- --module(bprops_eqc). - -%% -%% This module defines a collection of PROPER state_m commands, for -%% testing the riak_core_bucket module. In order to understand this -%% test, you should understand PROPER generally, and the PROPER state machine -%% testing framework and callback conventions. -%% -%% TODO This module currently tests a limited subset of the -%% riak_core_bucket module and makes little attempt to -%% do negative testing around malformed inputs, etc. -%% More attention needs to be spent on these tests! -%% - --ifdef(PROPER). --include_lib("proper/include/proper.hrl"). --include_lib("eunit/include/eunit.hrl"). - --compile(export_all). - --type bucket_name() :: binary(). --type orddict() :: orddict:orddict(). - --define(NAMES, [<<0>>, <<1>>, <<2>>, <<3>>]). --define(BPROP_KEYS, [foo, bar, tapas]). --define(DEFAULT_BPROPS, [{n_val, 3}]). --define(QC_OUT(P), - proper:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). - - -%% -%% The state_m "Model". This invariant represents what properties -%% should be in which buckets between state transitions. -%% --record(state, { - buckets = orddict:new() :: orddict() -}). - -%% -%% Eunit entrypoints -%% - -bprops_test_() -> - {timeout,360, - ?_assert(proper:quickcheck(?QC_OUT(prop_buckets()), [{numtests, 5000}])) - }. - -%% -%% top level drivers (for testing by hand, typically) -%% - -run() -> - run(100). - -run(N) -> - proper:quickcheck(proper:numtests(N, prop_buckets())). - -rerun() -> - proper:check(proper:show_states(prop_buckets())). - -cover() -> - cover(100). - -cover(N) -> - cover:compile_beam(riak_core_bucket), - proper:quickcheck(proper:numtests(N, prop_buckets())), - cover:analyse_to_file(riak_core_bucket, [html]). - -%% -command(State) -> - oneof([{call, ?MODULE, set_bucket, set_bucket_args(State)}, - {call, ?MODULE, get_bucket, get_bucket_args(State)} - ]). - -%% -%% eqc_statem initial model -%% - --spec initial_state() -> proper:symbolic_state(). -initial_state() -> - #state{}. - -%% -%% set_bucket command -%% - -set_bucket_args(_S) -> - [bucket_name(), bucket_props()]. - -set_bucket(Bucket, BProps) -> - riak_core_bucket:set_bucket(Bucket, BProps). - -next_state(#state{buckets=Buckets} = S,_Res,{call,?MODULE, set_bucket, [Bucket, BProps]}) -> -%set_bucket_next(#state{buckets=Buckets} = S, _Res, [Bucket, BProps]) - %% - %% Get any previously defined properties from the model - %% - OldBProps = - case orddict:find(Bucket, Buckets) of - {ok, Props} -> Props; - error -> orddict:from_list(?DEFAULT_BPROPS) - end, - S#state{ - buckets = orddict:store( - Bucket, - %% add defaults and the bucket name; remove any duplicates - %% bprops takes precedence over defaults, and name is always set - %% to bucket - expected_properties( - Bucket, OldBProps, BProps - ), - Buckets - ) - }; -next_state(S,_Res,{call,?MODULE, get_bucket, [_Bucket]}) -> - S. - --spec expected_properties(bucket_name(), orddict(), orddict()) -> orddict(). -expected_properties(Bucket, OldProps, NewProps) -> - Props = riak_core_bucket_props:merge(NewProps, OldProps), - orddict:store(name, Bucket, Props). - -eq(A,B)-> - A=:=B. -%% -%% get_bucket command -%% - -get_bucket_args(_S) -> - [bucket_name()]. - -get_bucket(Bucket) -> - riak_core_bucket:get_bucket(Bucket). - -precondition(_S, {call, ?MODULE, _,_})-> - true. -%get_bucket_post(#state{buckets=Buckets}, [Bucket], Res) -postcondition(#state{buckets=Buckets},{call, ?MODULE, get_bucket, [Bucket]}, Res) -> - BPropsFind = orddict:find(Bucket, Buckets), - case {Res, BPropsFind} of - {error, _} -> - eq(Res, error); - {_, {ok, BProps}} -> - eq( - orddict:from_list(Res), - orddict:from_list(BProps) - ); - {_, error} -> - eq( - orddict:from_list(Res), - orddict:from_list(?DEFAULT_BPROPS ++ [{name, Bucket}]) - ) - end; - -postcondition(#state{buckets=Buckets},{call,?MODULE, set_bucket, [Bucket, _BProps]}, Res) -> -%set_bucket_post(#state{buckets=Buckets}, [Bucket, _BProps], Res) - case {Res, orddict:find(Bucket, Buckets)} of - %% first time bucket has been set - {ok, error} -> - true; - %% bucket has been set before - {ok, {ok, _OldBProps}} -> - true; - %% anything other than ok is a failure - %% TODO revisit, e.g., generate invalid inputs to force an error - _ -> - false - end. - - -%% TODO Add more commands here - -%% -%% generators -%% - -bucket_name() -> - proper_types:elements(?NAMES). - -bucket_props() -> - proper_types:list(bucket_prop()). - -bucket_prop() -> - oneof( - [ - {n_val, pos_int()}, - {bucket_prop_name(), bucket_prop_value()} - ] - ). - -pos_int() -> - ?LET(N, proper_types:nat(), N + 1). - -bucket_prop_name() -> - proper_types:elements(?BPROP_KEYS). - -bucket_prop_value() -> - proper_types:bool(). - - -%% -%% proper properties -%% - -prop_buckets() -> - ?FORALL(Cmds, commands(?MODULE), - aggregate(command_names(Cmds), - ?TRAPEXIT( - begin - {_H, _S, Res} = - bucket_eqc_utils:per_test_setup(?DEFAULT_BPROPS, - fun() -> - run_commands(?MODULE, Cmds) - end), - aggregate( - command_names(Cmds), - Res == ok - ) - end - ) - ) - ). - --endif. diff --git a/test/pqc/bucket_eqc_utils.erl b/test/pqc/bucket_eqc_utils.erl deleted file mode 100644 index 3a3d6459c..000000000 --- a/test/pqc/bucket_eqc_utils.erl +++ /dev/null @@ -1,49 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2007-2016 Basho Technologies, Inc. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(bucket_eqc_utils). - -%% API --export([per_test_setup/2]). - - -per_test_setup(DefaultBucketProps, TestFun) -> - try - os:cmd("rm -rf ./meta_temp"), - riak_core_test_util:stop_pid(whereis(riak_core_ring_events)), - riak_core_test_util:stop_pid(whereis(riak_core_ring_manager)), - application:set_env(riak_core, claimant_tick, 4294967295), - application:set_env(riak_core, cluster_name, "eqc_test"), - application:set_env(riak_core, default_bucket_props, DefaultBucketProps), - {ok, RingEvents} = riak_core_ring_events:start_link(), - {ok, RingMgr} = riak_core_ring_manager:start_link(test), - {ok, Claimant} = riak_core_claimant:start_link(), - - Results = TestFun(), - - riak_core_test_util:stop_pid(Claimant), - unlink(RingMgr), - riak_core_ring_manager:stop(), - riak_core_test_util:stop_pid(RingEvents), - Results - after - os:cmd("rm -rf ./meta_temp"), - meck:unload() - end. From 11a36809612d2b594ba00460d8fbd728ffff9a86 Mon Sep 17 00:00:00 2001 From: woelki Date: Mon, 5 Oct 2020 18:47:56 +0200 Subject: [PATCH 5/5] Insert the commit "Unused riak_core_bas64url module" --- src/riak_core_base64url.erl | 99 ------------------------------ src/riak_core_vnode.erl | 98 ++++++++++++++--------------- test/riak_core_base64url_tests.erl | 42 ------------- 3 files changed, 47 insertions(+), 192 deletions(-) delete mode 100644 src/riak_core_base64url.erl delete mode 100644 test/riak_core_base64url_tests.erl diff --git a/src/riak_core_base64url.erl b/src/riak_core_base64url.erl deleted file mode 100644 index 15c1261ff..000000000 --- a/src/riak_core_base64url.erl +++ /dev/null @@ -1,99 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2009-2010 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% @doc base64url is a wrapper around the base64 module to produce -%% base64-compatible encodings that are URL safe. -%% The / character in normal base64 encoding is replaced with -%% the _ character, and + is replaced with -. -%% This replacement scheme is named "base64url" by -%% http://en.wikipedia.org/wiki/Base64 - --module(riak_core_base64url). - --export([decode/1, - decode_to_string/1, - encode/1, - encode_to_string/1, - mime_decode/1, - mime_decode_to_string/1]). - --spec decode(iodata()) -> binary(). - -decode(Base64url) -> - base64:decode(urldecode(Base64url)). - --spec decode_to_string(iodata()) -> string(). - -decode_to_string(Base64url) -> - base64:decode_to_string(urldecode(Base64url)). - --spec mime_decode(iodata()) -> binary(). - -mime_decode(Base64url) -> - base64:mime_decode(urldecode(Base64url)). - --spec mime_decode_to_string(iodata()) -> string(). - -mime_decode_to_string(Base64url) -> - base64:mime_decode_to_string(urldecode(Base64url)). - --spec encode(iodata()) -> binary(). - -encode(Data) -> urlencode(base64:encode(Data)). - --spec encode_to_string(iodata()) -> string(). - -encode_to_string(Data) -> - urlencode(base64:encode_to_string(Data)). - -urlencode(Base64) when is_list(Base64) -> - Padded = [urlencode_digit(D) || D <- Base64], - string:strip(Padded, both, $=); -urlencode(Base64) when is_binary(Base64) -> - Padded = << <<(urlencode_digit(D))>> - || <> <= Base64 >>, - binary:replace(Padded, <<"=">>, <<"">>, [global]). - -urldecode(Base64url) when is_list(Base64url) -> - Prepad = [urldecode_digit(D) || D <- Base64url], - Padding = padding(Prepad), - Prepad ++ Padding; -urldecode(Base64url) when is_binary(Base64url) -> - Prepad = << <<(urldecode_digit(D))>> - || <> <= Base64url >>, - Padding = padding(Prepad), - <>. - -padding(Base64) when is_binary(Base64) -> - case byte_size(Base64) rem 4 of - 2 -> <<"==">>; - 3 -> <<"=">>; - _ -> <<"">> - end; -padding(Base64) when is_list(Base64) -> - binary_to_list(padding(list_to_binary(Base64))). - -urlencode_digit($/) -> $_; -urlencode_digit($+) -> $-; -urlencode_digit(D) -> D. - -urldecode_digit($_) -> $/; -urldecode_digit($-) -> $+; -urldecode_digit(D) -> D. diff --git a/src/riak_core_vnode.erl b/src/riak_core_vnode.erl index 3e78ab22d..3a2770881 100644 --- a/src/riak_core_vnode.erl +++ b/src/riak_core_vnode.erl @@ -551,13 +551,13 @@ active({resize_transfer_complete, SeenIdxs}, State = #state{mod = Module, modstate = ModState, handoff_target = Target}) -> case Target of - none -> continue(State); - _ -> - %% TODO: refactor similarties w/ finish_handoff handle_event - {ok, NewModState} = Mod:handoff_finished(Target, - ModState), - finish_handoff(SeenIdxs, - State#state{modstate = NewModState}) + none -> continue(State); + _ -> + %% TODO: refactor similarties w/ finish_handoff handle_event + {ok, NewModState} = Module:handoff_finished(Target, + ModState), + finish_handoff(SeenIdxs, + State#state{modstate = NewModState}) end; active({handoff_error, _Err, _Reason}, State) -> State2 = start_manager_event_timer(handoff_error, @@ -575,11 +575,11 @@ active({trigger_handoff, TargetIdx, TargetNode}, active(trigger_delete, State = #state{mod = Module, modstate = ModState, index = Idx}) -> - case mark_delete_complete(Idx, Mod) of - {ok, _NewRing} -> - {ok, NewModState} = Module:delete(ModState), - logger:debug("~p ~p vnode deleted", [Idx, Module]); - _ -> NewModState = ModState + case mark_delete_complete(Idx, Module) of + {ok, _NewRing} -> + {ok, NewModState} = Module:delete(ModState), + logger:debug("~p ~p vnode deleted", [Idx, Module]); + _ -> NewModState = ModState end, maybe_shutdown_pool(State), riak_core_vnode_manager:unregister_vnode(Idx, Module), @@ -728,24 +728,21 @@ handle_sync_event(core_status, _From, StateName, _ -> undefined end, Status = [{index, Index}, {mod, Module}] ++ - case FN of - undefined -> []; - _ -> [{forward, FN}] + case FN of + undefined -> []; + _ -> [{forward, FN}] + end + ++ + case HT of + none -> []; + _ -> [{handoff_target, HT}] end - ++ - case HT of - none -> []; - _ -> [{handoff_target, HT}] - end - ++ - case ModState of - {deleted, _} -> [deleted]; - _ -> [] - end, - {reply, - {Mode, Status}, - StateName, - State, + ++ + case ModState of + {deleted, _} -> [deleted]; + _ -> [] + end, + {reply, {Mode, Status}, StateName, State, State#state.inactivity_timeout}. %%handle_info @@ -763,22 +760,20 @@ handle_info({'EXIT', Pid, Reason}, _StateName, State = #state{mod = Module, index = Index, pool_pid = Pid, pool_config = PoolConfig}) -> case Reason of - Reason when Reason == normal; Reason == shutdown -> - continue(State#state{pool_pid = undefined}); - _ -> - logger:error("~p ~p worker pool crashed ~p\n", - [Index, Module, Reason]), - {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, - logger:debug("starting worker pool ~p with size of " - "~p for vnode ~p.", - [WorkerModule, PoolSize, Index]), - {ok, NewPoolPid} = - riak_core_vnode_worker_pool:start_link(WorkerModule, - PoolSize, - Index, - WorkerArgs, - worker_props), - continue(State#state{pool_pid = NewPoolPid}) + Reason when Reason == normal; Reason == shutdown -> + continue(State#state{pool_pid = undefined}); + _ -> + logger:error("~p ~p worker pool crashed ~p\n", + [Index, Module, Reason]), + {pool, WorkerModule, PoolSize, WorkerArgs} = PoolConfig, + logger:debug("starting worker pool ~p with size of " + "~p for vnode ~p.", + [WorkerModule, PoolSize, Index]), + {ok, NewPoolPid} = + riak_core_vnode_worker_pool:start_link(WorkerModule, + PoolSize, Index, + WorkerArgs, worker_props), + continue(State#state{pool_pid = NewPoolPid}) end; handle_info({'DOWN', _Ref, process, _Pid, normal}, _StateName, State = #state{modstate = {deleted, _}}) -> @@ -846,7 +841,7 @@ do_init(State = #state{index = Index, mod = Module, _ -> PoolConfig = case lists:keyfind(pool, 1, Props) of {pool, WorkerModule, PoolSize, WorkerArgs} = - PoolConfig -> + PoolCfg -> logger:debug("starting worker pool ~p with size of " "~p~n", [WorkerModule, PoolSize]), @@ -855,8 +850,9 @@ do_init(State = #state{index = Index, mod = Module, PoolSize, Index, WorkerArgs, - worker_props); - _ -> PoolPid = PoolConfig = undefined + worker_props), + PoolCfg; + _ -> PoolPid = undefined end, riak_core_handoff_manager:remove_exclusion(Module, Index), Timeout = application:get_env(riak_core, @@ -994,7 +990,7 @@ vnode_coverage(Sender, Request, KeySpaces, {Index, NextOwner}, KeySpaces, Sender, - riak_core_vnode_master:reg_name(Mod)), + riak_core_vnode_master:reg_name(Module)), Action = continue end, case Action of @@ -1015,7 +1011,7 @@ vnode_coverage(Sender, Request, KeySpaces, end. vnode_handoff_command(Sender, Request, ForwardTo, - State = #state{module = Mod, modstate = ModState, + State = #state{mod = Module, modstate = ModState, handoff_target = HOTarget, handoff_type = HOType, pool_pid = Pool}) -> case Module:handle_handoff_command(Request, @@ -1426,7 +1422,7 @@ mod_set_forwarding(Forward, Module:module_info(exports)) of true -> - NewModState = Mod:set_vnode_forwarding(Forward, + NewModState = Module:set_vnode_forwarding(Forward, ModState), State#state{modstate = NewModState}; false -> State diff --git a/test/riak_core_base64url_tests.erl b/test/riak_core_base64url_tests.erl deleted file mode 100644 index b4ec62522..000000000 --- a/test/riak_core_base64url_tests.erl +++ /dev/null @@ -1,42 +0,0 @@ -%% ------------------------------------------------------------------- -%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% ------------------------------------------------------------------- --module(riak_core_base64url_tests). - --include_lib("eunit/include/eunit.hrl"). - --define(URL, "http://example.com/foo?query=thing"). - -string_to_string_test() -> - Encoded = riak_core_base64url:encode_to_string(?URL), - Decoded = riak_core_base64url:decode_to_string(Encoded), - ?assertEqual(?URL, Decoded). - -string_to_binary_test() -> - Encoded = riak_core_base64url:encode(?URL), - Decoded = riak_core_base64url:decode(Encoded), - ?assertEqual(<>, Decoded). - -binary_to_binary_test() -> - Encoded = riak_core_base64url:encode(<>), - Decoded = riak_core_base64url:decode(Encoded), - ?assertEqual(<>, Decoded). - -binary_to_string_test() -> - Encoded = riak_core_base64url:encode_to_string(<>), - Decoded = riak_core_base64url:decode_to_string(Encoded), - ?assertEqual(?URL, Decoded).