Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 104 additions & 51 deletions test/replicaset-luatest/vconnect_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ local verror = require('vshard.error')

local small_timeout_opts = {timeout = 0.01}
local timeout_opts = {timeout = vtest.wait_timeout}
local sync_opts = {timeout = 1, is_async = false}
local async_opts = {timeout = 1, is_async = true}

local test_group = t.group('vconnect')

Expand Down Expand Up @@ -40,30 +42,6 @@ test_group.after_all(function(g)
g.cluster:stop()
end)

--
-- Test, that conn_vconnect_wait fails to get correct
-- result. Connection should be closed.
--
test_group.test_vconnect_no_result = function(g)
local _, rs = next(vreplicaset.buildall(global_cfg))
g.replica:exec(function()
rawset(_G, '_call', ivshard.storage._call)
ivshard.storage._call = nil
end)

-- Drop connection in order to make replicaset to recreate it.
rs.master.conn = nil
local ret, err = rs:callrw('get_uuid', {}, timeout_opts)
t.assert_str_contains(err.message, "_call' is not defined")
t.assert_equals(ret, nil)
-- Critical error, connection should be closed.
t.assert_equals(rs.master.conn.state, 'closed')

g.replica:exec(function()
ivshard.storage._call = _G._call
end)
end

--
-- Test, that conn_vconnect_wait fails, when future is nil.
--
Expand Down Expand Up @@ -92,33 +70,6 @@ test_group.test_vconnect_no_future = function(g)
end)
end

--
-- Test, that conn_vconnect_check fails, when future's result is nil.
--
test_group.test_vconnect_check_no_future = function(g)
local _, rs = next(vreplicaset.buildall(global_cfg))
g.replica:exec(function()
rawset(_G, '_call', ivshard.storage._call)
ivshard.storage._call = nil
end)

rs.master.conn = nil
local opts = table.deepcopy(timeout_opts)
opts.is_async = true
t.helpers.retrying({}, function()
-- It may be VHANDSHAKE_NOT_COMPLETE error, when future
-- is not ready. But at the end it must be the actual error.
local ret, err = rs:callrw('get_uuid', {}, opts)
t.assert_str_contains(err.message, "_call' is not defined")
t.assert_equals(ret, nil)
t.assert_equals(rs.master.conn.state, 'closed')
end)

g.replica:exec(function()
ivshard.storage._call = _G._call
end)
end

--
-- 1. Change name and stop replica.
-- 2. Wait for error_reconnect timeout.
Expand Down Expand Up @@ -320,3 +271,105 @@ test_group.test_conn_not_leaks_on_rebind = function(g)
ivshard.storage._call = _G.old_call
end)
end

--
-- gh-632: Connection is closed during name check on initial connection,
-- when retryable error happens.
--
local function test_conn_with_retryable_error_template(g, opts, err_msg,
err_func, recovery_func)
local _, rs = next(vreplicaset.buildall(global_cfg))
t.assert_not_equals(rs:connect_master(), nil)
t.assert_equals(rs.master.conn.state, 'initial')
g.replica:exec(err_func)
t.helpers.retrying({}, function()
local net_status, res, err = rs.replicas.replica:call('echo', {123},
opts)
t.assert_not(net_status)
t.assert_not(res)
t.assert_str_contains(err.message, err_msg)
t.assert_equals(rs.master.conn.state, 'active')
end)
g.replica:exec(recovery_func)
end

test_group.test_conn_not_closed_during_disabled_storage = function(g)
local disable_replica = function() ivshard.storage.disable() end
local enable_replica = function() ivshard.storage.enable() end
local err_msg = 'Storage is disabled'

test_conn_with_retryable_error_template(g, sync_opts, err_msg,
disable_replica, enable_replica)
test_conn_with_retryable_error_template(g, async_opts, err_msg,
disable_replica, enable_replica)
end

test_group.test_conn_not_closed_during_undefined_storage_func = function(g)
local nullify_func = function()
rawset(_G, 'old_call', ivshard.storage._call)
ivshard.storage._call = nil
end
local restore_func = function() ivshard.storage._call = _G.old_call end
local err_msg = 'Procedure \'vshard.storage._call\' is not defined'

test_conn_with_retryable_error_template(g, sync_opts, err_msg,
nullify_func, restore_func)
test_conn_with_retryable_error_template(g, async_opts, err_msg,
nullify_func, restore_func)
end

test_group.test_conn_not_closed_during_denial_of_access = function(g)
local revoke_perms = function()
box.session.su('admin')
box.schema.user.revoke('storage', 'super')
box.schema.user.revoke('storage', 'execute', 'function',
'vshard.storage._call')
box.session.su('guest')
end
local grant_perms = function()
box.session.su('admin')
box.schema.user.grant('storage', 'super')
box.schema.user.grant('storage', 'execute', 'function',
'vshard.storage._call')
box.session.su('guest')
end
local err_msg = 'Execute access to function \'vshard.storage._call\' ' ..
'is denied'
test_conn_with_retryable_error_template(g, sync_opts, err_msg,
revoke_perms, grant_perms)
test_conn_with_retryable_error_template(g, async_opts, err_msg,
revoke_perms, grant_perms)
end

local function test_conn_with_non_retryable_error_template(g, opts)
g.replica:exec(function(global_cfg, opts)
rawset(_G, '_call', ivshard.storage._call)
ivshard.storage._call = function() error('Non retryable error') end
-- We build a replicaset inside the replica so that we can grep logs
-- of replicaset module.
local _, rs = next(require('vshard.replicaset').buildall(global_cfg))
t.assert_not_equals(rs:connect_master(), nil)
t.helpers.retrying({}, function()
-- It may be VHANDSHAKE_NOT_COMPLETE error, when future
-- is not ready. But at the end it must be the actual error.
local status, res, err = rs.replicas.replica:call('echo', {123},
opts)
t.assert_not(status)
t.assert_not(res)
t.assert_str_contains(err.message, 'Non retryable error')
t.assert_equals(rs.master.conn.state, 'closed')
end)
ivshard.storage._call = _G._call
end, {global_cfg, opts})

if opts.is_async then
t.assert(g.replica:grep_log('Closing the connection'))
else
t.assert(g.replica:grep_log('Closing the connection.*after waiting'))
end
end

test_group.test_conn_close_with_non_retryable_error = function(g)
test_conn_with_non_retryable_error_template(g, sync_opts)
test_conn_with_non_retryable_error_template(g, async_opts)
end
83 changes: 83 additions & 0 deletions test/router-luatest/router_2_2_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1303,3 +1303,86 @@ g.test_info_disable_consistency = function(g)
end, {global_cfg})
vtest.drop_instance(g, router)
end

--
-- gh-632: Connection is closed during name check on initial connection,
-- when retryable error happens.
--
local function test_router_alerts_on_initial_conn_template(g, err_func,
recovery_func)
t.run_only_if(vutil.feature.persistent_names)
-- We reconfigure the cluster with name_as_key identification
-- mode because without it the vconnect won't work.
local new_cfg_template = router_named_cfg_template()
local new_cfg = vtest.config_new(new_cfg_template)
local rs_2 = table.deepcopy(new_cfg_template.sharding.replicaset_2)
vtest.cluster_cfg(g, new_cfg)
vtest.router_cfg(g.router, new_cfg)
g.router:exec(function()
ivshard.router.discovery_wakeup()
t.helpers.retrying({}, function()
t.assert_equals(ivshard.router.info().alerts, {})
end)
end)
-- To initiate the vshard greeting (vconnect) we should reconnect one
-- replicaset. Also we need to check that just after retryable error
-- router has alert.
new_cfg_template.sharding.replicaset_2 = nil
vtest.router_cfg(g.router, vtest.config_new(new_cfg_template))
g.replica_2_a:exec(err_func)
new_cfg_template.sharding.replicaset_2 = rs_2
vtest.router_cfg(g.router, vtest.config_new(new_cfg_template))
g.router:exec(function()
ivshard.router.discovery_wakeup()
t.helpers.retrying({}, function()
local alert = {{'SUBOPTIMAL_REPLICA', 'A current read replica ' ..
'in replicaset replicaset_2 is not optimal'}}
t.assert_items_include(ivshard.router.info().alerts, alert)
end)
end)
-- In the end of this template-test we recover the whole cluster and
-- check that there are no alerts in router anymore.
g.replica_2_a:exec(recovery_func)
g.router:exec(function()
ivshard.router.discovery_wakeup()
t.helpers.retrying({}, function()
t.assert_equals(ivshard.router.info().alerts, {})
end)
end)
vtest.cluster_cfg(g, global_cfg)
vtest.router_cfg(g.router, global_cfg)
end

g.test_router_alerts_on_initial_conn_with_disabled_storage = function(g)
local disable_replica = function() ivshard.storage.disable() end
local enable_replica = function() ivshard.storage.enable() end
test_router_alerts_on_initial_conn_template(g, disable_replica,
enable_replica)
end

g.test_router_alerts_on_initial_conn_with_undefined_storage_func = function(g)
local nullify_func = function()
rawset(_G, 'old_call', ivshard.storage._call)
ivshard.storage._call = nil
end
local restore_func = function() ivshard.storage._call = _G.old_call end
test_router_alerts_on_initial_conn_template(g, nullify_func, restore_func)
end

g.test_router_alerts_on_initial_conn_with_denial_of_access = function(g)
local revoke_perms = function()
box.session.su('admin')
box.schema.user.revoke('storage', 'super')
box.schema.user.revoke('storage', 'execute', 'function',
'vshard.storage._call')
box.session.su('guest')
end
local grant_perms = function()
box.session.su('admin')
box.schema.user.grant('storage', 'super')
box.schema.user.grant('storage', 'execute', 'function',
'vshard.storage._call')
box.session.su('guest')
end
test_router_alerts_on_initial_conn_template(g, revoke_perms, grant_perms)
end
32 changes: 32 additions & 0 deletions vshard/error.lua
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,36 @@ local function error_is_timeout(err)
err.message == 'Timeout exceeded') or err.type == 'TimedOut'
end

local function is_vshard_not_ready(e)
-- ClientError is sent for all errors by old Tarantool versions which
-- didn't keep error type. New versions preserve the original error type.
if e.type == 'ClientError' or e.type == 'AccessDeniedError' then
if e.code == box.error.ACCESS_DENIED then
return e.message:startswith('Execute access to function \'vshard.')
end
if e.code == box.error.NO_SUCH_PROC then
return e.message:startswith('Procedure \'vshard.')
end
end
if e.type == 'ShardingError' then
return e.code == error_code.STORAGE_IS_DISABLED
end
return false
end

local function unwrap_vshard_error(err)
-- VShard functions can throw exceptions using error() function. When
-- it reaches the network layer, it is wrapped into LuajitError. Try to
-- extract the original error if this is the case. Not always is
-- possible - the string representation could be truncated.
local unwrapped_error = from_string(err.message)
if unwrapped_error and unwrapped_error.type == 'ShardingError' then
return unwrapped_error
end
-- It is not vshard error, we should return original err object
return err
end

return {
code = error_code,
box = box_error,
Expand All @@ -342,4 +372,6 @@ return {
alert = make_alert,
timeout = make_timeout,
is_timeout = error_is_timeout,
is_vshard_not_ready = is_vshard_not_ready,
unwrap_vshard_error = unwrap_vshard_error,
}
Loading
Loading