Skip to content

Commit

Permalink
disable node disconnect on error and detention
Browse files Browse the repository at this point in the history
- Added `config.NODES_DISCONNECT_ON_ERROR` which is false by default.
- In `nodes_monitor. report_error_on_node_blocks`, disconnect the node only if config.NODES_DISCONNECT_ON_ERROR is true.
- Changed the default of NODE_IO_DETENTION_DISABLE to true. Most users usually have a single node in a pool, so putting a node into detention causes many issues.

Signed-off-by: Danny Zaken <[email protected]>
  • Loading branch information
dannyzaken committed Feb 26, 2025
1 parent bf6a967 commit e895e70
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
8 changes: 7 additions & 1 deletion config.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,13 @@ config.NODES_FREE_SPACE_RESERVE = 100 * (1024 ** 2);
// don't use agents with less than reserve + 5 GB
config.MINIMUM_AGENT_TOTAL_STORAGE = config.NODES_FREE_SPACE_RESERVE + (5 * (1024 ** 3));

config.NODE_IO_DETENTION_DISABLE = false;

// by default not disconnecting nodes on error. This caused more issues than benefits
config.NODES_DISCONNECT_ON_ERROR = false;

// by default not detaining nodes on io errors. This caused more issues than benefits
config.NODE_IO_DETENTION_DISABLE = true;

config.NODE_IO_DETENTION_THRESHOLD = 60 * 1000;
config.NODE_IO_DETENTION_RECENT_ISSUES = 5;
// Picked two because minimum of nodes per pool is three
Expand Down
24 changes: 14 additions & 10 deletions src/server/node_services/nodes_monitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class NodesMonitor extends EventEmitter {
return P.resolve()
.then(() => this._run())
.then(() => {
// do nothing.
// do nothing.
});
}

Expand Down Expand Up @@ -1012,7 +1012,7 @@ class NodesMonitor extends EventEmitter {
})
.then(() => this._update_nodes_store('force'))
.then(() => {
// do nothing.
// do nothing.
});
}

Expand Down Expand Up @@ -1236,15 +1236,15 @@ class NodesMonitor extends EventEmitter {
if (item.node.deleted) return;
if (!item.connection) return;
if (!item.agent_info) return;
//The node should be set as enable if it is not decommissioned.
//The node should be set as enable if it is not decommissioned.
const should_enable = !item.node.decommissioned;
const item_pool = system_store.data.get_by_id(item.node.pool);
const location_info = {
node_id: String(item.node._id),
host_id: String(item.node.host_id),
pool_id: String(item.node.pool),
};
// We should only add region if it is defined.
// We should only add region if it is defined.
if (item_pool && !_.isUndefined(item_pool.region)) location_info.region = item_pool.region;
// We should change the service enable field if the field is not equal to the decommissioned decision.
const service_enabled_not_changed = (item.node.enabled === should_enable);
Expand Down Expand Up @@ -3373,12 +3373,16 @@ class NodesMonitor extends EventEmitter {
'node', item.node.name,
'issues_report', item.node.issues_report,
'block_report', block_report);
// disconnect from the node to force reconnect
// only disconnect if enough time passed since last disconnect to avoid amplification of errors in R\W flows
const DISCONNECT_GRACE_PERIOD = 2 * 60 * 1000; // 2 minutes grace before another disconnect
if (!item.disconnect_time || item.disconnect_time + DISCONNECT_GRACE_PERIOD < Date.now()) {
dbg.log0('disconnecting node to force reconnect. node:', item.node.name);
this._disconnect_node(item);


if (config.NODES_DISCONNECT_ON_ERROR) {
// disconnect from the node to force reconnect
// only disconnect if enough time passed since last disconnect to avoid amplification of errors in R\W flows
const DISCONNECT_GRACE_PERIOD = 2 * 60 * 1000; // 2 minutes grace before another disconnect
if (!item.disconnect_time || item.disconnect_time + DISCONNECT_GRACE_PERIOD < Date.now()) {
dbg.log0('disconnecting node to force reconnect. node:', item.node.name);
this._disconnect_node(item);
}
}
}
}
Expand Down

0 comments on commit e895e70

Please sign in to comment.