Skip to content

Commit c0e0129

Browse files
committed
Track message timeout ticks based on internal states
With the introduction of `has_pending_channel_update`, we can now determine whether any messages are owed to irrevocably commit HTLC updates based on the current channel state. We prefer using the channel state, over manually tracking as previously done, to have a single source of truth. We also gain the ability to expect to receive multiple messages at once, which will become relevant with the quiescence protocol, where we may be waiting on a counterparty `revoke_and_ack` and `stfu`.
1 parent 20877b3 commit c0e0129

File tree

2 files changed

+49
-48
lines changed

2 files changed

+49
-48
lines changed

lightning/src/ln/channel.rs

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,9 +1144,8 @@ pub(crate) const MIN_AFFORDABLE_HTLC_COUNT: usize = 4;
11441144
/// * `EXPIRE_PREV_CONFIG_TICKS` = convergence_delay / tick_interval
11451145
pub(crate) const EXPIRE_PREV_CONFIG_TICKS: usize = 5;
11461146

1147-
/// The number of ticks that may elapse while we're waiting for a response to a
1148-
/// [`msgs::RevokeAndACK`] or [`msgs::ChannelReestablish`] message before we attempt to disconnect
1149-
/// them.
1147+
/// The number of ticks that may elapse while we're waiting for a response before we attempt to
1148+
/// disconnect them.
11501149
///
11511150
/// See [`ChannelContext::sent_message_awaiting_response`] for more information.
11521151
pub(crate) const DISCONNECT_PEER_AWAITING_RESPONSE_TICKS: usize = 2;
@@ -1874,16 +1873,14 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
18741873
pub workaround_lnd_bug_4006: Option<msgs::ChannelReady>,
18751874

18761875
/// An option set when we wish to track how many ticks have elapsed while waiting for a response
1877-
/// from our counterparty after sending a message. If the peer has yet to respond after reaching
1878-
/// `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to try to
1879-
/// unblock the state machine.
1876+
/// from our counterparty after entering specific states. If the peer has yet to respond after
1877+
/// reaching `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to
1878+
/// try to unblock the state machine.
18801879
///
1881-
/// This behavior is mostly motivated by a lnd bug in which we don't receive a message we expect
1882-
/// to in a timely manner, which may lead to channels becoming unusable and/or force-closed. An
1883-
/// example of such can be found at <https://github.com/lightningnetwork/lnd/issues/7682>.
1884-
///
1885-
/// This is currently only used when waiting for a [`msgs::ChannelReestablish`] or
1886-
/// [`msgs::RevokeAndACK`] message from the counterparty.
1880+
/// This behavior was initially motivated by a lnd bug in which we don't receive a message we
1881+
/// expect to in a timely manner, which may lead to channels becoming unusable and/or
1882+
/// force-closed. An example of such can be found at
1883+
/// <https://github.com/lightningnetwork/lnd/issues/7682>.
18871884
sent_message_awaiting_response: Option<usize>,
18881885

18891886
/// This channel's type, as negotiated during channel open
@@ -5929,7 +5926,7 @@ impl<SP: Deref> FundedChannel<SP> where
59295926
// OK, we step the channel here and *then* if the new generation fails we can fail the
59305927
// channel based on that, but stepping stuff here should be safe either way.
59315928
self.context.channel_state.clear_awaiting_remote_revoke();
5932-
self.context.sent_message_awaiting_response = None;
5929+
self.mark_response_received();
59335930
self.context.counterparty_prev_commitment_point = self.context.counterparty_cur_commitment_point;
59345931
self.context.counterparty_cur_commitment_point = Some(msg.next_per_commitment_point);
59355932
self.context.cur_counterparty_commitment_transaction_number -= 1;
@@ -6295,6 +6292,10 @@ impl<SP: Deref> FundedChannel<SP> where
62956292
return Err(())
62966293
}
62976294

6295+
// We only clear `peer_disconnected` if we were able to reestablish the channel. We always
6296+
// reset our awaiting response in case we failed reestablishment and are disconnecting.
6297+
self.context.sent_message_awaiting_response = None;
6298+
62986299
if self.context.channel_state.is_peer_disconnected() {
62996300
// While the below code should be idempotent, it's simpler to just return early, as
63006301
// redundant disconnect events can fire, though they should be rare.
@@ -6355,8 +6356,6 @@ impl<SP: Deref> FundedChannel<SP> where
63556356
}
63566357
}
63576358

6358-
self.context.sent_message_awaiting_response = None;
6359-
63606359
// Reset any quiescence-related state as it is implicitly terminated once disconnected.
63616360
if matches!(self.context.channel_state, ChannelState::ChannelReady(_)) {
63626361
self.context.channel_state.clear_awaiting_quiescence();
@@ -6481,10 +6480,6 @@ impl<SP: Deref> FundedChannel<SP> where
64816480
commitment_update = None;
64826481
}
64836482

6484-
if commitment_update.is_some() {
6485-
self.mark_awaiting_response();
6486-
}
6487-
64886483
self.context.monitor_pending_revoke_and_ack = false;
64896484
self.context.monitor_pending_commitment_signed = false;
64906485
let order = self.context.resend_order.clone();
@@ -6841,7 +6836,7 @@ impl<SP: Deref> FundedChannel<SP> where
68416836
// Go ahead and unmark PeerDisconnected as various calls we may make check for it (and all
68426837
// remaining cases either succeed or ErrorMessage-fail).
68436838
self.context.channel_state.clear_peer_disconnected();
6844-
self.context.sent_message_awaiting_response = None;
6839+
self.mark_response_received();
68456840

68466841
let shutdown_msg = self.get_outbound_shutdown();
68476842

@@ -6897,9 +6892,6 @@ impl<SP: Deref> FundedChannel<SP> where
68976892
// AwaitingRemoteRevoke set, which indicates we sent a commitment_signed but haven't gotten
68986893
// the corresponding revoke_and_ack back yet.
68996894
let is_awaiting_remote_revoke = self.context.channel_state.is_awaiting_remote_revoke();
6900-
if is_awaiting_remote_revoke && !self.is_awaiting_monitor_update() {
6901-
self.mark_awaiting_response();
6902-
}
69036895
let next_counterparty_commitment_number = INITIAL_COMMITMENT_NUMBER - self.context.cur_counterparty_commitment_transaction_number + if is_awaiting_remote_revoke { 1 } else { 0 };
69046896

69056897
let channel_ready = if msg.next_local_commitment_number == 1 && INITIAL_COMMITMENT_NUMBER - self.holder_commitment_point.transaction_number() == 1 {
@@ -7084,26 +7076,34 @@ impl<SP: Deref> FundedChannel<SP> where
70847076
Ok((closing_signed, None, None))
70857077
}
70867078

7087-
// Marks a channel as waiting for a response from the counterparty. If it's not received
7088-
// [`DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`] after sending our own to them, then we'll attempt
7089-
// a reconnection.
7090-
fn mark_awaiting_response(&mut self) {
7091-
self.context.sent_message_awaiting_response = Some(0);
7079+
fn mark_response_received(&mut self) {
7080+
self.context.sent_message_awaiting_response = None;
70927081
}
70937082

70947083
/// Determines whether we should disconnect the counterparty due to not receiving a response
70957084
/// within our expected timeframe.
70967085
///
7097-
/// This should be called on every [`super::channelmanager::ChannelManager::timer_tick_occurred`].
7086+
/// This should be called for peers with an active socket on every
7087+
/// [`super::channelmanager::ChannelManager::timer_tick_occurred`].
7088+
#[allow(clippy::assertions_on_constants)]
70987089
pub fn should_disconnect_peer_awaiting_response(&mut self) -> bool {
7099-
let ticks_elapsed = if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7100-
ticks_elapsed
7090+
if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7091+
*ticks_elapsed += 1;
7092+
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7093+
} else if
7094+
// Cleared upon receiving `channel_reestablish`.
7095+
self.context.channel_state.is_peer_disconnected()
7096+
// Cleared upon receiving `revoke_and_ack`.
7097+
|| self.context.has_pending_channel_update()
7098+
{
7099+
// This is the first tick we've seen after expecting to make forward progress.
7100+
self.context.sent_message_awaiting_response = Some(1);
7101+
debug_assert!(DISCONNECT_PEER_AWAITING_RESPONSE_TICKS > 1);
7102+
false
71017103
} else {
71027104
// Don't disconnect when we're not waiting on a response.
7103-
return false;
7104-
};
7105-
*ticks_elapsed += 1;
7106-
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7105+
false
7106+
}
71077107
}
71087108

71097109
pub fn shutdown(
@@ -8266,7 +8266,6 @@ impl<SP: Deref> FundedChannel<SP> where
82668266
log_info!(logger, "Sending a data_loss_protect with no previous remote per_commitment_secret for channel {}", &self.context.channel_id());
82678267
[0;32]
82688268
};
8269-
self.mark_awaiting_response();
82708269
msgs::ChannelReestablish {
82718270
channel_id: self.context.channel_id(),
82728271
// The protocol has two different commitment number concepts - the "commitment

lightning/src/ln/channelmanager.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6632,19 +6632,21 @@ where
66326632

66336633
funded_chan.context.maybe_expire_prev_config();
66346634

6635-
if funded_chan.should_disconnect_peer_awaiting_response() {
6636-
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6637-
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6638-
counterparty_node_id, chan_id);
6639-
pending_msg_events.push(MessageSendEvent::HandleError {
6640-
node_id: counterparty_node_id,
6641-
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6642-
msg: msgs::WarningMessage {
6643-
channel_id: *chan_id,
6644-
data: "Disconnecting due to timeout awaiting response".to_owned(),
6635+
if peer_state.is_connected {
6636+
if funded_chan.should_disconnect_peer_awaiting_response() {
6637+
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6638+
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6639+
counterparty_node_id, chan_id);
6640+
pending_msg_events.push(MessageSendEvent::HandleError {
6641+
node_id: counterparty_node_id,
6642+
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6643+
msg: msgs::WarningMessage {
6644+
channel_id: *chan_id,
6645+
data: "Disconnecting due to timeout awaiting response".to_owned(),
6646+
},
66456647
},
6646-
},
6647-
});
6648+
});
6649+
}
66486650
}
66496651

66506652
true

0 commit comments

Comments
 (0)