Skip to content

Commit 54114c9

Browse files
authored
Merge pull request #1202 from TheBlueMatt/2021-12-fix-retries-races
Fix payment retry races and inform users when a payment fails
2 parents 09714e6 + 05d7a33 commit 54114c9

File tree

10 files changed

+572
-122
lines changed

10 files changed

+572
-122
lines changed

lightning-invoice/src/payment.rs

+192-9
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
//! # fn retry_payment(
6666
//! # &self, route: &Route, payment_id: PaymentId
6767
//! # ) -> Result<(), PaymentSendFailure> { unimplemented!() }
68+
//! # fn abandon_payment(&self, payment_id: PaymentId) { unimplemented!() }
6869
//! # }
6970
//! #
7071
//! # struct FakeRouter {}
@@ -187,6 +188,9 @@ pub trait Payer {
187188

188189
/// Retries a failed payment path for the [`PaymentId`] using the given [`Route`].
189190
fn retry_payment(&self, route: &Route, payment_id: PaymentId) -> Result<(), PaymentSendFailure>;
191+
192+
/// Signals that no further retries for the given payment will occur.
193+
fn abandon_payment(&self, payment_id: PaymentId);
190194
}
191195

192196
/// A trait defining behavior for routing an [`Invoice`] payment.
@@ -462,26 +466,30 @@ where
462466
fn handle_event(&self, event: &Event) {
463467
match event {
464468
Event::PaymentPathFailed {
465-
all_paths_failed, payment_id, payment_hash, rejected_by_dest, path,
466-
short_channel_id, retry, ..
469+
payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, ..
467470
} => {
468471
if let Some(short_channel_id) = short_channel_id {
469472
let path = path.iter().collect::<Vec<_>>();
470473
self.scorer.lock().payment_path_failed(&path, *short_channel_id);
471474
}
472475

473-
if *rejected_by_dest {
474-
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
475-
} else if payment_id.is_none() {
476+
if payment_id.is_none() {
476477
log_trace!(self.logger, "Payment {} has no id; not retrying", log_bytes!(payment_hash.0));
478+
} else if *rejected_by_dest {
479+
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
480+
self.payer.abandon_payment(payment_id.unwrap());
477481
} else if retry.is_none() {
478482
log_trace!(self.logger, "Payment {} missing retry params; not retrying", log_bytes!(payment_hash.0));
483+
self.payer.abandon_payment(payment_id.unwrap());
479484
} else if self.retry_payment(payment_id.unwrap(), *payment_hash, retry.as_ref().unwrap()).is_ok() {
480485
// We retried at least somewhat, don't provide the PaymentPathFailed event to the user.
481486
return;
487+
} else {
488+
self.payer.abandon_payment(payment_id.unwrap());
482489
}
483-
484-
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
490+
},
491+
Event::PaymentFailed { payment_hash, .. } => {
492+
self.remove_cached_payment(&payment_hash);
485493
},
486494
Event::PaymentPathSuccessful { path, .. } => {
487495
let path = path.iter().collect::<Vec<_>>();
@@ -511,12 +519,12 @@ mod tests {
511519
use lightning::ln::PaymentPreimage;
512520
use lightning::ln::features::{ChannelFeatures, NodeFeatures, InitFeatures};
513521
use lightning::ln::functional_test_utils::*;
514-
use lightning::ln::msgs::{ErrorAction, LightningError};
522+
use lightning::ln::msgs::{ChannelMessageHandler, ErrorAction, LightningError};
515523
use lightning::routing::network_graph::NodeId;
516524
use lightning::routing::router::{Payee, Route, RouteHop};
517525
use lightning::util::test_utils::TestLogger;
518526
use lightning::util::errors::APIError;
519-
use lightning::util::events::{Event, MessageSendEventsProvider};
527+
use lightning::util::events::{Event, EventsProvider, MessageSendEvent, MessageSendEventsProvider};
520528
use secp256k1::{SecretKey, PublicKey, Secp256k1};
521529
use std::cell::RefCell;
522530
use std::collections::VecDeque;
@@ -1447,6 +1455,8 @@ mod tests {
14471455
self.check_value_msats(Amount::OnRetry(route.get_total_amount()));
14481456
self.check_attempts().map(|_| ())
14491457
}
1458+
1459+
fn abandon_payment(&self, _payment_id: PaymentId) { }
14501460
}
14511461

14521462
// *** Full Featured Functional Tests with a Real ChannelManager ***
@@ -1569,4 +1579,177 @@ mod tests {
15691579
assert_eq!(htlc_msgs.len(), 2);
15701580
check_added_monitors!(nodes[0], 2);
15711581
}
1582+
1583+
#[test]
1584+
fn no_extra_retries_on_back_to_back_fail() {
1585+
// In a previous release, we had a race where we may exceed the payment retry count if we
1586+
// get two failures in a row with the second having `all_paths_failed` set.
1587+
// Generally, when we give up trying to retry a payment, we don't know for sure what the
1588+
// current state of the ChannelManager event queue is. Specifically, we cannot be sure that
1589+
// there are not multiple additional `PaymentPathFailed` or even `PaymentSent` events
1590+
// pending which we will see later. Thus, when we previously removed the retry tracking map
1591+
// entry after a `all_paths_failed` `PaymentPathFailed` event, we may have dropped the
1592+
// retry entry even though more events for the same payment were still pending. This led to
1593+
// us retrying a payment again even though we'd already given up on it.
1594+
//
1595+
// We now have a separate event - `PaymentFailed` which indicates no HTLCs remain and which
1596+
// is used to remove the payment retry counter entries instead. This tests for the specific
1597+
// excess-retry case while also testing `PaymentFailed` generation.
1598+
1599+
let chanmon_cfgs = create_chanmon_cfgs(3);
1600+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
1601+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
1602+
let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
1603+
1604+
let chan_1_scid = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 10_000_000, 0, InitFeatures::known(), InitFeatures::known()).0.contents.short_channel_id;
1605+
let chan_2_scid = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 10_000_000, 0, InitFeatures::known(), InitFeatures::known()).0.contents.short_channel_id;
1606+
1607+
let mut route = Route {
1608+
paths: vec![
1609+
vec![RouteHop {
1610+
pubkey: nodes[1].node.get_our_node_id(),
1611+
node_features: NodeFeatures::known(),
1612+
short_channel_id: chan_1_scid,
1613+
channel_features: ChannelFeatures::known(),
1614+
fee_msat: 0,
1615+
cltv_expiry_delta: 100,
1616+
}, RouteHop {
1617+
pubkey: nodes[2].node.get_our_node_id(),
1618+
node_features: NodeFeatures::known(),
1619+
short_channel_id: chan_2_scid,
1620+
channel_features: ChannelFeatures::known(),
1621+
fee_msat: 100_000_000,
1622+
cltv_expiry_delta: 100,
1623+
}],
1624+
vec![RouteHop {
1625+
pubkey: nodes[1].node.get_our_node_id(),
1626+
node_features: NodeFeatures::known(),
1627+
short_channel_id: chan_1_scid,
1628+
channel_features: ChannelFeatures::known(),
1629+
fee_msat: 0,
1630+
cltv_expiry_delta: 100,
1631+
}, RouteHop {
1632+
pubkey: nodes[2].node.get_our_node_id(),
1633+
node_features: NodeFeatures::known(),
1634+
short_channel_id: chan_2_scid,
1635+
channel_features: ChannelFeatures::known(),
1636+
fee_msat: 100_000_000,
1637+
cltv_expiry_delta: 100,
1638+
}]
1639+
],
1640+
payee: Some(Payee::from_node_id(nodes[2].node.get_our_node_id())),
1641+
};
1642+
let router = ManualRouter(RefCell::new(VecDeque::new()));
1643+
router.expect_find_route(Ok(route.clone()));
1644+
// On retry, we'll only be asked for one path
1645+
route.paths.remove(1);
1646+
router.expect_find_route(Ok(route.clone()));
1647+
1648+
let expected_events: RefCell<VecDeque<&dyn Fn(&Event)>> = RefCell::new(VecDeque::new());
1649+
let event_handler = |event: &Event| {
1650+
let event_checker = expected_events.borrow_mut().pop_front().unwrap();
1651+
event_checker(event);
1652+
};
1653+
let scorer = RefCell::new(TestScorer::new());
1654+
let invoice_payer = InvoicePayer::new(nodes[0].node, router, &scorer, nodes[0].logger, event_handler, RetryAttempts(1));
1655+
1656+
assert!(invoice_payer.pay_invoice(&create_invoice_from_channelmanager(
1657+
&nodes[1].node, nodes[1].keys_manager, Currency::Bitcoin, Some(100_010_000), "Invoice".to_string()).unwrap())
1658+
.is_ok());
1659+
let htlc_updates = SendEvent::from_node(&nodes[0]);
1660+
check_added_monitors!(nodes[0], 1);
1661+
assert_eq!(htlc_updates.msgs.len(), 1);
1662+
1663+
nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &htlc_updates.msgs[0]);
1664+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &htlc_updates.commitment_msg);
1665+
check_added_monitors!(nodes[1], 1);
1666+
let (bs_first_raa, bs_first_cs) = get_revoke_commit_msgs!(nodes[1], nodes[0].node.get_our_node_id());
1667+
1668+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_first_raa);
1669+
check_added_monitors!(nodes[0], 1);
1670+
let second_htlc_updates = SendEvent::from_node(&nodes[0]);
1671+
1672+
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_first_cs);
1673+
check_added_monitors!(nodes[0], 1);
1674+
let as_first_raa = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
1675+
1676+
nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &second_htlc_updates.msgs[0]);
1677+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &second_htlc_updates.commitment_msg);
1678+
check_added_monitors!(nodes[1], 1);
1679+
let bs_second_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
1680+
1681+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_first_raa);
1682+
check_added_monitors!(nodes[1], 1);
1683+
let bs_fail_update = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
1684+
1685+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_raa);
1686+
check_added_monitors!(nodes[0], 1);
1687+
1688+
nodes[0].node.handle_update_fail_htlc(&nodes[1].node.get_our_node_id(), &bs_fail_update.update_fail_htlcs[0]);
1689+
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_fail_update.commitment_signed);
1690+
check_added_monitors!(nodes[0], 1);
1691+
let (as_second_raa, as_third_cs) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
1692+
1693+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_second_raa);
1694+
check_added_monitors!(nodes[1], 1);
1695+
let bs_second_fail_update = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
1696+
1697+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_third_cs);
1698+
check_added_monitors!(nodes[1], 1);
1699+
let bs_third_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
1700+
1701+
nodes[0].node.handle_update_fail_htlc(&nodes[1].node.get_our_node_id(), &bs_second_fail_update.update_fail_htlcs[0]);
1702+
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_fail_update.commitment_signed);
1703+
check_added_monitors!(nodes[0], 1);
1704+
1705+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_third_raa);
1706+
check_added_monitors!(nodes[0], 1);
1707+
let (as_third_raa, as_fourth_cs) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
1708+
1709+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_third_raa);
1710+
check_added_monitors!(nodes[1], 1);
1711+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_fourth_cs);
1712+
check_added_monitors!(nodes[1], 1);
1713+
let bs_fourth_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
1714+
1715+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_fourth_raa);
1716+
check_added_monitors!(nodes[0], 1);
1717+
1718+
// At this point A has sent two HTLCs which both failed due to lack of fee. It now has two
1719+
// pending `PaymentPathFailed` events, one with `all_paths_failed` unset, and the second
1720+
// with it set. The first event will use up the only retry we are allowed, with the second
1721+
// `PaymentPathFailed` being passed up to the user (us, in this case). Previously, we'd
1722+
// treated this as "HTLC complete" and dropped the retry counter, causing us to retry again
1723+
// if the final HTLC failed.
1724+
expected_events.borrow_mut().push_back(&|ev: &Event| {
1725+
if let Event::PaymentPathFailed { rejected_by_dest, all_paths_failed, .. } = ev {
1726+
assert!(!rejected_by_dest);
1727+
assert!(all_paths_failed);
1728+
} else { panic!("Unexpected event"); }
1729+
});
1730+
nodes[0].node.process_pending_events(&invoice_payer);
1731+
assert!(expected_events.borrow().is_empty());
1732+
1733+
let retry_htlc_updates = SendEvent::from_node(&nodes[0]);
1734+
check_added_monitors!(nodes[0], 1);
1735+
1736+
nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &retry_htlc_updates.msgs[0]);
1737+
commitment_signed_dance!(nodes[1], nodes[0], &retry_htlc_updates.commitment_msg, false, true);
1738+
let bs_fail_update = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
1739+
nodes[0].node.handle_update_fail_htlc(&nodes[1].node.get_our_node_id(), &bs_fail_update.update_fail_htlcs[0]);
1740+
commitment_signed_dance!(nodes[0], nodes[1], &bs_fail_update.commitment_signed, false, true);
1741+
1742+
expected_events.borrow_mut().push_back(&|ev: &Event| {
1743+
if let Event::PaymentPathFailed { rejected_by_dest, all_paths_failed, .. } = ev {
1744+
assert!(!rejected_by_dest);
1745+
assert!(all_paths_failed);
1746+
} else { panic!("Unexpected event"); }
1747+
});
1748+
expected_events.borrow_mut().push_back(&|ev: &Event| {
1749+
if let Event::PaymentFailed { .. } = ev {
1750+
} else { panic!("Unexpected event"); }
1751+
});
1752+
nodes[0].node.process_pending_events(&invoice_payer);
1753+
assert!(expected_events.borrow().is_empty());
1754+
}
15721755
}

lightning-invoice/src/utils.rs

+4
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ where
152152
) -> Result<(), PaymentSendFailure> {
153153
self.retry_payment(route, payment_id)
154154
}
155+
156+
fn abandon_payment(&self, payment_id: PaymentId) {
157+
self.abandon_payment(payment_id)
158+
}
155159
}
156160

157161
#[cfg(test)]

0 commit comments

Comments
 (0)