@@ -862,3 +862,101 @@ TEST_F_CORO(raft_fixture, test_no_stepdown_on_append_entries_timeout) {
862
862
ASSERT_EQ_CORO (term_before, new_leader_node.raft ()->term ());
863
863
ASSERT_TRUE_CORO (new_leader_node.raft ()->is_leader ());
864
864
}
865
+
866
+ /* *
867
+ * This synthetic test is there to trigger a situation in which follower
868
+ * receives an append entries request which contains only batches that matches
869
+ * its log. This trigger a condition in which the follower should reply with
870
+ * success to the leader so the leader can continue recovery process.
871
+ *
872
+ * The test uses reply interception to 'trick' the leader to send the append
873
+ * entries with the batches that the follower already has.
874
+ */
875
+ TEST_F_CORO (raft_fixture, test_redelivery_of_matching_logs) {
876
+ co_await create_simple_group (3 );
877
+ auto leader_id = co_await wait_for_leader (10s);
878
+ model::node_id non_leader_id{0 };
879
+ for (auto & [id, n] : nodes ()) {
880
+ n->set_default_recovery_read_size (1 );
881
+ }
882
+
883
+ for (auto & [id, _] : nodes ()) {
884
+ if (id != leader_id) {
885
+ non_leader_id = id;
886
+ break ;
887
+ }
888
+ }
889
+ auto & leader_node = node (leader_id);
890
+ /* *
891
+ * Replicate data to all nodes
892
+ */
893
+ auto r = co_await leader_node.raft ()->replicate (
894
+ make_batches (200 , 1 , 10 ),
895
+ replicate_options (consistency_level::quorum_ack, 10s));
896
+ /* *
897
+ * Prevent one node from receiving append entries
898
+ */
899
+ leader_node.on_dispatch (
900
+ [non_leader_id](model::node_id id, raft::msg_type t) {
901
+ if (t == raft::msg_type::append_entries && id == non_leader_id) {
902
+ throw std::runtime_error (" error" );
903
+ }
904
+ return ss::now ();
905
+ });
906
+ /* *
907
+ * Replicate data with one of the nodes being blocked
908
+ */
909
+ r = co_await leader_node.raft ()->replicate (
910
+ make_batches (200 , 1 , 10 ),
911
+ replicate_options (consistency_level::quorum_ack, 10s));
912
+ ASSERT_FALSE_CORO (r.has_error ());
913
+
914
+ /* *
915
+ * Append some batches to the blocked node with different term
916
+ */
917
+ auto log_ap = node (non_leader_id)
918
+ .raft ()
919
+ ->log ()
920
+ ->make_appender (storage::log_append_config{
921
+ .should_fsync = storage::log_append_config::fsync::no,
922
+ .io_priority = ss::default_priority_class (),
923
+ });
924
+ auto reader = model::make_fragmented_memory_record_batch_reader (
925
+ make_batches (200 , 1 , 10 , model::term_id (2 )));
926
+ co_await reader.for_each_ref (std::move (log_ap), model::no_timeout);
927
+
928
+ auto term_1_match_offset = node (non_leader_id).raft ()->dirty_offset ();
929
+
930
+ /* *
931
+ * Trick the leader right at the offset where the leader and follower log
932
+ * would match
933
+ */
934
+ ss::condition_variable reply_intercepted;
935
+ size_t intercept_count = 0 ;
936
+ leader_node.set_reply_interceptor (
937
+ [&, term_1_match_offset](reply_variant reply, model::node_id) {
938
+ return ss::visit (
939
+ std::move (reply),
940
+ [&, term_1_match_offset](append_entries_reply& a_r) {
941
+ if (
942
+ a_r.last_dirty_log_index
943
+ == model::prev_offset (term_1_match_offset)) {
944
+ a_r.result = reply_result::failure;
945
+ intercept_count++;
946
+ reply_intercepted.signal ();
947
+ }
948
+ return ss::make_ready_future<reply_variant>(a_r);
949
+ },
950
+ [](auto & r) {
951
+ return ss::make_ready_future<reply_variant>(std::move (r));
952
+ });
953
+ });
954
+ /* *
955
+ * Recover communication and wait for the intercept to trigger
956
+ */
957
+ leader_node.reset_on_dispatch ();
958
+ co_await reply_intercepted.wait ([&] { return intercept_count > 5 ; });
959
+ leader_node.reset_reply_interceptor ();
960
+
961
+ co_await wait_for_committed_offset (leader_node.raft ()->dirty_offset (), 5s);
962
+ }
0 commit comments