From caedb46f0e67f3466ca9995c95fe9e745e33e5a0 Mon Sep 17 00:00:00 2001 From: Mark Travis Date: Wed, 19 Feb 2025 18:56:53 -0800 Subject: [PATCH 1/2] Combine multiple related debug log data points into a single message. Allows quick correlation of events that previously were either not logged or, if logged, strewn across multiple lines, making correlation difficult. The Heartbeat Timer and consensus ledger accept processing each have this capability. Also guarantees that log entries will be written if the node is a validator, regardless of log severity level. Otherwise, the level of these messages is at INFO severity. --- include/xrpl/basics/Log.h | 12 + include/xrpl/beast/utility/Journal.h | 10 + include/xrpl/beast/utility/WrappedSink.h | 7 + src/libxrpl/basics/Log.cpp | 8 + .../beast/utility/src/beast_Journal.cpp | 5 + src/test/beast/beast_Journal_test.cpp | 6 + src/test/csf/Peer.h | 19 +- src/test/csf/Sim.h | 8 + src/test/jtx/CaptureLogs.h | 8 + src/test/jtx/CheckMessageLogs.h | 7 + src/test/server/ServerStatus_test.cpp | 4 +- src/test/server/Server_test.cpp | 7 + src/test/unit_test/SuiteJournal.h | 25 +- src/xrpld/app/consensus/RCLConsensus.cpp | 53 +++- src/xrpld/app/consensus/RCLConsensus.h | 43 ++- src/xrpld/app/main/Application.cpp | 2 +- src/xrpld/app/misc/NetworkOPs.cpp | 58 +++- src/xrpld/app/misc/NetworkOPs.h | 6 +- src/xrpld/consensus/Consensus.cpp | 98 ++++-- src/xrpld/consensus/Consensus.h | 295 +++++++++++++----- 20 files changed, 549 insertions(+), 132 deletions(-) diff --git a/include/xrpl/basics/Log.h b/include/xrpl/basics/Log.h index 0117f2f9aab..442270546e6 100644 --- a/include/xrpl/basics/Log.h +++ b/include/xrpl/basics/Log.h @@ -67,6 +67,10 @@ class Logs void write(beast::severities::Severity level, std::string const& text) override; + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override; }; /** Manages a system file containing logged output. @@ -256,6 +260,14 @@ class Logs x #endif +#ifndef CLOG +#define CLOG(ss) \ + if (!ss) \ + ; \ + else \ + *ss +#endif + //------------------------------------------------------------------------------ // Debug logging: diff --git a/include/xrpl/beast/utility/Journal.h b/include/xrpl/beast/utility/Journal.h index 908738a2be2..41cea2a3a0b 100644 --- a/include/xrpl/beast/utility/Journal.h +++ b/include/xrpl/beast/utility/Journal.h @@ -112,6 +112,16 @@ class Journal virtual void write(Severity level, std::string const& text) = 0; + /** Bypass filter and write text to the sink at the specified severity. + * Always write the message, but maintain the same formatting as if + * it passed through a level filter. + * + * @param level Level to display in message. + * @param text Text to write to sink. + */ + virtual void + writeAlways(Severity level, std::string const& text) = 0; + private: Severity thresh_; bool m_console; diff --git a/include/xrpl/beast/utility/WrappedSink.h b/include/xrpl/beast/utility/WrappedSink.h index f22455a52c4..72dcf4e8f59 100644 --- a/include/xrpl/beast/utility/WrappedSink.h +++ b/include/xrpl/beast/utility/WrappedSink.h @@ -93,6 +93,13 @@ class WrappedSink : public beast::Journal::Sink using beast::Journal; sink_.write(level, prefix_ + text); } + + void + writeAlways(severities::Severity level, std::string const& text) override + { + using beast::Journal; + sink_.writeAlways(level, prefix_ + text); + } }; } // namespace beast diff --git a/src/libxrpl/basics/Log.cpp b/src/libxrpl/basics/Log.cpp index 873087bd217..ac42aa9ec43 100644 --- a/src/libxrpl/basics/Log.cpp +++ b/src/libxrpl/basics/Log.cpp @@ -47,6 +47,14 @@ Logs::Sink::write(beast::severities::Severity level, std::string const& text) logs_.write(level, partition_, text, console()); } +void +Logs::Sink::writeAlways( + beast::severities::Severity level, + std::string const& text) +{ + logs_.write(level, partition_, text, console()); +} + //------------------------------------------------------------------------------ Logs::File::File() : m_stream(nullptr) diff --git a/src/libxrpl/beast/utility/src/beast_Journal.cpp b/src/libxrpl/beast/utility/src/beast_Journal.cpp index 449f474739c..dd160a58708 100644 --- a/src/libxrpl/beast/utility/src/beast_Journal.cpp +++ b/src/libxrpl/beast/utility/src/beast_Journal.cpp @@ -66,6 +66,11 @@ class NullJournalSink : public Journal::Sink write(severities::Severity, std::string const&) override { } + + void + writeAlways(severities::Severity, std::string const&) override + { + } }; //------------------------------------------------------------------------------ diff --git a/src/test/beast/beast_Journal_test.cpp b/src/test/beast/beast_Journal_test.cpp index 6f1652381a0..99badc0fc43 100644 --- a/src/test/beast/beast_Journal_test.cpp +++ b/src/test/beast/beast_Journal_test.cpp @@ -53,6 +53,12 @@ class Journal_test : public unit_test::suite if (level >= threshold()) ++m_count; } + + void + writeAlways(severities::Severity level, std::string const&) override + { + ++m_count; + } }; void diff --git a/src/test/csf/Peer.h b/src/test/csf/Peer.h index e6bc7d24e08..dceafb96482 100644 --- a/src/test/csf/Peer.h +++ b/src/test/csf/Peer.h @@ -116,6 +116,10 @@ struct Peer } }; + class TestConsensusLogger + { + }; + /** Generic Validations adaptor that simply ignores recently stale * validations */ @@ -532,7 +536,8 @@ struct Peer closeResolution, rawCloseTimes, mode, - std::move(consensusJson)); + std::move(consensusJson), + validating()); } void @@ -542,7 +547,8 @@ struct Peer NetClock::duration const& closeResolution, ConsensusCloseTimes const& rawCloseTimes, ConsensusMode const& mode, - Json::Value&& consensusJson) + Json::Value&& consensusJson, + const bool validating) { schedule(delays.ledgerAccept, [=, this]() { const bool proposing = mode == ConsensusMode::proposing; @@ -877,6 +883,13 @@ struct Peer { } + bool + validating() const + { + // does not matter + return false; + } + //-------------------------------------------------------------------------- // A locally submitted transaction void @@ -917,7 +930,7 @@ struct Peer // Not yet modeling dynamic UNL. hash_set nowUntrusted; consensus.startRound( - now(), bestLCL, lastClosedLedger, nowUntrusted, runAsValidator); + now(), bestLCL, lastClosedLedger, nowUntrusted, runAsValidator, {}); } // Start the consensus process assuming it is not yet running diff --git a/src/test/csf/Sim.h b/src/test/csf/Sim.h index b9b203f1957..0cb052305ad 100644 --- a/src/test/csf/Sim.h +++ b/src/test/csf/Sim.h @@ -57,6 +57,14 @@ class BasicSink : public beast::Journal::Sink std::cout << clock_.now().time_since_epoch().count() << " " << text << std::endl; } + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override + { + std::cout << clock_.now().time_since_epoch().count() << " " << text + << std::endl; + } }; class Sim diff --git a/src/test/jtx/CaptureLogs.h b/src/test/jtx/CaptureLogs.h index 62bf25bda90..ddb04cb3ff7 100644 --- a/src/test/jtx/CaptureLogs.h +++ b/src/test/jtx/CaptureLogs.h @@ -60,6 +60,14 @@ class CaptureLogs : public Logs std::lock_guard lock(strmMutex_); strm_ << text; } + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override + { + std::lock_guard lock(strmMutex_); + strm_ << text; + } }; public: diff --git a/src/test/jtx/CheckMessageLogs.h b/src/test/jtx/CheckMessageLogs.h index dc3253278cd..e51b84b1c04 100644 --- a/src/test/jtx/CheckMessageLogs.h +++ b/src/test/jtx/CheckMessageLogs.h @@ -48,6 +48,13 @@ class CheckMessageLogs : public Logs if (text.find(owner_.msg_) != std::string::npos) *owner_.pFound_ = true; } + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override + { + write(level, text); + } }; public: diff --git a/src/test/server/ServerStatus_test.cpp b/src/test/server/ServerStatus_test.cpp index 8aa7bf19f30..f15085d6efa 100644 --- a/src/test/server/ServerStatus_test.cpp +++ b/src/test/server/ServerStatus_test.cpp @@ -860,7 +860,7 @@ class ServerStatus_test : public beast::unit_test::suite, // mark the Network as having an Amendment Warning, but won't fail env.app().getOPs().setAmendmentWarned(); - env.app().getOPs().beginConsensus(env.closed()->info().hash); + env.app().getOPs().beginConsensus(env.closed()->info().hash, {}); // consensus doesn't change BEAST_EXPECT( @@ -991,7 +991,7 @@ class ServerStatus_test : public beast::unit_test::suite, // mark the Network as Amendment Blocked, but still won't fail until // ELB is enabled (next step) env.app().getOPs().setAmendmentBlocked(); - env.app().getOPs().beginConsensus(env.closed()->info().hash); + env.app().getOPs().beginConsensus(env.closed()->info().hash, {}); // consensus now sees validation disabled BEAST_EXPECT( diff --git a/src/test/server/Server_test.cpp b/src/test/server/Server_test.cpp index e0de7ddda56..c17c752ff12 100644 --- a/src/test/server/Server_test.cpp +++ b/src/test/server/Server_test.cpp @@ -96,6 +96,13 @@ class Server_test : public beast::unit_test::suite suite_.log << text << std::endl; } + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override + { + suite_.log << text << std::endl; + } }; //-------------------------------------------------------------------------- diff --git a/src/test/unit_test/SuiteJournal.h b/src/test/unit_test/SuiteJournal.h index 211bec0e4d3..b5c59f3d29e 100644 --- a/src/test/unit_test/SuiteJournal.h +++ b/src/test/unit_test/SuiteJournal.h @@ -50,12 +50,26 @@ class SuiteJournalSink : public beast::Journal::Sink void write(beast::severities::Severity level, std::string const& text) override; + + void + writeAlways(beast::severities::Severity level, std::string const& text) + override; }; inline void SuiteJournalSink::write( beast::severities::Severity level, std::string const& text) +{ + // Only write the string if the level at least equals the threshold. + if (level >= threshold()) + writeAlways(level, text); +} + +inline void +SuiteJournalSink::writeAlways( + beast::severities::Severity level, + std::string const& text) { using namespace beast::severities; @@ -80,9 +94,7 @@ SuiteJournalSink::write( return "FTL:"; }(); - // Only write the string if the level at least equals the threshold. - if (level >= threshold()) - suite_.log << s << partition_ << text << std::endl; + suite_.log << s << partition_ << text << std::endl; } class SuiteJournal @@ -127,9 +139,16 @@ class StreamSink : public beast::Journal::Sink { if (level < threshold()) return; + writeAlways(level, text); + } + inline void + writeAlways(beast::severities::Severity level, std::string const& text) + override + { strm_ << text << std::endl; } + std::stringstream const& messages() const { diff --git a/src/xrpld/app/consensus/RCLConsensus.cpp b/src/xrpld/app/consensus/RCLConsensus.cpp index a746b30357d..91f1cff3ef4 100644 --- a/src/xrpld/app/consensus/RCLConsensus.cpp +++ b/src/xrpld/app/consensus/RCLConsensus.cpp @@ -45,6 +45,7 @@ #include #include +#include #include namespace ripple { @@ -434,7 +435,8 @@ RCLConsensus::Adaptor::onAccept( NetClock::duration const& closeResolution, ConsensusCloseTimes const& rawCloseTimes, ConsensusMode const& mode, - Json::Value&& consensusJson) + Json::Value&& consensusJson, + const bool validating) { app_.getJobQueue().addJob( jtACCEPT, @@ -445,6 +447,7 @@ RCLConsensus::Adaptor::onAccept( // is accepted, the consensus results and capture by reference state // will not change until startRound is called (which happens via // endConsensus). + RclConsensusLogger clog("onAccept", validating, j_); this->doAccept( result, prevLedger, @@ -452,7 +455,7 @@ RCLConsensus::Adaptor::onAccept( rawCloseTimes, mode, std::move(cj)); - this->app_.getOPs().endConsensus(); + this->app_.getOPs().endConsensus(clog.ss()); }); } @@ -935,17 +938,22 @@ RCLConsensus::getJson(bool full) const } void -RCLConsensus::timerEntry(NetClock::time_point const& now) +RCLConsensus::timerEntry( + NetClock::time_point const& now, + std::unique_ptr const& clog) { try { std::lock_guard _{mutex_}; - consensus_.timerEntry(now); + consensus_.timerEntry(now, clog); } catch (SHAMapMissingNode const& mn) { // This should never happen - JLOG(j_.error()) << "During consensus timerEntry: " << mn.what(); + std::stringstream ss; + ss << "During consensus timerEntry: " << mn.what(); + JLOG(j_.error()) << ss.str(); + CLOG(clog) << ss.str(); Rethrow(); } } @@ -1082,7 +1090,8 @@ RCLConsensus::startRound( RCLCxLedger::ID const& prevLgrId, RCLCxLedger const& prevLgr, hash_set const& nowUntrusted, - hash_set const& nowTrusted) + hash_set const& nowTrusted, + std::unique_ptr const& clog) { std::lock_guard _{mutex_}; consensus_.startRound( @@ -1090,6 +1099,36 @@ RCLConsensus::startRound( prevLgrId, prevLgr, nowUntrusted, - adaptor_.preStartRound(prevLgr, nowTrusted)); + adaptor_.preStartRound(prevLgr, nowTrusted), + clog); } + +RclConsensusLogger::RclConsensusLogger( + const char* label, + const bool validating, + beast::Journal j) + : j_(j) +{ + if (!validating && !j.info()) + return; + start_ = std::chrono::steady_clock::now(); + ss_ = std::make_unique(); + header_ = "ConsensusLogger "; + header_ += label; + header_ += ": "; +} + +RclConsensusLogger::~RclConsensusLogger() +{ + if (!ss_) + return; + auto const duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_); + std::stringstream outSs; + outSs << header_ << "duration " << (duration.count() / 1000) << '.' + << std::setw(3) << std::setfill('0') << (duration.count() % 1000) + << "s. " << ss_->str(); + j_.sink().writeAlways(beast::severities::kInfo, outSs.str()); +} + } // namespace ripple diff --git a/src/xrpld/app/consensus/RCLConsensus.h b/src/xrpld/app/consensus/RCLConsensus.h index 893e5cf0847..a29a1f33a46 100644 --- a/src/xrpld/app/consensus/RCLConsensus.h +++ b/src/xrpld/app/consensus/RCLConsensus.h @@ -36,8 +36,12 @@ #include #include #include +#include #include #include +#include +#include + namespace ripple { class InboundTransactions; @@ -324,7 +328,8 @@ class RCLConsensus NetClock::duration const& closeResolution, ConsensusCloseTimes const& rawCloseTimes, ConsensusMode const& mode, - Json::Value&& consensusJson); + Json::Value&& consensusJson, + const bool validating); /** Process the accepted ledger that was a result of simulation/force accept. @@ -484,11 +489,14 @@ class RCLConsensus RCLCxLedger::ID const& prevLgrId, RCLCxLedger const& prevLgr, hash_set const& nowUntrusted, - hash_set const& nowTrusted); + hash_set const& nowTrusted, + std::unique_ptr const& clog); //! @see Consensus::timerEntry void - timerEntry(NetClock::time_point const& now); + timerEntry( + NetClock::time_point const& now, + std::unique_ptr const& clog = {}); //! @see Consensus::gotTxSet void @@ -530,6 +538,35 @@ class RCLConsensus Consensus consensus_; beast::Journal const j_; }; + +/** Collects logging information. + * + * Eases correlating multiple data points together to + * help follow flow of a complex activity. If we + * are a validator, then always write output to + * debug log on destruction. Otherwise, INFO + * level. + */ +class RclConsensusLogger +{ + std::string header_; + beast::Journal j_; + std::unique_ptr ss_; + std::chrono::steady_clock::time_point start_; + +public: + explicit RclConsensusLogger( + const char* label, + bool validating, + beast::Journal j); + ~RclConsensusLogger(); + + std::unique_ptr const& + ss() + { + return ss_; + } +}; } // namespace ripple #endif diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp index bcacdbc3757..3953b56d672 100644 --- a/src/xrpld/app/main/Application.cpp +++ b/src/xrpld/app/main/Application.cpp @@ -1407,7 +1407,7 @@ ApplicationImp::setup(boost::program_options::variables_map const& cmdline) // start first consensus round if (!m_networkOPs->beginConsensus( - m_ledgerMaster->getClosedLedger()->info().hash)) + m_ledgerMaster->getClosedLedger()->info().hash, {})) { JLOG(m_journal.fatal()) << "Unable to start consensus"; return false; diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index a5c8200ccbc..c4c5f962418 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -381,9 +382,11 @@ class NetworkOPsImp final : public NetworkOPs public: bool - beginConsensus(uint256 const& networkClosed) override; + beginConsensus( + uint256 const& networkClosed, + std::unique_ptr const& clog) override; void - endConsensus() override; + endConsensus(std::unique_ptr const& clog) override; void setStandAlone() override; @@ -1008,6 +1011,8 @@ NetworkOPsImp::setAccountHistoryJobTimer(SubAccountHistoryInfoWeak subInfo) void NetworkOPsImp::processHeartbeatTimer() { + auto v = mConsensus.validating(); + RclConsensusLogger clog("Heartbeat Timer", v, m_journal); { std::unique_lock lock{app_.getMasterMutex()}; @@ -1023,9 +1028,17 @@ NetworkOPsImp::processHeartbeatTimer() if (mMode != OperatingMode::DISCONNECTED) { setMode(OperatingMode::DISCONNECTED); - JLOG(m_journal.warn()) - << "Node count (" << numPeers << ") has fallen " - << "below required minimum (" << minPeerCount_ << ")."; + std::stringstream ss; + ss << "Node count (" << numPeers << ") has fallen " + << "below required minimum (" << minPeerCount_ << ")."; + JLOG(m_journal.warn()) << ss.str(); + CLOG(clog.ss()) << "set mode to DISCONNECTED: " << ss.str(); + } + else + { + CLOG(clog.ss()) + << "already DISCONNECTED. too few peers (" << numPeers + << "), need at least " << minPeerCount_; } // MasterMutex lock need not be held to call setHeartbeatTimer() @@ -1033,6 +1046,7 @@ NetworkOPsImp::processHeartbeatTimer() // We do not call mConsensus.timerEntry until there are enough // peers providing meaningful inputs to consensus setHeartbeatTimer(); + return; } @@ -1041,24 +1055,38 @@ NetworkOPsImp::processHeartbeatTimer() setMode(OperatingMode::CONNECTED); JLOG(m_journal.info()) << "Node count (" << numPeers << ") is sufficient."; + CLOG(clog.ss()) << "setting mode to CONNECTED based on " << numPeers + << " peers. "; } // Check if the last validated ledger forces a change between these // states. + auto origMode = mMode.load(); + CLOG(clog.ss()) << "mode: " << strOperatingMode(origMode, true); if (mMode == OperatingMode::SYNCING) setMode(OperatingMode::SYNCING); else if (mMode == OperatingMode::CONNECTED) setMode(OperatingMode::CONNECTED); + auto newMode = mMode.load(); + if (origMode != newMode) + { + CLOG(clog.ss()) + << ", changing to " << strOperatingMode(newMode, true); + } + CLOG(clog.ss()) << ". "; } - mConsensus.timerEntry(app_.timeKeeper().closeTime()); + mConsensus.timerEntry(app_.timeKeeper().closeTime(), clog.ss()); + CLOG(clog.ss()) << "consensus phase " << to_string(mLastConsensusPhase); const ConsensusPhase currPhase = mConsensus.phase(); if (mLastConsensusPhase != currPhase) { reportConsensusStateChange(currPhase); mLastConsensusPhase = currPhase; + CLOG(clog.ss()) << " changed to " << to_string(mLastConsensusPhase); } + CLOG(clog.ss()) << ". "; setHeartbeatTimer(); } @@ -1815,7 +1843,9 @@ NetworkOPsImp::switchLastClosedLedger( } bool -NetworkOPsImp::beginConsensus(uint256 const& networkClosed) +NetworkOPsImp::beginConsensus( + uint256 const& networkClosed, + std::unique_ptr const& clog) { XRPL_ASSERT( networkClosed.isNonZero(), @@ -1835,8 +1865,10 @@ NetworkOPsImp::beginConsensus(uint256 const& networkClosed) { JLOG(m_journal.warn()) << "Don't have LCL, going to tracking"; setMode(OperatingMode::TRACKING); + CLOG(clog) << "beginConsensus Don't have LCL, going to tracking. "; } + CLOG(clog) << "beginConsensus no previous ledge. "; return false; } @@ -1871,7 +1903,8 @@ NetworkOPsImp::beginConsensus(uint256 const& networkClosed) networkClosed, prevLedger, changes.removed, - changes.added); + changes.added, + clog); const ConsensusPhase currPhase = mConsensus.phase(); if (mLastConsensusPhase != currPhase) @@ -1910,7 +1943,7 @@ NetworkOPsImp::mapComplete(std::shared_ptr const& map, bool fromAcquire) } void -NetworkOPsImp::endConsensus() +NetworkOPsImp::endConsensus(std::unique_ptr const& clog) { uint256 deadLedger = m_ledgerMaster.getClosedLedger()->info().parentHash; @@ -1928,7 +1961,10 @@ NetworkOPsImp::endConsensus() checkLastClosedLedger(app_.overlay().getActivePeers(), networkClosed); if (networkClosed.isZero()) + { + CLOG(clog) << "endConsensus last closed ledger is zero. "; return; + } // WRITEME: Unless we are in FULL and in the process of doing a consensus, // we must count how many nodes share our LCL, how many nodes disagree with @@ -1962,7 +1998,7 @@ NetworkOPsImp::endConsensus() } } - beginConsensus(networkClosed); + beginConsensus(networkClosed, clog); } void @@ -3890,7 +3926,7 @@ NetworkOPsImp::acceptLedger( // FIXME Could we improve on this and remove the need for a specialized // API in Consensus? - beginConsensus(m_ledgerMaster.getClosedLedger()->info().hash); + beginConsensus(m_ledgerMaster.getClosedLedger()->info().hash, {}); mConsensus.simulate(app_.timeKeeper().closeTime(), consensusDelay); return m_ledgerMaster.getCurrentLedger()->info().seq; } diff --git a/src/xrpld/app/misc/NetworkOPs.h b/src/xrpld/app/misc/NetworkOPs.h index 166b9e9e11f..be849531e2b 100644 --- a/src/xrpld/app/misc/NetworkOPs.h +++ b/src/xrpld/app/misc/NetworkOPs.h @@ -180,9 +180,11 @@ class NetworkOPs : public InfoSub::Source // network state machine virtual bool - beginConsensus(uint256 const& netLCL) = 0; + beginConsensus( + uint256 const& netLCL, + std::unique_ptr const& clog) = 0; virtual void - endConsensus() = 0; + endConsensus(std::unique_ptr const& clog) = 0; virtual void setStandAlone() = 0; virtual void diff --git a/src/xrpld/consensus/Consensus.cpp b/src/xrpld/consensus/Consensus.cpp index 01451c6a255..1ddba88abd1 100644 --- a/src/xrpld/consensus/Consensus.cpp +++ b/src/xrpld/consensus/Consensus.cpp @@ -34,18 +34,32 @@ shouldCloseLedger( std::chrono::milliseconds openTime, // Time waiting to close this ledger std::chrono::milliseconds idleInterval, ConsensusParms const& parms, - beast::Journal j) + beast::Journal j, + std::unique_ptr const& clog) { + CLOG(clog) << "shouldCloseLedger params anyTransactions: " + << anyTransactions << ", prevProposers: " << prevProposers + << ", proposersClosed: " << proposersClosed + << ", proposersValidated: " << proposersValidated + << ", prevRoundTime: " << prevRoundTime.count() << "ms" + << ", timeSincePrevClose: " << timeSincePrevClose.count() << "ms" + << ", openTime: " << openTime.count() << "ms" + << ", idleInterval: " << idleInterval.count() << "ms" + << ", ledgerMIN_CLOSE: " << parms.ledgerMIN_CLOSE.count() << "ms" + << ". "; using namespace std::chrono_literals; if ((prevRoundTime < -1s) || (prevRoundTime > 10min) || (timeSincePrevClose > 10min)) { // These are unexpected cases, we just close the ledger - JLOG(j.warn()) << "shouldCloseLedger Trans=" - << (anyTransactions ? "yes" : "no") - << " Prop: " << prevProposers << "/" << proposersClosed - << " Secs: " << timeSincePrevClose.count() - << " (last: " << prevRoundTime.count() << ")"; + std::stringstream ss; + ss << "shouldCloseLedger Trans=" << (anyTransactions ? "yes" : "no") + << " Prop: " << prevProposers << "/" << proposersClosed + << " Secs: " << timeSincePrevClose.count() + << " (last: " << prevRoundTime.count() << ")"; + + JLOG(j.warn()) << ss.str(); + CLOG(clog) << "closing ledger: " << ss.str() << ". "; return true; } @@ -53,12 +67,14 @@ shouldCloseLedger( { // If more than half of the network has closed, we close JLOG(j.trace()) << "Others have closed"; + CLOG(clog) << "closing ledger because enough others have already. "; return true; } if (!anyTransactions) { // Only close at the end of the idle interval + CLOG(clog) << "no transactions, returning. "; return timeSincePrevClose >= idleInterval; // normal idle } @@ -66,6 +82,7 @@ shouldCloseLedger( if (openTime < parms.ledgerMIN_CLOSE) { JLOG(j.debug()) << "Must wait minimum time before closing"; + CLOG(clog) << "not closing because under ledgerMIN_CLOSE. "; return false; } @@ -75,10 +92,12 @@ shouldCloseLedger( if (openTime < (prevRoundTime / 2)) { JLOG(j.debug()) << "Ledger has not been open long enough"; + CLOG(clog) << "not closing because not open long enough. "; return false; } // Close the ledger + CLOG(clog) << "no reason to not close. "; return true; } @@ -88,8 +107,14 @@ checkConsensusReached( std::size_t total, bool count_self, std::size_t minConsensusPct, - bool reachedMax) + bool reachedMax, + std::unique_ptr const& clog) { + CLOG(clog) << "checkConsensusReached params: agreeing: " << agreeing + << ", total: " << total << ", count_self: " << count_self + << ", minConsensusPct: " << minConsensusPct + << ", reachedMax: " << reachedMax << ". "; + // If we are alone for too long, we have consensus. // Delaying consensus like this avoids a circumstance where a peer // gets ahead of proposers insofar as it has not received any proposals. @@ -102,7 +127,13 @@ checkConsensusReached( if (total == 0) { if (reachedMax) + { + CLOG(clog) + << "Consensus reached because nobody shares our position and " + "maximum duration has passed."; return true; + } + CLOG(clog) << "Consensus not reached and nobody shares our position. "; return false; } @@ -110,11 +141,22 @@ checkConsensusReached( { ++agreeing; ++total; + CLOG(clog) << "agreeing and total adjusted: " << agreeing << ',' + << total << ". "; } std::size_t currentPercentage = (agreeing * 100) / total; - - return currentPercentage >= minConsensusPct; + CLOG(clog) << "currentPercentage: " << currentPercentage; + const bool ret = currentPercentage >= minConsensusPct; + if (currentPercentage >= minConsensusPct) + { + CLOG(clog) << ", consensus reached. "; + } + else + { + CLOG(clog) << ", consensus not reached. "; + } + return ret; } ConsensusState @@ -127,22 +169,26 @@ checkConsensus( std::chrono::milliseconds currentAgreeTime, ConsensusParms const& parms, bool proposing, - beast::Journal j) + beast::Journal j, + std::unique_ptr const& clog) { - JLOG(j.trace()) << "checkConsensus: prop=" << currentProposers << "/" - << prevProposers << " agree=" << currentAgree - << " validated=" << currentFinished - << " time=" << currentAgreeTime.count() << "/" - << previousAgreeTime.count() << " proposing? " << proposing - << " minimum duration to reach consensus: " - << parms.ledgerMIN_CONSENSUS.count() << "ms" - << " max consensus time " - << parms.ledgerMAX_CONSENSUS.count() << "s" - << " minimum consensus percentage: " - << parms.minCONSENSUS_PCT; + CLOG(clog) << "checkConsensus: prop=" << currentProposers << "/" + << prevProposers << " agree=" << currentAgree + << " validated=" << currentFinished + << " time=" << currentAgreeTime.count() << "/" + << previousAgreeTime.count() << " proposing? " << proposing + << " minimum duration to reach consensus: " + << parms.ledgerMIN_CONSENSUS.count() << "ms" + << " max consensus time " << parms.ledgerMAX_CONSENSUS.count() + << "s" + << " minimum consensus percentage: " << parms.minCONSENSUS_PCT + << ". "; if (currentAgreeTime <= parms.ledgerMIN_CONSENSUS) + { + CLOG(clog) << "Not reached. "; return ConsensusState::No; + } if (currentProposers < (prevProposers * 3 / 4)) { @@ -151,6 +197,7 @@ checkConsensus( if (currentAgreeTime < (previousAgreeTime + parms.ledgerMIN_CONSENSUS)) { JLOG(j.trace()) << "too fast, not enough proposers"; + CLOG(clog) << "Too fast, not enough proposers. Not reached. "; return ConsensusState::No; } } @@ -162,9 +209,11 @@ checkConsensus( currentProposers, proposing, parms.minCONSENSUS_PCT, - currentAgreeTime > parms.ledgerMAX_CONSENSUS)) + currentAgreeTime > parms.ledgerMAX_CONSENSUS, + clog)) { JLOG(j.debug()) << "normal consensus"; + CLOG(clog) << "reached. "; return ConsensusState::Yes; } @@ -175,14 +224,17 @@ checkConsensus( currentProposers, false, parms.minCONSENSUS_PCT, - currentAgreeTime > parms.ledgerMAX_CONSENSUS)) + currentAgreeTime > parms.ledgerMAX_CONSENSUS, + clog)) { JLOG(j.warn()) << "We see no consensus, but 80% of nodes have moved on"; + CLOG(clog) << "We see no consensus, but 80% of nodes have moved on"; return ConsensusState::MovedOn; } // no consensus yet JLOG(j.trace()) << "no consensus"; + CLOG(clog) << "No consensus. "; return ConsensusState::No; } diff --git a/src/xrpld/consensus/Consensus.h b/src/xrpld/consensus/Consensus.h index e340105737b..623aad25143 100644 --- a/src/xrpld/consensus/Consensus.h +++ b/src/xrpld/consensus/Consensus.h @@ -55,6 +55,7 @@ namespace ripple { @param idleInterval the network's desired idle interval @param parms Consensus constant parameters @param j journal for logging + @param clog log object to which to append */ bool shouldCloseLedger( @@ -67,7 +68,8 @@ shouldCloseLedger( std::chrono::milliseconds openTime, std::chrono::milliseconds idleInterval, ConsensusParms const& parms, - beast::Journal j); + beast::Journal j, + std::unique_ptr const& clog = {}); /** Determine whether the network reached consensus and whether we joined. @@ -82,6 +84,7 @@ shouldCloseLedger( @param parms Consensus constant parameters @param proposing whether we should count ourselves @param j journal for logging + @param clog log object to which to append */ ConsensusState checkConsensus( @@ -93,7 +96,8 @@ checkConsensus( std::chrono::milliseconds currentAgreeTime, ConsensusParms const& parms, bool proposing, - beast::Journal j); + beast::Journal j, + std::unique_ptr const& clog = {}); /** Generic implementation of consensus algorithm. @@ -340,11 +344,13 @@ class Consensus Called by the client code to start each round of consensus. - @param now The network adjusted time + @param now The network adjusted time @param prevLedgerID the ID of the last ledger - @param prevLedger The last ledger + @param prevLedger The last ledger @param nowUntrusted ID of nodes that are newly untrusted this round - @param proposing Whether we want to send proposals to peers this round. + @param proposing Whether we want to send proposals to peers this + round. + @param clog log object to which to append @note @b prevLedgerID is not required to the ID of @b prevLedger since the ID may be known locally before the contents of the ledger arrive @@ -355,7 +361,8 @@ class Consensus typename Ledger_t::ID const& prevLedgerID, Ledger_t prevLedger, hash_set const& nowUntrusted, - bool proposing); + bool proposing, + std::unique_ptr const& clog = {}); /** A peer has proposed a new position, adjust our tracking. @@ -370,10 +377,13 @@ class Consensus /** Call periodically to drive consensus forward. - @param now The network adjusted time + @param now The network adjusted time + @param clog log object to which to append */ void - timerEntry(NetClock::time_point const& now); + timerEntry( + NetClock::time_point const& now, + std::unique_ptr const& clog = {}); /** Process a transaction set acquired from the network @@ -439,11 +449,14 @@ class Consensus NetClock::time_point const& now, typename Ledger_t::ID const& prevLedgerID, Ledger_t const& prevLedger, - ConsensusMode mode); + ConsensusMode mode, + std::unique_ptr const& clog); // Change our view of the previous ledger void - handleWrongLedger(typename Ledger_t::ID const& lgrId); + handleWrongLedger( + typename Ledger_t::ID const& lgrId, + std::unique_ptr const& clog); /** Check if our previous ledger matches the network's. @@ -451,7 +464,7 @@ class Consensus the network and need to bow out/switch modes. */ void - checkLedger(); + checkLedger(std::unique_ptr const& clog); /** If we radically changed our consensus context for some reason, we need to replay recent proposals so that they're not lost. @@ -473,7 +486,7 @@ class Consensus switch to the establish phase and start the consensus process. */ void - phaseOpen(); + phaseOpen(std::unique_ptr const& clog); /** Handle establish phase. @@ -484,7 +497,8 @@ class Consensus If we have consensus, move to the accepted phase. */ void - phaseEstablish(); + phaseEstablish(std::unique_ptr const& clog); + ; /** Evaluate whether pausing increases likelihood of validation. * @@ -509,22 +523,24 @@ class Consensus * @return Whether to pause to wait for lagging proposers. */ bool - shouldPause() const; + shouldPause(std::unique_ptr const& clog) const; // Close the open ledger and establish initial position. void - closeLedger(); + closeLedger(std::unique_ptr const& clog); // Adjust our positions to try to agree with other validators. void - updateOurPositions(); + updateOurPositions(std::unique_ptr const& clog); bool - haveConsensus(); + haveConsensus(std::unique_ptr const& clog); // Create disputes between our position and the provided one. void - createDisputes(TxSet_t const& o); + createDisputes( + TxSet_t const& o, + std::unique_ptr const& clog = {}); // Update our disputes given that this node has adopted a new position. // Will call createDisputes as needed. @@ -534,7 +550,7 @@ class Consensus // Revoke our outstanding proposal, if any, and cease proposing // until this round ends. void - leaveConsensus(); + leaveConsensus(std::unique_ptr const& clog); // The rounded or effective close time estimate from a proposer NetClock::time_point @@ -621,7 +637,8 @@ Consensus::startRound( typename Ledger_t::ID const& prevLedgerID, Ledger_t prevLedger, hash_set const& nowUntrusted, - bool proposing) + bool proposing, + std::unique_ptr const& clog) { if (firstRound_) { @@ -658,7 +675,7 @@ Consensus::startRound( } } - startRoundInternal(now, prevLedgerID, prevLedger, startMode); + startRoundInternal(now, prevLedgerID, prevLedger, startMode, clog); } template void @@ -666,10 +683,14 @@ Consensus::startRoundInternal( NetClock::time_point const& now, typename Ledger_t::ID const& prevLedgerID, Ledger_t const& prevLedger, - ConsensusMode mode) + ConsensusMode mode, + std::unique_ptr const& clog) { phase_ = ConsensusPhase::open; - JLOG(j_.debug()) << "transitioned to ConsensusPhase::open"; + JLOG(j_.debug()) << "transitioned to ConsensusPhase::open "; + CLOG(clog) << "startRoundInternal transitioned to ConsensusPhase::open, " + "previous ledgerID: " + << prevLedgerID << ", seq: " << prevLedger.seq() << ". "; mode_.set(mode, adaptor_); now_ = now; prevLedgerID_ = prevLedgerID; @@ -690,11 +711,15 @@ Consensus::startRoundInternal( previousLedger_.seq() + typename Ledger_t::Seq{1}); playbackProposals(); + CLOG(clog) << "number of peer proposals,previous proposers: " + << currPeerPositions_.size() << ',' << prevProposers_ << ". "; + ; if (currPeerPositions_.size() > (prevProposers_ / 2)) { // We may be falling behind, don't wait for the timer // consider closing the ledger immediately - timerEntry(now_); + CLOG(clog) << "consider closing the ledger immediately. "; + timerEntry(now_, clog); } } @@ -816,25 +841,35 @@ Consensus::peerProposalInternal( template void -Consensus::timerEntry(NetClock::time_point const& now) +Consensus::timerEntry( + NetClock::time_point const& now, + std::unique_ptr const& clog) { + CLOG(clog) << "Consensus::::timerEntry. "; // Nothing to do if we are currently working on a ledger if (phase_ == ConsensusPhase::accepted) + { + CLOG(clog) << "Nothing to do during accepted phase. "; return; + } now_ = now; + CLOG(clog) << "Set network adjusted time to " << to_string(now) << ". "; // Check we are on the proper ledger (this may change phase_) - checkLedger(); - - if (phase_ == ConsensusPhase::open) + const auto phaseOrig = phase_; + CLOG(clog) << "Phase " << to_string(phaseOrig) << ". "; + checkLedger(clog); + if (const auto phaseNow = phase_; phaseOrig != phaseNow) { - phaseOpen(); + CLOG(clog) << "Changed phase to << " << to_string(phaseNow) << ". "; } + + if (phase_ == ConsensusPhase::open) + phaseOpen(clog); else if (phase_ == ConsensusPhase::establish) - { - phaseEstablish(); - } + phaseEstablish(clog); + CLOG(clog) << "timerEntry finishing in phase " << to_string(phase_) << ". "; } template @@ -894,7 +929,7 @@ Consensus::simulate( using namespace std::chrono_literals; JLOG(j_.info()) << "Simulating consensus"; now_ = now; - closeLedger(); + closeLedger({}); result_->roundTime.tick(consensusDelay.value_or(100ms)); result_->proposers = prevProposers_ = currPeerPositions_.size(); prevRoundTime_ = result_->roundTime.read(); @@ -1009,14 +1044,17 @@ Consensus::getJson(bool full) const // Handle a change in the prior ledger during a consensus round template void -Consensus::handleWrongLedger(typename Ledger_t::ID const& lgrId) +Consensus::handleWrongLedger( + typename Ledger_t::ID const& lgrId, + std::unique_ptr const& clog) { + CLOG(clog) << "handleWrongLedger. "; XRPL_ASSERT( lgrId != prevLedgerID_ || previousLedger_.id() != lgrId, "ripple::Consensus::handleWrongLedger : have wrong ledger"); // Stop proposing because we are out of sync - leaveConsensus(); + leaveConsensus(clog); // First time switching to this ledger if (prevLedgerID_ != lgrId) @@ -1039,41 +1077,63 @@ Consensus::handleWrongLedger(typename Ledger_t::ID const& lgrId) } if (previousLedger_.id() == prevLedgerID_) + { + CLOG(clog) << "previousLedger_.id() == prevLeverID_ " << prevLedgerID_ + << ". "; return; + } // we need to switch the ledger we're working from if (auto newLedger = adaptor_.acquireLedger(prevLedgerID_)) { JLOG(j_.info()) << "Have the consensus ledger " << prevLedgerID_; + CLOG(clog) << "Have the consensus ledger " << prevLedgerID_ << ". "; startRoundInternal( - now_, lgrId, *newLedger, ConsensusMode::switchedLedger); + now_, lgrId, *newLedger, ConsensusMode::switchedLedger, clog); } else { + CLOG(clog) << "Still on wrong ledger. "; mode_.set(ConsensusMode::wrongLedger, adaptor_); } } template void -Consensus::checkLedger() +Consensus::checkLedger(std::unique_ptr const& clog) { + CLOG(clog) << "checkLedger. "; + auto netLgr = adaptor_.getPrevLedger(prevLedgerID_, previousLedger_, mode_.get()); + CLOG(clog) << "network ledgerid " << netLgr << ", " << "previous ledger " + << prevLedgerID_ << ". "; if (netLgr != prevLedgerID_) { - JLOG(j_.warn()) << "View of consensus changed during " - << to_string(phase_) << " status=" << to_string(phase_) - << ", " << " mode=" << to_string(mode_.get()); - JLOG(j_.warn()) << prevLedgerID_ << " to " << netLgr; + std::stringstream ss; + ss << "View of consensus changed during " << to_string(phase_) + << " status=" << to_string(phase_) << ", " + << " mode=" << to_string(mode_.get()); + JLOG(j_.warn()) << ss.str(); + CLOG(clog) << ss.str(); + ss.clear(); + ss.str(""); + ss << prevLedgerID_ << " to " << netLgr; + JLOG(j_.warn()) << ss.str(); + CLOG(clog) << ss.str(); JLOG(j_.warn()) << Json::Compact{previousLedger_.getJson()}; - JLOG(j_.debug()) << "State on consensus change " - << Json::Compact{getJson(true)}; - handleWrongLedger(netLgr); + CLOG(clog) << "State on consensus change " + << Json::Compact{getJson(true)} << ". "; + handleWrongLedger(netLgr, clog); } else if (previousLedger_.id() != prevLedgerID_) - handleWrongLedger(netLgr); + { + CLOG(clog) << "previousLedger_.id() != prevLedgerID_: " + << previousLedger_.id() << ',' << to_string(prevLedgerID_) + << ". "; + handleWrongLedger(netLgr, clog); + } } template @@ -1095,8 +1155,9 @@ Consensus::playbackProposals() template void -Consensus::phaseOpen() +Consensus::phaseOpen(std::unique_ptr const& clog) { + CLOG(clog) << "phaseOpen. "; using namespace std::chrono; // it is shortly before ledger close time @@ -1109,25 +1170,44 @@ Consensus::phaseOpen() // This computes how long since last ledger's close time milliseconds sinceClose; { - bool previousCloseCorrect = - (mode_.get() != ConsensusMode::wrongLedger) && - previousLedger_.closeAgree() && - (previousLedger_.closeTime() != - (previousLedger_.parentCloseTime() + 1s)); + const auto mode = mode_.get(); + const bool closeAgree = previousLedger_.closeAgree(); + const auto prevCloseTime = previousLedger_.closeTime(); + const auto prevParentCloseTimePlus1 = + previousLedger_.parentCloseTime() + 1s; + bool previousCloseCorrect = (mode != ConsensusMode::wrongLedger) && + closeAgree && (prevCloseTime != prevParentCloseTimePlus1); auto lastCloseTime = previousCloseCorrect - ? previousLedger_.closeTime() // use consensus timing - : prevCloseTime_; // use the time we saw internally + ? prevCloseTime // use consensus timing + : prevCloseTime_; // use the time we saw internally if (now_ >= lastCloseTime) sinceClose = duration_cast(now_ - lastCloseTime); else sinceClose = -duration_cast(lastCloseTime - now_); + CLOG(clog) << "calculating how long since last ledger's close time " + "based on mode : " + << to_string(mode) + << ", previous closeAgree: " << closeAgree + << ", previous close time: " << to_string(prevCloseTime) + << ", previous parent close time + 1s: " + << to_string(prevParentCloseTimePlus1) + << ", previous close time seen internally: " + << to_string(prevCloseTime_) + << ", last close time: " << to_string(lastCloseTime) + << ", since close: " << sinceClose.count() << ". "; } auto const idleInterval = std::max( adaptor_.parms().ledgerIDLE_INTERVAL, 2 * previousLedger_.closeTimeResolution()); + CLOG(clog) << "idle interval set to " << idleInterval.count() + << "ms based on " + << "ledgerIDLE_INTERVAL: " + << adaptor_.parms().ledgerIDLE_INTERVAL.count() + << ", previous ledger close time resolution: " + << previousLedger_.closeTimeResolution().count() << "ms. "; // Decide if we should close the ledger if (shouldCloseLedger( @@ -1140,16 +1220,20 @@ Consensus::phaseOpen() openTime_.read(), idleInterval, adaptor_.parms(), - j_)) + j_, + clog)) { - closeLedger(); + CLOG(clog) << "closing ledger. "; + closeLedger(clog); } } template bool -Consensus::shouldPause() const +Consensus::shouldPause( + std::unique_ptr const& clog) const { + CLOG(clog) << "shouldPause? "; auto const& parms = adaptor_.parms(); std::uint32_t const ahead( previousLedger_.seq() - @@ -1176,6 +1260,7 @@ Consensus::shouldPause() const result_->roundTime.read() > parms.ledgerMAX_CONSENSUS) { j_.debug() << "not pausing (early)" << vars.str(); + CLOG(clog) << "Not pausing (early). "; return false; } @@ -1253,16 +1338,24 @@ Consensus::shouldPause() const } if (willPause) + { j_.warn() << "pausing" << vars.str(); + CLOG(clog) << "pausing " << vars.str() << ". "; + } else + { j_.debug() << "not pausing" << vars.str(); + CLOG(clog) << "not pausing. "; + } return willPause; } template void -Consensus::phaseEstablish() +Consensus::phaseEstablish( + std::unique_ptr const& clog) { + CLOG(clog) << "phaseEstablish. "; // can only establish consensus if we already took a stance XRPL_ASSERT(result_, "ripple::Consensus::phaseEstablish : result is set"); @@ -1274,25 +1367,39 @@ Consensus::phaseEstablish() convergePercent_ = result_->roundTime.read() * 100 / std::max(prevRoundTime_, parms.avMIN_CONSENSUS_TIME); + CLOG(clog) << "convergePercent_ " << convergePercent_ + << " is based on round duration so far: " + << result_->roundTime.read().count() << "ms, " + << "previous round duration: " << prevRoundTime_.count() + << "ms, " + << "avMIN_CONSENSUS_TIME: " << parms.avMIN_CONSENSUS_TIME.count() + << "ms. "; // Give everyone a chance to take an initial position if (result_->roundTime.read() < parms.ledgerMIN_CONSENSUS) + { + CLOG(clog) << "ledgerMIN_CONSENSUS not reached: " + << parms.ledgerMIN_CONSENSUS.count() << "ms. "; return; + } - updateOurPositions(); + updateOurPositions(clog); // Nothing to do if too many laggards or we don't have consensus. - if (shouldPause() || !haveConsensus()) + if (shouldPause(clog) || !haveConsensus(clog)) return; if (!haveCloseTimeConsensus_) { JLOG(j_.info()) << "We have TX consensus but not CT consensus"; + CLOG(clog) << "We have TX consensus but not CT consensus. "; return; } JLOG(j_.info()) << "Converge cutoff (" << currPeerPositions_.size() << " participants)"; + CLOG(clog) << "Converge cutoff (" << currPeerPositions_.size() + << " participants). Transitioned to ConsensusPhase::accepted. "; adaptor_.updateOperatingMode(currPeerPositions_.size()); prevProposers_ = currPeerPositions_.size(); prevRoundTime_ = result_->roundTime.read(); @@ -1304,12 +1411,13 @@ Consensus::phaseEstablish() closeResolution_, rawCloseTimes_, mode_.get(), - getJson(true)); + getJson(true), + adaptor_.validating()); } template void -Consensus::closeLedger() +Consensus::closeLedger(std::unique_ptr const& clog) { // We should not be closing if we already have a position XRPL_ASSERT(!result_, "ripple::Consensus::closeLedger : result is not set"); @@ -1325,7 +1433,12 @@ Consensus::closeLedger() if (acquired_.emplace(result_->txns.id(), result_->txns).second) adaptor_.share(result_->txns); - if (mode_.get() == ConsensusMode::proposing) + const auto mode = mode_.get(); + CLOG(clog) + << "closeLedger transitioned to ConsensusPhase::establish, mode: " + << to_string(mode) + << ", number of peer positions: " << currPeerPositions_.size() << ". "; + if (mode == ConsensusMode::proposing) adaptor_.propose(result_->position); // Create disputes with any peer positions we have transactions for @@ -1334,9 +1447,7 @@ Consensus::closeLedger() auto const& pos = pit.second.proposal().position(); auto const it = acquired_.find(pos); if (it != acquired_.end()) - { - createDisputes(it->second); - } + createDisputes(it->second, clog); } } @@ -1362,7 +1473,8 @@ participantsNeeded(int participants, int percent) template void -Consensus::updateOurPositions() +Consensus::updateOurPositions( + std::unique_ptr const& clog) { // We must have a position if we are updating it XRPL_ASSERT( @@ -1372,6 +1484,8 @@ Consensus::updateOurPositions() // Compute a cutoff time auto const peerCutoff = now_ - parms.proposeFRESHNESS; auto const ourCutoff = now_ - parms.proposeINTERVAL; + CLOG(clog) << "updateOurPositions. peerCutoff " << to_string(peerCutoff) + << ", ourCutoff " << to_string(ourCutoff) << ". "; // Verify freshness of peer positions and compute close times std::map closeTimeVotes; @@ -1454,6 +1568,7 @@ Consensus::updateOurPositions() neededWeight = parms.avLATE_CONSENSUS_PCT; else neededWeight = parms.avSTUCK_CONSENSUS_PCT; + CLOG(clog) << "neededWeight " << neededWeight << ". "; int participants = currPeerPositions_.size(); if (mode_.get() == ConsensusMode::proposing) @@ -1469,9 +1584,12 @@ Consensus::updateOurPositions() int const threshConsensus = participantsNeeded(participants, parms.avCT_CONSENSUS_PCT); - JLOG(j_.info()) << "Proposers:" << currPeerPositions_.size() - << " nw:" << neededWeight << " thrV:" << threshVote - << " thrC:" << threshConsensus; + std::stringstream ss; + ss << "Proposers:" << currPeerPositions_.size() + << " nw:" << neededWeight << " thrV:" << threshVote + << " thrC:" << threshConsensus; + JLOG(j_.info()) << ss.str(); + CLOG(clog) << ss.str(); for (auto const& [t, v] : closeTimeVotes) { @@ -1500,6 +1618,7 @@ Consensus::updateOurPositions() << " Mode:" << to_string(mode_.get()) << " Thresh:" << threshConsensus << " Pos:" << consensusCloseTime.time_since_epoch().count(); + CLOG(clog) << "No close time consensus. "; } } @@ -1517,9 +1636,11 @@ Consensus::updateOurPositions() result_->txns = std::move(*ourNewSet); - JLOG(j_.info()) << "Position change: CTime " - << consensusCloseTime.time_since_epoch().count() - << ", tx " << newID; + std::stringstream ss; + ss << "Position change: CTime " + << consensusCloseTime.time_since_epoch().count() << ", tx " << newID; + JLOG(j_.info()) << ss.str(); + CLOG(clog) << ss.str(); result_->position.changePosition(newID, consensusCloseTime, now_); @@ -1547,7 +1668,8 @@ Consensus::updateOurPositions() template bool -Consensus::haveConsensus() +Consensus::haveConsensus( + std::unique_ptr const& clog) { // Must have a stance if we are checking for consensus XRPL_ASSERT(result_, "ripple::Consensus::haveConsensus : has result"); @@ -1587,10 +1709,14 @@ Consensus::haveConsensus() result_->roundTime.read(), adaptor_.parms(), mode_.get() == ConsensusMode::proposing, - j_); + j_, + clog); if (result_->state == ConsensusState::No) + { + CLOG(clog) << "No consensus. "; return false; + } // There is consensus, but we need to track if the network moved on // without us. @@ -1598,14 +1724,18 @@ Consensus::haveConsensus() { JLOG(j_.error()) << "Unable to reach consensus"; JLOG(j_.error()) << Json::Compact{getJson(true)}; + CLOG(clog) << "Unable to reach consensus " + << Json::Compact{getJson(true)} << ". "; } + CLOG(clog) << "Consensus has been reached. "; return true; } template void -Consensus::leaveConsensus() +Consensus::leaveConsensus( + std::unique_ptr const& clog) { if (mode_.get() == ConsensusMode::proposing) { @@ -1617,24 +1747,34 @@ Consensus::leaveConsensus() mode_.set(ConsensusMode::observing, adaptor_); JLOG(j_.info()) << "Bowing out of consensus"; + CLOG(clog) << "Bowing out of consensus. "; } } template void -Consensus::createDisputes(TxSet_t const& o) +Consensus::createDisputes( + TxSet_t const& o, + std::unique_ptr const& clog) { // Cannot create disputes without our stance XRPL_ASSERT(result_, "ripple::Consensus::createDisputes : result is set"); // Only create disputes if this is a new set - if (!result_->compares.emplace(o.id()).second) + const auto emplaced = result_->compares.emplace(o.id()).second; + CLOG(clog) << "createDisputes: new set? " << !emplaced << ". "; + if (!emplaced) return; // Nothing to dispute if we agree if (result_->txns.id() == o.id()) + { + CLOG(clog) << "both sets are identical. "; return; + } + CLOG(clog) << "comparing existing with new set: " << result_->txns.id() + << ',' << o.id() << ". "; JLOG(j_.debug()) << "createDisputes " << result_->txns.id() << " to " << o.id(); @@ -1678,6 +1818,7 @@ Consensus::createDisputes(TxSet_t const& o) result_->disputes.emplace(txID, std::move(dtx)); } JLOG(j_.debug()) << dc << " differences found"; + CLOG(clog) << "disputes: " << dc << ". "; } template From 257a2deb28b53b770d6d5591e8cf531abab6b304 Mon Sep 17 00:00:00 2001 From: Mark Travis Date: Sat, 22 Feb 2025 14:54:43 -0800 Subject: [PATCH 2/2] Review fixes from Valentin. --- src/xrpld/app/consensus/RCLConsensus.h | 3 ++- src/xrpld/app/misc/NetworkOPs.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/xrpld/app/consensus/RCLConsensus.h b/src/xrpld/app/consensus/RCLConsensus.h index a29a1f33a46..0c48aec86b6 100644 --- a/src/xrpld/app/consensus/RCLConsensus.h +++ b/src/xrpld/app/consensus/RCLConsensus.h @@ -542,7 +542,8 @@ class RCLConsensus /** Collects logging information. * * Eases correlating multiple data points together to - * help follow flow of a complex activity. If we + * help follow flow of a complex activity, such as + * consensus heartbeat timer. If we * are a validator, then always write output to * debug log on destruction. Otherwise, INFO * level. diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index c4c5f962418..8a5872f1009 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -1868,7 +1868,7 @@ NetworkOPsImp::beginConsensus( CLOG(clog) << "beginConsensus Don't have LCL, going to tracking. "; } - CLOG(clog) << "beginConsensus no previous ledge. "; + CLOG(clog) << "beginConsensus no previous ledger. "; return false; }