From 9da32e01cc6cc7edf4179e2e3751ab37433ee6b8 Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Fri, 1 May 2026 01:16:56 -0700 Subject: [PATCH 1/3] Add experimental trigger timer --- docs/metrics.md | 1 + src/herder/HerderImpl.cpp | 162 +++++++++++++++++++++++++++++++-- src/herder/HerderImpl.h | 20 ++++ src/herder/HerderSCPDriver.cpp | 2 - src/herder/HerderSCPDriver.h | 7 +- src/main/Config.cpp | 3 + src/main/Config.h | 4 + 7 files changed, 185 insertions(+), 14 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 9ec61c2b9..448b3c8c4 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -199,6 +199,7 @@ scp.timing.externalized | timer | time spent in ballot pro scp.timing.first-to-self-externalize-lag | timer | delay between first externalize message and local node externalizing scp.timing.self-to-others-externalize-lag | timer | delay between local node externalizing and later externalize messages from other nodes scp.timing.ballot-blocked-on-txset | timer | time balloting was blocked waiting for a txset download (milliseconds) +scp.trigger.prepare-start-fallback | meter | experimental trigger timer fell back from the network-close-time anchor to the local prepare-start anchor scp.value.invalid | meter | SCP value is invalid scp.value.valid | meter | SCP value is valid scp.slot.values-referenced | histogram | number of values referenced per consensus round diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index d9da4bf21..26d5afd60 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -80,6 +80,8 @@ HerderImpl::SCPMetrics::SCPMetrics(Application& app) {"scp", "envelope", "validsig"}, "envelope")) , mEnvelopeInvalidSig(app.getMetrics().NewMeter( {"scp", "envelope", "invalidsig"}, "envelope")) + , mTriggerPrepareStartFallback(app.getMetrics().NewMeter( + {"scp", "trigger", "prepare-start-fallback"}, "trigger")) { } @@ -1265,6 +1267,151 @@ HerderImpl::lastClosedLedgerIncreased(bool latest, TxSetXDRFrameConstPtr txSet, } } +VirtualClock::time_point +HerderImpl::triggerAnchorFromPrepareStart( + uint64_t lastIndex, VirtualClock::time_point now, + std::chrono::milliseconds expectedClose) +{ + auto lastStart = mHerderSCPDriver.getPrepareStart(lastIndex); + if (lastStart) + { + return *lastStart; + } + // Pessimistic estimate: assume the previous ballot protocol started one + // full interval ago, so the next trigger should fire immediately. + return now - expectedClose; +} + +// Compute the anchor for the next trigger. Caller fires at +// `anchor + expectedClose`. Unlike the local ballot cadence trigger, this uses +// the externalized close time from the previous ledger, which is a timestamp +// coming from a different, potentially unsynced node. This is more accurate +// than just using local timers, as those skew late based on latency from the +// leader, but we need to be careful to account for drift. +// +// Constraints: +// 1. Track `expectedClose` on the network timeline as closely as possible. +// 2. Under drift, prefer a longer ledger to a shorter one to prevent fast +// ledgers causing too much strain on the network. +// 3. Never let a badly lagging local clock wedge the node (i.e. never schedule +// a trigger in the far future). +// +// Scenarios (assume last `closeTime` = N, target = 5s): +// 1. Our clock ahead of network: Check how much time our system clock says has +// elapsed since the externalized network ledger closed +// (timeSinceNetworkLedgerStart). If timeSinceNetworkLedgerStart > target, +// either the network itself is slow, or our clock is ahead. +// +// To get a better sense of which, we also take into account +// nomination time, so the check becomes timeSinceNetworkLedgerStart > target +// + nominationBudget, where nomination budget scales with timeouts. +// +// If we think we're drifting ahead after taking nomination into account, we +// fall back to prepare-start anchor, which is based on our local clock and +// can't drift, but is slower. +// +// Note that if nomination is quick, but apply takes a long time, it still +// appears like we're ahead and we fall back to prepare-start. This isn't a +// problem. The prepare-start timer starts before the apply stage, so a long +// apply is "baked in" to the time. If apply was the bottleneck, we'll +// probably trigger immediately even if we use the prepare-start timer because +// we're behind, so using prepare-start is identical to the network-based +// anchor from the perf standpoint if we really are in sync anyway. +// +// This is not true for nomination. The prepare-start timer starts after +// nomination, so it does not reflect a long nomination. This is why we need +// the nomination budget in our check. If nomination took a long time and we +// fall back to the conservative timer, this is much worse from a perf +// perspective, as we are waiting nomination time + target time before +// starting the next ledger. +// +// 2. Our clock behind network: Check whether our local timer says +// more time has elapsed since prepare-start vs how much time has +// elapsed since the externalized network ledger closed. +// +// If true, our system clock is lagging far enough that the network-based +// anchor would schedule the next trigger later than the conservative timer +// based on local ballot cadence. That delay can grow with drift and wedge the +// node if arbitrarily in the future, so we fall back to the prepare-start +// anchor. This anchor is conservative and local, but it gives us a bounded +// trigger time even when system time is badly behind. +// +// 3. Clocks synced, but nomination/apply was slow: With synced clocks, +// time since network close time is large because real time really passed, +// not because our system clock drifted. The goal is to avoid falling back to +// a conservative timer and snowballing the real delay. +// +// See scenario 1, but TL;DR we can look at the nomination timeouts and see +// if the network is slow vs. the node drifting. If nomination is slow, we +// can't fall back to the prepare-apply timer because it would compound the +// delay. If apply is slow, it doesn't matter which timer we use, they both +// will result in triggering immediately. +VirtualClock::time_point +HerderImpl::triggerAnchorFromConsensusCloseTime( + uint64_t lastIndex, VirtualClock::time_point now, + std::chrono::milliseconds expectedClose) +{ + auto fallbackToPrepareStart = [&]() { + mSCPMetrics.mTriggerPrepareStartFallback.Mark(); + return triggerAnchorFromPrepareStart(lastIndex, now, expectedClose); + }; + + auto consensusCloseTime = trackingConsensusCloseTime(); + if (consensusCloseTime == 0) + { + CLOG_WARNING(Herder, "Consensus close time is 0, falling back to " + "prepare-start anchor"); + return fallbackToPrepareStart(); + } + + // Compare elapsed time on the externalized closeTime timeline with elapsed + // time on our local prepare-start timeline. + // Relation, with drift > 0 meaning our clock is ahead of network time: + // + // timeSinceNetworkLedgerStart + // = nominationBudget + timeSinceLocalBallotStart + drift + // + // where nominationBudget is the slow-nomination allowance described above. + auto externalizedSystemTime = VirtualClock::from_time_t(consensusCloseTime); + auto currentSystemTime = mApp.getClock().system_now(); + auto timeSinceNetworkLedgerStart = + std::chrono::duration_cast( + currentSystemTime - externalizedSystemTime); + + auto localBallotStart = + triggerAnchorFromPrepareStart(lastIndex, now, expectedClose); + auto timeSinceLocalBallotStart = + std::chrono::duration_cast(now - + localBallotStart); + + // Scenario 2: if system time is behind the local prepare-start timer, the + // network-based anchor can wedge the node, so use the local fallback. + if (timeSinceLocalBallotStart > timeSinceNetworkLedgerStart) + { + return fallbackToPrepareStart(); + } + + // Scenario 1: widen the ahead-drift bound by the slow nomination we can + // explain from the previous slot's timeout count. + auto nominationTimeouts = + mHerderSCPDriver.getNominationTimeouts(lastIndex).value_or(0); + auto nominationBudget = std::chrono::milliseconds::zero(); + for (int64_t round = 1; round <= nominationTimeouts; ++round) + { + nominationBudget += mHerderSCPDriver.computeTimeout( + static_cast(round), /*isNomination=*/true); + } + + // Scenario 1: if elapsed system time exceeds target plus explainable + // nomination delay, treat it as clock-ahead drift and use the fallback. + if (timeSinceNetworkLedgerStart > expectedClose + nominationBudget) + { + return fallbackToPrepareStart(); + } + + return now - timeSinceNetworkLedgerStart; +} + void HerderImpl::setupTriggerNextLedger() { @@ -1291,19 +1438,16 @@ HerderImpl::setupTriggerNextLedger() std::chrono::milliseconds milliseconds = mLedgerManager.getExpectedLedgerCloseTime(); - // bootstrap with a pessimistic estimate of when - // the ballot protocol started last auto now = mApp.getClock().now(); - auto lastBallotStart = now - milliseconds; - auto lastStart = mHerderSCPDriver.getPrepareStart(lastIndex); - if (lastStart) - { - lastBallotStart = *lastStart; - } + + auto lastLedgerStartingPoint = + mApp.getConfig().EXPERIMENTAL_TRIGGER_TIMER + ? triggerAnchorFromConsensusCloseTime(lastIndex, now, milliseconds) + : triggerAnchorFromPrepareStart(lastIndex, now, milliseconds); // Adjust trigger time in case node's clock has drifted. // This ensures that next value to nominate is valid - auto triggerTime = lastBallotStart + milliseconds; + auto triggerTime = lastLedgerStartingPoint + milliseconds; if (triggerTime < now) { diff --git a/src/herder/HerderImpl.h b/src/herder/HerderImpl.h index 5b12e966f..ceb09b3d2 100644 --- a/src/herder/HerderImpl.h +++ b/src/herder/HerderImpl.h @@ -258,6 +258,22 @@ class HerderImpl : public Herder void setupTriggerNextLedger(); + // Compute the trigger-timer anchor point using the local node's + // prepare-start timestamp for the previous slot. Returns a pessimistic + // estimate (now - expectedClose) if no prepare-start is recorded. + VirtualClock::time_point + triggerAnchorFromPrepareStart(uint64_t lastIndex, + VirtualClock::time_point now, + std::chrono::milliseconds expectedClose); + + // Compute the trigger-timer anchor point using the network-agreed + // consensus close time on the system clock. Falls back to + // triggerAnchorFromPrepareStart if consensus close time is unavailable + // or if the local clock is significantly drifting from the network time. + VirtualClock::time_point triggerAnchorFromConsensusCloseTime( + uint64_t lastIndex, VirtualClock::time_point now, + std::chrono::milliseconds expectedClose); + void startOutOfSyncTimer(); void outOfSyncRecovery(); void broadcast(SCPEnvelope const& e); @@ -342,6 +358,10 @@ class HerderImpl : public Herder medida::Meter& mEnvelopeValidSig; medida::Meter& mEnvelopeInvalidSig; + // Marked when the experimental trigger timer falls back from the + // network-close-time anchor to the local prepare-start anchor. + medida::Meter& mTriggerPrepareStartFallback; + SCPMetrics(Application& app); }; diff --git a/src/herder/HerderSCPDriver.cpp b/src/herder/HerderSCPDriver.cpp index 4a66a5d35..f4e81b9d6 100644 --- a/src/herder/HerderSCPDriver.cpp +++ b/src/herder/HerderSCPDriver.cpp @@ -1959,7 +1959,6 @@ HerderSCPDriver::getNodeWeight(NodeID const& nodeID, SCPQuorumSet const& qset, return qualityWeightIt->second / homeDomainSizeIt->second; } -#ifdef BUILD_TESTS std::optional HerderSCPDriver::getNominationTimeouts(uint64_t slotIndex) const { @@ -1970,6 +1969,5 @@ HerderSCPDriver::getNominationTimeouts(uint64_t slotIndex) const } return std::nullopt; } -#endif } diff --git a/src/herder/HerderSCPDriver.h b/src/herder/HerderSCPDriver.h index 4094b3f81..be817c5da 100644 --- a/src/herder/HerderSCPDriver.h +++ b/src/herder/HerderSCPDriver.h @@ -209,15 +209,16 @@ class HerderSCPDriver : public SCPDriver void cacheValidTxSet(ApplicableTxSetFrame const& txSet, LedgerHeaderHistoryEntry const& lcl, uint64_t closeTimeOffset) const; + + // Get the number of nomination timeouts that occurred for a given slot + std::optional getNominationTimeouts(uint64_t slotIndex) const; + #ifdef BUILD_TESTS RandomEvictionCache& getTxSetValidityCache() { return mTxSetValidCache; } - - // Get the number of nomination timeouts that occurred for a given slot - std::optional getNominationTimeouts(uint64_t slotIndex) const; #endif private: diff --git a/src/main/Config.cpp b/src/main/Config.cpp index 987832e68..97b136aee 100644 --- a/src/main/Config.cpp +++ b/src/main/Config.cpp @@ -176,6 +176,7 @@ Config::Config() : NODE_SEED(SecretKey::random()) DISABLE_SOROBAN_METRICS_FOR_TESTING = false; DISABLE_TX_META_FOR_TESTING = false; BACKGROUND_TX_SIG_VERIFICATION = true; + EXPERIMENTAL_TRIGGER_TIMER = false; BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT = 14; // 2^14 == 16 kb BUCKETLIST_DB_INDEX_CUTOFF = 20; // 20 mb BUCKETLIST_DB_MEMORY_FOR_CACHING = 0; @@ -1209,6 +1210,8 @@ Config::processConfig(std::shared_ptr t) }}, {"BACKGROUND_TX_SIG_VERIFICATION", [&]() { BACKGROUND_TX_SIG_VERIFICATION = readBool(item); }}, + {"EXPERIMENTAL_TRIGGER_TIMER", + [&]() { EXPERIMENTAL_TRIGGER_TIMER = readBool(item); }}, {"ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING", [&]() { ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = diff --git a/src/main/Config.h b/src/main/Config.h index af4bee021..329a87184 100644 --- a/src/main/Config.h +++ b/src/main/Config.h @@ -542,6 +542,10 @@ class Config : public std::enable_shared_from_this // also enabled. bool BACKGROUND_TX_SIG_VERIFICATION; + // Experimental flag to use externalized close time for trigger timer + // calculation instead of prepare start time. + bool EXPERIMENTAL_TRIGGER_TIMER; + // When set to true, BucketListDB indexes are persisted on-disk so that the // BucketList does not need to be reindexed on startup. Defaults to true. // This should only be set to false for testing purposes From 2279d0fc77d25c27d5668cd37bbde5df56c45f46 Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Fri, 1 May 2026 02:16:13 -0700 Subject: [PATCH 2/3] Added tests for experimental timer --- src/herder/HerderImpl.cpp | 7 +- src/herder/HerderSCPDriver.cpp | 20 ++++ src/herder/HerderSCPDriver.h | 3 + src/herder/test/HerderTests.cpp | 164 ++++++++++++++++++++++++++++++++ src/main/ApplicationImpl.cpp | 11 +++ src/main/Config.cpp | 17 ++++ src/main/Config.h | 8 ++ src/scp/NominationProtocol.cpp | 25 +++++ src/scp/SCPDriver.h | 8 ++ src/scp/Slot.h | 8 +- src/test/test.cpp | 1 + src/util/Timer.cpp | 28 +++++- src/util/Timer.h | 22 +++++ 13 files changed, 314 insertions(+), 8 deletions(-) diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index 26d5afd60..6b538e5d0 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -1285,9 +1285,10 @@ HerderImpl::triggerAnchorFromPrepareStart( // Compute the anchor for the next trigger. Caller fires at // `anchor + expectedClose`. Unlike the local ballot cadence trigger, this uses // the externalized close time from the previous ledger, which is a timestamp -// coming from a different, potentially unsynced node. This is more accurate -// than just using local timers, as those skew late based on latency from the -// leader, but we need to be careful to account for drift. +// coming from a different node whose system clock may be drifted relative to +// ours. This is more accurate than just using local timers, as those skew late +// based on latency from the leader, but we need to be careful to account for +// drift. // // Constraints: // 1. Track `expectedClose` on the network timeline as closely as possible. diff --git a/src/herder/HerderSCPDriver.cpp b/src/herder/HerderSCPDriver.cpp index f4e81b9d6..d5551ccc5 100644 --- a/src/herder/HerderSCPDriver.cpp +++ b/src/herder/HerderSCPDriver.cpp @@ -725,6 +725,18 @@ void HerderSCPDriver::timerCallbackWrapper(uint64_t slotIndex, int timerID, std::function cb) { +#ifdef BUILD_TESTS + if (timerID == Slot::NOMINATION_EMIT_TIMER) + { + if (!mHerder.isTracking() || + mHerder.nextConsensusLedgerIndex() == slotIndex) + { + cb(); + } + return; + } +#endif + // reschedule timers for future slots when tracking if (mHerder.isTracking() && mHerder.nextConsensusLedgerIndex() != slotIndex) { @@ -850,6 +862,14 @@ HerderSCPDriver::computeTimeout(uint32 roundNumber, bool isNomination) return std::chrono::milliseconds(timeoutMS); } +#ifdef BUILD_TESTS +std::chrono::milliseconds +HerderSCPDriver::getNominationEmitDelayForTesting() const +{ + return mApp.getConfig().ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING; +} +#endif + // returns true if l < r // lh, rh are the hashes of l,h static bool diff --git a/src/herder/HerderSCPDriver.h b/src/herder/HerderSCPDriver.h index be817c5da..5c6a2c06a 100644 --- a/src/herder/HerderSCPDriver.h +++ b/src/herder/HerderSCPDriver.h @@ -105,6 +105,9 @@ class HerderSCPDriver : public SCPDriver void stopTimer(uint64 slotIndex, int timerID) override; std::chrono::milliseconds computeTimeout(uint32 roundNumber, bool isNomination) override; +#ifdef BUILD_TESTS + std::chrono::milliseconds getNominationEmitDelayForTesting() const override; +#endif // hashing support Hash getHashOf(std::vector> const& vals) const override; diff --git a/src/herder/test/HerderTests.cpp b/src/herder/test/HerderTests.cpp index 9868cf6e4..7d29b2839 100644 --- a/src/herder/test/HerderTests.cpp +++ b/src/herder/test/HerderTests.cpp @@ -8914,3 +8914,167 @@ TEST_CASE("network externalizes empty-tx-set on missing value", "[herder][tx]") txtest::captureLastClosedLedgerLcm(*app); } #endif // CAP_0083 + +TEST_CASE("experimental trigger timer", "[herder][!hide]") +{ + constexpr uint32_t LEDGERS_TO_RUN = 10; + constexpr int64_t MIN_DRIFT_FALLBACKS = (LEDGERS_TO_RUN + 1) / 2; + auto const driftOffset = std::chrono::seconds(4); + + struct RunResult + { + std::chrono::milliseconds elapsed; + int64_t totalFallbacks{0}; + int64_t driftedNodeFallbacks{0}; + int64_t otherNodeFallbacks{0}; + bool sawNominationTimeout{false}; + }; + + auto fallbackCount = [](Application::pointer const& app) { + auto const metrics = app->getMetrics().GetAllMetrics(); + auto const it = + metrics.find({"scp", "trigger", "prepare-start-fallback"}); + if (it == metrics.end()) + { + return int64_t{0}; + } + auto meter = dynamic_cast(it->second.get()); + releaseAssert(meter); + return static_cast(meter->count()); + }; + + auto runSimulation = + [&](bool experimentalTriggerTimer, + std::chrono::milliseconds nominationEmitDelay, + std::chrono::milliseconds triggerClockOffset = + std::chrono::milliseconds::zero()) -> RunResult { + auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE); + + auto simulation = Topologies::separateAllHighQuality( + 4, Simulation::OVER_TCP, networkID, [&](int i) { + auto cfg = getTestConfig(i, Config::TESTDB_DEFAULT); + cfg.ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING = false; + cfg.EXPERIMENTAL_TRIGGER_TIMER = experimentalTriggerTimer; + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(1000); + cfg.ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING = + nominationEmitDelay; + + // Drift one validator. Note: i == 0 is the Simulation's + // idle app (its config is generated first by the constructor), + // so the first real validator is i == 1. + if (i == 1) + { + cfg.ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING = + triggerClockOffset; + } + return cfg; + }); + + simulation->fullyConnectAllPending(); + simulation->startAllNodes(); + auto nodes = simulation->getNodes(); + auto const expectedClose = simulation->getExpectedLedgerCloseTime(); + REQUIRE(expectedClose == std::chrono::seconds(5)); + + std::vector fallbackCounts; + std::transform(nodes.begin(), nodes.end(), + std::back_inserter(fallbackCounts), fallbackCount); + + auto minLedger = [&]() { + return std::min_element(nodes.begin(), nodes.end(), + [](Application::pointer const& lhs, + Application::pointer const& rhs) { + return lhs->getLedgerManager() + .getLastClosedLedgerNum() < + rhs->getLedgerManager() + .getLastClosedLedgerNum(); + }) + ->get() + ->getLedgerManager() + .getLastClosedLedgerNum(); + }; + + auto const startLedger = minLedger(); + auto targetLedger = startLedger + LEDGERS_TO_RUN; + auto const startTime = nodes.front()->getClock().now(); + + simulation->crankUntil( + [&]() { return simulation->haveAllExternalized(targetLedger, 1); }, + 10 * (LEDGERS_TO_RUN + 1) * expectedClose, true); + + RunResult result; + result.elapsed = std::chrono::duration_cast( + nodes.front()->getClock().now() - startTime); + + for (size_t i = 0; i < nodes.size(); ++i) + { + auto const delta = fallbackCount(nodes[i]) - fallbackCounts.at(i); + result.totalFallbacks += delta; + + auto const isDriftedNode = + triggerClockOffset != std::chrono::milliseconds::zero() && + nodes[i]->getConfig() + .ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING == + triggerClockOffset; + if (isDriftedNode) + { + result.driftedNodeFallbacks = delta; + } + else + { + result.otherNodeFallbacks += delta; + } + + auto const& driver = + dynamic_cast(nodes[i]->getHerder()) + .getHerderSCPDriver(); + for (uint32_t ledger = startLedger + 1; ledger <= targetLedger; + ++ledger) + { + auto timeouts = driver.getNominationTimeouts(ledger); + result.sawNominationTimeout = + result.sawNominationTimeout || + (timeouts.has_value() && timeouts.value() > 0); + } + } + + return result; + }; + + // New timer is faster without drift. + { + auto const nominationDelay = std::chrono::milliseconds(1000); + auto const oldTimer = runSimulation(false, nominationDelay); + auto const newTimer = runSimulation(true, nominationDelay); + + REQUIRE(newTimer.elapsed < oldTimer.elapsed); + REQUIRE(newTimer.totalFallbacks == 0); + } + + // One node drifting ahead falls back. + { + auto const nodeAhead = + runSimulation(true, std::chrono::milliseconds::zero(), driftOffset); + REQUIRE(nodeAhead.driftedNodeFallbacks >= MIN_DRIFT_FALLBACKS); + // Fallback must be specific to the drifted node: in-sync nodes should + // stay on the network-close-time anchor. + REQUIRE(nodeAhead.otherNodeFallbacks == 0); + } + + // One node drifting behind falls back. + { + auto const nodeBehind = runSimulation( + true, std::chrono::milliseconds::zero(), -driftOffset); + REQUIRE(nodeBehind.driftedNodeFallbacks >= MIN_DRIFT_FALLBACKS); + REQUIRE(nodeBehind.otherNodeFallbacks == 0); + } + + // Long nomination does not cause timer fallback + { + auto const nominationDelay = std::chrono::milliseconds(5000); + auto const slowNomination = runSimulation(true, nominationDelay); + REQUIRE(slowNomination.sawNominationTimeout); + REQUIRE(slowNomination.totalFallbacks == 0); + } +} diff --git a/src/main/ApplicationImpl.cpp b/src/main/ApplicationImpl.cpp index 815ce2f21..8a4fe2c4e 100644 --- a/src/main/ApplicationImpl.cpp +++ b/src/main/ApplicationImpl.cpp @@ -148,6 +148,17 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) homeStr += mConfig.NODE_HOME_DOMAIN; TracyAppInfo(homeStr.c_str(), homeStr.size()); +#ifdef BUILD_TESTS + if (mConfig.ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING != + std::chrono::milliseconds::zero()) + { + mVirtualClock.setSystemTimeOffset( + std::chrono::duration_cast( + mConfig.ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING)); + mStartedOn = clock.system_now(); + } +#endif + mStopSignals.async_wait([this](asio::error_code const& ec, int sig) { if (!ec) { diff --git a/src/main/Config.cpp b/src/main/Config.cpp index 97b136aee..4796588ba 100644 --- a/src/main/Config.cpp +++ b/src/main/Config.cpp @@ -68,6 +68,7 @@ static std::unordered_set const TESTING_ONLY_OPTIONS = { "ARTIFICIALLY_SET_SURVEY_PHASE_DURATION_FOR_TESTING", "ARTIFICIALLY_DELAY_BUCKET_APPLICATION_FOR_TESTING", "ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING", + "ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING", "ARTIFICIALLY_SKIP_CONNECTION_ADJUSTMENT_FOR_TESTING", "ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING", "SKIP_HIGH_CRITICAL_VALIDATOR_CHECKS_FOR_TESTING", @@ -361,6 +362,10 @@ Config::Config() : NODE_SEED(SecretKey::random()) CATCHUP_SKIP_KNOWN_RESULTS_FOR_TESTING = false; MODE_USES_IN_MEMORY_LEDGER = false; SKIP_HIGH_CRITICAL_VALIDATOR_CHECKS_FOR_TESTING = false; + ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING = + std::chrono::milliseconds::zero(); + ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING = + std::chrono::milliseconds::zero(); #endif #ifdef BEST_OFFER_DEBUGGING @@ -1928,6 +1933,18 @@ Config::processConfig(std::shared_ptr t) ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING = std::chrono::microseconds(readInt(item)); }}, +#ifdef BUILD_TESTS + {"ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING", + [&]() { + ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING = + std::chrono::milliseconds(readInt(item)); + }}, + {"ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING", + [&]() { + ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING = + std::chrono::milliseconds(readInt(item)); + }}, +#endif {"MAX_DEX_TX_OPERATIONS_IN_TX_SET", [&]() { auto value = readInt(item); diff --git a/src/main/Config.h b/src/main/Config.h index 329a87184..18183fd1d 100644 --- a/src/main/Config.h +++ b/src/main/Config.h @@ -925,6 +925,14 @@ class Config : public std::enable_shared_from_this // only flag. bool TESTING_NOMINATE_RANDOM_VALUES; + // Injects a signed wall-clock offset into the node's system clock for + // testing. Expressed in milliseconds. + std::chrono::milliseconds ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING; + + // Delay emission of updated nomination messages for testing nomination + // timeout behavior. Expressed in milliseconds. + std::chrono::milliseconds ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING; + // Set QUORUM_SET using automatic quorum set configuration based on // `validators`. void diff --git a/src/scp/NominationProtocol.cpp b/src/scp/NominationProtocol.cpp index 88fc170ed..9780afc7b 100644 --- a/src/scp/NominationProtocol.cpp +++ b/src/scp/NominationProtocol.cpp @@ -676,6 +676,31 @@ NominationProtocol::nominate(ValueWrapperPtr value, Value const& previousValue, slot->nominate(value, previousValue, true); }); +#ifdef BUILD_TESTS + // If a nomination-emit delay is configured, defer the emit by + // arming NOMINATION_EMIT_TIMER instead of broadcasting now. A subsequent + // nominate() will replace the timer so the eventual emit reflects the + // latest state. Skip the deferral once the SCP nomination timeout has + // fired (`timedout`) so retries don't compound latency, and cancel any + // still-armed timer in that case. + if (updated) + { + auto delay = mSlot.getSCPDriver().getNominationEmitDelayForTesting(); + if (delay > std::chrono::milliseconds::zero() && !timedout) + { + mSlot.getSCPDriver().setupTimer( + mSlot.getSlotIndex(), Slot::NOMINATION_EMIT_TIMER, delay, + [slot, this]() { emitNomination(); }); + return updated; + } + if (delay > std::chrono::milliseconds::zero()) + { + mSlot.getSCPDriver().stopTimer(mSlot.getSlotIndex(), + Slot::NOMINATION_EMIT_TIMER); + } + } +#endif + if (updated) { emitNomination(); diff --git a/src/scp/SCPDriver.h b/src/scp/SCPDriver.h index d97ab4221..659f2dd18 100644 --- a/src/scp/SCPDriver.h +++ b/src/scp/SCPDriver.h @@ -253,6 +253,14 @@ class SCPDriver virtual std::chrono::milliseconds computeTimeout(uint32 roundNumber, bool isNomination) = 0; +#ifdef BUILD_TESTS + virtual std::chrono::milliseconds + getNominationEmitDelayForTesting() const + { + return std::chrono::milliseconds::zero(); + } +#endif + // returns the weight of the node within the qset normalized between // 0-UINT64_MAX. If `nodeID` is the local node, then set `isLocalNode` to // `true`. diff --git a/src/scp/Slot.h b/src/scp/Slot.h index 20eca36d9..ecf1e8fee 100644 --- a/src/scp/Slot.h +++ b/src/scp/Slot.h @@ -197,7 +197,13 @@ class Slot : public std::enable_shared_from_this enum timerIDs { NOMINATION_TIMER = 0, - BALLOT_PROTOCOL_TIMER = 1 + BALLOT_PROTOCOL_TIMER = 1, +#ifdef BUILD_TESTS + // Test-only: defers broadcast of a nomination vote by + // ARTIFICIALLY_DELAY_NOMINATION_EMIT_FOR_TESTING. Used to simulate + // slow nominators without breaking the protocol. + NOMINATION_EMIT_TIMER = 2, +#endif }; // The number of times the timer has to expire before we consider the node diff --git a/src/test/test.cpp b/src/test/test.cpp index 4efae06e7..6a9852695 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -566,6 +566,7 @@ getTestConfig(int instanceNumber, Config::TestDbMode mode) thisConfig.MANUAL_CLOSE = true; thisConfig.TEST_CASES_ENABLED = true; + thisConfig.EXPERIMENTAL_TRIGGER_TIMER = true; thisConfig.PEER_PORT = static_cast(DEFAULT_PEER_PORT + instanceNumber * 2); diff --git a/src/util/Timer.cpp b/src/util/Timer.cpp index 2d776facc..3a90b6a74 100644 --- a/src/util/Timer.cpp +++ b/src/util/Timer.cpp @@ -43,21 +43,33 @@ VirtualClock::now() const noexcept } } -VirtualClock::system_time_point -VirtualClock::system_now() const noexcept +std::pair +VirtualClock::actual_and_fake_system_now() const noexcept { + system_time_point actual; if (mMode == REAL_TIME) { - return std::chrono::system_clock::now(); + actual = std::chrono::system_clock::now(); } else { LOCK_GUARD(mVirtualNowMutex, lock); auto offset = mVirtualNow.time_since_epoch(); - return std::chrono::system_clock::time_point( + actual = std::chrono::system_clock::time_point( std::chrono::duration_cast< std::chrono::system_clock::time_point::duration>(offset)); } + auto drifted = actual; +#ifdef BUILD_TESTS + drifted += mSystemTimeOffset.load(std::memory_order_relaxed); +#endif + return {actual, drifted}; +} + +VirtualClock::system_time_point +VirtualClock::system_now() const noexcept +{ + return actual_and_fake_system_now().second; } void @@ -288,6 +300,14 @@ VirtualClock::setCurrentVirtualTime(system_time_point t) setCurrentVirtualTime(time_point(offset)); } +#ifdef BUILD_TESTS +void +VirtualClock::setSystemTimeOffset(std::chrono::microseconds offset) +{ + mSystemTimeOffset.store(offset, std::memory_order_relaxed); +} +#endif + void VirtualClock::sleep_for(std::chrono::microseconds us) { diff --git a/src/util/Timer.h b/src/util/Timer.h index 3ae767591..089b486fe 100644 --- a/src/util/Timer.h +++ b/src/util/Timer.h @@ -12,11 +12,13 @@ #include "util/Scheduler.h" #include "util/ThreadAnnotations.h" +#include #include #include #include #include #include +#include namespace stellar { @@ -209,6 +211,15 @@ class VirtualClock RealSteadyTimer mRealTimer; mutable ANNOTATED_MUTEX(mVirtualNowMutex); +#ifdef BUILD_TESTS + // Offset applied to system_now() to simulate wall-clock drift without + // affecting steady_clock scheduling. Positive values make system_now() + // return a time in the future; negative values make it return a time in + // the past. + std::atomic mSystemTimeOffset{ + std::chrono::microseconds{0}}; +#endif + public: // A VirtualClock is instantiated in either real or virtual mode. In real // mode, crank() sleeps until the next event, either timer or IO; in virtual @@ -231,6 +242,11 @@ class VirtualClock // someone has set the time forward using setCurrentVirtualTime below). system_time_point system_now() const noexcept; + // Returns both the unshifted and drifted system time samples captured from + // the same underlying timestamp source. + std::pair + actual_and_fake_system_now() const noexcept; + void enqueue(std::shared_ptr ve); void flushCancelledEvents(); bool cancelAllEvents(); @@ -254,6 +270,12 @@ class VirtualClock size_t getActionQueueSize() const; bool actionQueueIsOverloaded() const; Scheduler::ActionType currentSchedulerActionType() const; + +#ifdef BUILD_TESTS + // Inject a wall-clock offset into system_now() to simulate clock drift. + // Does not affect steady_clock (now()) or event scheduling. + void setSystemTimeOffset(std::chrono::microseconds offset); +#endif }; class VirtualClockEvent : public NonMovableOrCopyable From ad88f371d62174be61aa3a914b85fec2facfb976 Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Thu, 11 Jun 2026 10:31:40 -0700 Subject: [PATCH 3/3] Add NTP probe check --- Cargo.lock | 7 ++ docs/metrics.md | 2 + docs/stellar-core_example.cfg | 10 ++ src/main/ApplicationImpl.cpp | 13 +++ src/main/ApplicationImpl.h | 2 + src/main/Config.cpp | 9 ++ src/main/Config.h | 11 ++ src/main/NtpProbe.cpp | 182 +++++++++++++++++++++++++++++++ src/main/NtpProbe.h | 65 +++++++++++ src/main/test/SelfCheckTests.cpp | 20 ++++ src/rust/Cargo.toml | 5 + src/rust/src/bridge.rs | 16 +++ src/rust/src/lib.rs | 1 + src/rust/src/ntp.rs | 46 ++++++++ src/test/test.cpp | 3 + src/util/Timer.cpp | 23 ++-- src/util/Timer.h | 6 - 17 files changed, 401 insertions(+), 20 deletions(-) create mode 100644 src/main/NtpProbe.cpp create mode 100644 src/main/NtpProbe.h create mode 100644 src/rust/src/ntp.rs diff --git a/Cargo.lock b/Cargo.lock index c0a11371f..45ecdbd1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1231,6 +1231,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "rsntp" +version = "4.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "474edb0b3a6ccf4fd97360150a27e50f2a4c6ca67870fde22e288269d8b9f38d" + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -1973,6 +1979,7 @@ dependencies = [ "itertools 0.10.5", "log", "rand", + "rsntp", "rustc-simple-version", "soroban-env-host 21.2.2", "soroban-env-host 22.0.0", diff --git a/docs/metrics.md b/docs/metrics.md index 448b3c8c4..9c5270b66 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -55,6 +55,8 @@ bucketlistDB-cache.hit | meter | number of cache hits on bucketlistDB-cache.miss | meter | number of cache misses on Live BucketList Disk random eviction cache bucketlistDB.cache.entries | counter | number of entries currently in Live BucketList index cache bucketlistDB.cache.bytes | counter | estimated size in bytes of entries in Live BucketList index cache +clock.ntp.offset-ms | counter | last measured offset (ms) between the local clock and the configured NTP server (signed; positive means the local clock is behind true time). Requires NTP_DRIFT_CHECK_SERVER +clock.ntp.probe-failure | meter | NTP drift-check probe failed to reach NTP server crypto.verify.hit | meter | number of signature cache hits crypto.verify.miss | meter | number of signature cache misses crypto.verify.total | meter | sum of both hits and misses diff --git a/docs/stellar-core_example.cfg b/docs/stellar-core_example.cfg index 62b77032f..3c7ae13df 100644 --- a/docs/stellar-core_example.cfg +++ b/docs/stellar-core_example.cfg @@ -290,6 +290,16 @@ EXPERIMENTAL_PARALLEL_TX_SET_DOWNLOAD = false # also enabled. BACKGROUND_TX_SIG_VERIFICATION = true +# NTP_DRIFT_CHECK_SERVER (string) default "pool.ntp.org" +# Hostname of an NTP server that the node periodically queries (about every ten +# minutes) to detect and warn about drift of the local clock. This is detection +# only: stellar-core never adjusts the system clock -- keep an NTP daemon +# running for that. +# The check only runs on validators (NODE_IS_VALIDATOR=true), since clock drift +# only hurts nodes that participate in consensus. +# Set to the empty string ("") to disable the check entirely. +NTP_DRIFT_CHECK_SERVER = "pool.ntp.org" + # PREFERRED_PEERS (list of strings) default is empty # These are IP:port strings that this server will add to its DB of peers. # This server will try to always stay connected to the other peers on this list. diff --git a/src/main/ApplicationImpl.cpp b/src/main/ApplicationImpl.cpp index 8a4fe2c4e..c8acbfb0d 100644 --- a/src/main/ApplicationImpl.cpp +++ b/src/main/ApplicationImpl.cpp @@ -42,6 +42,7 @@ #include "main/AppConnector.h" #include "main/ApplicationUtils.h" #include "main/CommandHandler.h" +#include "main/NtpProbe.h" #include "main/StellarCoreVersion.h" #include "medida/counter.h" #include "medida/meter.h" @@ -790,6 +791,14 @@ ApplicationImpl::startServices() { mHerder->setUpgrades(mConfig); } + + // Start NTP-based clock-drift detection + if (mConfig.ntpDriftCheckEnabled() && + mVirtualClock.getMode() == VirtualClock::REAL_TIME) + { + mNtpProbe = NtpProbe::create(*this); + mNtpProbe->start(); + } } void @@ -858,6 +867,10 @@ ApplicationImpl::idempotentShutdown(bool forgetBuckets) mOverlayManager->shutdown(); } mSelfCheckTimer.cancel(); + if (mNtpProbe) + { + mNtpProbe->shutdown(); + } shutdownWorkScheduler(); if (mProcessManager) { diff --git a/src/main/ApplicationImpl.h b/src/main/ApplicationImpl.h index e7257b3c1..4deb340d4 100644 --- a/src/main/ApplicationImpl.h +++ b/src/main/ApplicationImpl.h @@ -37,6 +37,7 @@ class InMemoryLedgerTxn; class InMemoryLedgerTxnRoot; class LoadGenerator; class AppConnector; +class NtpProbe; class ApplicationImpl : public Application { @@ -190,6 +191,7 @@ class ApplicationImpl : public Application std::unique_ptr mInvariantManager; std::shared_ptr mProcessManager; std::shared_ptr mWorkScheduler; + std::shared_ptr mNtpProbe; std::unique_ptr mPersistentState; std::unique_ptr mBanManager; std::unique_ptr mBannedAccountsPersistor; diff --git a/src/main/Config.cpp b/src/main/Config.cpp index 4796588ba..7f10981d3 100644 --- a/src/main/Config.cpp +++ b/src/main/Config.cpp @@ -178,6 +178,7 @@ Config::Config() : NODE_SEED(SecretKey::random()) DISABLE_TX_META_FOR_TESTING = false; BACKGROUND_TX_SIG_VERIFICATION = true; EXPERIMENTAL_TRIGGER_TIMER = false; + NTP_DRIFT_CHECK_SERVER = "pool.ntp.org"; BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT = 14; // 2^14 == 16 kb BUCKETLIST_DB_INDEX_CUTOFF = 20; // 20 mb BUCKETLIST_DB_MEMORY_FOR_CACHING = 0; @@ -1217,6 +1218,8 @@ Config::processConfig(std::shared_ptr t) [&]() { BACKGROUND_TX_SIG_VERIFICATION = readBool(item); }}, {"EXPERIMENTAL_TRIGGER_TIMER", [&]() { EXPERIMENTAL_TRIGGER_TIMER = readBool(item); }}, + {"NTP_DRIFT_CHECK_SERVER", + [&]() { NTP_DRIFT_CHECK_SERVER = readString(item); }}, {"ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING", [&]() { ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = @@ -2620,6 +2623,12 @@ Config::parallelLedgerClose() const return PARALLEL_LEDGER_APPLY && DATABASE.value != "sqlite3://:memory:"; } +bool +Config::ntpDriftCheckEnabled() const +{ + return NODE_IS_VALIDATOR && !NTP_DRIFT_CHECK_SERVER.empty(); +} + void Config::setNoListen() { diff --git a/src/main/Config.h b/src/main/Config.h index 18183fd1d..f3dd0405b 100644 --- a/src/main/Config.h +++ b/src/main/Config.h @@ -546,6 +546,12 @@ class Config : public std::enable_shared_from_this // calculation instead of prepare start time. bool EXPERIMENTAL_TRIGGER_TIMER; + // Hostname of an NTP server to periodically query in order to detect drift + // of this node's local clock. This is detection only: core never adjusts + // the system clock. Defaults to a pool.ntp.org; set to the empty string to + // disable the check entirely. + std::string NTP_DRIFT_CHECK_SERVER; + // When set to true, BucketListDB indexes are persisted on-disk so that the // BucketList does not need to be reindexed on startup. Defaults to true. // This should only be set to false for testing purposes @@ -985,6 +991,11 @@ class Config : public std::enable_shared_from_this bool allBucketsInMemory() const; void logBasicInfo() const; bool parallelLedgerClose() const; + + // Returns true if this node should run the NTP clock-drift check: an NTP + // server is configured and the node is a validator (clock drift only hurts + // nodes that participate in consensus). + bool ntpDriftCheckEnabled() const; void setNoListen(); void setNoPublish(); diff --git a/src/main/NtpProbe.cpp b/src/main/NtpProbe.cpp new file mode 100644 index 000000000..a94cbdee6 --- /dev/null +++ b/src/main/NtpProbe.cpp @@ -0,0 +1,182 @@ +// Copyright 2026 Stellar Development Foundation and contributors. Licensed +// under the Apache License, Version 2.0. See the COPYING file at the root +// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 + +#include "main/NtpProbe.h" + +#include "main/Application.h" +#include "main/Config.h" +#include "main/ErrorMessages.h" +#include "rust/RustBridge.h" +#include "util/Logging.h" +#include "util/MetricsRegistry.h" + +#include "medida/counter.h" +#include "medida/meter.h" + +#include + +namespace stellar +{ + +namespace +{ +constexpr auto NTP_PROBE_INTERVAL = std::chrono::minutes(10); +// First probe fires a little after startup rather than immediately, to avoid +// racing the rest of node bring-up. +constexpr auto NTP_PROBE_INITIAL_DELAY = std::chrono::seconds(30); +constexpr uint64_t NTP_PROBE_TIMEOUT_SECONDS = 5; +constexpr auto NTP_OFFSET_WARN_THRESHOLD = std::chrono::milliseconds(500); + +// Make sure we don't crash if our probe fails, since it's just a warning +// diagnostic. Any caught exception is counted as a probe failure and logged. +template +void +guardProbeStep(char const* step, medida::Meter& failureMeter, F&& f) +{ + try + { + f(); + } + catch (std::exception const& e) + { + failureMeter.Mark(); + CLOG_WARNING(Herder, "NTP probe step '{}' failed: {}", step, e.what()); + } + catch (...) + { + failureMeter.Mark(); + CLOG_WARNING(Herder, + "NTP probe step '{}' failed with unknown exception", step); + } +} +} + +std::shared_ptr +NtpProbe::create(Application& app) +{ + return std::shared_ptr(new NtpProbe(app)); +} + +NtpProbe::NtpProbe(Application& app) + : mApp(app) + , mServer(app.getConfig().NTP_DRIFT_CHECK_SERVER) + , mProbeTimer(app) + , mOffsetMs(app.getMetrics().NewCounter({"clock", "ntp", "offset-ms"})) + , mProbeFailure( + app.getMetrics().NewMeter({"clock", "ntp", "probe-failure"}, "probe")) +{ +} + +void +NtpProbe::start() +{ + if (mServer.empty()) + { + return; + } + CLOG_INFO( + Herder, + "Clock-drift check enabled, querying NTP server '{}' every {} " + "minutes (detection only; does not adjust the clock)", + mServer, + std::chrono::duration_cast(NTP_PROBE_INTERVAL) + .count()); + scheduleNext(NTP_PROBE_INITIAL_DELAY); +} + +void +NtpProbe::shutdown() +{ + mShutdown = true; + mProbeTimer.cancel(); +} + +void +NtpProbe::scheduleNext(std::chrono::seconds delay) +{ + if (mShutdown) + { + return; + } + mProbeTimer.expires_from_now(delay); + auto self = shared_from_this(); + mProbeTimer.async_wait([self](asio::error_code const& ec) { + if (!ec && !self->mShutdown) + { + guardProbeStep("schedule", self->mProbeFailure, + [&]() { self->beginProbe(); }); + } + }); +} + +void +NtpProbe::beginProbe() +{ + if (mShutdown) + { + return; + } + + // The SNTP query blocks (network round-trip + timeout), so run it on a + // background thread and hand the result back to the main thread. + auto self = shared_from_this(); + std::string const server = mServer; + mApp.postOnBackgroundThread( + [self, server]() { + guardProbeStep("query", self->mProbeFailure, [&]() { + auto const result = rust_bridge::query_ntp_offset( + server, NTP_PROBE_TIMEOUT_SECONDS); + self->mApp.postOnMainThread( + [self, succeeded = result.succeeded, + offsetMs = result.offset_millis]() { + guardProbeStep("result", self->mProbeFailure, [&]() { + self->onResult(succeeded, offsetMs); + }); + }, + "NtpProbe::onResult"); + }); + }, + "NtpProbe::query"); +} + +void +NtpProbe::onResult(bool succeeded, int64_t offsetMs) +{ + if (mShutdown) + { + return; + } + + if (!succeeded) + { + mProbeFailure.Mark(); + CLOG_INFO(Herder, + "NTP probe to '{}' produced no usable measurement (server " + "unreachable, timed out, or egress blocked)", + mServer); + } + else + { + mOffsetMs.set_count(offsetMs); + if (std::chrono::abs(std::chrono::milliseconds(offsetMs)) > + NTP_OFFSET_WARN_THRESHOLD) + { + CLOG_WARNING(Herder, POSSIBLY_BAD_LOCAL_CLOCK); + CLOG_WARNING(Herder, + "Local clock is off by {} ms versus NTP server '{}' " + "(positive means the local clock is behind). This can " + "degrade ledger close performance; ensure NTP is " + "running and healthy.", + offsetMs, mServer); + } + else + { + CLOG_DEBUG(Herder, "NTP offset versus '{}' is {} ms", mServer, + offsetMs); + } + } + + scheduleNext(NTP_PROBE_INTERVAL); +} +} diff --git a/src/main/NtpProbe.h b/src/main/NtpProbe.h new file mode 100644 index 000000000..394b39b3f --- /dev/null +++ b/src/main/NtpProbe.h @@ -0,0 +1,65 @@ +#pragma once + +// Copyright 2026 Stellar Development Foundation and contributors. Licensed +// under the Apache License, Version 2.0. See the COPYING file at the root +// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 + +#include "util/Timer.h" + +#include +#include +#include +#include +#include + +namespace medida +{ +class Counter; +class Meter; +} + +namespace stellar +{ + +class Application; + +// Periodically queries an NTP server to estimate the offset between this node's +// wall clock and true time, and warns when the local clock looks significantly +// skewed. The actual SNTP exchange is delegated to a small Rust crate (rsntp) +// via the Rust bridge; the query runs on a background thread (it blocks) and +// only happens about every ten minutes. +class NtpProbe : public std::enable_shared_from_this +{ + public: + static std::shared_ptr create(Application& app); + + // Schedule the first probe. Subsequent probes reschedule themselves. + void start(); + + // Stop rescheduling. Any in-flight background query simply discards its + // result. + void shutdown(); + + private: + explicit NtpProbe(Application& app); + + void scheduleNext(std::chrono::seconds delay); + // Kick off a query on a background thread (the SNTP call blocks). + void beginProbe(); + // Handle a query result back on the main thread. + void onResult(bool succeeded, int64_t offsetMs); + + Application& mApp; + std::string const mServer; + VirtualTimer mProbeTimer; + + // Signed offset to add to the local clock to match true time, in + // milliseconds (positive => local clock is behind true time). + medida::Counter& mOffsetMs; + // Marked whenever a probe fails to produce a usable measurement (DNS + // failure, timeout, unreachable server). + medida::Meter& mProbeFailure; + + std::atomic mShutdown{false}; +}; +} diff --git a/src/main/test/SelfCheckTests.cpp b/src/main/test/SelfCheckTests.cpp index 7c17ee40b..357117faa 100644 --- a/src/main/test/SelfCheckTests.cpp +++ b/src/main/test/SelfCheckTests.cpp @@ -7,6 +7,7 @@ #include "main/Application.h" #include "main/ApplicationUtils.h" #include "main/Config.h" +#include "rust/RustBridge.h" #include "test/Catch2.h" #include "test/TestUtils.h" #include "test/test.h" @@ -64,3 +65,22 @@ TEST_CASE("online self-check runs on a schedule", "[selfcheck]") } REQUIRE(meter.count() == n); } + +// Smoke test for the NTP drift-check. Note this queries the actual default NTP +// server so it's hidden. +TEST_CASE("ntp probe smoke check", "[ntp][!hide]") +{ + std::string const server = "pool.ntp.org"; + auto const result = rust_bridge::query_ntp_offset(server, /*timeout*/ 5); + + REQUIRE(result.succeeded); + + LOG_INFO(DEFAULT_LOG, "Measured NTP offset versus {}: {} ms", server, + result.offset_millis); + + // Sanity check that we're within a very generous 5 minute bound of true + // time. + int64_t const offsetMs = result.offset_millis; + int64_t const absMs = offsetMs < 0 ? -offsetMs : offsetMs; + REQUIRE(absMs < 5LL * 60LL * 1000LL); +} diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 6042a462e..b67966797 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -24,6 +24,11 @@ backtrace = { version = "=0.3.76", features = [ "cpp_demangle" ] } # ed25519-dalek for faster signature verification ed25519-dalek = "2.1.1" +# rsntp: small blocking SNTP client used only to detect (and warn about) local +# clock drift. default-features disabled so we get the synchronous client +# without pulling in tokio/chrono/time. +rsntp = { version = "=4.1.1", default-features = false } + # NB: tracy is quite particular about version compatibility. There must only be # one _implementation_ of the C++ tracy symbols in the final binary (brought in # by the tracy-client-sys crate) and the version and all feature flags must diff --git a/src/rust/src/bridge.rs b/src/rust/src/bridge.rs index 27a7fb96f..9e694ed6d 100644 --- a/src/rust/src/bridge.rs +++ b/src/rust/src/bridge.rs @@ -21,6 +21,16 @@ pub(crate) mod rust_bridge { data: Vec, } + // Result of an NTP clock-offset probe (see ntp.rs). + struct NtpProbeResult { + // False if the query failed (DNS failure, timeout, unreachable + // server). When false, offset_millis is meaningless. + succeeded: bool, + // Signed offset to add to the local clock to match the NTP server, in + // milliseconds (positive => local clock is behind true time). + offset_millis: i64, + } + // We return these from get_xdr_hashes below. struct XDRFileHash { file: String, @@ -188,6 +198,11 @@ pub(crate) mod rust_bridge { extern "Rust" { fn to_base64(b: &CxxVector, mut s: Pin<&mut CxxString>); fn from_base64(s: &CxxString, mut b: Pin<&mut CxxVector>); + + // Query an NTP server for the local clock offset. Blocking; intended to + // be called from a background thread. Never panics: any failure is + // reported via NtpProbeResult::succeeded == false. + fn query_ntp_offset(server: &CxxString, timeout_seconds: u64) -> NtpProbeResult; fn check_sensible_soroban_config_for_protocol(core_max_proto: u32); // Ed25519 signature verification using dalek library. @@ -416,6 +431,7 @@ use crate::common::*; use crate::ed25519_verify::*; use crate::i128::*; use crate::log::*; +use crate::ntp::*; use crate::quorum_checker::*; use crate::soroban_fuzz::*; use crate::soroban_invoke::*; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index d6c3e587a..5bcb10193 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -32,6 +32,7 @@ mod b64; mod ed25519_verify; mod i128; mod log; +mod ntp; mod quorum_checker; mod soroban_invoke; mod soroban_module_cache; diff --git a/src/rust/src/ntp.rs b/src/rust/src/ntp.rs new file mode 100644 index 000000000..7e1f6bbc7 --- /dev/null +++ b/src/rust/src/ntp.rs @@ -0,0 +1,46 @@ +// Copyright 2026 Stellar Development Foundation and contributors. Licensed +// under the Apache License, Version 2.0. See the COPYING file at the root +// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 + +use crate::rust_bridge::NtpProbeResult; +use cxx::CxxString; +use rsntp::LeapIndicator; +use rsntp::SntpClient; +use std::time::Duration; + +// Query `server` (an NTP host name, e.g. "pool.ntp.org") for the offset between +// the local clock and true time. Blocking; the caller runs this on a background +// thread. Never panics: any failure (bad UTF-8, DNS failure, timeout, +// unreachable server) is reported as `succeeded: false`. +// +// This is detection only -- it does not adjust the local clock. +pub(crate) fn query_ntp_offset(server: &CxxString, timeout_seconds: u64) -> NtpProbeResult { + let failure = NtpProbeResult { + succeeded: false, + offset_millis: 0, + }; + + let host = match server.to_str() { + Ok(h) => h, + Err(_) => return failure, + }; + + let mut client = SntpClient::new(); + client.set_timeout(Duration::from_secs(timeout_seconds)); + + match client.synchronize(host) { + Ok(result) + if (1..=15).contains(&result.stratum()) + && result.leap_indicator() != LeapIndicator::AlarmCondition => + { + // as_secs_f64 keeps the sign: positive means the local clock is + // behind the server (i.e. add this to the local clock to match). + let offset_secs = result.clock_offset().as_secs_f64(); + NtpProbeResult { + succeeded: true, + offset_millis: (offset_secs * 1000.0) as i64, + } + } + Ok(_) | Err(_) => failure, + } +} diff --git a/src/test/test.cpp b/src/test/test.cpp index 6a9852695..e87ac467f 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -568,6 +568,9 @@ getTestConfig(int instanceNumber, Config::TestDbMode mode) thisConfig.TEST_CASES_ENABLED = true; thisConfig.EXPERIMENTAL_TRIGGER_TIMER = true; + // Never perform real NTP network I/O from the test suite. + thisConfig.NTP_DRIFT_CHECK_SERVER = ""; + thisConfig.PEER_PORT = static_cast(DEFAULT_PEER_PORT + instanceNumber * 2); thisConfig.HTTP_PORT = static_cast( diff --git a/src/util/Timer.cpp b/src/util/Timer.cpp index 3a90b6a74..c08c62d81 100644 --- a/src/util/Timer.cpp +++ b/src/util/Timer.cpp @@ -43,33 +43,28 @@ VirtualClock::now() const noexcept } } -std::pair -VirtualClock::actual_and_fake_system_now() const noexcept +VirtualClock::system_time_point +VirtualClock::system_now() const noexcept { - system_time_point actual; + system_time_point t; if (mMode == REAL_TIME) { - actual = std::chrono::system_clock::now(); + t = std::chrono::system_clock::now(); } else { LOCK_GUARD(mVirtualNowMutex, lock); auto offset = mVirtualNow.time_since_epoch(); - actual = std::chrono::system_clock::time_point( + t = std::chrono::system_clock::time_point( std::chrono::duration_cast< std::chrono::system_clock::time_point::duration>(offset)); } - auto drifted = actual; #ifdef BUILD_TESTS - drifted += mSystemTimeOffset.load(std::memory_order_relaxed); + // Simulate wall-clock drift (test-only); does not affect steady_clock + // scheduling. + t += mSystemTimeOffset.load(std::memory_order_relaxed); #endif - return {actual, drifted}; -} - -VirtualClock::system_time_point -VirtualClock::system_now() const noexcept -{ - return actual_and_fake_system_now().second; + return t; } void diff --git a/src/util/Timer.h b/src/util/Timer.h index 089b486fe..b079af5a7 100644 --- a/src/util/Timer.h +++ b/src/util/Timer.h @@ -18,7 +18,6 @@ #include #include #include -#include namespace stellar { @@ -242,11 +241,6 @@ class VirtualClock // someone has set the time forward using setCurrentVirtualTime below). system_time_point system_now() const noexcept; - // Returns both the unshifted and drifted system time samples captured from - // the same underlying timestamp source. - std::pair - actual_and_fake_system_now() const noexcept; - void enqueue(std::shared_ptr ve); void flushCancelledEvents(); bool cancelAllEvents();