-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Speed up initial in-memory Soroban state population #5252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
e6d4a41
9b5bee7
c442d07
6309ae3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -708,6 +708,203 @@ SearchableLiveBucketListSnapshot::scanForEntriesOfType( | |
| loopAllBuckets(scanBucket); | ||
| } | ||
|
|
||
| namespace | ||
| { | ||
| // Iterator for `BucketEntry`s of a given type in a bucket. Expects the stream | ||
| // to be positioned at the start of the type range. This is basically the same | ||
| // as SearchableLiveBucketListSnapshot::scanForEntriesOfType's scanBucket except | ||
| // with more control over when iteration happens. | ||
| class BucketEntryIterator | ||
| { | ||
| BucketEntry mEntry; | ||
| LedgerKey mKey; | ||
| XDRInputFileStream& mStream; | ||
| LedgerEntryType const mType; | ||
|
|
||
| public: | ||
| BucketEntryIterator(XDRInputFileStream& stream, LedgerEntryType type) | ||
| : mStream(stream), mType(type) | ||
| { | ||
| } | ||
|
|
||
| BucketEntry const& | ||
| getEntry() const | ||
| { | ||
| return mEntry; | ||
| } | ||
| LedgerKey const& | ||
| getKey() const | ||
| { | ||
| return mKey; | ||
| } | ||
| bool advance(); | ||
| }; | ||
|
|
||
| bool | ||
| BucketEntryIterator::advance() | ||
| { | ||
| while (mStream.readOne(mEntry)) | ||
| { | ||
| if (isBucketMetaEntry<LiveBucket>(mEntry)) | ||
| { | ||
| continue; | ||
| } | ||
| mKey = getBucketLedgerKey(mEntry); | ||
| if (mKey.type() > mType) | ||
| { | ||
| break; | ||
| } | ||
|
|
||
| if (mKey.type() == mType) | ||
| { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
| } // namespace | ||
|
|
||
| void | ||
| SearchableLiveBucketListSnapshot::scanForLiveEntriesOfType( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this looks generally correct, but it's adding a good amount of complexity, I'd like to see a unit test specifically for this scanning function. Before it was pretty straight forward and indirectly tested, but given the k-way merge I think a more explicit test is warranted. Maybe we can test some of the loser tree edge cases, like a degenerate merge with just 1 bucket, 2 buckets, and some non powers of two. It might also be a good idea to hook this into the randomized bucket testing infra LedgerStateSnapshotTests,cpp or BucketIndexTests.cpp, where we just make sure we hit all the entries properly. |
||
| LedgerEntryType type, | ||
| std::function<void(LedgerEntry const&, LedgerKey const&)> callback) const | ||
| { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should probably have a |
||
| ZoneScoped; | ||
| // We implement this as a k-way merge over all buckets. We use a loser tree | ||
| // for this. The benefit over a heap is ~2x fewer comparisons. A loser tree | ||
| // is like a single-elimination tournament. The leaves of the tree are the | ||
| // iterators, and the internal nodes represent the loser of the comparison | ||
| // between the two children. This implementation represents the binary tree | ||
| // in an array, where the tournament tree is from indices [1, 2n) (leaves | ||
| // are [n, 2n)). Index 0 is used for keeping track of the overall winner. To | ||
| // update, we just need to advance the iterator for the winning node and | ||
| // then do the log(k) comparisons upward along the path to the root to | ||
| // update the losers. While loser trees often store the whole node value at | ||
| // intermediate nodes, we just store an index, since copying the XDR types | ||
| // is probably more expensive than the extra indirection. | ||
|
|
||
| std::vector<BucketEntryIterator> iterators; | ||
| loopAllBuckets([&iterators, type, | ||
| this](std::shared_ptr<LiveBucket const> const& bucket) { | ||
| if (bucket->isEmpty()) | ||
| { | ||
| return Loop::INCOMPLETE; | ||
| } | ||
|
|
||
| auto range = bucket->getRangeForType(type); | ||
| if (!range) | ||
| { | ||
| return Loop::INCOMPLETE; | ||
| } | ||
|
|
||
| auto& stream = getStream(bucket); | ||
| stream.seek(range->first); | ||
|
|
||
| iterators.emplace_back(stream, type); | ||
| return Loop::INCOMPLETE; | ||
| }); | ||
|
|
||
| if (iterators.empty()) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| size_t const numIterators = iterators.size(); | ||
|
|
||
| constexpr int exhausted = -1; | ||
| std::vector<int> tree; | ||
| tree.resize(numIterators * 2); | ||
| for (size_t i = 0; i < numIterators; ++i) | ||
| { | ||
| if (iterators[i].advance()) | ||
| { | ||
| tree[numIterators + i] = i; | ||
| } | ||
| else | ||
| { | ||
| tree[numIterators + i] = exhausted; | ||
| } | ||
| } | ||
|
|
||
| auto leftWins = [&iterators](int leftIndex, int rightIndex) -> bool { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More comments here would be helpful, this indicates that the smaller, newer version index wins, right? |
||
| if (leftIndex == exhausted) | ||
| { | ||
| return false; | ||
| } | ||
| if (rightIndex == exhausted) | ||
| { | ||
| return true; | ||
| } | ||
| if (auto cmp = compareLedgerKeys(iterators[leftIndex].getKey(), | ||
| iterators[rightIndex].getKey()); | ||
| cmp != std::partial_ordering::equivalent) | ||
| { | ||
| releaseAssert(cmp != std::partial_ordering::unordered); | ||
| return cmp == std::partial_ordering::less; | ||
| } | ||
| return leftIndex < rightIndex; | ||
| }; | ||
|
|
||
| // Play the match at index i; store the loser, return the winner | ||
| auto play = [&tree, &leftWins](auto& play, size_t index) -> int { | ||
| if (2 * index >= tree.size()) | ||
| { | ||
| return tree[index]; | ||
| } | ||
| int left = play(play, 2 * index); | ||
| int right = play(play, 2 * index + 1); | ||
| if (leftWins(left, right)) | ||
| { | ||
| tree[index] = right; | ||
| return left; | ||
| } | ||
| else | ||
| { | ||
| tree[index] = left; | ||
| return right; | ||
| } | ||
| }; | ||
| tree[0] = play(play, 1); | ||
|
|
||
| bool first = true; | ||
| LedgerKey last; | ||
| while (tree[0] != exhausted) | ||
| { | ||
| int index = tree[0]; | ||
| auto& iter = iterators[index]; | ||
| // Deduplicate entries with the same key across buckets | ||
| if (auto& key = iter.getKey(); first || key != last) | ||
| { | ||
| last = key; | ||
| auto& entry = iter.getEntry(); | ||
| if (entry.type() == LIVEENTRY || entry.type() == INITENTRY) | ||
| { | ||
| callback(entry.liveEntry(), key); | ||
| } | ||
| } | ||
| first = false; | ||
|
|
||
| if (!iter.advance()) | ||
| { | ||
| tree[index + numIterators] = exhausted; | ||
| } | ||
| int winner = tree[index + numIterators]; | ||
|
|
||
| // Update tournament up the tree to the root | ||
| int i = (index + numIterators) / 2; | ||
| while (i > 0) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: This while could be a for loop, which to me reads a little cleaner. |
||
| { | ||
| if (leftWins(tree[i], winner)) | ||
| { | ||
| std::swap(tree[i], winner); | ||
| } | ||
| i /= 2; | ||
| } | ||
|
|
||
| tree[0] = winner; | ||
|
Comment on lines
+869
to
+904
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add some comments to this? It's a little hard to figure out what's going on here. This is the actual k-way merge, right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, it's still a little opaque. |
||
| } | ||
| } | ||
|
|
||
| // Helper function to handle scan logic in a single bucket. | ||
| Loop | ||
| SearchableLiveBucketListSnapshot::scanForEvictionInBucket( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| // Copyright 2026 Stellar Development Foundation and contributors. Licensed | ||
| // under the Apache License, Version 2.0. See the COPYING file at the root | ||
| // of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| #include "bucket/LedgerCmp.h" | ||
|
|
||
| namespace | ||
| { | ||
| template <typename T> | ||
| std::partial_ordering | ||
| lexComparePartial(T&& lhs1, T&& rhs1) | ||
| { | ||
| return lhs1 <=> rhs1; | ||
| } | ||
|
|
||
| template <typename T, typename... U> | ||
| std::partial_ordering | ||
| lexComparePartial(T&& lhs1, T&& rhs1, U&&... args) | ||
| { | ||
| if (auto cmp = lhs1 <=> rhs1; cmp != std::partial_ordering::equivalent) | ||
| { | ||
| return cmp; | ||
| } | ||
| return lexComparePartial(std::forward<U>(args)...); | ||
| } | ||
| } // namespace | ||
|
|
||
| namespace stellar | ||
| { | ||
| std::partial_ordering | ||
| compareLedgerKeys(LedgerKey const& a, LedgerKey const& b) | ||
| { | ||
| LedgerEntryType aty = a.type(); | ||
| LedgerEntryType bty = b.type(); | ||
|
|
||
| if (aty < bty) | ||
| return std::partial_ordering::less; | ||
|
|
||
| if (aty > bty) | ||
| return std::partial_ordering::greater; | ||
|
|
||
| switch (aty) | ||
| { | ||
| case ACCOUNT: | ||
| return a.account().accountID <=> b.account().accountID; | ||
| case TRUSTLINE: | ||
| return lexComparePartial(a.trustLine().accountID, | ||
| b.trustLine().accountID, a.trustLine().asset, | ||
| b.trustLine().asset); | ||
| case OFFER: | ||
| return lexComparePartial(a.offer().sellerID, b.offer().sellerID, | ||
| a.offer().offerID, b.offer().offerID); | ||
| case DATA: | ||
| return lexComparePartial(a.data().accountID, b.data().accountID, | ||
| a.data().dataName, b.data().dataName); | ||
| case CLAIMABLE_BALANCE: | ||
| return a.claimableBalance().balanceID <=> | ||
| b.claimableBalance().balanceID; | ||
| case LIQUIDITY_POOL: | ||
| return a.liquidityPool().liquidityPoolID <=> | ||
| b.liquidityPool().liquidityPoolID; | ||
| case CONTRACT_DATA: | ||
| { | ||
| return lexComparePartial( | ||
| a.contractData().contract, b.contractData().contract, | ||
| a.contractData().key, b.contractData().key, | ||
| a.contractData().durability, b.contractData().durability); | ||
| } | ||
| case CONTRACT_CODE: | ||
| return lexComparePartial(a.contractCode().hash, b.contractCode().hash); | ||
| case CONFIG_SETTING: | ||
| { | ||
| return a.configSetting().configSettingID <=> | ||
| b.configSetting().configSettingID; | ||
| } | ||
| case TTL: | ||
| return lexComparePartial(a.ttl().keyHash, b.ttl().keyHash); | ||
| } | ||
|
|
||
| return std::partial_ordering::unordered; | ||
| } | ||
| } // namespace stellar |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
|
|
||
| #pragma once | ||
|
|
||
| #include <compare> | ||
| #include <type_traits> | ||
|
|
||
| #include "bucket/BucketUtils.h" | ||
|
|
@@ -123,6 +124,10 @@ struct LedgerEntryIdCmp | |
| } | ||
| }; | ||
|
|
||
| // Like LedgerEntryIdCmp, but only compares LedgerKeys, and does a 3-way | ||
| // comparison instead of a less-than. | ||
| std::partial_ordering compareLedgerKeys(LedgerKey const& a, LedgerKey const& b); | ||
|
Comment on lines
+127
to
+129
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why a partial ordering? Does a total ordering not exist for ledger keys?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The compare delegates to the |
||
|
|
||
| /** | ||
| * Compare two BucketEntries for identity by comparing their respective | ||
| * LedgerEntries (ignoring their hashes, as the LedgerEntryIdCmp ignores their | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Stylistically this is a little weird. Can we either inline the advance or just move the declaration to .h?