From 6d02e30d3b6d203f6799b8989daac5ce8c617251 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 21 May 2026 06:41:16 +0400 Subject: [PATCH 01/30] fix(snapshot): detect and recover validator vote snapshot inconsistencies - Add sanity check during export to warn if validators exist but validator votes are absent - Log warning about possible chainbase type-enum mismatch causing incomplete snapshot - Implement fallback during import to recover validator votes from legacy witness_vote key if validator_vote is empty - Improve snapshot integrity by handling potential silent corruption cases due to type enum shifts --- plugins/snapshot/plugin.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/plugins/snapshot/plugin.cpp b/plugins/snapshot/plugin.cpp index f89b3dc81b..d4e9177caf 100644 --- a/plugins/snapshot/plugin.cpp +++ b/plugins/snapshot/plugin.cpp @@ -975,6 +975,18 @@ fc::mutable_variant_object snapshot_plugin::plugin_impl::serialize_state() { EXPORT_INDEX(account_authority_index, account_authority_object, "account_authority") EXPORT_INDEX(validator_index, validator_object, "validator") EXPORT_INDEX(validator_vote_index, validator_vote_object, "validator_vote") + // Sanity: if validators exist but votes are absent, the chainbase type enum + // likely shifted (types added/removed before validator_vote_object_type). + // This would silently corrupt the snapshot. + { + auto n_validators = state["validator"].get_array().size(); + auto n_votes = state["validator_vote"].get_array().size(); + if (n_validators > 0 && n_votes == 0) + wlog("SNAPSHOT INTEGRITY: ${v} validators but 0 validator votes — " + "validator_vote_index may be empty due to chainbase type-enum mismatch. " + "Snapshot will be INCOMPLETE.", + ("v", n_validators)); + } EXPORT_INDEX(block_summary_index, block_summary_object, "block_summary") EXPORT_INDEX(content_index, content_object, "content") EXPORT_INDEX(content_vote_index, content_vote_object, "content_vote") @@ -1560,6 +1572,14 @@ void snapshot_plugin::plugin_impl::load_snapshot(const fc::path& input_path) { if (state.contains("validator_vote")) { auto n = detail::import_validator_votes(db, state["validator_vote"].get_array()); ilog(CLOG_ORANGE "Imported ${n} validator votes" CLOG_RESET, ("n", n)); + // Defensive fallback: validator_vote was present but empty; the snapshot may have + // been produced from a chainbase DB with a type-enum mismatch (see export warning). + // If an old witness_vote key also exists with data, use it to recover. + if (n == 0 && state.contains("witness_vote")) { + auto n2 = detail::import_validator_votes(db, state["witness_vote"].get_array()); + if (n2 > 0) + ilog(CLOG_ORANGE "Imported ${n} validator votes (recovered from witness_vote)" CLOG_RESET, ("n", n2)); + } } else if (state.contains("witness_vote")) { // backward compat: old snapshots used "witness_vote" key auto n = detail::import_validator_votes(db, state["witness_vote"].get_array()); From 9cd2143f330c2009d97928255e39559496ffc790 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 21 May 2026 10:32:12 +0400 Subject: [PATCH 02/30] docs(build): remove all references to low-memory node build flag and options - Deleted all mentions of `LOW_MEMORY_NODE` from build scripts, environment variables, and documentation - Removed low-memory node build instructions and flags from Linux, macOS, and Windows build guides - Updated CMake options and environment variables to exclude low-memory settings - Simplified Docker image CMake flags by removing `LOW_MEMORY_NODE` - Cleared low-memory related config references in node setup and getting started guides - Cleaned up example config files by removing deprecated plugins and options related to low-memory builds --- @l10n/ru/docs/development/building.md | 9 --------- @l10n/ru/docs/node/building.md | 9 --------- @l10n/ru/docs/node/docker.md | 8 ++++---- @l10n/ru/docs/node/getting-started.md | 3 +-- @l10n/zh-CN/docs/development/building.md | 9 --------- @l10n/zh-CN/docs/node/building.md | 9 --------- @l10n/zh-CN/docs/node/docker.md | 8 ++++---- @l10n/zh-CN/docs/node/getting-started.md | 1 - docs/development/building.md | 9 --------- docs/node/building.md | 9 --------- docs/node/docker.md | 8 ++++---- docs/node/getting-started.md | 1 - share/vizd/config/config.ini | 3 --- share/vizd/config/config_debug.ini | 8 +------- share/vizd/config/config_debug_mongo.ini | 13 +------------ share/vizd/config/config_mongo.ini | 13 +------------ share/vizd/config/config_stock_exchange.ini | 2 +- share/vizd/config/config_testnet.ini | 8 +------- 18 files changed, 18 insertions(+), 112 deletions(-) diff --git a/@l10n/ru/docs/development/building.md b/@l10n/ru/docs/development/building.md index 7ba29470f0..cdd27517d6 100644 --- a/@l10n/ru/docs/development/building.md +++ b/@l10n/ru/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release-сборка (по умолчанию) -./build-linux.sh -l # LOW_MEMORY_NODE (узлы-валидаторы) ./build-linux.sh -n # Testnet-сборка ./build-linux.sh -t Debug -j4 # Debug-сборка с 4 параллельными задачами -./build-linux.sh --skip-deps # Пропустить установку зависимостей ./build-linux.sh --install # Установить в систему после сборки # Пользовательские пути к зависимостям @@ -54,7 +52,6 @@ chmod +x build-mac.sh **Параметры:** ```bash -./build-mac.sh -l # Low-memory узел ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # Пропустить установки Homebrew ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | Переменная | По умолчанию | Описание | |-----------|-------------|---------| | `VIZ_BUILD_TYPE` | Release | Release или Debug | -| `VIZ_LOW_MEMORY` | OFF | Включить low-memory узел | | `VIZ_BUILD_TESTNET` | OFF | Testnet-сборка | | `VIZ_FULL_STATIC` | OFF | Полностью статический бинарник | | `VIZ_CMAKE_EXTRA` | — | Дополнительные флаги CMake | @@ -100,7 +96,6 @@ build-msvc.bat |-----------|-------------|---------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Генератор Visual Studio | | `VIZ_BUILD_TYPE` | Release | Тип сборки | -| `VIZ_LOW_MEMORY` | OFF | Low-memory узел | | `VIZ_BUILD_TESTNET` | OFF | Testnet-сборка | **Требования:** Visual Studio 2019+ с нагрузкой "Desktop development with C++", CMake 3.16+. @@ -125,7 +120,6 @@ build-msvc.bat | Параметр | По умолчанию | Описание | |---------|-------------|---------| | `BUILD_TESTNET` | OFF | Сборка для testnet | -| `LOW_MEMORY_NODE` | OFF | Исключить неконсенсусные данные (уменьшает RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | Включить проверку блокировок (только для разработки) | | `BUILD_SHARED_LIBRARIES` | OFF | Собирать разделяемые библиотеки | | `USE_PCH` | OFF | Включить предкомпилированные заголовки (ускоряет пересборку) | @@ -140,9 +134,6 @@ build-msvc.bat # Release-сборка python3 programs/build_helpers/configure_build.py --release --src ../.. -# Debug с low-memory -python3 programs/build_helpers/configure_build.py --debug --low-memory - # Кросс-компиляция для Windows с MinGW python3 programs/build_helpers/configure_build.py --win --release diff --git a/@l10n/ru/docs/node/building.md b/@l10n/ru/docs/node/building.md index 60f278784d..3c70eceb8a 100644 --- a/@l10n/ru/docs/node/building.md +++ b/@l10n/ru/docs/node/building.md @@ -46,9 +46,6 @@ chmod +x build-linux.sh ### Основные флаги сборки ```bash -# Низкопамятный узел (для валидаторов/сид-узлов — без плагинов индексирования истории) -./build-linux.sh -l - # Сборка для тестнета ./build-linux.sh -n @@ -58,9 +55,6 @@ chmod +x build-linux.sh # Параллельные задания ./build-linux.sh -j 8 -# Пропустить установку зависимостей (уже установлены) -./build-linux.sh --skip-deps - # Пользовательские пути к Boost / OpenSSL ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ build-mingw.bat | Переменная | По умолчанию | Описание | |------------|--------------|----------| | `VIZ_BUILD_TYPE` | `Release` | `Release` или `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` для низкопамятного узла | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` для сборки тестнета | | `VIZ_FULL_STATIC` | `OFF` | `ON` для полностью статического бинарного файла | @@ -124,7 +117,6 @@ build-msvc.bat | Опция | По умолчанию | Описание | |-------|--------------|----------| | `BUILD_TESTNET` | `OFF` | Включить код для тестнета | -| `LOW_MEMORY_NODE` | `OFF` | Исключить плагины истории/индексирования | | `CHAINBASE_CHECK_LOCKING` | `OFF` | Включить проверки блокировок (debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | Собрать разделяемые библиотеки | | `USE_PCH` | `OFF` | Включить предкомпилированные заголовки (ускоряет пересборку) | @@ -134,7 +126,6 @@ build-msvc.bat ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/@l10n/ru/docs/node/docker.md b/@l10n/ru/docs/node/docker.md index 94ef93829d..4d51bf0eb1 100644 --- a/@l10n/ru/docs/node/docker.md +++ b/@l10n/ru/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### CMake-флаги для каждого образа -| Образ | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|-------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| Образ | `BUILD_TESTNET` | +|-------|:---------------:| +| production | OFF | +| testnet | ON | --- diff --git a/@l10n/ru/docs/node/getting-started.md b/@l10n/ru/docs/node/getting-started.md index 538b83ee05..2540d782c6 100644 --- a/@l10n/ru/docs/node/getting-started.md +++ b/@l10n/ru/docs/node/getting-started.md @@ -133,7 +133,7 @@ shared-file-size = 4G # Плагины (полный узел) plugin = chain p2p webserver json_rpc database_api network_broadcast_api -plugin = social_network tags follow account_history +plugin = account_history ``` Для узла-валидатора см. [Узел-валидатор](./validator-node.md). @@ -172,7 +172,6 @@ curl -s -X POST http://localhost:8090 \ | Полный узел | `config.ini` | Все плагины, публичные RPC-эндпоинты | | Валидатор | `config_witness.ini` | Производство блоков, RPC только на localhost | | Тестовая сеть | `config_testnet.ini` | Разработка и тестирование | -| Малая память | `config.ini` + флаг сборки `LOW_MEMORY_NODE` | Только консенсус, без индексов истории | --- diff --git a/@l10n/zh-CN/docs/development/building.md b/@l10n/zh-CN/docs/development/building.md index f7c1cb5125..8eb6279129 100644 --- a/@l10n/zh-CN/docs/development/building.md +++ b/@l10n/zh-CN/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release 构建(默认) -./build-linux.sh -l # LOW_MEMORY_NODE(验证者节点) ./build-linux.sh -n # Testnet 构建 ./build-linux.sh -t Debug -j4 # Debug 构建,4 个并行任务 -./build-linux.sh --skip-deps # 跳过依赖安装 ./build-linux.sh --install # 构建后安装到系统 # 自定义依赖路径 @@ -54,7 +52,6 @@ chmod +x build-mac.sh **选项:** ```bash -./build-mac.sh -l # 低内存节点 ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # 跳过 Homebrew 安装 ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | 变量 | 默认值 | 描述 | |------|--------|------| | `VIZ_BUILD_TYPE` | Release | Release 或 Debug | -| `VIZ_LOW_MEMORY` | OFF | 启用低内存节点 | | `VIZ_BUILD_TESTNET` | OFF | Testnet 构建 | | `VIZ_FULL_STATIC` | OFF | 完全静态二进制文件 | | `VIZ_CMAKE_EXTRA` | — | 附加 CMake 标志 | @@ -100,7 +96,6 @@ build-msvc.bat |------|--------|------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Visual Studio 生成器 | | `VIZ_BUILD_TYPE` | Release | 构建类型 | -| `VIZ_LOW_MEMORY` | OFF | 低内存节点 | | `VIZ_BUILD_TESTNET` | OFF | Testnet 构建 | **要求:** Visual Studio 2019+(带"Desktop development with C++"工作负载)、CMake 3.16+。 @@ -125,7 +120,6 @@ build-msvc.bat | 选项 | 默认值 | 描述 | |------|--------|------| | `BUILD_TESTNET` | OFF | 为 testnet 构建 | -| `LOW_MEMORY_NODE` | OFF | 排除非共识数据(减少 RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | 启用锁检查(仅用于开发) | | `BUILD_SHARED_LIBRARIES` | OFF | 构建共享库 | | `USE_PCH` | OFF | 启用预编译头文件(加速重新构建) | @@ -140,9 +134,6 @@ build-msvc.bat # Release 构建 python3 programs/build_helpers/configure_build.py --release --src ../.. -# 带低内存的 Debug -python3 programs/build_helpers/configure_build.py --debug --low-memory - # 使用 MinGW 交叉编译 Windows 版本 python3 programs/build_helpers/configure_build.py --win --release diff --git a/@l10n/zh-CN/docs/node/building.md b/@l10n/zh-CN/docs/node/building.md index 839144dc0c..f40637cbde 100644 --- a/@l10n/zh-CN/docs/node/building.md +++ b/@l10n/zh-CN/docs/node/building.md @@ -46,9 +46,6 @@ chmod +x build-linux.sh ### 常用构建标志 ```bash -# 低内存节点(验证者/种子节点 — 排除历史索引) -./build-linux.sh -l - # 测试网构建 ./build-linux.sh -n @@ -58,9 +55,6 @@ chmod +x build-linux.sh # 并行任务数 ./build-linux.sh -j 8 -# 跳过依赖安装(已安装) -./build-linux.sh --skip-deps - # 自定义 Boost / OpenSSL 路径 ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ build-mingw.bat | 变量 | 默认值 | 描述 | |------|-------|------| | `VIZ_BUILD_TYPE` | `Release` | `Release` 或 `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` 构建低内存节点 | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` 用于测试网构建 | | `VIZ_FULL_STATIC` | `OFF` | `ON` 构建完全静态二进制文件 | @@ -124,7 +117,6 @@ build-msvc.bat | 选项 | 默认值 | 描述 | |------|-------|------| | `BUILD_TESTNET` | `OFF` | 启用测试网专用代码 | -| `LOW_MEMORY_NODE` | `OFF` | 排除历史/索引插件 | | `CHAINBASE_CHECK_LOCKING` | `OFF` | 启用锁断言检查(debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | 构建共享库 | | `USE_PCH` | `OFF` | 启用预编译头文件(加快重新构建) | @@ -134,7 +126,6 @@ build-msvc.bat ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/@l10n/zh-CN/docs/node/docker.md b/@l10n/zh-CN/docs/node/docker.md index e8e96a14a9..57ec5dbd62 100644 --- a/@l10n/zh-CN/docs/node/docker.md +++ b/@l10n/zh-CN/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### 各镜像的 CMake 标志 -| 镜像 | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| 镜像 | `BUILD_TESTNET` | +|------|:---------------:| +| production | OFF | +| testnet | ON | --- diff --git a/@l10n/zh-CN/docs/node/getting-started.md b/@l10n/zh-CN/docs/node/getting-started.md index e499476b82..41961058c1 100644 --- a/@l10n/zh-CN/docs/node/getting-started.md +++ b/@l10n/zh-CN/docs/node/getting-started.md @@ -172,7 +172,6 @@ curl -s -X POST http://localhost:8090 \ | 全节点 | `config.ini` | 所有插件,公共 RPC 端点 | | 验证者 | `config_witness.ini` | 区块生产,RPC 仅限本地 | | 测试网 | `config_testnet.ini` | 开发和测试 | -| 低内存 | `config.ini` + `LOW_MEMORY_NODE` 构建标志 | 仅共识,无历史索引 | --- diff --git a/docs/development/building.md b/docs/development/building.md index f25d27de5f..60d74805fb 100644 --- a/docs/development/building.md +++ b/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release build (default) -./build-linux.sh -l # LOW_MEMORY_NODE (validator nodes) ./build-linux.sh -n # Testnet build ./build-linux.sh -t Debug -j4 # Debug build with 4 parallel jobs -./build-linux.sh --skip-deps # Skip dependency installation ./build-linux.sh --install # Install to system after build # Custom dependency paths @@ -54,7 +52,6 @@ Requires Xcode Command Line Tools and Homebrew. The script installs: `boost`, `c **Options:** ```bash -./build-mac.sh -l # Low-memory node ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # Skip Homebrew installs ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | Variable | Default | Description | |----------|---------|-------------| | `VIZ_BUILD_TYPE` | Release | Release or Debug | -| `VIZ_LOW_MEMORY` | OFF | Enable low-memory node | | `VIZ_BUILD_TESTNET` | OFF | Testnet build | | `VIZ_FULL_STATIC` | OFF | Fully static binary | | `VIZ_CMAKE_EXTRA` | — | Additional CMake flags | @@ -100,7 +96,6 @@ build-msvc.bat |----------|---------|-------------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Visual Studio generator | | `VIZ_BUILD_TYPE` | Release | Build type | -| `VIZ_LOW_MEMORY` | OFF | Low-memory node | | `VIZ_BUILD_TESTNET` | OFF | Testnet build | **Requirements:** Visual Studio 2019+ with "Desktop development with C++" workload, CMake 3.16+. @@ -125,7 +120,6 @@ All Dockerfiles use a two-stage build to minimize image size and use Boost 1.71 | Option | Default | Description | |--------|---------|-------------| | `BUILD_TESTNET` | OFF | Build for testnet | -| `LOW_MEMORY_NODE` | OFF | Exclude non-consensus data (reduces RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | Enable lock checking (development only) | | `BUILD_SHARED_LIBRARIES` | OFF | Build shared libraries | | `USE_PCH` | OFF | Enable precompiled headers (faster rebuilds) | @@ -140,9 +134,6 @@ Wraps CMake with sensible defaults and cross-compilation support: # Release build python3 programs/build_helpers/configure_build.py --release --src ../.. -# Debug with low-memory -python3 programs/build_helpers/configure_build.py --debug --low-memory - # Cross-compile for Windows with MinGW python3 programs/build_helpers/configure_build.py --win --release diff --git a/docs/node/building.md b/docs/node/building.md index 682aff4ed2..f7887aa811 100644 --- a/docs/node/building.md +++ b/docs/node/building.md @@ -46,9 +46,6 @@ Output binary: `build/programs/vizd/vizd` ### Common build flags ```bash -# Low-memory node (validators/seed nodes — excludes history indexing) -./build-linux.sh -l - # Testnet build ./build-linux.sh -n @@ -58,9 +55,6 @@ Output binary: `build/programs/vizd/vizd` # Parallel jobs ./build-linux.sh -j 8 -# Skip dependency installation (already installed) -./build-linux.sh --skip-deps - # Custom Boost / OpenSSL paths ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ Optional environment variables: | Variable | Default | Description | |----------|---------|-------------| | `VIZ_BUILD_TYPE` | `Release` | `Release` or `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` to build low-memory node | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` for testnet build | | `VIZ_FULL_STATIC` | `OFF` | `ON` for fully static binary | @@ -124,7 +117,6 @@ For direct CMake usage (advanced): | Option | Default | Description | |--------|---------|-------------| | `BUILD_TESTNET` | `OFF` | Enable testnet-specific code | -| `LOW_MEMORY_NODE` | `OFF` | Exclude history/indexing plugins | | `CHAINBASE_CHECK_LOCKING` | `OFF` | Enable lock assertion checks (debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | Build shared libraries | | `USE_PCH` | `OFF` | Enable precompiled headers (faster rebuilds) | @@ -134,7 +126,6 @@ Example: ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/docs/node/docker.md b/docs/node/docker.md index 3205a19072..014ae11859 100644 --- a/docs/node/docker.md +++ b/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### CMake flags per image -| Image | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|-------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| Image | `BUILD_TESTNET` | +|-------|:---------------:| +| production | OFF | +| testnet | ON | --- diff --git a/docs/node/getting-started.md b/docs/node/getting-started.md index b7dae55713..5c8864e78b 100644 --- a/docs/node/getting-started.md +++ b/docs/node/getting-started.md @@ -171,7 +171,6 @@ Check `head_block_number` — it should increase every 3 seconds once synced. | Full node | `config.ini` | All plugins, public RPC endpoints | | Validator | `config_witness.ini` | Block production, RPC on localhost only | | Testnet | `config_testnet.ini` | Development and testing | -| Low-memory | `config.ini` + `LOW_MEMORY_NODE` build flag | Consensus only, no history indexes | --- diff --git a/share/vizd/config/config.ini b/share/vizd/config/config.ini index c29dd224b4..32139779ce 100644 --- a/share/vizd/config/config.ini +++ b/share/vizd/config/config.ini @@ -118,9 +118,6 @@ history-count-blocks = 57600 # Defines starting block from which recording stats by the account_history and operation_history plugin. history-start-block = 70000000 -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - # name of validator controlled by this node (e.g. initwitness ) # validator = # # validator = # DEPRECATED: use 'validator' diff --git a/share/vizd/config/config_debug.ini b/share/vizd/config/config_debug.ini index 25c31e30de..7d26c2f992 100644 --- a/share/vizd/config/config_debug.ini +++ b/share/vizd/config/config_debug.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 100M # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 10 # each 30 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags account_by_key account_history operation_history block_info raw_block debug_node witness_api +plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key account_history operation_history block_info raw_block debug_node # Remove votes before defined block, should increase performance clear-votes-before-block = 0 # don't clear votes @@ -93,12 +93,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - # Enable block production, even if the chain is stale. enable-stale-production = true diff --git a/share/vizd/config/config_debug_mongo.ini b/share/vizd/config/config_debug_mongo.ini index 7377448471..ace73667c7 100644 --- a/share/vizd/config/config_debug_mongo.ini +++ b/share/vizd/config/config_debug_mongo.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 100M # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 10 # each 30 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags market_history account_by_key account_history operation_history block_info raw_block debug_node validator_api mongo_db +plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key account_history operation_history block_info raw_block debug_node validator_api mongo_db # For connect to mongodb which is running outside Docker (if vizd running inside) mongodb-uri = mongodb://172.17.0.1:27017/viz @@ -96,17 +96,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Track market history by grouping orders into buckets of equal size measured in seconds specified as a JSON array of numbers -bucket-size = [15,60,300,3600,86400] - -# How far back in time to track history for each bucket size, measured in the number of buckets (default: 5760) -history-per-size = 5760 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = # Enable block production, even if the chain is stale. enable-stale-production = true diff --git a/share/vizd/config/config_mongo.ini b/share/vizd/config/config_mongo.ini index a8ee8be8e6..8aae5f136d 100644 --- a/share/vizd/config/config_mongo.ini +++ b/share/vizd/config/config_mongo.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 2G # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 1000 # each 3000 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags market_history account_by_key operation_history account_history block_info raw_block validator_api mongo_db +plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key operation_history account_history block_info raw_block validator_api mongo_db # For connect to mongodb which is running outside Docker (if vizd running inside) mongodb-uri = mongodb://172.17.0.1:27017/viz @@ -96,17 +96,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = 0 -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Track market history by grouping orders into buckets of equal size measured in seconds specified as a JSON array of numbers -bucket-size = [15,60,300,3600,86400] - -# How far back in time to track history for each bucket size, measured in the number of buckets (default: 5760) -history-per-size = 5760 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = # Enable block production, even if the chain is stale. enable-stale-production = false diff --git a/share/vizd/config/config_stock_exchange.ini b/share/vizd/config/config_stock_exchange.ini index 3a89b81b94..48a413df51 100644 --- a/share/vizd/config/config_stock_exchange.ini +++ b/share/vizd/config/config_stock_exchange.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 2G # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 1000 # each 3000 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator database_api block_info raw_block operation_history account_history witness_api +plugin = chain p2p json_rpc webserver network_broadcast_api validator database_api block_info raw_block operation_history account_history # Remove votes before defined block, should increase performance clear-votes-before-block = 0 # clear votes after each cashout diff --git a/share/vizd/config/config_testnet.ini b/share/vizd/config/config_testnet.ini index 7281d042b8..83e1ebaa6d 100644 --- a/share/vizd/config/config_testnet.ini +++ b/share/vizd/config/config_testnet.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 2G # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 1000 # each 3000 seconds -plugin = validator witness_api +plugin = validator plugin = chain p2p json_rpc webserver network_broadcast_api database_api plugin = account_history operation_history plugin = committee_api invite_api paid_subscription_api custom_protocol_api @@ -97,12 +97,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = 0 -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - # Enable block production, even if the chain is stale. enable-stale-production = true From 74ff904d180bdc65f1c871cc31bbc0cfdcfedf6b Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 21 May 2026 10:35:37 +0400 Subject: [PATCH 03/30] chore(config): remove deprecated mongo config files - Delete config_debug_mongo.ini to clean up obsolete debug mongo configuration - Remove config_mongo.ini to eliminate outdated mongo production configuration - Simplify project configuration by removing unused or legacy mongo ini files --- share/vizd/config/config_debug_mongo.ini | 132 ----------------------- share/vizd/config/config_mongo.ini | 132 ----------------------- 2 files changed, 264 deletions(-) delete mode 100644 share/vizd/config/config_debug_mongo.ini delete mode 100644 share/vizd/config/config_mongo.ini diff --git a/share/vizd/config/config_debug_mongo.ini b/share/vizd/config/config_debug_mongo.ini deleted file mode 100644 index ace73667c7..0000000000 --- a/share/vizd/config/config_debug_mongo.ini +++ /dev/null @@ -1,132 +0,0 @@ -# Endpoint for P2P node to listen on -# p2p-endpoint = - -# Maxmimum number of incoming connections on P2P endpoint -# p2p-max-connections = - -# P2P nodes to connect to on startup (may specify multiple times) -# p2p-seed-node = - -# Enable stale sync detection: when no blocks are received for the configured timeout, -# the node resets sync from the last irreversible block and reconnects all seed peers. -# p2p-stale-sync-detection = false - -# Timeout in seconds before stale sync detection triggers recovery (default: 120 = 2 minutes). -# p2p-stale-sync-timeout-seconds = 120 - -# Pairs of [BLOCK_NUM,BLOCK_ID] that should be enforced as checkpoints. -# checkpoint = - -# Number of threads for rpc-clients. Optimal value `-1` -webserver-thread-pool-size = 2 - -# IP:PORT for HTTP connections -webserver-http-endpoint = 0.0.0.0:8090 - -# IP:PORT for WebSocket connections -webserver-ws-endpoint = 0.0.0.0:8091 - -# Maximum microseconds for trying to get read lock -read-wait-micro = 500000 - -# Maximum retries to get read lock. Each retry is read-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire READ lock'. -max-read-wait-retries = 2 - -# Maximum microseconds for trying to get write lock on broadcast transaction. -write-wait-micro = 500000 - -# Maximum retries to get write lock. Each retry is write-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire WRITE lock'. -max-write-wait-retries = 3 - -# Do all write operations (push_block/push_transaction) in the single thread. -# Write lock of database is very heavy. When many threads tries to lock database on writing, rpc-clients -# receive many errors 'Unable to acquire READ lock' ('Unable to acquire WRITE lock'). -# Enabling of this options can increase performance. -single-write-thread = true - -# Enable plugin notifications about operations in a pushed transaction, which should be included to the next generated -# block. Plugins doesn't validate data in operations, they only update its own indexes, so notifications can be -# disabled on push_transaction() without any side-effects. The option doesn't have effect on a pushing signed blocks, -# so it is safe. -# Disabling of this option can increase performance. -enable-plugins-on-push-transaction = true - -# A start size for shared memory file when it doesn't have any data. Possible cases: -# - If shared memory has data and the value is greater then the size of shared_memory.bin, -# the file will be grown to requested size. -# - If shared memory has data and the value is less then the size of shared_memory.bin, nothing happens. -# Changing of this parameter doesn't require the replaying. -shared-file-size = 100M - -# The minimum free space in the shared memory file. When free space reaches the following value, the size of the -# shared_memory.bin increases by the value of inc-shared-file-size. -min-free-shared-file-size = 50M - -# Step of increasing size of shared_memory.bin. When the free memory size reaches min-free-shared-file-size, -# the shared memory size increases by the following value. -inc-shared-file-size = 100M - -# How often do checking the free space in shared_memory.bin. A very frequent checking can decrease performance. -# It's not critical if the free size became very small, because the daemon catches the `bad_alloc` exception -# and resizes. The optimal strategy is do checking of the free space, but not very often. -block-num-check-free-size = 10 # each 30 seconds - -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key account_history operation_history block_info raw_block debug_node validator_api mongo_db - -# For connect to mongodb which is running outside Docker (if vizd running inside) -mongodb-uri = mongodb://172.17.0.1:27017/viz - -# Remove votes before defined block, should increase performance -clear-votes-before-block = 0 # don't clear votes - -# Virtual operations will not be passed to the plugins, enabling of the option helps to save some memory. -skip-virtual-ops = false - -# Defines a range of accounts to track by the account_history plugin as a json pair ["from","to"] [from,to] -# track-account-range = - -# Defines a list of operations which will be explicitly logged by the account_history plugin. -# history-whitelist-ops = - -# Defines a list of operations which will be explicitly ignored by the account_history plugin. -# history-blacklist-ops = - -# Defines starting block from which recording stats by the account_history plugin. -# history-start-block = - - -# Enable block production, even if the chain is stale. -enable-stale-production = true - - -# Percent of validators (0-99) that must be participating in order to produce blocks -required-participation = 0 - -# name of validator controlled by this node (e.g. initwitness ) -validator = "viz" -# validator = "viz" # DEPRECATED: use 'validator' - -# WIF PRIVATE KEY to be used by one or more validators -private-key = 5JVFFWRLwz6JoP9kguuRFfytToGU6cLgBVTL9t6NB3D3BQLbUBS - -# declare an appender named "stderr" that writes messages to the console -[log.console_appender.stderr] -stream=std_error - -# declare an appender named "p2p" that writes messages to p2p.log -[log.file_appender.p2p] -filename=logs/p2p/p2p.log -# filename can be absolute or relative to this config file - -# route any messages logged to the default logger to the "stderr" logger we -# declared above, if they are info level are higher -[logger.default] -level=info -appenders=stderr - -# route messages sent to the "p2p" logger to stderr too -[logger.p2p] -level=info -appenders=stderr diff --git a/share/vizd/config/config_mongo.ini b/share/vizd/config/config_mongo.ini deleted file mode 100644 index 8aae5f136d..0000000000 --- a/share/vizd/config/config_mongo.ini +++ /dev/null @@ -1,132 +0,0 @@ -# Endpoint for P2P node to listen on -p2p-endpoint = 0.0.0.0:4243 - -# Maxmimum number of incoming connections on P2P endpoint -# p2p-max-connections = - -# P2P nodes to connect to on startup (may specify multiple times) -# p2p-seed-node = - -# Enable stale sync detection: when no blocks are received for the configured timeout, -# the node resets sync from the last irreversible block and reconnects all seed peers. -# p2p-stale-sync-detection = false - -# Timeout in seconds before stale sync detection triggers recovery (default: 120 = 2 minutes). -# p2p-stale-sync-timeout-seconds = 120 - -# Pairs of [BLOCK_NUM,BLOCK_ID] that should be enforced as checkpoints. -# checkpoint = - -# Number of threads for rpc-clients. The optimal value is `-1` -webserver-thread-pool-size = 2 - -# IP:PORT for HTTP connections -webserver-http-endpoint = 0.0.0.0:8090 - -# IP:PORT for WebSocket connections -webserver-ws-endpoint = 0.0.0.0:8091 - -# Maximum microseconds for trying to get read lock -read-wait-micro = 500000 - -# Maximum retries to get read lock. Each retry is read-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire READ lock'. -max-read-wait-retries = 2 - -# Maximum microseconds for trying to get write lock on broadcast transaction. -write-wait-micro = 500000 - -# Maximum retries to get write lock. Each retry is write-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire WRITE lock'. -max-write-wait-retries = 3 - -# Do all write operations (push_block/push_transaction) in the single thread. -# Write lock of database is very heavy. When many threads tries to lock database on writing, rpc-clients -# receive many errors 'Unable to acquire READ lock' ('Unable to acquire WRITE lock'). -# Enabling of this options can increase performance. -single-write-thread = true - -# Enable plugin notifications about operations in a pushed transaction, which should be included to the next generated -# block. Plugins doesn't validate data in operations, they only update its own indexes, so notifications can be -# disabled on push_transaction() without any side-effects. The option doesn't have effect on a pushing signed blocks, -# so it is safe. -# Disabling of this option can increase performance. -enable-plugins-on-push-transaction = false - -# A start size for shared memory file when it doesn't have any data. Possible cases: -# - If shared memory has data and the value is greater then the size of shared_memory.bin, -# the file will be grown to requested size. -# - If shared memory has data and the value is less then the size of shared_memory.bin, nothing happens. -# Changing of this parameter doesn't require the replaying. -shared-file-size = 2G - -# The minimum free space in the shared memory file. When free space reaches the following value, the size of the -# shared_memory.bin increases by the value of inc-shared-file-size. -min-free-shared-file-size = 500M - -# Step of increasing size of shared_memory.bin. When the free memory size reaches min-free-shared-file-size, -# the shared memory size increases by the following value. -inc-shared-file-size = 2G - -# How often do checking the free space in shared_memory.bin. A very frequent checking can decrease performance. -# It's not critical if the free size became very small, because the daemon catches the `bad_alloc` exception -# and resizes. The optimal strategy is do checking of the free space, but not very often. -block-num-check-free-size = 1000 # each 3000 seconds - -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key operation_history account_history block_info raw_block validator_api mongo_db - -# For connect to mongodb which is running outside Docker (if vizd running inside) -mongodb-uri = mongodb://172.17.0.1:27017/viz - -# Remove votes before defined block, should increase performance -clear-votes-before-block = 0 # clear votes after each cashout - -# Virtual operations will not be passed to the plugins, enabling of the option helps to save some memory. -skip-virtual-ops = false - -# Defines a range of accounts to track by the account_history plugin as a json pair ["from","to"] [from,to] -# track-account-range = - -# Defines a list of operations which will be explicitly logged by the account_history plugin. -# history-whitelist-ops = account_create_operation account_update_operation content_operation delete_content_operation vote_operation author_reward_operation curation_reward_operation transfer_operation transfer_to_vesting_operation withdraw_vesting_operation witness_update_operation account_witness_vote_operation account_witness_proxy_operation fill_vesting_withdraw_operation shutdown_witness_operation custom_json_operation request_account_recovery_operation recover_account_operation change_recovery_account_operation escrow_transfer_operation escrow_approve_operation escrow_dispute_operation escrow_release_operation content_benefactor_reward_operation - -# Defines a list of operations which will be explicitly ignored by the account_history plugin. -# history-blacklist-ops = - -# Defines starting block from which recording stats by the account_history plugin. -# history-start-block = 0 - - -# Enable block production, even if the chain is stale. -enable-stale-production = false - - -# Percent of validators (0-99) that must be participating in order to produce blocks -required-participation = 0 - -# name of validator controlled by this node (e.g. initwitness ) -# validator = -# # validator = # DEPRECATED: use 'validator' - -# WIF PRIVATE KEY to be used by one or more validators -# private-key = - -# declare an appender named "stderr" that writes messages to the console -[log.console_appender.stderr] -stream=std_error - -# declare an appender named "p2p" that writes messages to p2p.log -[log.file_appender.p2p] -filename=logs/p2p/p2p.log -# filename can be absolute or relative to this config file - -# route any messages logged to the default logger to the "stderr" logger we -# declared above, if they are info level are higher -[logger.default] -level=debug -appenders=stderr - -# route messages sent to the "p2p" logger to stderr too -[logger.p2p] -level=error -appenders=stderr From fd8c6782f439be3520d2a7e39cf39b8a95709929 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 21 May 2026 19:22:45 +0400 Subject: [PATCH 04/30] fix(network): replace ilog with dlog for peer connection logging - Changed info-level logs (ilog) to debug-level logs (dlog) when connecting to peers and sending DLT hello messages - Updated rate-limit notification from ilog to dlog for peer exchange requests - Ensured logging reflects appropriate verbosity level for peer communication events --- libraries/network/dlt_p2p_node.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 766dc495aa..5ebb49546d 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -285,7 +285,7 @@ void dlt_p2p_node::connect_to_peer(const fc::ip::endpoint& ep) { // Send hello send_message(pid, message(build_hello_message())); - ilog(DLT_LOG_GREEN "Connected to peer ${ep}, sent DLT hello" DLT_LOG_RESET, ("ep", ep)); + dlog(DLT_LOG_GREEN "Connected to peer ${ep}, sent DLT hello" DLT_LOG_RESET, ("ep", ep)); // Start read loop as a fiber on the p2p thread start_read_loop(pid); @@ -1770,7 +1770,7 @@ void dlt_p2p_node::on_dlt_peer_exchange_reply(peer_id peer, const dlt_peer_excha void dlt_p2p_node::on_dlt_peer_exchange_rate_limited(peer_id peer, const dlt_peer_exchange_rate_limited& msg) { auto it = _peer_states.find(peer); auto ep = (it != _peer_states.end()) ? std::string(it->second.endpoint) : std::to_string(peer); - ilog(DLT_LOG_DGRAY "Peer ${ep} rate-limited our exchange request, wait ${w}s" DLT_LOG_RESET, + dlog(DLT_LOG_DGRAY "Peer ${ep} rate-limited our exchange request, wait ${w}s" DLT_LOG_RESET, ("ep", ep)("w", msg.wait_seconds)); // Record the rate-limit locally so periodic_peer_exchange() stops From 40cd17f3036c2aabb146235efe98e85d68233488 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 21 May 2026 20:44:30 +0400 Subject: [PATCH 05/30] fix(webserver): add CORS headers and handle preflight OPTIONS requests - Handle CORS preflight by responding to OPTIONS method with proper headers - Append Access-Control-Allow-Origin header to all HTTP responses - Add Access-Control-Allow-Methods, Allow-Headers, and Max-Age headers for OPTIONS responses - Ensure CORS headers are included on error and success responses - Prevent CORS issues for cross-origin API calls through the webserver plugin --- plugins/webserver/webserver_plugin.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/webserver/webserver_plugin.cpp b/plugins/webserver/webserver_plugin.cpp index 3c4aadd63b..a37fd1d99d 100644 --- a/plugins/webserver/webserver_plugin.cpp +++ b/plugins/webserver/webserver_plugin.cpp @@ -421,10 +421,22 @@ namespace graphene { auto con = server->get_con_from_hdl(hdl); con->defer_http_response(); + // CORS preflight + if (con->get_request().get_method() == "OPTIONS") { + con->append_header("Access-Control-Allow-Origin", "*"); + con->append_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS"); + con->append_header("Access-Control-Allow-Headers", "Content-Type, Authorization"); + con->append_header("Access-Control-Max-Age", "86400"); + con->set_status(websocketpp::http::status_code::ok); + try { con->send_http_response(); } catch (...) {} + return; + } + thread_pool_ios.post([con, this]() { auto body = con->get_request_body(); if (body.empty()) { + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("empty request body"); con->set_status(websocketpp::http::status_code::bad_request); try { con->send_http_response(); } catch (...) {} @@ -439,12 +451,14 @@ namespace graphene { // Invalid JSON — skip cache, let json_rpc handle the error try { api->call(body, [con](const std::string &data){ + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(data); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); }); } catch (fc::exception &e) { edump((e)); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("Could not call API"); con->set_status(websocketpp::http::status_code::not_found); try { con->send_http_response(); } catch (...) {} @@ -466,6 +480,7 @@ namespace graphene { if (cached_response.valid()) { // Patch the id in cached response to match request std::string patched = patch_response_id(*cached_response, request_id); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(patched); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); @@ -477,6 +492,7 @@ namespace graphene { api->call(body, [con, this, request_hash, cacheable](const std::string &data){ // this lambda can be called from any thread in application // for example, when task was delegated ( see msg_pack(msg_pack&&) ) + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(data); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); @@ -489,6 +505,7 @@ namespace graphene { } catch (fc::exception &e) { // this case happens if exception was thrown on parsing request edump((e)); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("Could not call API"); con->set_status(websocketpp::http::status_code::not_found); // this sending response can't be merged with sending response from try-block From 5ec792aa9fae12dd375f37c0ccc48872cfe85ab1 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Fri, 22 May 2026 06:59:24 +0400 Subject: [PATCH 06/30] fix(network): prevent log spamming during peer disconnect - Add check to skip logging if disconnect is already in progress for a peer - Avoid re-entrance in send_message calls during handle_disconnect coroutine - Prevent excessive log entries when send queue is at max depth and peer disconnects --- libraries/network/dlt_p2p_node.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 5ebb49546d..e032328b8d 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -514,6 +514,10 @@ void dlt_p2p_node::send_message(peer_id peer, const message& msg) { ++state.send_queue_total; } else { // Queue is at max depth — peer can't consume data fast enough. + // Skip if disconnect is already in progress: handle_disconnect yields + // at cancel_and_wait, allowing other fibers to call send_message for + // the same peer, which would re-enter this branch and spam the log. + if (_disconnect_in_progress.count(peer)) return; // Capture info before handle_disconnect potentially erases the state. std::string ep = std::string(state.endpoint); uint32_t dropped = state.send_queue_dropped; From 35aef463ef6ef5df4587b0eec283b0f5e1ce892a Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Fri, 22 May 2026 07:09:22 +0400 Subject: [PATCH 07/30] fix(network): prevent deadlock by closing socket before cancelling read fiber - Close socket first to unblock pending I/O and avoid multi-second hangs - Erase connection after closing to prevent dangling shared_ptr references - Cancel read fiber only after socket is closed to ensure immediate exit - Retain reentrancy guard to keep peer state valid during disconnect handling - Adjust order of operations to fix deadlock when multiple peers disconnect simultaneously --- libraries/network/dlt_p2p_node.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index e032328b8d..2e11dddac2 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -342,23 +342,29 @@ void dlt_p2p_node::handle_disconnect(peer_id peer, const std::string& reason, bo } } - // Cancel read fiber — cancel_and_wait yields, allowing drain_send_queue - // to resume on this thread. The reentrancy guard above ensures that - // reentrant handle_disconnect call returns immediately without touching - // _peer_states, so state/it remain valid when we resume here. - auto fiber_it = _read_fibers.find(peer); - if (fiber_it != _read_fibers.end()) { - try { if (fiber_it->second.valid()) fiber_it->second.cancel_and_wait(__FUNCTION__); } catch (...) {} - _read_fibers.erase(fiber_it); - } - - // Close connection + // Close the socket FIRST — this immediately unblocks any pending readsome/writesome + // in the read fiber and drain_send_queue fiber, causing them to throw and exit. + // drain_send_queue holds sock by owning shared_ptr copy, so erasing _connections + // here does not leave it with a dangling reference. + // If we closed AFTER cancel_and_wait, the fiber would be stuck waiting for network + // I/O that can never arrive on a dead peer — causing a multi-second hang per peer, + // and a full deadlock when N peers disconnect simultaneously (p82: silent reboot). auto conn_it = _connections.find(peer); if (conn_it != _connections.end()) { try { if (conn_it->second) conn_it->second->close(); } catch (...) {} _connections.erase(conn_it); } + // Cancel read fiber — cancel_and_wait yields, but the fiber exits immediately + // because its socket I/O is already unblocked by the close() above. + // The reentrancy guard above ensures that reentrant handle_disconnect calls + // return immediately without touching _peer_states, so state/it remain valid. + auto fiber_it = _read_fibers.find(peer); + if (fiber_it != _read_fibers.end()) { + try { if (fiber_it->second.valid()) fiber_it->second.cancel_and_wait(__FUNCTION__); } catch (...) {} + _read_fibers.erase(fiber_it); + } + // Clear send guard and drain any queued messages _peer_sending.erase(peer); state.send_queue.clear(); @@ -542,7 +548,7 @@ void dlt_p2p_node::drain_send_queue(peer_id peer, std::vector buf) { _peer_sending.erase(peer); return; } - auto& sock = conn_it->second; + auto sock = conn_it->second; // owning copy — handle_disconnect may erase _connections while we yield in writesome // Cache endpoint before entering the try block — handle_disconnect may // remove the peer from _peer_states before the catch block runs, making From 4fc71ec1a1e9697b1e37eb955214fd9eaecf3c21 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Fri, 22 May 2026 10:43:14 +0400 Subject: [PATCH 08/30] docs(introduction): add community symbol and display conventions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Introduced Ƶ as the short symbol for VIZ chosen by the community - Explained common practice of showing balances with 2 decimal places - Noted that even staked funds (SHARES) are displayed as Ƶ with staking notes - Clarified symbol usage in wallets, explorers, and applications docs(webserver): document native CORS support in webserver plugin - Detailed handling of browser cross-origin requests without reverse proxy - Specified preflight (OPTIONS) response headers and values - Confirmed all other responses include Access-Control-Allow-Origin: * - Mentioned compatibility with production setups using nginx proxy - Highlighted use cases for browser-based wallets and dApps calling JSON-RPC endpoints directly --- docs/introduction/key-concepts.md | 6 ++++++ docs/plugins/webserver.md | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/docs/introduction/key-concepts.md b/docs/introduction/key-concepts.md index 3dd3154150..e6b53d3e26 100644 --- a/docs/introduction/key-concepts.md +++ b/docs/introduction/key-concepts.md @@ -45,6 +45,12 @@ An authority is a multi-sig structure: `{ weight_threshold, account_auths[], key - Created by staking VIZ; withdrawn back to VIZ over 28 intervals (≈28 days) - Not directly transferable; can be delegated to other accounts +### Community Symbol: Ƶ + +The community has chosen **Ƶ** as the short symbol for VIZ. Most wallets, explorers, and applications display it instead of the full ticker. + +It is also common practice to show balances with **2 decimal places** regardless of the underlying token type. Even staked funds (SHARES) are often displayed as `Ƶ` with a note that they are staked in the account, rather than switching to the `SHARES` unit and its 6-decimal format. + --- ## Energy System diff --git a/docs/plugins/webserver.md b/docs/plugins/webserver.md index c8262af9f9..64f56709af 100644 --- a/docs/plugins/webserver.md +++ b/docs/plugins/webserver.md @@ -93,6 +93,25 @@ Subscriptions require a persistent WebSocket connection. They are not available --- +## CORS + +The webserver plugin handles browser cross-origin requests natively — no reverse proxy is required for local or development setups. + +**Preflight requests** (`OPTIONS`) are answered immediately with: + +| Response header | Value | +|----------------|-------| +| `Access-Control-Allow-Origin` | `*` | +| `Access-Control-Allow-Methods` | `POST, GET, OPTIONS` | +| `Access-Control-Allow-Headers` | `Content-Type, Authorization` | +| `Access-Control-Max-Age` | `86400` | + +**All other HTTP responses** include `Access-Control-Allow-Origin: *`. + +This allows browser-based wallets and dApps to call the JSON-RPC endpoint directly. For production deployments behind nginx, CORS is handled at the proxy layer (see [Exposing the API via HTTPS](#exposing-the-api-via-https-nginx--certbot)) — both layers setting the header is harmless. + +--- + ## Security - **Bind to localhost** (`127.0.0.1`) and use a reverse proxy (nginx/Caddy) for public exposure. Binding to `0.0.0.0` exposes the RPC directly to the network. From 3e344695f3cc091c5a7ca37f2b793548f59125c3 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Sat, 23 May 2026 20:41:50 +0400 Subject: [PATCH 09/30] chain: clear currently_syncing after auto-recovery completes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After shared-memory corruption triggers attempt_auto_recovery(), the function sets currently_syncing=true so the validator plugin defers block production during the wipe / snapshot import / dlt_block_log replay sequence. Once the database is rebuilt and P2P is resumed, the flag was expected to self-clear on the next applied block via plugin_impl::accept_block(), which stores the caller-supplied sync_mode flag whenever a block is successfully pushed. That self-clearing path never runs on the DLT pipeline. The DLT P2P delegate (dlt_delegate::accept_block in plugins/p2p/p2p_plugin.cpp) calls chain.db().push_block() directly and bypasses plugin_impl::accept_block() entirely, so neither broadcast blocks nor gap-fill replies ever update currently_syncing. The only remaining clearer is transition_to_forward(), but a node that was in FORWARD mode at the moment of corruption stays in FORWARD throughout pause/resume — transition_to_forward() is never invoked, so the flag is permanently stuck at true. The validator gate at plugins/validator/validator.cpp checks chain().is_syncing() in DLT mode and returns not_synced, producing the observed indefinite "Block production deferred: not_synced (head=#X, catching_up=false)" loop where head keeps advancing via P2P but no local block is produced. Fix: explicitly clear currently_syncing immediately after do_snapshot_load(data_dir, true) returns successfully in attempt_auto_recovery(). Post-recovery catchup remains correctly gated by _catchup_after_pause in the P2P layer, which the periodic task clears once no peer is ahead of our head. --- plugins/chain/plugin.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index 4ed946d25d..84d748692e 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -946,6 +946,19 @@ namespace chain { wlog("=== AUTO-RECOVERY COMPLETE: node resumed at block ${n} ===", ("n", my->db.head_block_num())); + // Recovery is complete: clear the syncing flag so the validator + // plugin can resume block production once the post-pause catchup + // window closes. The DLT P2P delegate calls db.push_block() + // directly and bypasses plugin_impl::accept_block(), so the + // flag-update path that would otherwise self-clear this on the + // next applied block never runs on the DLT path. Without this + // explicit reset, the flag set above stays true forever and + // is_syncing() permanently gates production with not_synced. + // The remaining catchup window is gated by _catchup_after_pause + // in the P2P layer, which clears itself once peers are no longer + // ahead of our head. + my->currently_syncing.store(false, std::memory_order_relaxed); + // 5. Resume P2P now that the database is fully rebuilt. // do_snapshot_load(is_recovery=true) already set LIB = head // so P2P will request blocks after the snapshot head. From f5d0d46d9bac97f9021c79caca7981f4923ef8a0 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Sat, 23 May 2026 21:53:57 +0400 Subject: [PATCH 10/30] fix(snapshot): defer wake-up until block strictly after validator slot The deferred-snapshot wake-up in on_applied_block previously used head_block_time() >= pending_snapshot_safe_after_time, which fires on the very block the local validator just produced. The applied_block signal is dispatched synchronously from _push_block inside db.generate_block(), and the validator only calls p2p().broadcast_block() after generate_block() returns. So firing the snapshot on the same block let the snapshot read-lock start before the produced block had been broadcast to peers. Change the condition to strictly greater than: the deferred snapshot now waits until a SUBSEQUENT block is applied. That block is built by another validator on top of ours, proving our block was produced, applied locally, and propagated through the network. Only then does the snapshot start reading state. Cost is ~one block interval of additional delay, and only on slots where the local validator was the deferral target. The non-producer path is unchanged: snapshots still fire immediately at the originating block when is_validator_producing_soon() is false. Also expanded the surrounding comment block and updated the wake-up log messages to reflect the new semantics. --- plugins/snapshot/plugin.cpp | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/plugins/snapshot/plugin.cpp b/plugins/snapshot/plugin.cpp index d4e9177caf..4082b54433 100644 --- a/plugins/snapshot/plugin.cpp +++ b/plugins/snapshot/plugin.cpp @@ -1973,26 +1973,38 @@ void snapshot_plugin::plugin_impl::on_applied_block(const graphene::protocol::si // deferred. We do NOT re-check is_validator_producing_soon() here to avoid an // infinite deferral loop where the validator is always scheduled soon. // - // Instead, we wait for the specific validator slot to be filled: the deferred - // snapshot only fires once head_block_time() >= pending_snapshot_safe_after_time, - // meaning the validator's block has been produced and applied (or the slot was - // missed and the chain moved past it). This prevents the snapshot from starting - // while the validator is about to produce. + // We wait for a block to be applied that is STRICTLY AFTER the validator slot + // we deferred for: head_block_time() > pending_snapshot_safe_after_time. + // + // Why strictly greater (not >=): + // The applied_block signal is dispatched synchronously inside _push_block, + // BEFORE generate_block() returns to the validator and BEFORE the validator + // calls p2p().broadcast_block(). If we fired the snapshot on the same block + // the local validator just produced, the snapshot read-lock could start + // before the produced block has been broadcast to peers. + // + // Requiring head_block_time > slot_time means we wait until a SUBSEQUENT + // block is applied. That block is necessarily produced by another validator + // on top of ours, which proves our block was successfully produced, applied + // locally, and propagated through the network. Only then is it safe to + // start the snapshot read pass. + // + // Cost: ~one block interval of additional delay, but only when the local + // validator was the deferral target. When we are not the producer, the + // snapshot fires immediately at the originating block (no deferral path). if (snapshot_pending && !is_syncing) { // If safe_after_time is epoch (lookup failed), fire immediately as fallback. - // If head_block_time has reached/passed the validator slot time, the block - // at that slot has been applied (or the slot was skipped by a gap). bool safe_to_fire = (pending_snapshot_safe_after_time == fc::time_point_sec()) || - (db.head_block_time() >= pending_snapshot_safe_after_time); + (db.head_block_time() > pending_snapshot_safe_after_time); if (safe_to_fire) { fc::path output(pending_snapshot_path); snapshot_pending = false; pending_snapshot_path.clear(); pending_snapshot_safe_after_time = fc::time_point_sec(); - ilog(CLOG_GREEN "Creating deferred snapshot now (validator slot passed): ${p}" CLOG_RESET, ("p", output.string())); + ilog(CLOG_GREEN "Creating deferred snapshot now (validator slot passed and block broadcast): ${p}" CLOG_RESET, ("p", output.string())); schedule_async_snapshot(output, "deferred"); } else { - dlog("Deferred snapshot waiting for validator slot at ${t} (head_block_time=${h})", + dlog("Deferred snapshot waiting for block strictly after validator slot ${t} (head_block_time=${h})", ("t", pending_snapshot_safe_after_time)("h", db.head_block_time())); } } From f31326667b6a7700266126e6aed7218f6e90b0d2 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 17:23:53 +0400 Subject: [PATCH 11/30] fix: show correct next scheduled validator in missed block log Replace hardcoded b.validator with get_scheduled_validator(i + 2) so each missed block line shows the validator scheduled for the slot immediately after the miss, instead of repeating the current block producer for every line. --- libraries/chain/database.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 10fafcb33b..4f75234642 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -5500,7 +5500,7 @@ namespace graphene { namespace chain { ("w", validator_missed.owner) ("n", head_block_num() + i + 1) ("t", get_slot_time(i + 1)) - ("next", b.validator)); + ("next", get_scheduled_validator(i + 2))); } modify(validator_missed, [&](validator_object &w) { From bf42081070eee882f8b0adbeaaa13d1812658295 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 17:43:05 +0400 Subject: [PATCH 12/30] fix(auto-recovery): reset recovery_in_progress flag and prevent SYNC/FORWARD oscillation The static atomic recovery_in_progress flag in attempt_auto_recovery() was never reset to false after successful recovery, making any subsequent corruption event permanently unrecoverable ("already in progress, skipping duplicate attempt"). Reset it after P2P resume so the node can recover from future corruption events. Add a consecutive recovery counter (max 3 within 5 minutes) to prevent infinite recovery loops when the snapshot or block log is itself corrupted. In request_gap_fill(), remove the SYNC transition and peer request loop from the "no peer available" fallback path. When no peer has a higher head, transition_to_sync() followed by request_blocks_from_peer() immediately detects all peers as "caught up" and calls transition_to_forward(), producing rapid SYNC->FORWARD oscillation every 5 seconds. Instead, just log and let the periodic task retry when new peers connect. --- libraries/network/dlt_p2p_node.cpp | 21 +++++++----------- plugins/chain/plugin.cpp | 35 +++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 2e11dddac2..55462bcebc 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -2131,21 +2131,16 @@ void dlt_p2p_node::request_gap_fill() { ("ep", peer_state.endpoint)("ph", peer_state.peer_head_num)("ex", peer_state.exchange_enabled)); send_message(any_active_peer, message(req)); } else { - // P39 fix: No peer at all with a higher head — gap fill - // can't help. Transition to SYNC immediately instead of - // waiting for stagnation detection. - wlog("Gap fill: no peer available — transitioning to SYNC"); + // No peer with a higher head is available for gap fill. + // Do NOT transition to SYNC — without a peer ahead of us, + // request_blocks_from_peer() would immediately see all peers + // as "caught up" and call transition_to_forward(), producing + // rapid SYNC→FORWARD oscillation. Instead, just log and let + // the periodic task retry when new peers connect or existing + // peers advance their head. + wlog("Gap fill: no peer available with higher head — waiting for peers"); _gap_fill_in_progress = false; _gap_fill_start_time = fc::time_point(); - transition_to_sync(); - // Request blocks from all active peers - for (const auto& _pi : _peer_states) { - const auto& state = _pi.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || - state.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) { - request_blocks_from_peer(_pi.first); - } - } } } diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index 84d748692e..06ba79df5a 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -884,13 +884,40 @@ namespace chain { void plugin::attempt_auto_recovery() { static std::atomic recovery_in_progress{false}; + static constexpr int MAX_CONSECUTIVE_RECOVERIES = 3; + static constexpr int RECOVERY_COOLDOWN_SEC = 300; // 5 minutes + static int consecutive_recoveries = 0; + static fc::time_point last_recovery_time; + bool expected = false; if (!recovery_in_progress.compare_exchange_strong(expected, true)) { wlog("Auto-recovery already in progress, skipping duplicate attempt"); return; } - wlog("=== IMMEDIATE AUTO-RECOVERY: shared memory corruption detected ==="); + // Guard against infinite recovery loops: if the same block keeps + // failing after recovery, the snapshot or block log may be corrupted. + // Reset the counter after a cooldown period to allow eventual retry. + auto now = fc::time_point::now(); + if (last_recovery_time != fc::time_point() && + (now - last_recovery_time).to_seconds() > RECOVERY_COOLDOWN_SEC) { + consecutive_recoveries = 0; + } + consecutive_recoveries++; + last_recovery_time = now; + + if (consecutive_recoveries > MAX_CONSECUTIVE_RECOVERIES) { + elog("Auto-recovery limit reached: ${n} consecutive attempts within ${c}s cooldown. " + "The snapshot or block log may be corrupted — manual intervention required. " + "Try a fresh snapshot or delete the block log.", + ("n", consecutive_recoveries)("c", RECOVERY_COOLDOWN_SEC)); + recovery_in_progress.store(false, std::memory_order_release); + appbase::app().quit(); + return; + } + + wlog("=== IMMEDIATE AUTO-RECOVERY: shared memory corruption detected (attempt ${n}/${max}) ===", + ("n", consecutive_recoveries)("max", MAX_CONSECUTIVE_RECOVERIES)); // 1. Find latest snapshot fc::path snap = my->find_latest_snapshot(); @@ -971,6 +998,12 @@ namespace chain { } catch (...) { wlog("Auto-recovery: failed to resume P2P"); } + + // Allow future recovery attempts. Without this reset the + // static atomic stays true forever and any subsequent + // corruption event is silently discarded, leaving the node + // permanently stuck. + recovery_in_progress.store(false, std::memory_order_release); } catch (const fc::exception& e) { elog("Auto-recovery FAILED during snapshot load: ${e}", ("e", e.to_detail_string())); appbase::app().quit(); From a5d8e61ee46616bed280f0f1951f93d7feb9e5d6 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 17:49:28 +0400 Subject: [PATCH 13/30] Add diagnostic logs to database::open() startup path Node crashes silently between DLT block log open and "Done opening block log" with no error output. Add step-by-step ilog() calls to every major operation in the critical path so the exact failing step is visible in the next crash log: - block_log and dlt_block_log head after open - Before/after undo_all() with revision values - Revision mismatch detection with values - Before reading head block from block_log - fork_db seeding start in both normal and DLT modes - Before/after init_hardforks() (second call) - Before validator schedule integrity check Also add db.open() success log in chain plugin_startup. --- libraries/chain/database.cpp | 20 ++++++++++++++++---- plugins/chain/plugin.cpp | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 4f75234642..3f39241148 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -253,9 +253,12 @@ namespace graphene { namespace chain { } _block_log.open(data_dir / "block_log"); + ilog("block_log opened, head=${h}", ("h", _block_log.head() ? std::to_string(_block_log.head()->block_num()) : std::string("none"))); _dlt_block_log.open(data_dir / "dlt_block_log"); + ilog("dlt_block_log opened, head=${h}", ("h", _dlt_block_log.head() ? std::to_string(_dlt_block_log.head_block_num()) : std::string("none"))); // Rewind all undo state. This should return us to the state at the last irreversible block. + ilog("Calling undo_all()..."); // Wrap in a try-catch for boost::interprocess::lock_exception: // After a hard crash, the previous process may have died while holding // shared-memory internal mutexes (e.g., inside managed_mapped_file allocator). @@ -275,8 +278,12 @@ namespace graphene { namespace chain { "Shared memory lock corrupted (previous crash): ${what}", ("what", e.what())); } + ilog("undo_all() completed, revision=${rev} head_block_num=${hbn}", + ("rev", revision())("hbn", head_block_num())); if (revision() != head_block_num()) { + ilog("Revision mismatch: revision=${rev} != head_block_num=${hbn}, calling init_hardforks()", + ("rev", revision())("hbn", head_block_num())); with_strong_read_lock([&]() { init_hardforks(); // Writes to local state, but reads from db }); @@ -290,6 +297,7 @@ namespace graphene { namespace chain { } if (head_block_num()) { + ilog("Validating block log consistency, head_block_num=${h}", ("h", head_block_num())); // Validate DLT block log consistency before seeding fork_db. // After a crash, the DLT block log index/data files can become // truncated (e.g., only 1 block when database has thousands). @@ -303,14 +311,16 @@ namespace graphene { namespace chain { _dlt_block_log.reset(); } + ilog("Reading head block #${n} from block_log", ("n", head_block_num())); auto head_block = _block_log.read_block_by_num(head_block_num()); if (head_block.valid()) { // Block_log has the head block FC_ASSERT(head_block->id() == head_block_id(), "Chain state does not match block log. Please reindex blockchain."); + ilog("Head block found in block_log, starting fork_db and seeding"); _fork_db.start_block(*head_block); - // P22 fix: Seed fork_db with recent blocks (up to 100) + // Seed fork_db with recent blocks (up to 100) // so that incoming sync blocks from peers near our head // can find their parent chain. After restart, fork_db only // has the head block; if peers send blocks a few behind @@ -343,9 +353,8 @@ namespace graphene { namespace chain { } else { // DLT mode: block_log is empty but chainbase has state (loaded from snapshot). set_dlt_mode(true); - wlog("DLT mode detected: block log is empty but database has state at block ${n}. " - "Skipping block log validation.", - ("n", head_block_num())); + ilog("DLT mode: block_log empty, seeding fork_db from DLT log for head_block_num=${h}", + ("h", head_block_num())); // Seed fork_db bottom-up from the oldest available DLT block // within a seeding window so that all blocks from oldest to @@ -424,11 +433,14 @@ namespace graphene { namespace chain { wlog("Done opening block log, elapsed time ${t} sec", ("t", double((end - start).count()) / 1000000.0)); } + ilog("Block log open complete, calling init_hardforks()"); with_strong_read_lock([&]() { init_hardforks(); // Writes to local state, but reads from db }); + ilog("init_hardforks() completed"); // === HARDFORK 12: EMERGENCY SCHEDULE RECOVERY === + ilog("Checking validator schedule integrity at head_block_num=${h}", ("h", head_block_num())); // If the node shut down (or crashed) during emergency mode while // update_validator_schedule() had zeroed the schedule but before the // hybrid override could fill it with committee, the schedule may diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index 06ba79df5a..c90ec87233 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -631,7 +631,9 @@ namespace chain { try { ilog("Opening shared memory from ${path}", ("path", my->shared_memory_dir.generic_string())); my->db.open(data_dir, my->shared_memory_dir, CHAIN_INIT_SUPPLY, my->shared_memory_size, chainbase::database::read_write/*, my->validate_invariants*/ ); + ilog("db.open() completed successfully, head_block_num=${h}", ("h", my->db.head_block_num())); auto head_block_log = my->db.get_block_log().head(); + ilog("block_log head=${h}", ("h", head_block_log ? std::to_string(head_block_log->block_num()) : std::string("none"))); my->replay |= head_block_log && my->db.revision() != head_block_log->block_num(); if (my->replay) { From 1a2d8880f692bd647d9470200363ecbd0d4912c1 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 19:17:50 +0400 Subject: [PATCH 14/30] fix(memory-order): upgrade currently_syncing atomics to release/acquire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All reads and writes to the currently_syncing atomic flag used relaxed ordering, which does not guarantee cross-thread visibility on non-x86 architectures. The recovery thread writes currently_syncing=false after rebuilding the database, and the validator production thread reads it to decide whether to produce blocks. Upgrade to release/acquire ordering to ensure the store is visible to the reader on all platforms. store → memory_order_release (3 sites) load → memory_order_acquire (1 site) exchange → memory_order_acq_rel (1 site) --- plugins/chain/plugin.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index c90ec87233..1cdfebb000 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -139,7 +139,7 @@ namespace chain { // particularly when the emergency master receives blocks from a // competing fork that have gap=0 but previous != head_block_id. if (block_applied) { - currently_syncing.store(currently_syncing_flag, std::memory_order_relaxed); + currently_syncing.store(currently_syncing_flag, std::memory_order_release); if (currently_syncing_flag) { if (!sync_start_logged) { ilog("\033[92m>>> Syncing Blockchain started from block #${n} (head: ${head})\033[0m", @@ -280,11 +280,11 @@ namespace chain { } bool plugin::is_syncing() const { - return my->currently_syncing.load(std::memory_order_relaxed); + return my->currently_syncing.load(std::memory_order_acquire); } void plugin::clear_syncing() { - if (my->currently_syncing.exchange(false, std::memory_order_relaxed)) { + if (my->currently_syncing.exchange(false, std::memory_order_acq_rel)) { ilog("Sync complete: cleared currently_syncing flag (validator block production may resume)"); my->sync_start_logged = false; } @@ -955,7 +955,7 @@ namespace chain { } // Mark syncing so witness plugin defers block production during recovery. - my->currently_syncing.store(true, std::memory_order_relaxed); + my->currently_syncing.store(true, std::memory_order_release); wlog("Auto-recovery: closing database and recovering from snapshot ${p}...", ("p", snap.string())); @@ -986,7 +986,7 @@ namespace chain { // The remaining catchup window is gated by _catchup_after_pause // in the P2P layer, which clears itself once peers are no longer // ahead of our head. - my->currently_syncing.store(false, std::memory_order_relaxed); + my->currently_syncing.store(false, std::memory_order_release); // 5. Resume P2P now that the database is fully rebuilt. // do_snapshot_load(is_recovery=true) already set LIB = head From 6c6d7a45d9f45177970240d14b828a20e9581e04 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 19:24:43 +0400 Subject: [PATCH 15/30] Add crash detection for undo_all() using marker file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit undo_all() in database::open() causes a silent SIGSEGV when shared memory is corrupted after a hard crash. Since segfaults bypass all C++ exception handlers, the node enters an infinite restart loop in Docker without ever reaching the recovery path. Introduce a marker file (state/undo_all_in_progress) that is created before undo_all() and removed after it completes. If the process crashes inside undo_all(), the marker survives and triggers database_revision_exception on the next startup, which activates the existing snapshot recovery path. Marker cleanup is added to: - database::open() — removed after successful undo_all() - database::open_from_snapshot() — cleaned before snapshot import - database::wipe() — cleaned during shared memory wipe --- libraries/chain/database.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 3f39241148..cd8652d3ed 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -258,6 +258,25 @@ namespace graphene { namespace chain { ilog("dlt_block_log opened, head=${h}", ("h", _dlt_block_log.head() ? std::to_string(_dlt_block_log.head_block_num()) : std::string("none"))); // Rewind all undo state. This should return us to the state at the last irreversible block. + // + // Crash guard: undo_all() walks shared-memory data structures that may + // be corrupted after a hard crash (SIGSEGV). Since a segfault kills the + // process instantly, no C++ exception handler can catch it. We use a + // marker file to detect that a previous run died inside undo_all() and + // throw database_revision_exception to trigger the recovery path instead. + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + wlog("Detected incomplete undo_all from previous startup. " + "Shared memory is likely corrupted. " + "Throwing revision mismatch to trigger recovery."); + FC_THROW_EXCEPTION(database_revision_exception, + "Shared memory corrupted: previous undo_all() crashed (marker detected)"); + } + + // Write marker — will be removed after undo_all() succeeds. + // If the process crashes inside undo_all(), the marker survives + // and triggers recovery on the next startup. + { std::ofstream f(undo_marker.string()); } ilog("Calling undo_all()..."); // Wrap in a try-catch for boost::interprocess::lock_exception: // After a hard crash, the previous process may have died while holding @@ -278,6 +297,8 @@ namespace graphene { namespace chain { "Shared memory lock corrupted (previous crash): ${what}", ("what", e.what())); } + // undo_all() completed successfully — remove the crash marker. + boost::filesystem::remove(undo_marker); ilog("undo_all() completed, revision=${rev} head_block_num=${hbn}", ("rev", revision())("hbn", head_block_num())); @@ -513,6 +534,15 @@ namespace graphene { namespace chain { _dlt_mode = true; // Set before init_genesis so all subsequent code sees DLT mode + // Clean up undo_all crash marker if present from a previous failed startup. + // chainbase::database::wipe() only removes shared_memory.bin, not other files + // in the directory, so we must do this explicitly. + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + wlog("Removing stale undo_all crash marker before snapshot import"); + boost::filesystem::remove(undo_marker); + } + // Always wipe shared memory before snapshot import to ensure clean state. // This prevents conflicts if: // - A previous snapshot import attempt failed mid-way @@ -909,6 +939,11 @@ namespace graphene { namespace chain { void database::wipe(const fc::path &data_dir, const fc::path &shared_mem_dir, bool include_blocks) { close(); chainbase::database::wipe(shared_mem_dir); + // Remove undo_all crash marker if present (chainbase::wipe only removes shared_memory.bin) + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + boost::filesystem::remove(undo_marker); + } if (include_blocks) { fc::remove_all(data_dir / "block_log"); fc::remove_all(data_dir / "block_log.index"); From 3af3ebad8e9489cc54723de4ea8593f80457bf81 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Mon, 25 May 2026 21:29:33 +0400 Subject: [PATCH 16/30] docs: remove CORS from nginx examples --- @l10n/ru/docs/plugins/webserver.md | 16 ---------------- @l10n/zh-CN/docs/plugins/webserver.md | 16 ---------------- docs/plugins/webserver.md | 16 ---------------- 3 files changed, 48 deletions(-) diff --git a/@l10n/ru/docs/plugins/webserver.md b/@l10n/ru/docs/plugins/webserver.md index 3284e98243..4ec83ecd93 100644 --- a/@l10n/ru/docs/plugins/webserver.md +++ b/@l10n/ru/docs/plugins/webserver.md @@ -137,22 +137,6 @@ server { } location / { - # CORS — разрешить любой источник (публичный API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; diff --git a/@l10n/zh-CN/docs/plugins/webserver.md b/@l10n/zh-CN/docs/plugins/webserver.md index 584d58faa1..e21c91c5d1 100644 --- a/@l10n/zh-CN/docs/plugins/webserver.md +++ b/@l10n/zh-CN/docs/plugins/webserver.md @@ -138,22 +138,6 @@ server { } location / { - # CORS — 允许任意来源(公开 API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; diff --git a/docs/plugins/webserver.md b/docs/plugins/webserver.md index 64f56709af..f18e5a4b9a 100644 --- a/docs/plugins/webserver.md +++ b/docs/plugins/webserver.md @@ -156,22 +156,6 @@ server { } location / { - # CORS — allow any origin (public API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; From f4ae63ea1d035801573e6a3cb66a62445e13c2cd Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Wed, 27 May 2026 10:02:30 +0400 Subject: [PATCH 17/30] fix(chain): make shared memory auto-grow (inc-shared-file-size) safer - Detect resize_in_progress crash marker on startup, throw database_revision_exception to trigger recovery path - Add post-resize validation: verify max_memory() increased and dynamic_global_property_object survived the remap in both _resize() (immediate) and apply_pending_resize() (deferred) - Fix bad_alloc -> std::terminate in _push_block: heap-allocate undo session and explicitly destroy before exception unwinding - Clean up resize crash markers in database::wipe() - Update shared-memory.md docs and RU/ZH-CN translations with safety mechanisms, updated startup sequence, and recovery scenarios --- @l10n/ru/docs/storage/shared-memory.md | 34 +++++++--- @l10n/zh-CN/docs/storage/shared-memory.md | 32 ++++++--- docs/storage/shared-memory.md | 32 ++++++--- libraries/chain/database.cpp | 83 ++++++++++++++++++++++- thirdparty/chainbase | 2 +- 5 files changed, 151 insertions(+), 32 deletions(-) diff --git a/@l10n/ru/docs/storage/shared-memory.md b/@l10n/ru/docs/storage/shared-memory.md index dc752c7272..4989cd765f 100644 --- a/@l10n/ru/docs/storage/shared-memory.md +++ b/@l10n/ru/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true База данных автоматически увеличивается, когда свободное место падает ниже `min-free-shared-file-size`. При каждом изменении размера: -1. Приостанавливаются все операции (включая производство блоков и API-запросы). -2. Уничтожается текущее отображение памяти. -3. Файл увеличивается на `inc-shared-file-size`. -4. Файл заново отображается, пересчитываются все указатели индексов. +1. Записывается маркер сбоя `resize_in_progress`. +2. Все грязные страницы сбрасываются на диск (`flush()`). +3. Приостанавливаются все операции (включая производство блоков и API-запросы). +4. Уничтожается текущее отображение памяти. +5. Файл увеличивается на `inc-shared-file-size`. +6. Файл заново отображается, пересчитываются все указатели индексов. +7. Проверяется, что ключевые объекты (например, `dynamic_global_property_object`) пережили перераспределение. +8. Маркер сбоя удаляется. + +### Механизмы безопасности + +- **Сброс перед изменением размера:** Грязные страницы записываются на диск до уничтожения отображения, что гарантирует согласованность файла на диске в случае сбоя во время увеличения. +- **Маркер сбоя:** Файл `resize_in_progress` записывается перед деструктивным перераспределением и удаляется после успешного завершения. Если процесс аварийно завершается во время изменения размера, маркер сохраняется и запускает автоматическое восстановление при следующем запуске. +- **Проверка после изменения размера:** После перераспределения узел проверяет, что `max_memory()` соответствует ожидаемому размеру и что критические объекты (например, `dynamic_global_property_object`) не повреждены. Повреждение обнаруживается рано, а не приводит к запутанным сбоям позже. +- **Безопасность при bad_alloc:** Если разделяемая память исчерпана во время применения блока, сессия отмены безопасно отбрасывается (вместо попытки обречённой отмены, которая привела бы к краху процесса через `std::terminate`). Отложенное изменение размера планируется на следующий блок. Выделяйте `shared-file-size` с запасом, чтобы минимизировать частоту изменений размера. Каждое изменение вызывает скачок задержки. @@ -121,11 +132,12 @@ skip-virtual-ops = true ``` 1. Открыть shared_memory.bin (увеличить, если shared-file-size больше) 2. Захватить эксклюзивную блокировку файла -3. Инициализировать индексы -4. Если отсутствует genesis → init_genesis() -5. Открыть block_log или dlt_block_log -6. undo_all() → откатиться к последнему необратимому блоку -7. Проверить совпадение head блока с block log +3. Проверить маркер сбоя resize_in_progress → запустить восстановление при обнаружении +4. Инициализировать индексы +5. Если отсутствует genesis → init_genesis() +6. Открыть block_log или dlt_block_log +7. undo_all() → откатиться к последнему необратимому блоку +8. Проверить совпадение head блока с block log ``` --- @@ -133,10 +145,12 @@ skip-virtual-ops = true ## Восстановление | Симптом | Действие | -|---------|---------| +|---------|--------| | `CRITICAL: validator X account object MISSING` | Повреждение — использовать `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | Повреждение — использовать `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | Конкуренция за блокировку — увеличить `read-wait-micro` / включить `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | Прерванное изменение размера — использовать `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | Повреждение после изменения размера — использовать `--replay-from-snapshot --snapshot-auto-latest` | | Узел зацикливается при запуске | Повреждённый файл — `--replay-from-snapshot --snapshot-auto-latest` | Варианты восстановления: diff --git a/@l10n/zh-CN/docs/storage/shared-memory.md b/@l10n/zh-CN/docs/storage/shared-memory.md index c1acd37775..01fdb92abd 100644 --- a/@l10n/zh-CN/docs/storage/shared-memory.md +++ b/@l10n/zh-CN/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true 当空闲空间降至 `min-free-shared-file-size` 以下时,数据库自动增长。每次调整大小时: -1. 暂停所有操作(包括区块生产和 API 请求)。 -2. 销毁当前内存映射。 -3. 按 `inc-shared-file-size` 扩展文件。 -4. 重新映射文件并重建所有索引指针。 +1. 写入 `resize_in_progress` 崩溃标记文件。 +2. 将所有脏页刷新到磁盘(`flush()`)。 +3. 暂停所有操作(包括区块生产和 API 请求)。 +4. 销毁当前内存映射。 +5. 按 `inc-shared-file-size` 扩展文件。 +6. 重新映射文件并重建所有索引指针。 +7. 验证关键对象(如 `dynamic_global_property_object`)在重映射后完好无损。 +8. 删除崩溃标记。 + +### 安全机制 + +- **调整前刷新:** 在销毁映射之前将脏页写入磁盘,确保在增长过程中发生任何故障时磁盘上的文件保持一致。 +- **崩溃标记:** 在破坏性重映射之前写入 `resize_in_progress` 文件,成功后删除。如果进程在调整大小期间崩溃,标记会保留并在下次启动时触发自动恢复。 +- **调整后验证:** 重映射后,节点验证 `max_memory()` 是否与预期大小匹配,以及关键对象(如 `dynamic_global_property_object`)是否完好。损坏会被及早发现,而不是导致后续令人困惑的故障。 +- **bad_alloc 安全:** 如果在区块应用期间共享内存耗尽,撤销会话会被安全丢弃(而不是尝试注定失败的撤销,这将通过 `std::terminate` 导致进程崩溃)。延迟调整大小将安排在下一个区块进行。 预先充裕地分配 `shared-file-size` 以最小化调整大小频率。每次调整大小都会导致延迟峰值。 @@ -121,11 +132,12 @@ VIZ 主网全节点的大致使用量: ``` 1. 打开 shared_memory.bin(若 shared-file-size 更大则扩展) 2. 获取独占文件锁 -3. 初始化索引 -4. 若缺少 genesis → init_genesis() -5. 打开 block_log 或 dlt_block_log -6. undo_all() → 回滚到最后一个不可逆区块 -7. 验证头区块与区块日志匹配 +3. 检查 resize_in_progress 崩溃标记 → 若发现则触发恢复 +4. 初始化索引 +5. 若缺少 genesis → init_genesis() +6. 打开 block_log 或 dlt_block_log +7. undo_all() → 回滚到最后一个不可逆区块 +8. 验证头区块与区块日志匹配 ``` --- @@ -137,6 +149,8 @@ VIZ 主网全节点的大致使用量: | `CRITICAL: validator X account object MISSING` | 损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | 损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | 锁竞争 — 增大 `read-wait-micro` / 启用 `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | 中断的调整大小 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | 调整后损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | 节点启动时循环崩溃 | 文件损坏 — `--replay-from-snapshot --snapshot-auto-latest` | 恢复选项: diff --git a/docs/storage/shared-memory.md b/docs/storage/shared-memory.md index a8e1de74b2..9699b9791b 100644 --- a/docs/storage/shared-memory.md +++ b/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true The database auto-grows when free space drops below `min-free-shared-file-size`. Each resize: -1. Pauses all operations (including block production and API requests). -2. Destroys the current memory mapping. -3. Extends the file by `inc-shared-file-size`. -4. Re-maps the file and rebuilds all index pointers. +1. Writes a `resize_in_progress` crash marker file. +2. Flushes all dirty pages to disk (`flush()`). +3. Pauses all operations (including block production and API requests). +4. Destroys the current memory mapping. +5. Extends the file by `inc-shared-file-size`. +6. Re-maps the file and rebuilds all index pointers. +7. Validates key objects (e.g., `dynamic_global_property_object`) survived the remap. +8. Removes the crash marker. + +### Safety Mechanisms + +- **Flush-before-resize:** Dirty pages are written to disk before the mapping is destroyed, ensuring the on-disk file is consistent if anything fails during grow. +- **Crash marker:** A `resize_in_progress` file is written before the destructive remap and removed after success. If the process crashes mid-resize, the marker survives and triggers automatic recovery on the next startup. +- **Post-resize validation:** After the remap, the node verifies that `max_memory()` matches the expected size and that critical objects (e.g., `dynamic_global_property_object`) are intact. Corruption is detected early instead of causing confusing downstream failures. +- **bad_alloc safety:** If shared memory is exhausted during block application, the undo session is safely discarded (rather than attempting a doomed undo that would crash the process via `std::terminate`). A deferred resize is scheduled for the next block. Pre-allocate `shared-file-size` generously to minimize resize frequency. Each resize causes a latency spike. @@ -121,11 +132,12 @@ Approximate usage for a VIZ mainnet full node: ``` 1. Open shared_memory.bin (grow if shared-file-size is larger) 2. Acquire exclusive file lock -3. Initialize indices -4. If genesis missing → init_genesis() -5. Open block_log or dlt_block_log -6. undo_all() → rewind to last irreversible block -7. Verify head block matches block log +3. Check for resize_in_progress crash marker → trigger recovery if found +4. Initialize indices +5. If genesis missing → init_genesis() +6. Open block_log or dlt_block_log +7. undo_all() → rewind to last irreversible block +8. Verify head block matches block log ``` --- @@ -137,6 +149,8 @@ Approximate usage for a VIZ mainnet full node: | `CRITICAL: validator X account object MISSING` | Corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | Corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | Lock contention — increase `read-wait-micro` / enable `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | Interrupted resize — use `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | Resize corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | Node crashes in a loop on startup | Corrupted file — `--replay-from-snapshot --snapshot-auto-latest` | Recovery options: diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index cd8652d3ed..0b1a423495 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -259,6 +259,21 @@ namespace graphene { namespace chain { // Rewind all undo state. This should return us to the state at the last irreversible block. // + // Crash guard: detect incomplete resize from a previous run. + // resize() writes a marker before the destructive grow/remap + // and removes it after success. If the marker survived, the + // shared memory file may be in an inconsistent state (file was + // grown but the mapping was never rebuilt). Treat this the + // same as undo_all corruption and trigger recovery. + auto resize_marker = shared_mem_dir / "resize_in_progress"; + if (boost::filesystem::exists(resize_marker)) { + wlog("Detected incomplete resize from previous startup. " + "Shared memory is likely corrupted. " + "Throwing revision mismatch to trigger recovery."); + FC_THROW_EXCEPTION(database_revision_exception, + "Shared memory corrupted: previous resize() crashed (marker detected)"); + } + // Crash guard: undo_all() walks shared-memory data structures that may // be corrupted after a hard crash (SIGSEGV). Since a segfault kills the // process instantly, no C++ exception handler can catch it. We use a @@ -837,6 +852,25 @@ namespace graphene { namespace chain { ("used_before", used_mem_before / (1024 * 1024))("max_before", max_mem / (1024 * 1024))); resize(new_max); + // Post-resize validation: verify key objects survived the remap. + // A silent grow failure (file unchanged but open succeeds with + // old size) or corrupted segment metadata would cause later + // operations to fail in confusing ways. Catch it early here. + if (max_memory() < new_max) { + elog("CRITICAL: shared memory resize did not increase capacity! " + "expected=${exp} actual=${act}. File may be corrupted.", + ("exp", new_max)("act", max_memory())); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "Resize failed: capacity ${act} < expected ${exp}", + ("act", max_memory())("exp", new_max)); + } + if (!find()) { + elog("CRITICAL: dynamic_global_property_object MISSING after resize. " + "Shared memory is corrupted."); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "dynamic_global_property_object missing after resize"); + } + uint64_t free_mem = free_memory(); uint64_t reserved_mem = reserved_memory(); uint64_t used_mem_after = new_max - free_mem; @@ -880,6 +914,22 @@ namespace graphene { namespace chain { ("mem", target / (1024 * 1024))); resize(target); + // Post-resize validation: verify key objects survived the remap. + if (max_memory() < target) { + elog("CRITICAL: deferred shared memory resize did not increase capacity! " + "expected=${exp} actual=${act}. File may be corrupted.", + ("exp", target)("act", max_memory())); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "Deferred resize failed: capacity ${act} < expected ${exp}", + ("act", max_memory())("exp", target)); + } + if (!find()) { + elog("CRITICAL: dynamic_global_property_object MISSING after deferred resize. " + "Shared memory is corrupted."); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "dynamic_global_property_object missing after resize"); + } + uint64_t free_mem = free_memory(); uint64_t reserved_mem = reserved_memory(); uint64_t used_mem_after = target - free_mem; @@ -944,6 +994,11 @@ namespace graphene { namespace chain { if (boost::filesystem::exists(undo_marker)) { boost::filesystem::remove(undo_marker); } + // Remove resize crash marker if present + auto resize_marker = shared_mem_dir / "resize_in_progress"; + if (boost::filesystem::exists(resize_marker)) { + boost::filesystem::remove(resize_marker); + } if (include_blocks) { fc::remove_all(data_dir / "block_log"); fc::remove_all(data_dir / "block_log.index"); @@ -2089,9 +2144,31 @@ namespace graphene { namespace chain { } try { - auto session = start_undo_session(); - apply_block(new_block, skip); - session.push(); + // Heap-allocate the undo session so we can explicitly + // destroy it before exception unwinding reaches it. + // If bad_alloc fires inside apply_block(), the session + // destructor would call undo() which writes to shared + // memory. With memory exhausted, undo() throws another + // bad_alloc during stack unwinding -> double exception + // -> std::terminate. By resetting the unique_ptr in + // our catch block, the session is destroyed cleanly + // before the exception propagates further. + auto session = std::unique_ptr( + new chainbase::database::session(start_undo_session())); + try { + apply_block(new_block, skip); + } catch (const std::exception& e) { + // Attempt explicit undo before rethrowing. If undo() + // throws (shared memory exhausted), suppress it — the + // chainbase session destructor's uncaught_exceptions() + // guard will NOT fire here (we're in a catch block, not + // in stack unwinding), so we must protect manually. + // The original exception is preserved and rethrown. + try { session.reset(); } catch (...) {} + throw; + } + session->push(); + session.reset(); } catch (const wrong_scheduled_validator_exception &e) { // Schedule mismatch: keep the block in fork_db as a diff --git a/thirdparty/chainbase b/thirdparty/chainbase index 3d02090982..989845a6a3 160000 --- a/thirdparty/chainbase +++ b/thirdparty/chainbase @@ -1 +1 @@ -Subproject commit 3d02090982d7df8ea2b796d58964ec430c26b506 +Subproject commit 989845a6a3c9b86c0951ae465bd4550638269037 From be747e0f0c82ac0d5a408899cd036d7b3b2c83b9 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Wed, 27 May 2026 10:08:11 +0400 Subject: [PATCH 18/30] update chainbase --- thirdparty/chainbase | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/chainbase b/thirdparty/chainbase index 989845a6a3..a05eee3e83 160000 --- a/thirdparty/chainbase +++ b/thirdparty/chainbase @@ -1 +1 @@ -Subproject commit 989845a6a3c9b86c0951ae465bd4550638269037 +Subproject commit a05eee3e83d3e5e39b9c5a126b01d8f8d7d3350b From 602e3f36540177a0692655e3a34e2481db21e8e5 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Wed, 27 May 2026 17:57:15 +0400 Subject: [PATCH 19/30] add json api spec for plugins and json rpc methods --- .qoder/docs/jsonrpc-api-spec.json | 1353 +++++++++++++++++++++++++++++ 1 file changed, 1353 insertions(+) create mode 100644 .qoder/docs/jsonrpc-api-spec.json diff --git a/.qoder/docs/jsonrpc-api-spec.json b/.qoder/docs/jsonrpc-api-spec.json new file mode 100644 index 0000000000..4d821d5bdf --- /dev/null +++ b/.qoder/docs/jsonrpc-api-spec.json @@ -0,0 +1,1353 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "VIZ Blockchain JSON-RPC API Specification", + "description": "Complete specification of all JSON-RPC methods exposed by VIZ node plugins. Intended as a machine-readable spec for API explorer generation.", + "version": "1.0.0", + "plugins": [ + { + "name": "validator_api", + "description": "Provides read-only access to validator (witness) data: schedules, votes, and validator registration info.", + "methods": [ + { + "method": "get_active_validators", + "description": "Returns the list of currently active validator account names that are participating in block production.", + "aliases": ["get_active_witnesses"], + "params": [], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of account names of currently active validators." + } + }, + { + "method": "get_validator_schedule", + "description": "Returns the current validator schedule object, including the shuffled list of validators and their timeshares.", + "aliases": ["get_witness_schedule"], + "params": [], + "returns": { + "type": "object", + "description": "The validator_schedule_object containing current_shuffled_validators, timeshare, and related scheduling data." + } + }, + { + "method": "get_validators", + "description": "Returns a list of validator objects by their database IDs. For each ID, returns either the validator_api_object or null if not found.", + "aliases": ["get_witnesses"], + "params": [ + { + "name": "validator_ids", + "caption": "Validator IDs", + "description": "Array of validator object database IDs to look up.", + "type": "array", + "items": { "type": "integer" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of optional validator_api_object entries, one per requested ID." + } + }, + { + "method": "get_validator_by_account", + "description": "Returns the validator object registered under a specific account name, or null if the account is not a validator.", + "aliases": ["get_witness_by_account"], + "params": [ + { + "name": "account_name", + "caption": "Account Name", + "description": "The account name to look up as a validator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The validator_api_object for the account, or null if not a validator.", + "nullable": true + } + }, + { + "method": "get_validators_by_vote", + "description": "Returns validators sorted by total votes (descending). Starts from a given account name. Only returns validators with votes > 0. Maximum 100 results.", + "aliases": ["get_witnesses_by_vote"], + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name to start from. Use empty string to start from the top.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of validator_api_object entries sorted by vote count." + } + }, + { + "method": "get_validators_by_counted_vote", + "description": "Returns validators sorted by counted votes (descending). Starts from a given account name. Only returns validators with counted_votes > 0. Maximum 100 results.", + "aliases": ["get_witnesses_by_counted_vote"], + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name to start from. Use empty string to start from the top.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of validator_api_object entries sorted by counted vote." + } + }, + { + "method": "get_validator_count", + "description": "Returns the total number of registered validators on the blockchain.", + "aliases": ["get_witness_count"], + "params": [], + "returns": { + "type": "integer", + "description": "Total count of registered validators." + } + }, + { + "method": "lookup_validator_accounts", + "description": "Looks up validator account names starting from a lower bound. Returns up to 1000 results alphabetically.", + "aliases": ["lookup_witness_accounts"], + "params": [ + { + "name": "lower_bound_name", + "caption": "Lower Bound Name", + "description": "The lower bound of the first account name to return. Use empty string to start from the beginning.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of validator account names matching the query." + } + } + ] + }, + { + "name": "account_history", + "description": "Tracks operations by account and provides per-account operation history queries.", + "methods": [ + { + "method": "get_account_history", + "description": "Returns a map of operations for a given account in the sequence range [from-limit, from]. Each account operation has a sequence number starting from 0. Use from=-1 (4294967295) to get the most recent operations.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose operation history to retrieve.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Sequence", + "description": "The absolute sequence number. Use -1 (4294967295) for the most recent operation.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of operations to return. Must be between 1 and 1000. Must be less than 'from' unless from is -1.", + "type": "integer", + "required": true, + "minimum": 1, + "maximum": 1000 + } + ], + "returns": { + "type": "object", + "description": "Map of sequence number to applied_operation objects for the account." + } + } + ] + }, + { + "name": "operation_history", + "description": "Tracks all blockchain operations and provides block-level and transaction-level operation queries.", + "methods": [ + { + "method": "get_ops_in_block", + "description": "Returns the sequence of operations included or generated within a particular block. Virtual operations are generated by the blockchain (e.g. rewards) as opposed to user-submitted operations.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose operations should be returned.", + "type": "integer", + "required": true + }, + { + "name": "only_virtual", + "caption": "Only Virtual", + "description": "Whether to only include virtual operations in the returned results.", + "type": "boolean", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of applied_operation objects from the specified block." + } + }, + { + "method": "get_transaction", + "description": "Returns a transaction by its ID, including block number and transaction index within the block.", + "params": [ + { + "name": "id", + "caption": "Transaction ID", + "description": "The hash (SHA-256 / ripemd160) of the transaction to retrieve.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "annotated_signed_transaction with block_num and transaction_num fields added." + } + } + ] + }, + { + "name": "database_api", + "description": "The core read-only API for the blockchain database. Provides access to blocks, accounts, chain properties, authority validation, vesting delegations, and more.", + "methods": [ + { + "method": "get_block_header", + "description": "Retrieves a block header by block number.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose header should be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The block header, or null if no matching block was found.", + "nullable": true + } + }, + { + "method": "get_block", + "description": "Retrieves a full, signed block by block number.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The full signed block, or null if no matching block was found.", + "nullable": true + } + }, + { + "method": "get_irreversible_block_header", + "description": "Retrieves a block header only if the block is irreversible. Returns null if the block has not yet been finalized.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose header should be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The block header if the block is irreversible, or null.", + "nullable": true + } + }, + { + "method": "get_irreversible_block", + "description": "Retrieves a full, signed block only if it is irreversible. Returns null if the block has not yet been finalized.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The full signed block if irreversible, or null.", + "nullable": true + } + }, + { + "method": "set_block_applied_callback", + "description": "Sets a callback function that is triggered on each newly generated block. Used for real-time block notifications via WebSocket.", + "params": [ + { + "name": "callback", + "caption": "Callback", + "description": "Callback function to invoke when a new block is applied.", + "type": "function", + "required": true + } + ], + "returns": { + "type": "null", + "description": "No return value (callback-based)." + } + }, + { + "method": "get_config", + "description": "Retrieves compile-time constants and configuration values of the blockchain (e.g., chain ID, symbol, precision).", + "params": [], + "returns": { + "type": "object", + "description": "Object containing blockchain compile-time configuration constants." + } + }, + { + "method": "get_dynamic_global_properties", + "description": "Retrieves the current dynamic global properties object, which contains real-time chain state such as head block number, total supply, and other dynamic metrics.", + "params": [], + "returns": { + "type": "object", + "description": "The dynamic_global_property_api_object with current chain state." + } + }, + { + "method": "get_chain_properties", + "description": "Retrieves the chain properties as set by the median validator schedule (chain-wide constraints like account creation fee, maximum block size, etc.).", + "params": [], + "returns": { + "type": "object", + "description": "chain_api_properties object with median chain parameters." + } + }, + { + "method": "get_hardfork_version", + "description": "Returns the current hardfork version of the blockchain.", + "params": [], + "returns": { + "type": "string", + "description": "The current hardfork version string (e.g. '0.23.0')." + } + }, + { + "method": "get_next_scheduled_hardfork", + "description": "Returns the next scheduled hardfork version and the time it is planned to go live.", + "params": [], + "returns": { + "type": "object", + "description": "Object with hf_version (string) and live_time (ISO timestamp)." + } + }, + { + "method": "get_accounts", + "description": "Returns full account objects for a list of account names. Includes balances, vesting, authority, and validator votes.", + "params": [ + { + "name": "names", + "caption": "Account Names", + "description": "Array of account names to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of account_api_object entries. Only accounts that exist are returned." + } + }, + { + "method": "lookup_account_names", + "description": "Looks up accounts by their names. Returns an optional account object for each name; null if the account does not exist.", + "params": [ + { + "name": "account_names", + "caption": "Account Names", + "description": "Array of account names to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of optional account_api_object entries. Each element may be null." + } + }, + { + "method": "lookup_accounts", + "description": "Looks up account names starting from a lower bound. Returns a set of account names in alphabetical order.", + "params": [ + { + "name": "lower_bound_name", + "caption": "Lower Bound Name", + "description": "The lower bound of the first account name to return.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of account names matching the query." + } + }, + { + "method": "get_account_count", + "description": "Returns the total number of accounts registered on the blockchain.", + "params": [], + "returns": { + "type": "integer", + "description": "Total number of registered accounts." + } + }, + { + "method": "get_master_history", + "description": "Returns the master authority change history for a given account, useful for account recovery audits.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose master authority history to retrieve.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of master_authority_history_api_object entries." + } + }, + { + "method": "get_recovery_request", + "description": "Returns the current account recovery request for an account, if one exists.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose recovery request to check.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The account_recovery_request_api_object, or null if no request exists.", + "nullable": true + } + }, + { + "method": "get_escrow", + "description": "Returns the escrow object for a given sender and escrow ID.", + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name of the escrow sender.", + "type": "string", + "required": true + }, + { + "name": "escrow_id", + "caption": "Escrow ID", + "description": "The numeric escrow ID to look up.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The escrow_api_object, or null if not found.", + "nullable": true + } + }, + { + "method": "get_withdraw_routes", + "description": "Returns vesting withdrawal routes for a given account. Can filter by direction (incoming, outgoing, or all).", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose withdrawal routes to retrieve.", + "type": "string", + "required": true + }, + { + "name": "type", + "caption": "Route Type", + "description": "Filter direction: 'incoming', 'outgoing', or 'all'.", + "type": "string", + "enum": ["incoming", "outgoing", "all"], + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of withdraw_route objects with from_account, to_account, percent, auto_vest." + } + }, + { + "method": "get_vesting_delegations", + "description": "Returns vesting delegation objects for a given account. Supports pagination and filtering by delegated or received.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The delegator or delegatee account name.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From", + "description": "The account name to start from for pagination.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results. Defaults to 100. Must not exceed 1000.", + "type": "integer", + "required": false, + "default": 100, + "maximum": 1000 + }, + { + "name": "type", + "caption": "Delegation Type", + "description": "Filter type: 'delegated' (sent) or 'received'. Defaults to 'delegated'.", + "type": "string", + "enum": ["delegated", "received"], + "required": false, + "default": "delegated" + } + ], + "returns": { + "type": "array", + "description": "Array of vesting_delegation_api_object entries." + } + }, + { + "method": "get_expiring_vesting_delegations", + "description": "Returns expiring vesting delegation objects for a given account, starting from a given date.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The delegator account name.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Date", + "description": "Start date/time for expiration lookup (ISO timestamp).", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results. Defaults to 100. Must not exceed 1000.", + "type": "integer", + "required": false, + "default": 100, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of vesting_delegation_expiration_api_object entries." + } + }, + { + "method": "get_transaction_hex", + "description": "Returns a hexadecimal dump of the serialized binary form of a transaction.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction object to serialize.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "string", + "description": "Hex-encoded serialized transaction." + } + }, + { + "method": "get_required_signatures", + "description": "Given a partially signed transaction and a set of available public keys, returns the minimal subset of public keys that should add signatures to authorize the transaction.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to analyze.", + "type": "object", + "required": true + }, + { + "name": "available_keys", + "caption": "Available Keys", + "description": "Array/set of public keys that the caller can sign with.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of public keys that are required to sign the transaction." + } + }, + { + "method": "get_potential_signatures", + "description": "Returns the set of all public keys that could possibly sign for a given transaction. Useful for wallets to filter their key set before calling get_required_signatures.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to analyze.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of all public keys that could potentially authorize the transaction." + } + }, + { + "method": "verify_authority", + "description": "Verifies that a transaction has all of the required signatures. Returns true if valid, otherwise throws an exception.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to verify.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "boolean", + "description": "true if the transaction has all required signatures." + } + }, + { + "method": "verify_account_authority", + "description": "Verifies that a set of public keys has sufficient authority to authorize actions on behalf of an account.", + "params": [ + { + "name": "name_or_id", + "caption": "Account Name", + "description": "The account name to check authority for.", + "type": "string", + "required": true + }, + { + "name": "signers", + "caption": "Signer Keys", + "description": "Array/set of public keys to verify against the account's authority.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "boolean", + "description": "true if the signers have enough authority to authorize the account." + } + }, + { + "method": "get_database_info", + "description": "Returns database shared memory usage information including total size, free size, reserved size, used size, and per-index record counts.", + "params": [], + "returns": { + "type": "object", + "description": "Object with total_size, free_size, reserved_size, used_size, and index_list (array of {name, record_count})." + } + }, + { + "method": "get_proposed_transactions", + "description": "Returns proposed transactions (proposals) associated with a given account, both authored and requiring approval.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose proposals to retrieve.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Offset", + "description": "Offset for pagination (number of results to skip).", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of proposals to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of proposal_api_object entries." + } + }, + { + "method": "get_accounts_on_sale", + "description": "Returns a list of accounts currently on sale (direct sale, not auction). Only accounts whose sale start time has passed are included.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of account_on_sale_api_object entries." + } + }, + { + "method": "get_accounts_on_auction", + "description": "Returns a list of accounts currently on auction (no target buyer set). Only accounts whose sale start time has passed are included.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of account_on_sale_api_object entries for auction listings." + } + }, + { + "method": "get_subaccounts_on_sale", + "description": "Returns a list of subaccounts currently on sale.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of subaccount_on_sale_api_object entries." + } + } + ] + }, + { + "name": "account_by_key", + "description": "Provides a lookup from public keys to the accounts that reference those keys in their authority.", + "methods": [ + { + "method": "get_key_references", + "description": "Returns all account names that reference the given public keys in their master, active, or regular authority.", + "params": [ + { + "name": "keys", + "caption": "Public Keys", + "description": "Array of public keys to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of arrays of account names. Each inner array corresponds to one input key and contains all accounts referencing that key." + } + } + ] + }, + { + "name": "network_broadcast_api", + "description": "Provides transaction and block broadcasting capabilities. This is the write API for submitting transactions to the network.", + "methods": [ + { + "method": "broadcast_transaction", + "description": "Broadcasts a signed transaction to the network. The transaction is accepted into the pending pool and propagated to P2P peers. Optionally checks that the blockchain is not too far behind.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. If the blockchain is behind by more than this, the call will fail. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "null", + "description": "No return value on success." + } + }, + { + "method": "broadcast_transaction_synchronous", + "description": "Broadcasts a signed transaction and waits for confirmation. Returns the transaction ID, block number, and transaction index once included in a block. The callback includes whether the transaction expired.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "object", + "description": "Object with id (transaction hash), block_num, trx_num, and expired fields." + } + }, + { + "method": "broadcast_block", + "description": "Broadcasts a signed block to the network. Typically used by validators to propagate newly produced blocks.", + "params": [ + { + "name": "block", + "caption": "Block", + "description": "The signed block to broadcast.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "null", + "description": "No return value on success." + } + }, + { + "method": "broadcast_transaction_with_callback", + "description": "Broadcasts a signed transaction with a confirmation callback. The first argument is the callback, followed by the transaction. Similar to broadcast_transaction_synchronous but with custom callback handling.", + "params": [ + { + "name": "callback", + "caption": "Callback", + "description": "Confirmation callback function.", + "type": "function", + "required": true + }, + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "null", + "description": "No direct return; result delivered via callback." + } + } + ] + }, + { + "name": "committee_api", + "description": "Provides access to committee worker proposal requests and their voting state.", + "methods": [ + { + "method": "get_committee_request", + "description": "Returns a committee request by its ID, optionally including votes.", + "params": [ + { + "name": "request_id", + "caption": "Request ID", + "description": "The numeric ID of the committee request to retrieve.", + "type": "integer", + "required": true + }, + { + "name": "votes_count", + "caption": "Votes Count", + "description": "Number of votes to include. Use 0 for no votes, -1 for all votes, or a positive number to limit.", + "type": "integer", + "required": false, + "default": 0 + } + ], + "returns": { + "type": "object", + "description": "committee_api_object with optional embedded votes array." + } + }, + { + "method": "get_committee_request_votes", + "description": "Returns all votes for a specific committee request.", + "params": [ + { + "name": "request_id", + "caption": "Request ID", + "description": "The numeric ID of the committee request whose votes to retrieve.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of committee_vote_state objects." + } + }, + { + "method": "get_committee_requests_list", + "description": "Returns a list of committee request IDs filtered by status.", + "params": [ + { + "name": "status", + "caption": "Status", + "description": "The status code to filter by (e.g. 0=pending, 1=approved, etc.).", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "integer" }, + "description": "Array of committee request IDs matching the given status." + } + } + ] + }, + { + "name": "invite_api", + "description": "Provides access to invite objects used for account registration via invite keys.", + "methods": [ + { + "method": "get_invites_list", + "description": "Returns a list of invite IDs filtered by status.", + "params": [ + { + "name": "status", + "caption": "Status", + "description": "The status code to filter invites by.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "integer" }, + "description": "Array of invite database IDs matching the given status." + } + }, + { + "method": "get_invite_by_id", + "description": "Returns an invite object by its database ID.", + "params": [ + { + "name": "id", + "caption": "Invite ID", + "description": "The database ID of the invite to retrieve.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "invite_api_object with invite details (key, creator, balance, etc.)." + } + }, + { + "method": "get_invite_by_key", + "description": "Returns an invite object by its public key.", + "params": [ + { + "name": "key", + "caption": "Invite Key", + "description": "The public key associated with the invite.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "invite_api_object matching the given key." + } + } + ] + }, + { + "name": "paid_subscription_api", + "description": "Provides access to paid subscription data: subscription options set by content creators, subscription status of subscribers, and active/inactive subscription lists.", + "methods": [ + { + "method": "get_paid_subscription_options", + "description": "Returns the paid subscription settings for a given account (creator).", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name of the subscription creator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "paid_subscription_state with subscription details (price, period, etc.)." + } + }, + { + "method": "get_paid_subscriptions", + "description": "Returns a paginated list of all paid subscription objects.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of paid_subscription_object entries." + } + }, + { + "method": "get_paid_subscription_status", + "description": "Returns the subscription status of a specific subscriber for a given creator account.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + }, + { + "name": "account", + "caption": "Creator Account", + "description": "The account name of the subscription creator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "paid_subscribe_state with subscription status details." + } + }, + { + "method": "get_active_paid_subscriptions", + "description": "Returns a list of creator account names that a given subscriber has active subscriptions to.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of creator account names with active subscriptions." + } + }, + { + "method": "get_inactive_paid_subscriptions", + "description": "Returns a list of creator account names that a given subscriber has inactive (expired) subscriptions to.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of creator account names with inactive subscriptions." + } + } + ] + }, + { + "name": "custom_protocol_api", + "description": "Provides access to account data enriched with custom protocol sequence information. Custom protocols allow third-party applications to track per-account custom operations.", + "methods": [ + { + "method": "get_account", + "description": "Returns an account object enriched with custom protocol sequence data for a specific custom protocol ID. Populates custom_sequence and custom_sequence_block_num fields.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name to look up.", + "type": "string", + "required": true + }, + { + "name": "custom_protocol_id", + "caption": "Custom Protocol ID", + "description": "The custom protocol ID string to retrieve the sequence for. Use empty string to skip custom protocol lookup.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "account_api_object with custom_sequence and custom_sequence_block_num populated for the given protocol." + } + } + ] + }, + { + "name": "auth_util", + "description": "Provides utility methods for verifying account authority signatures against arbitrary data digests.", + "methods": [ + { + "method": "check_authority_signature", + "description": "Verifies that the provided signatures are valid for the given account's authority at a specified level (master, active, or regular). Returns the public keys derived from the signatures.", + "params": [ + { + "name": "account_name", + "caption": "Account Name", + "description": "The account name whose authority to check.", + "type": "string", + "required": true + }, + { + "name": "level", + "caption": "Authority Level", + "description": "The authority level to verify against: 'master' (or 'm'), 'active' (or 'a'), 'regular' (or 'r'). Empty string defaults to 'active'.", + "type": "string", + "required": true + }, + { + "name": "dig", + "caption": "Digest", + "description": "The SHA-256 hash of the data that was signed.", + "type": "string", + "required": true + }, + { + "name": "sigs", + "caption": "Signatures", + "description": "Array of signatures to verify.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of public keys recovered from the valid signatures." + } + } + ] + }, + { + "name": "block_info", + "description": "Tracks block metadata (size, average block size, slot info) and provides queries to retrieve this information for ranges of blocks.", + "methods": [ + { + "method": "get_block_info", + "description": "Returns block metadata (block_id, block_size, average_block_size, aslot, last_irreversible_block_num) for a range of blocks starting from start_block_num.", + "params": [ + { + "name": "start_block_num", + "caption": "Start Block Number", + "description": "The first block number to return info for. Must be greater than 0.", + "type": "integer", + "required": true, + "minimum": 1 + }, + { + "name": "count", + "caption": "Count", + "description": "Number of blocks to return info for. Must not exceed 10000.", + "type": "integer", + "required": true, + "maximum": 10000 + } + ], + "returns": { + "type": "array", + "description": "Array of block_info objects. Entries may be empty if no info is stored (e.g. blocks before snapshot)." + } + }, + { + "method": "get_blocks_with_info", + "description": "Returns full signed blocks with attached metadata for a range. Limits total response size to 8 MB. Stops early if no info is stored for a block.", + "params": [ + { + "name": "start_block_num", + "caption": "Start Block Number", + "description": "The first block number to return. Must be greater than 0.", + "type": "integer", + "required": true, + "minimum": 1 + }, + { + "name": "count", + "caption": "Count", + "description": "Maximum number of blocks to return. Must not exceed 10000. Response is capped at 8 MB total.", + "type": "integer", + "required": true, + "maximum": 10000 + } + ], + "returns": { + "type": "array", + "description": "Array of block_with_info objects, each containing a signed block and its block_info metadata." + } + } + ] + }, + { + "name": "raw_block", + "description": "Provides access to raw (base64-encoded) serialized block data for low-level block inspection or re-import.", + "methods": [ + { + "method": "get_raw_block", + "description": "Returns a raw block by block number, including the base64-encoded serialized binary, block ID, previous block ID, and timestamp.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to retrieve in raw form.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "Object with block_id, previous, timestamp, and raw_block (base64-encoded string) fields." + } + } + ] + } + ] +} From 44cd2ebc387c04608c9d46b3d74265be68def856 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 08:46:09 +0400 Subject: [PATCH 20/30] update info about account_history plugin in docs + fix purge --- @l10n/ru/docs/plugins/overview.md | 18 +++++++++++++ @l10n/zh-CN/docs/plugins/overview.md | 17 ++++++++++++ docs/plugins/overview.md | 39 ++++++++++++++++++++-------- plugins/account_history/plugin.cpp | 16 +++++++++++- 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/@l10n/ru/docs/plugins/overview.md b/@l10n/ru/docs/plugins/overview.md index e07d477e00..b829a4b162 100644 --- a/@l10n/ru/docs/plugins/overview.md +++ b/@l10n/ru/docs/plugins/overview.md @@ -222,6 +222,24 @@ DLT P2P-сетевое взаимодействие — распростране - `track-account-range` — диапазон имён аккаунтов для индексирования (по умолчанию: все аккаунты) - `history-count-blocks` — сохранять историю за N блоков +> **Зависимость:** `account_history` **требует** `operation_history` как родительский плагин +> (`APPBASE_PLUGIN_REQUIRES`). Нода не запустится при отсутствии `operation_history`. +> `account_history` хранит ссылки `operation_id_type` (внешние ключи) на строки `operation_object`, +> которыми управляет `operation_history`; при запросе `get_account_history` разрешает их через +> `database.get(itr->op)`. +> +> **Всегда включать оба плагина вместе:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **Координация очистки:** Оба плагина читают один и тот же ключ `history-count-blocks` из +> `config.ini` — разделения по плагинам нет. Одно значение применяется к обоим одновременно. +> Внутри `account_history` дополнительно вызывает `operation_history::get_min_keep_block()` +> при каждом блоке как защитную проверку, гарантируя, что его записи никогда не будут ссылаться +> на уже удалённый `operation_object`. + --- ### `operation_history` diff --git a/@l10n/zh-CN/docs/plugins/overview.md b/@l10n/zh-CN/docs/plugins/overview.md index b394601f0e..b9d59cf482 100644 --- a/@l10n/zh-CN/docs/plugins/overview.md +++ b/@l10n/zh-CN/docs/plugins/overview.md @@ -222,6 +222,23 @@ DLT P2P 网络——区块和交易传播、节点管理、少数派 fork 恢复 - `track-account-range` — 索引的账户名范围(默认:所有账户) - `history-count-blocks` — 保留 N 个区块的历史 +> **依赖关系:** `account_history` **需要** `operation_history` 作为父插件 +> (`APPBASE_PLUGIN_REQUIRES`)。若缺少 `operation_history`,节点将无法启动。 +> `account_history` 存储指向 `operation_object` 行的 `operation_id_type` 引用(外键), +> 这些行由 `operation_history` 管理;查询时 `get_account_history` 通过 +> `database.get(itr->op)` 解析这些引用。 +> +> **始终同时启用两个插件:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **清理协调:** 两个插件从 `config.ini` 读取同一个 `history-count-blocks` 键—— +> 不存在按插件分别设置的机制。设置一次即同时作用于两个插件。 +> 内部实现上,`account_history` 还在每个区块调用 `operation_history::get_min_keep_block()` +> 作为安全检查,确保其条目永远不会引用已被删除的 `operation_object`。 + --- ### `operation_history` diff --git a/docs/plugins/overview.md b/docs/plugins/overview.md index 3213aa4869..36c40c15c3 100644 --- a/docs/plugins/overview.md +++ b/docs/plugins/overview.md @@ -210,34 +210,51 @@ Reverse-lookup accounts by public key. --- -### `account_history` +### `operation_history` -Per-account operation history, paginated. +All-operations index for block-level and transaction queries. | Method | Description | |--------|-------------| -| `get_account_history(account, from, limit)` | Get operations; `from=-1` returns newest; max 1000 per call | +| `get_ops_in_block(block_num, virtual_ops)` | Operations in a block; `virtual_ops=true` includes virtual ops | +| `get_transaction(tx_id)` | Transaction by ID | **Config options:** -- `track-account-range` — account name range to index (default: all accounts) +- `history-whitelist-ops` / `history-blacklist-ops` — filter which op types are stored +- `history-start-block` — start indexing from this block number - `history-count-blocks` — retain N blocks of history --- -### `operation_history` +### `account_history` -All-operations index for block-level and transaction queries. +Per-account operation history, paginated. | Method | Description | |--------|-------------| -| `get_ops_in_block(block_num, virtual_ops)` | Operations in a block; `virtual_ops=true` includes virtual ops | -| `get_transaction(tx_id)` | Transaction by ID | +| `get_account_history(account, from, limit)` | Get operations; `from=-1` returns newest; max 1000 per call | **Config options:** -- `history-whitelist-ops` / `history-blacklist-ops` — filter which op types are stored -- `history-start-block` — start indexing from this block number +- `track-account-range` — account name range to index (default: all accounts) - `history-count-blocks` — retain N blocks of history +> **Dependency:** `account_history` **requires** `operation_history` as a parent plugin +> (`APPBASE_PLUGIN_REQUIRES`). The node will not start if `operation_history` is absent. +> `account_history` stores `operation_id_type` references (foreign keys) to `operation_object` rows +> managed by `operation_history`; at query time `get_account_history` resolves them via +> `database.get(itr->op)`. +> +> **Always enable both plugins together:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **Purge coordination:** Both plugins read the same `history-count-blocks` key from `config.ini` — +> there is no per-plugin separation. Setting it once applies to both simultaneously. Internally, +> `account_history` also calls `operation_history::get_min_keep_block()` on every block as a safety +> check, ensuring its entries never reference a purged `operation_object`. + --- ### `committee_api` @@ -325,8 +342,8 @@ plugin = database_api plugin = network_broadcast_api plugin = validator_api plugin = account_by_key -plugin = account_history plugin = operation_history +plugin = account_history plugin = committee_api plugin = invite_api plugin = paid_subscription_api diff --git a/plugins/account_history/plugin.cpp b/plugins/account_history/plugin.cpp index a6108530a5..07258c9f09 100644 --- a/plugins/account_history/plugin.cpp +++ b/plugins/account_history/plugin.cpp @@ -228,7 +228,15 @@ if( options.count(name) ) { \ std::map result; for (; itr != end; ++itr) { - result[itr->sequence] = database.get(itr->op); + // Guard against dangling operation_object references: + // operation_history may purge operation_objects before account_history + // purges the corresponding account_history_objects (e.g., different history-count-blocks). + // If the referenced object no longer exists, skip it silently. + auto op_obj = database.find(itr->op); + if (!op_obj) { + continue; + } + result[itr->sequence] = applied_operation(*op_obj); } return result; } @@ -529,6 +537,12 @@ if( options.count(name) ) { \ boost::program_options::value>()->composing()->multitoken(), "Defines a range of accounts to track as a json pair [\"from\",\"to\"] [from,to]. " "Can be specified multiple times" + )( + "history-count-blocks", + boost::program_options::value(), + "Defines depth of history for recording account history (same as operation_history's history-count-blocks). " + "If set here, account_history will purge at least as aggressively as this value, " + "and will also coordinate with operation_history's setting." ); cfg.add(cli); } From ee85cb354495a4352afc11faf757cde316046969 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 09:27:27 +0400 Subject: [PATCH 21/30] fix: disconnect signal handlers in plugin_shutdown() across 6 plugins validator, webserver, network_broadcast_api, database_api, account_by_key, and custom_protocol_api all connected to database signals (applied_block, pre/post_apply_operation) but never stored or disconnected the connection handles. On shutdown, callbacks into partially-destroyed plugin state could cause use-after-free crashes. Each plugin now stores boost::signals2::connection members and calls .disconnect() in plugin_shutdown() before releasing owned resources. validator additionally disconnects before stopping the production io_service to prevent the callback racing the thread join. --- plugins/account_by_key/account_by_key_plugin.cpp | 7 ++++--- .../plugins/account_by_key/account_by_key_plugin.hpp | 3 +++ plugins/custom_protocol_api/custom_protocol_api.cpp | 6 +++++- plugins/database_api/api.cpp | 10 +++++++++- .../include/graphene/plugins/database_api/plugin.hpp | 2 +- .../network_broadcast_api/network_broadcast_api.cpp | 1 + plugins/validator/validator.cpp | 5 ++++- plugins/webserver/webserver_plugin.cpp | 2 ++ 8 files changed, 29 insertions(+), 7 deletions(-) diff --git a/plugins/account_by_key/account_by_key_plugin.cpp b/plugins/account_by_key/account_by_key_plugin.cpp index 8ffbac4191..89dbc6272c 100644 --- a/plugins/account_by_key/account_by_key_plugin.cpp +++ b/plugins/account_by_key/account_by_key_plugin.cpp @@ -200,8 +200,8 @@ namespace graphene { namespace plugins { namespace account_by_key { my.reset(new account_by_key_plugin_impl(*this)); graphene::chain::database &db = appbase::app().get_plugin().db(); - db.pre_apply_operation.connect([&](operation_notification &o) { my->pre_operation(o); }); - db.post_apply_operation.connect([&](const operation_notification &o) { my->post_operation(o); }); + my->_pre_op_conn = db.pre_apply_operation.connect([&](operation_notification &o) { my->pre_operation(o); }); + my->_post_op_conn = db.post_apply_operation.connect([&](const operation_notification &o) { my->post_operation(o); }); add_plugin_index(db); JSON_RPC_REGISTER_API ( name() ) ; @@ -217,7 +217,8 @@ namespace graphene { namespace plugins { namespace account_by_key { void account_by_key_plugin::plugin_shutdown() { ilog("account_by_key plugin: plugin_shutdown() begin"); - + my->_pre_op_conn.disconnect(); + my->_post_op_conn.disconnect(); ilog("account_by_key plugin: plugin_shutdown() end"); } diff --git a/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp b/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp index 0d15169ff1..27664079d5 100644 --- a/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp +++ b/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp @@ -71,6 +71,9 @@ namespace graphene { account_by_key_plugin &_self; graphene::chain::database &_db; + + boost::signals2::connection _pre_op_conn; + boost::signals2::connection _post_op_conn; }; std::unique_ptr my; diff --git a/plugins/custom_protocol_api/custom_protocol_api.cpp b/plugins/custom_protocol_api/custom_protocol_api.cpp index 693a305e9f..c475c5c1e0 100644 --- a/plugins/custom_protocol_api/custom_protocol_api.cpp +++ b/plugins/custom_protocol_api/custom_protocol_api.cpp @@ -58,6 +58,7 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { } uint8_t custom_protocol_store_size = 10; + boost::signals2::connection _post_op_conn; private: graphene::chain::database& database_; @@ -70,6 +71,9 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { void custom_protocol_api_plugin::plugin_shutdown() { wlog("custom_protocol_api plugin: plugin_shutdown()"); + if (pimpl) { + pimpl->_post_op_conn.disconnect(); + } } const std::string& custom_protocol_api_plugin::name() { @@ -93,7 +97,7 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { void custom_protocol_api_plugin::plugin_initialize(const boost::program_options::variables_map& options) { pimpl = std::make_unique(); auto& db = pimpl->database(); - db.post_apply_operation.connect([&](const operation_notification& note) { + pimpl->_post_op_conn = db.post_apply_operation.connect([&](const operation_notification& note) { pimpl->on_operation(note); }); add_plugin_index(db); diff --git a/plugins/database_api/api.cpp b/plugins/database_api/api.cpp index cecf8dc128..778fdb2c8d 100755 --- a/plugins/database_api/api.cpp +++ b/plugins/database_api/api.cpp @@ -130,6 +130,8 @@ struct plugin::api_impl final { block_applied_callback_info::cont active_block_applied_callback; block_applied_callback_info::cont free_block_applied_callback; + boost::signals2::connection _applied_block_conn; + private: graphene::chain::database &_db; @@ -912,7 +914,7 @@ void plugin::plugin_initialize(const boost::program_options::variables_map &opti ilog("database_api plugin: plugin_initialize() begin"); my = std::make_unique(); JSON_RPC_REGISTER_API(plugin_name) - my->database().applied_block.connect([this](const protocol::signed_block &) { + my->_applied_block_conn = my->database().applied_block.connect([this](const protocol::signed_block &) { this->clear_block_applied_callback(); }); ilog("database_api plugin: plugin_initialize() end"); @@ -922,4 +924,10 @@ void plugin::plugin_startup() { my->startup(); } +void plugin::plugin_shutdown() { + if (my) { + my->_applied_block_conn.disconnect(); + } +} + } } } // graphene::plugins::database_api diff --git a/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp b/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp index 654e266ca0..b2f5c8a126 100755 --- a/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp +++ b/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp @@ -196,7 +196,7 @@ class plugin final : public appbase::plugin { void plugin_startup() override; - void plugin_shutdown() override{} + void plugin_shutdown() override; plugin(); diff --git a/plugins/network_broadcast_api/network_broadcast_api.cpp b/plugins/network_broadcast_api/network_broadcast_api.cpp index e41f05d0b7..43a1e96ad2 100644 --- a/plugins/network_broadcast_api/network_broadcast_api.cpp +++ b/plugins/network_broadcast_api/network_broadcast_api.cpp @@ -181,6 +181,7 @@ namespace graphene { } void network_broadcast_api_plugin::plugin_shutdown() { + on_applied_block_connection.disconnect(); } void network_broadcast_api_plugin::on_applied_block(const signed_block &b) { try { diff --git a/plugins/validator/validator.cpp b/plugins/validator/validator.cpp index ea87e41ee5..eda908f894 100644 --- a/plugins/validator/validator.cpp +++ b/plugins/validator/validator.cpp @@ -208,6 +208,8 @@ namespace graphene { // Updated in the applied_block signal handler. uint64_t _last_applied_block_num = 0; + boost::signals2::connection _applied_block_connection; + // Protects cross-thread diagnostic fields shared between // production_io_thread_ and the P2P thread (on_block_applied / // get_production_diagnostics). Never held during database() calls. @@ -368,7 +370,7 @@ namespace graphene { // Connect to applied_block signal to detect missed slots // that belong to our validators and log diagnostic state. pimpl->_last_applied_block_num = d.head_block_num(); - d.applied_block.connect([this](const graphene::chain::signed_block &block) { + pimpl->_applied_block_connection = d.applied_block.connect([this](const graphene::chain::signed_block &block) { pimpl->on_block_applied(block); }); @@ -387,6 +389,7 @@ namespace graphene { void validator_plugin::plugin_shutdown() { graphene::time::shutdown_ntp_time(); if (!pimpl->_validators.empty()) { + pimpl->_applied_block_connection.disconnect(); ilog("shutting downing production timer"); // Stop the dedicated io_service so the production thread exits. // io_service::stop() is thread-safe; it causes run() to return diff --git a/plugins/webserver/webserver_plugin.cpp b/plugins/webserver/webserver_plugin.cpp index a37fd1d99d..6a28f94f79 100644 --- a/plugins/webserver/webserver_plugin.cpp +++ b/plugins/webserver/webserver_plugin.cpp @@ -630,6 +630,8 @@ namespace graphene { } void webserver_plugin::plugin_shutdown() { + my->chain_sync_con.disconnect(); + my->applied_block_conn.disconnect(); my->stop_webserver(); } From e162c93fc8f504600092e9497ac0758effc3ba05 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 09:44:37 +0400 Subject: [PATCH 22/30] update docker log rotation recomendations --- @l10n/ru/docs/node/docker.md | 54 +++++++++++++++++++++++++ @l10n/zh-CN/docs/node/docker.md | 54 +++++++++++++++++++++++++ docs/node/docker.md | 54 +++++++++++++++++++++++++ share/vizd/docker/Dockerfile-production | 19 +++++++++ share/vizd/docker/Dockerfile-testnet | 19 +++++++++ 5 files changed, 200 insertions(+) diff --git a/@l10n/ru/docs/node/docker.md b/@l10n/ru/docs/node/docker.md index 4d51bf0eb1..48cb630638 100644 --- a/@l10n/ru/docs/node/docker.md +++ b/@l10n/ru/docs/node/docker.md @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## Ротация логов + +vizd пишет весь вывод в stdout/stderr. Дефолтный драйвер `json-file` в Docker **не имеет ограничений по размеру** — цикл краша или буря ошибок может заполнить диск хоста за считанные минуты (в продакшне наблюдалось 35 ГБ+). + +Вместо этого используйте драйвер `local`. Он хранит логи в компактном бинарном формате и автоматически ротирует файлы. + +**Глобальная конфигурация (рекомендуется — защищает все контейнеры на хосте):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +Применить: + +```bash +sudo systemctl restart docker +``` + +**Для конкретного контейнера (`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**Для конкретного контейнера (docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> При `max-file: 5` и `max-size: 100m` Docker хранит не более 500 МБ логов на контейнер и автоматически удаляет старейший файл при ротации. + +--- + ## Устранение неполадок | Симптом | Причина | Решение | @@ -176,3 +229,4 @@ shared-file-size = 4G | Нет пиров | Файрвол блокирует порт 2001 | Откройте порт 2001 TCP входящий | | Медленная синхронизация | Снимок не загружен | Предоставьте снимок в томе перед первым запуском | | `Permission denied` на `/var/lib/vizd` | Несоответствие владельца тома | `chown -R 1000:1000 /data/vizd` | +| Диск заполняется логами Docker | Драйвер `json-file` не имеет ограничения по размеру | Настройте драйвер `local` с `max-size`/`max-file` — см. [Ротация логов](#ротация-логов) | diff --git a/@l10n/zh-CN/docs/node/docker.md b/@l10n/zh-CN/docs/node/docker.md index 57ec5dbd62..97ee609668 100644 --- a/@l10n/zh-CN/docs/node/docker.md +++ b/@l10n/zh-CN/docs/node/docker.md @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## 日志轮转 + +vizd 将所有输出写入 stdout/stderr。Docker 默认的 `json-file` 日志驱动**没有大小限制**——崩溃循环或断言风暴可在数分钟内填满宿主机磁盘(生产环境中曾观察到 35 GB+)。 + +建议改用 `local` 驱动。它以紧凑的二进制格式存储日志并自动轮转。 + +**全局配置(推荐——保护宿主机上的所有容器):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +应用配置: + +```bash +sudo systemctl restart docker +``` + +**单容器配置(`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**单容器配置(docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> 设置 `max-file: 5` 和 `max-size: 100m` 后,Docker 每个容器最多保留 500 MB 日志,轮转时自动删除最旧的文件。 + +--- + ## 故障排除 | 症状 | 原因 | 解决方案 | @@ -176,3 +229,4 @@ shared-file-size = 4G | 无对等节点 | 防火墙阻止端口 2001 | 开放 2001 TCP 入站 | | 同步缓慢 | 未加载快照 | 首次启动前在卷中提供快照 | | `/var/lib/vizd` 权限拒绝 | 卷所有权不匹配 | `chown -R 1000:1000 /data/vizd` | +| Docker 日志填满磁盘 | `json-file` 驱动没有大小限制 | 配置带 `max-size`/`max-file` 的 `local` 驱动——参见[日志轮转](#日志轮转) | diff --git a/docs/node/docker.md b/docs/node/docker.md index 014ae11859..2a68a11c94 100644 --- a/docs/node/docker.md +++ b/docs/node/docker.md @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## Log Rotation + +vizd writes all output to stdout/stderr. Docker's default `json-file` log driver has **no size limit** — a crash loop or assertion storm can fill the host disk in minutes (35 GB+ observed in production). + +Use the `local` driver instead. It uses a compact binary format and rotates automatically. + +**Global config (recommended — protects all containers on the host):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +Apply with: + +```bash +sudo systemctl restart docker +``` + +**Per-container (`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**Per-container (docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> With `max-file: 5` and `max-size: 100m` Docker keeps at most 500 MB of logs per container and automatically deletes the oldest file when rotating. + +--- + ## Troubleshooting | Symptom | Cause | Fix | @@ -176,3 +229,4 @@ shared-file-size = 4G | No peers | Firewall blocking port 2001 | Open port 2001 TCP inbound | | Slow sync | No snapshot loaded | Provide snapshot in volume before first start | | `Permission denied` on `/var/lib/vizd` | Volume ownership mismatch | `chown -R 1000:1000 /data/vizd` | +| Disk fills up with Docker logs | `json-file` driver has no size limit | Configure `local` driver with `max-size`/`max-file` — see [Log Rotation](#log-rotation) | diff --git a/share/vizd/docker/Dockerfile-production b/share/vizd/docker/Dockerfile-production index d32d7eec23..9eaf7869e9 100644 --- a/share/vizd/docker/Dockerfile-production +++ b/share/vizd/docker/Dockerfile-production @@ -131,6 +131,25 @@ COPY share/vizd/vizd.sh /etc/service/vizd/run COPY share/vizd/snapshot.json /var/lib/vizd COPY share/vizd/config/config.ini /etc/vizd/config.ini +# Log rotation — IMPORTANT for production deployments. +# +# vizd writes all output to stdout/stderr. Docker's default json-file log driver +# has no size limit, so a log flood (crash loop, assertion storm, etc.) can fill +# the host disk in minutes. Use the `local` driver, which is more space-efficient +# and supports automatic rotation. +# +# Recommended: set globally on the Docker host so every container is protected: +# +# /etc/docker/daemon.json: +# { +# "log-driver": "local", +# "log-opts": { "max-size": "100m", "max-file": "5" } +# } +# then: systemctl restart docker +# +# Per-container override (docker run): +# --log-driver=local --log-opt max-size=100m --log-opt max-file=5 +# # rpc services: # http EXPOSE 8090 diff --git a/share/vizd/docker/Dockerfile-testnet b/share/vizd/docker/Dockerfile-testnet index f4ba3cc300..a4f93f4192 100644 --- a/share/vizd/docker/Dockerfile-testnet +++ b/share/vizd/docker/Dockerfile-testnet @@ -132,6 +132,25 @@ COPY share/vizd/vizd.sh /etc/service/vizd/run COPY share/vizd/snapshot-testnet.json /var/lib/vizd/snapshot.json COPY share/vizd/config/config_testnet.ini /etc/vizd/config.ini +# Log rotation — IMPORTANT for production deployments. +# +# vizd writes all output to stdout/stderr. Docker's default json-file log driver +# has no size limit, so a log flood (crash loop, assertion storm, etc.) can fill +# the host disk in minutes. Use the `local` driver, which is more space-efficient +# and supports automatic rotation. +# +# Recommended: set globally on the Docker host so every container is protected: +# +# /etc/docker/daemon.json: +# { +# "log-driver": "local", +# "log-opts": { "max-size": "100m", "max-file": "5" } +# } +# then: systemctl restart docker +# +# Per-container override (docker run): +# --log-driver=local --log-opt max-size=100m --log-opt max-file=5 +# # rpc services: # http EXPOSE 8090 From 21d38d9184a23df360d3fad66af58cd9562c9ff7 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 09:48:00 +0400 Subject: [PATCH 23/30] add try-catch sections to limit log spam --- libraries/network/dlt_p2p_node.cpp | 145 ++++++++++++++++++----------- 1 file changed, 90 insertions(+), 55 deletions(-) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 55462bcebc..30a0d6942d 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -143,13 +143,30 @@ void dlt_p2p_node::start() { // Start periodic task fiber if (_thread) { _periodic_fiber = _thread->async([this]() { + uint32_t consecutive_errors = 0; + fc::time_point last_error_log; while (_running) { try { fc::usleep(fc::seconds(5)); if (!_running) break; periodic_task(); + consecutive_errors = 0; } catch (const fc::exception& e) { - elog("Error in DLT P2P periodic task: ${e}", ("e", e.to_detail_string())); + consecutive_errors++; + auto now = fc::time_point::now(); + if (consecutive_errors == 1 || (now - last_error_log).count() > 60 * 1000000LL) { + elog("Error in DLT P2P periodic task (#${n}): ${e}", + ("n", consecutive_errors)("e", e.to_detail_string())); + last_error_log = now; + } + } catch (const std::exception& e) { + consecutive_errors++; + auto now = fc::time_point::now(); + if (consecutive_errors == 1 || (now - last_error_log).count() > 60 * 1000000LL) { + elog("Error in DLT P2P periodic task (#${n}): ${e}", + ("n", consecutive_errors)("e", std::string(e.what()))); + last_error_log = now; + } } } }, "dlt periodic_task"); @@ -3363,10 +3380,14 @@ void dlt_p2p_node::block_validation_timeout() { void dlt_p2p_node::periodic_task() { // Non-DB-access housekeeping always runs. - periodic_reconnect_check(); - periodic_lifecycle_timeout_check(); - block_validation_timeout(); - periodic_mempool_cleanup(); + try { periodic_reconnect_check(); } + catch (const std::exception& e) { wlog("periodic_reconnect_check: ${e}", ("e", std::string(e.what()))); } + try { periodic_lifecycle_timeout_check(); } + catch (const std::exception& e) { wlog("periodic_lifecycle_timeout_check: ${e}", ("e", std::string(e.what()))); } + try { block_validation_timeout(); } + catch (const std::exception& e) { wlog("block_validation_timeout: ${e}", ("e", std::string(e.what()))); } + try { periodic_mempool_cleanup(); } + catch (const std::exception& e) { wlog("periodic_mempool_cleanup: ${e}", ("e", std::string(e.what()))); } // When block processing is paused (snapshot creation in progress), // skip periodic operations that need database read locks. The snapshot @@ -3374,85 +3395,99 @@ void dlt_p2p_node::periodic_task() { // lock from this fiber would time out and cascade into peer disconnections. if (_block_processing_paused) { // Still check banned peers for unban -- no DB access needed. - for (auto& _peer_item : _peer_states) { - auto& state = _peer_item.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { - auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; - auto elapsed = fc::time_point::now() - state.state_entered_time; - if (elapsed.count() > ban_dur * 1000000) { - state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; - state.disconnected_since = fc::time_point::now(); - state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); - ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + try { + for (auto& _peer_item : _peer_states) { + auto& state = _peer_item.second; + if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { + auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; + auto elapsed = fc::time_point::now() - state.state_entered_time; + if (elapsed.count() > ban_dur * 1000000) { + state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; + state.disconnected_since = fc::time_point::now(); + state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); + ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + } } } - } + } catch (const std::exception& e) { wlog("unban_check(paused): ${e}", ("e", std::string(e.what()))); } return; } // Normal path: all periodic operations run. - sync_stagnation_check(); - check_sync_catchup(); // P26 fix: periodic catch-up detection - check_forward_behind(); // P27 fix: detect falling behind in FORWARD mode - check_forward_stagnation(); // P37 fix: detect head stuck in FORWARD mode - request_gap_fill(); // P36 fix: fill gaps via exchange-enabled peers - periodic_peer_exchange(); + try { sync_stagnation_check(); } + catch (const std::exception& e) { wlog("sync_stagnation_check: ${e}", ("e", std::string(e.what()))); } + try { check_sync_catchup(); } // P26 fix: periodic catch-up detection + catch (const std::exception& e) { wlog("check_sync_catchup: ${e}", ("e", std::string(e.what()))); } + try { check_forward_behind(); } // P27 fix: detect falling behind in FORWARD mode + catch (const std::exception& e) { wlog("check_forward_behind: ${e}", ("e", std::string(e.what()))); } + try { check_forward_stagnation(); } // P37 fix: detect head stuck in FORWARD mode + catch (const std::exception& e) { wlog("check_forward_stagnation: ${e}", ("e", std::string(e.what()))); } + try { request_gap_fill(); } // P36 fix: fill gaps via exchange-enabled peers + catch (const std::exception& e) { wlog("request_gap_fill: ${e}", ("e", std::string(e.what()))); } + try { periodic_peer_exchange(); } + catch (const std::exception& e) { wlog("periodic_peer_exchange: ${e}", ("e", std::string(e.what()))); } // Post-pause catchup: drain queued blocks and/or clear the flag // when caught up. if (_catchup_after_pause && _delegate) { - // If there are still queued blocks, drain them first - if (!_paused_block_queue.empty()) { - drain_paused_block_queue(); - } + try { + // If there are still queued blocks, drain them first + if (!_paused_block_queue.empty()) { + drain_paused_block_queue(); + } - // After drain (or if queue was empty), check if we're still behind - uint32_t our_head = _delegate->get_head_block_num(); - bool any_ahead = false; - for (const auto& _pi : _peer_states) { - const auto& s = _pi.second; - if ((s.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || - s.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) && - s.peer_head_num > our_head) { - any_ahead = true; - break; + // After drain (or if queue was empty), check if we're still behind + uint32_t our_head = _delegate->get_head_block_num(); + bool any_ahead = false; + for (const auto& _pi : _peer_states) { + const auto& s = _pi.second; + if ((s.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || + s.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) && + s.peer_head_num > our_head) { + any_ahead = true; + break; + } } - } - if (!any_ahead) { - _catchup_after_pause = false; - ilog(DLT_LOG_GREEN "Post-pause catchup complete, no gap remaining (head=#${h})" DLT_LOG_RESET, - ("h", our_head)); - } + if (!any_ahead) { + _catchup_after_pause = false; + ilog(DLT_LOG_GREEN "Post-pause catchup complete, no gap remaining (head=#${h})" DLT_LOG_RESET, + ("h", our_head)); + } + } catch (const std::exception& e) { wlog("catchup_after_pause: ${e}", ("e", std::string(e.what()))); } } // Log node status every 1 minute (12 cycles at 5s) _status_log_counter++; if (_status_log_counter >= 12) { _status_log_counter = 0; - log_node_status(); + try { log_node_status(); } + catch (const std::exception& e) { wlog("log_node_status: ${e}", ("e", std::string(e.what()))); } } // Log peer stats at configured interval (counter tracks seconds, ticks are 5s) _stats_log_counter += 5; if (_stats_log_counter >= _stats_log_interval_sec) { _stats_log_counter = 0; - log_peer_stats(); + try { log_peer_stats(); } + catch (const std::exception& e) { wlog("log_peer_stats: ${e}", ("e", std::string(e.what()))); } } // Check banned peers for unban - for (auto& _peer_item : _peer_states) { - auto& state = _peer_item.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { - auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; - auto elapsed = fc::time_point::now() - state.state_entered_time; - if (elapsed.count() > ban_dur * 1000000) { - state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; - state.disconnected_since = fc::time_point::now(); - state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); - ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + try { + for (auto& _peer_item : _peer_states) { + auto& state = _peer_item.second; + if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { + auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; + auto elapsed = fc::time_point::now() - state.state_entered_time; + if (elapsed.count() > ban_dur * 1000000) { + state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; + state.disconnected_since = fc::time_point::now(); + state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); + ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + } } } - } + } catch (const std::exception& e) { wlog("unban_check: ${e}", ("e", std::string(e.what()))); } } // ── Accept loop ───────────────────────────────────────────────── From cfc5dbf43d8196ff24c09cc291849e6ff3b3adfa Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 09:55:11 +0400 Subject: [PATCH 24/30] fix: remove duplicate history-count-blocks option registration in account_history Both account_history and operation_history registered the same option name in the shared appbase cli options_description, causing boost to throw "option is ambiguous" on startup. account_history already reads the value registered by operation_history, so re-registration is unnecessary. --- plugins/account_history/plugin.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/plugins/account_history/plugin.cpp b/plugins/account_history/plugin.cpp index 07258c9f09..0d0c60b5cd 100644 --- a/plugins/account_history/plugin.cpp +++ b/plugins/account_history/plugin.cpp @@ -537,13 +537,9 @@ if( options.count(name) ) { \ boost::program_options::value>()->composing()->multitoken(), "Defines a range of accounts to track as a json pair [\"from\",\"to\"] [from,to]. " "Can be specified multiple times" - )( - "history-count-blocks", - boost::program_options::value(), - "Defines depth of history for recording account history (same as operation_history's history-count-blocks). " - "If set here, account_history will purge at least as aggressively as this value, " - "and will also coordinate with operation_history's setting." ); + // history-count-blocks is registered by operation_history plugin and shared; + // account_history reads it in plugin_initialize() without re-registering. cfg.add(cli); } From db07bd59303bd92d2397f88f50d3981a6c68131f Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 22:32:29 +0400 Subject: [PATCH 25/30] fix: prevent double-resize race corrupting shared memory segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apply_pending_resize() is called from two threads before their respective write locks: the P2P thread (push_block) and the validator thread (generate_block). Both can see _pending_resize==true simultaneously, both pass begin_resize_barrier() (which releases its internal mutex on return), and both call resize() concurrently — resulting in simultaneous _segment.reset()+open()+add_index() on the same database object, corrupting the chainbase B-tree indices. Add _apply_resize_mutex with a double-check pattern so only one thread performs the resize; the second thread exits early after seeing _pending_resize already cleared. This is the root cause of periodic shared memory corruption (~20-30h intervals) in DLT mode: account_history pruning (history-count-blocks) creates constant alloc/free cycles in the boost::interprocess heap, accumulating fragmentation until free_memory() drops below the resize threshold. More frequent resizes increase the probability of the race hitting. In classic mode without pruning, resizes are rare enough that the race is practically unreachable. --- libraries/chain/database.cpp | 11 +++++++++++ libraries/chain/include/graphene/chain/database.hpp | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 0b1a423495..59f1c7811e 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -891,6 +891,17 @@ namespace graphene { namespace chain { return; } + // Serialize concurrent resize attempts: the P2P thread (push_block) + // and the validator thread (generate_block) both call this before + // acquiring their respective write locks. Without this mutex both + // can see _pending_resize==true simultaneously, both pass + // begin_resize_barrier(), and both call resize() concurrently — + // corrupting the chainbase segment (double-resize race). + std::unique_lock resize_entry_guard(_apply_resize_mutex); + if (!_pending_resize) { + return; // another thread completed the resize while we waited + } + // Use the resize barrier to pause ALL database operations. // This is stronger than with_strong_write_lock: it also blocks // lockless reads (e.g. get_slot_at_time, get_scheduled_validator, diff --git a/libraries/chain/include/graphene/chain/database.hpp b/libraries/chain/include/graphene/chain/database.hpp index ae58e89bd8..0a4aa18d75 100644 --- a/libraries/chain/include/graphene/chain/database.hpp +++ b/libraries/chain/include/graphene/chain/database.hpp @@ -13,6 +13,7 @@ #include #include +#include namespace graphene { namespace chain { @@ -679,6 +680,12 @@ namespace graphene { namespace chain { bool _pending_resize = false; size_t _pending_resize_target = 0; + // Serializes concurrent apply_pending_resize() calls from the + // validator thread and the P2P thread. Both call it before their + // respective write locks, so without this mutex both threads can + // see _pending_resize==true simultaneously and double-resize, + // corrupting the chainbase segment. + std::mutex _apply_resize_mutex; bool _skip_virtual_ops = false; bool _enable_plugins_on_push_transaction = false; From 80e2884ba523cd57dc3536b040bff214bd1ea167 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 22:42:14 +0400 Subject: [PATCH 26/30] fix: clear soft-bans on auto-recovery to allow majority fork reconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After snapshot-based auto-recovery, soft-bans accumulated before the corruption were kept alive (up to 3600s), including bans on peers that carry the majority fork. The node would then gap-fill from the only available (minority-fork) peer and get stuck. Add reset_peers_after_recovery() that calls emergency_peer_reset() on the P2P thread: clears all banned→disconnected and resets reconnect backoff to zero so majority-fork peers reconnect immediately. Call it in attempt_auto_recover() before resume_block_processing(). --- libraries/network/dlt_p2p_node.cpp | 8 ++++++++ .../network/include/graphene/network/dlt_p2p_node.hpp | 1 + plugins/chain/plugin.cpp | 4 ++++ plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp | 6 ++++++ plugins/p2p/p2p_plugin.cpp | 6 ++++++ 5 files changed, 25 insertions(+) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 30a0d6942d..d69563ad8b 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -2408,6 +2408,14 @@ void dlt_p2p_node::resume_block_processing() { run_resume_on_p2p_thread(); } +void dlt_p2p_node::reset_peers_after_recovery() { + // Called from the P2P thread after auto-recovery completes. + // Clears all soft-bans so peers that were banned before the + // corruption (and may carry the majority fork) can reconnect + // and serve blocks immediately. + emergency_peer_reset(); +} + bool dlt_p2p_node::is_on_majority_fork() const { return _fork_status != DLT_FORK_STATUS_MINORITY; } diff --git a/libraries/network/include/graphene/network/dlt_p2p_node.hpp b/libraries/network/include/graphene/network/dlt_p2p_node.hpp index 02ed289789..6580c2919e 100644 --- a/libraries/network/include/graphene/network/dlt_p2p_node.hpp +++ b/libraries/network/include/graphene/network/dlt_p2p_node.hpp @@ -143,6 +143,7 @@ class dlt_p2p_node { void reconnect_seeds(); void pause_block_processing(); void resume_block_processing(); + void reset_peers_after_recovery(); // ── Our node state ─────────────────────────────────────────── dlt_node_status get_node_status() const { return _node_status; } diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index 1cdfebb000..959a4447ba 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -994,6 +994,10 @@ namespace chain { try { auto* p2p_plug = appbase::app().find_plugin(); if (p2p_plug && p2p_plug->get_state() == appbase::abstract_plugin::started) { + // Clear soft-bans BEFORE resuming so that peers banned + // before the corruption (which may carry the majority fork) + // can reconnect and serve blocks immediately after recovery. + p2p_plug->reset_peers_after_recovery(); p2p_plug->resume_block_processing(); wlog("Auto-recovery: P2P block processing resumed"); } diff --git a/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp b/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp index 4099b58c2e..c6956f1129 100644 --- a/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp +++ b/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp @@ -136,6 +136,12 @@ namespace graphene { */ void clear_catchup_flag(); + /** + * Clear all soft-bans after auto-recovery so peers that + * carry the majority fork can reconnect immediately. + */ + void reset_peers_after_recovery(); + private: std::unique_ptr my; }; diff --git a/plugins/p2p/p2p_plugin.cpp b/plugins/p2p/p2p_plugin.cpp index 942c2dd3e2..632918072c 100644 --- a/plugins/p2p/p2p_plugin.cpp +++ b/plugins/p2p/p2p_plugin.cpp @@ -747,6 +747,12 @@ void p2p_plugin::clear_catchup_flag() { if (my->node) my->node->clear_catchup_after_pause(); } +void p2p_plugin::reset_peers_after_recovery() { + my->p2p_thread.async([this]() { + if (my->node) my->node->reset_peers_after_recovery(); + }); +} + } // namespace p2p } // namespace plugins } // namespace graphene From f9de06eafbbb06799eaf0a3b087db7652f25a493 Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 22:45:53 +0400 Subject: [PATCH 27/30] fix: start gap fill from LIB when minority fork detected to trigger fork switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When on_dlt_gap_fill_reply detects a dead-fork block from a peer that is ahead (our fork is losing), the old code immediately called transition_to_sync() + request_blocks_from_peer() which fell back to LIB only if gap > FORWARD_FALLBEHIND_THRESHOLD. For small gaps the request started from our_head (wrong fork), the peer returned its version of our_head as a dead-fork block again — infinite 5-second loop. Fix: on "our fork is losing" detection, set _gap_fill_fork_override_start = our_lib. The next request_gap_fill() call uses LIB as the start instead of our_head. Blocks between LIB and the divergence point are ALREADY_KNOWN; blocks after the divergence point land in fork_db as FORK_DB_ONLY (majority chain). Once the majority chain accumulates sufficient height, the normal fork switch fires. Override is one-shot: cleared to 0 after use so subsequent gap fills resume from our_head. --- libraries/network/dlt_p2p_node.cpp | 26 +++++++++++++++++-- .../include/graphene/network/dlt_p2p_node.hpp | 7 +++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index d69563ad8b..4eb2dd8d46 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -1976,9 +1976,18 @@ void dlt_p2p_node::on_dlt_gap_fill_reply(peer_id peer, const dlt_gap_fill_reply& uint32_t our_head_now = _delegate->get_head_block_num(); uint32_t peer_latest = std::max(it->second.peer_dlt_latest, it->second.peer_head_num); if (peer_latest > our_head_now) { + uint32_t our_lib = _delegate->get_lib_block_num(); wlog(DLT_LOG_ORANGE "Gap fill: dead-fork block #${n} from peer ${ep} (peer=#${p} > our head #${h})" - " — our fork is losing, re-syncing from LIB instead of banning" DLT_LOG_RESET, - ("n", block.block_num())("ep", it->second.endpoint)("p", peer_latest)("h", our_head_now)); + " — our fork is losing, next gap fill will start from LIB #${lib}" DLT_LOG_RESET, + ("n", block.block_num())("ep", it->second.endpoint)("p", peer_latest) + ("h", our_head_now)("lib", our_lib)); + // Set override so request_gap_fill() starts from LIB on the + // next call: blocks from LIB include the divergence point and + // land in fork_db as FORK_DB_ONLY, allowing a fork switch once + // the majority chain reaches sufficient length. + if (our_lib > 0 && our_lib < our_head_now) { + _gap_fill_fork_override_start = our_lib; + } transition_to_sync(); request_blocks_from_peer(peer); } else { @@ -2055,6 +2064,19 @@ void dlt_p2p_node::request_gap_fill() { uint32_t our_head = _delegate->get_head_block_num(); if (our_head == 0) return; + // Fork-losing override: when on_dlt_gap_fill_repl detected that our fork + // is losing (dead-fork block, peer is ahead), it sets this to our LIB. + // Starting from LIB instead of our_head ensures the request covers the + // divergence point; the majority-chain blocks land in fork_db as + // FORK_DB_ONLY and eventually trigger a fork switch. One-shot: cleared + // after use so normal gap fill resumes from our_head on subsequent calls. + if (_gap_fill_fork_override_start > 0 && _gap_fill_fork_override_start < our_head) { + ilog(DLT_LOG_ORANGE "Gap fill fork override: starting from LIB #${lib} instead of head #${h} to find majority chain divergence" DLT_LOG_RESET, + ("lib", _gap_fill_fork_override_start)("h", our_head)); + our_head = _gap_fill_fork_override_start; + } + _gap_fill_fork_override_start = 0; + // Gap fill works in both FORWARD and SYNC modes. // In SYNC mode, when request_blocks_from_peer() can't bridge a gap // (blocks below the syncing peer's DLT range), gap fill provides an diff --git a/libraries/network/include/graphene/network/dlt_p2p_node.hpp b/libraries/network/include/graphene/network/dlt_p2p_node.hpp index 6580c2919e..1af47924b8 100644 --- a/libraries/network/include/graphene/network/dlt_p2p_node.hpp +++ b/libraries/network/include/graphene/network/dlt_p2p_node.hpp @@ -388,6 +388,13 @@ class dlt_p2p_node { static constexpr uint32_t GAP_FILL_TIMEOUT_SEC = 15; ///< Max seconds to wait for gap fill reply uint32_t _highest_seen_block_num = 0; ///< Highest block num seen from any source + // When "our fork is losing" is detected in gap fill reply, this is set to + // our LIB so the next request_gap_fill() starts from LIB instead of + // our_head. Blocks from LIB onward include the divergence point, giving + // fork_db the majority chain blocks it needs to trigger a fork switch. + // Reset to 0 after use (one-shot). + uint32_t _gap_fill_fork_override_start = 0; + // ── Gap fill rejection tracking ────────────────────────────── uint32_t _gap_rejected_block_num = 0; ///< Last block num rejected by gap fill uint32_t _gap_rejected_count = 0; ///< How many times that block was rejected From fd99ad30fbe2cd31b14c984a55c105a5b8b866cf Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Thu, 28 May 2026 23:29:57 +0400 Subject: [PATCH 28/30] fix: replace dangling dgp ref with guarded bool copy in maybe_validate_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_dynamic_global_properties() returns a const& directly into the shared memory segment. Between line 1280 (where the ref was obtained) and line 1629 (where op_guard is created), _active_operations can be 0. A concurrent P2P push_block calling apply_pending_resize() would see _active_operations==0, pass begin_resize_barrier(), call _segment.reset() and remap the segment — leaving the dangling ref to produce a SIGSEGV on the next access to dgp.emergency_consensus_active. Read emergency_consensus_active into a local bool under with_weak_read_lock (which acquires an op_guard internally) before any other shared memory access. Replace all six uses of dgp.emergency_consensus_active in maybe_validate_block with the local copy. --- plugins/validator/validator.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/plugins/validator/validator.cpp b/plugins/validator/validator.cpp index eda908f894..e53664bd6a 100644 --- a/plugins/validator/validator.cpp +++ b/plugins/validator/validator.cpp @@ -1277,8 +1277,17 @@ namespace graphene { // - No blocks arrive to clear the syncing flag // - The production loop is the only path to advance the chain if (db._debug_block_production) ilog("DEBUG_CRASH: getting dgp"); - const auto &dgp = db.get_dynamic_global_properties(); - if (db._debug_block_production) ilog("DEBUG_CRASH: dgp ok, head=${h} emergency=${e}", ("h", dgp.head_block_number)("e", dgp.emergency_consensus_active)); + // Copy emergency_consensus_active under a read lock so the local bool + // stays valid even if a concurrent P2P resize remaps the segment between + // here and the op_guard created below at line ~1629. Keeping a raw + // const& into shared memory without an op_guard is a dangling-reference + // risk: begin_resize_barrier() can complete while _active_operations==0 + // and unmap the old segment before we dereference the field. + bool emergency_active = false; + db.with_weak_read_lock([&]() { + emergency_active = db.get_dynamic_global_properties().emergency_consensus_active; + }); + if (db._debug_block_production) ilog("DEBUG_CRASH: dgp ok, head=${h} emergency=${e}", ("h", db.head_block_num())("e", emergency_active)); // === DLT MODE: DEFER PRODUCTION DURING ACTIVE SYNC === // In DLT mode, the validator must not produce blocks while the @@ -1304,7 +1313,7 @@ namespace graphene { // while the network is catching up. if (db._dlt_mode && chain().is_syncing()) { bool we_are_emergency_master = - dgp.emergency_consensus_active && + emergency_active && _validators.find(CHAIN_EMERGENCY_VALIDATOR_ACCOUNT) != _validators.end(); if (!we_are_emergency_master) { return block_validation_condition::not_synced; @@ -1350,7 +1359,7 @@ namespace graphene { if (db._debug_block_production) ilog("DEBUG_CRASH: checking hardfork12 and emergency path"); if (db.has_hardfork(CHAIN_HARDFORK_12)) { - if (dgp.emergency_consensus_active) { + if (emergency_active) { // EMERGENCY MODE: auto-bypass both stale and participation checks // for the emergency master only. The master holds the // emergency-private-key and MUST produce to avoid deadlock. @@ -1501,7 +1510,7 @@ namespace graphene { // continue producing (bootstrap / testnet / recovery scenario). // With enable-stale-production=false (default): we're on the wrong fork, // pop back to LIB and resync from the P2P network. - if (!dgp.emergency_consensus_active) { + if (!emergency_active) { auto fork_head = db.get_fork_db().head(); if (fork_head) { bool all_ours = true; @@ -1564,7 +1573,7 @@ namespace graphene { // "ours" is expected — other nodes sync from us. Skip minority fork // detection entirely to avoid false positives and the production // deadlock that would otherwise occur. - if (dgp.emergency_consensus_active && db._dlt_mode) { + if (emergency_active && db._dlt_mode) { // If committee is in the schedule and we have its key, WE are the // emergency master. All blocks being "ours" is expected -- other // nodes sync from us. Skip minority fork detection to prevent @@ -1852,7 +1861,7 @@ namespace graphene { bool has_competing_block = false; graphene::chain::item_ptr competing_block; - if (dgp.emergency_consensus_active) { + if (emergency_active) { // During emergency mode: ANY block at this height is competing. // Multiple nodes with the emergency key may have produced. // Defer to the deterministic hash-based resolution in fork_db. @@ -2010,7 +2019,7 @@ namespace graphene { // Roll back to LIB and resync from P2P network. elog("unlinkable_block_exception during block generation: fork_db broken. " "Rolling back to LIB and resyncing from P2P network."); - p2p().resync_from_lib(dgp.emergency_consensus_active /*force_emergency*/); + p2p().resync_from_lib(emergency_active /*force_emergency*/); _minority_fork_recovering = true; _minority_fork_recovery_start = fc::time_point::now(); return block_validation_condition::minority_fork; From da03b666be8da036b0fd0a16528b69cde8055ccd Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Sat, 30 May 2026 08:58:58 +0400 Subject: [PATCH 29/30] fix(chain): detect and handle missing validator account corruption - Added diagnostic fields copied from validator_obj before releasing op_guard scope - Implemented lockless check for validator account existence with saved diagnostics - Added re-verification under read lock to confirm missing account and avoid deadlock - Logged detailed critical error if validator account missing after re-check - Threw shared_memory_corruption_exception on confirmed shared memory corruption - Preserved existing validator signature assertion outside lock scope --- libraries/chain/database.cpp | 61 ++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 59f1c7811e..6b1144436a 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -2385,6 +2385,15 @@ namespace graphene { namespace chain { // while we hold raw pointers/references into the mapped segment. // The guard is scoped so it is released before with_strong_write_lock // (which acquires its own operation guard internally). + // Diagnostic fields saved from validator_obj for the locked re-check below. + // validator_obj is a shared-memory reference valid only inside the op_guard scope; + // copy before the scope ends so we can log them if the re-check confirms missing. + bool lockless_validator_account_missing = false; + public_key_type diag_signing_key; + uint32_t diag_total_missed = 0; + uint16_t diag_penalty = 0; + uint32_t diag_last_confirmed = 0; + size_t diag_account_index_size = 0; { auto op_guard = make_operation_guard(); @@ -2395,30 +2404,48 @@ namespace graphene { namespace chain { const auto &validator_obj = get_validator(validator_owner); - // Pre-check: ensure the validator account exists before generating the block. - // If the account is missing from the database (shared memory corruption), - // the block will be produced but fail to apply internally (process_funds - // calls get_account which would throw "unknown key"). - const auto* validator_acct = find_account(validator_owner); - if (!validator_acct) { - auto& acc_idx = get_index().indices().get(); + if (!(skip & skip_validator_signature)) + FC_ASSERT(validator_obj.signing_key == + block_signing_private_key.get_public_key()); + + // Lockless hint: does the validator account exist? + // op_guard coordinates with resize only — it does NOT exclude writers. + // A concurrent P2P writer rebalancing the by_name red-black tree can make + // this lookup transiently return null for an existing key. Save the hint + // and re-verify under a read lock AFTER this scope (op_guard released), + // to avoid a deadlock: with_strong_read_lock() nests its own op_guard, and + // if resize set _resize_in_progress between now and then, the nested + // enter_operation() would wait for resize while resize waits for us → deadlock. + if (!find_account(validator_owner)) { + lockless_validator_account_missing = true; + diag_signing_key = validator_obj.signing_key; + diag_total_missed = validator_obj.total_missed; + diag_penalty = validator_obj.penalty_percent; + diag_last_confirmed = validator_obj.last_confirmed_block_num; + diag_account_index_size = + get_index().indices().get().size(); + } + } // op_guard released here + + // Re-verify suspected missing account under a read lock, which blocks writers + // and yields a consistent index traversal. If still missing — real corruption. + if (lockless_validator_account_missing) { + with_strong_read_lock([&]() { + if (find_account(validator_owner)) { + return; // false alarm: lockless read raced a concurrent P2P writer + } elog("CRITICAL: Validator ${w} account object MISSING from database! " "This is impossible state - shared memory may be corrupted. " "signing_key=${k} total_missed=${m} penalty=${p} last_confirmed=${lc} " "account_index_size=${idx_size}", - ("w", validator_owner)("k", validator_obj.signing_key) - ("m", validator_obj.total_missed)("p", validator_obj.penalty_percent) - ("lc", validator_obj.last_confirmed_block_num) - ("idx_size", acc_idx.size())); + ("w", validator_owner)("k", diag_signing_key) + ("m", diag_total_missed)("p", diag_penalty) + ("lc", diag_last_confirmed)("idx_size", diag_account_index_size)); FC_THROW_EXCEPTION(shared_memory_corruption_exception, "CRITICAL: Validator ${w} account not found in database! Shared memory corruption suspected.", ("w", validator_owner)); - } - - if (!(skip & skip_validator_signature)) - FC_ASSERT(validator_obj.signing_key == - block_signing_private_key.get_public_key()); - } // op_guard released here + }); + } // Second operation guard covers all remaining lockless reads // in this function: get_dynamic_global_properties(), head_block_id(), From 5e5c2a46d61c4ba4613f2acbf9d2f92865117c7e Mon Sep 17 00:00:00 2001 From: Anatoly Piskunov Date: Sat, 30 May 2026 09:02:00 +0400 Subject: [PATCH 30/30] fix(chain): add detailed logging during shared memory resize operations - Add debug logs before flushing and remapping shared memory segments - Log after remapping to indicate completion and start of validation - Provide memory size details in megabytes in all resize logs - Improve traceability for deferred shared memory resizing process --- libraries/chain/database.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 6b1144436a..f717a1ef7e 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -850,7 +850,10 @@ namespace graphene { namespace chain { "\033[33mShared memory growing on block ${block}: actual data ${used_before}M / current ${max_before}M -> new ${mem}M\033[0m", ("block", current_block_num)("mem", new_max / (1024 * 1024)) ("used_before", used_mem_before / (1024 * 1024))("max_before", max_mem / (1024 * 1024))); + dlog("Shared memory resize: flushing segment and remapping ${cur}M -> ${new}M", + ("cur", max_mem / (1024 * 1024))("new", new_max / (1024 * 1024))); resize(new_max); + dlog("Shared memory resize: remap complete, validating segment"); // Post-resize validation: verify key objects survived the remap. // A silent grow failure (file unchanged but open succeeds with @@ -923,7 +926,10 @@ namespace graphene { namespace chain { ilog("\033[33mApplying deferred shared memory resize: actual data ${used_before}M / current ${max_before}M -> new ${mem}M\033[0m", ("used_before", used_mem_before / (1024 * 1024))("max_before", max_mem_before / (1024 * 1024)) ("mem", target / (1024 * 1024))); + dlog("Shared memory resize: flushing segment and remapping ${cur}M -> ${new}M", + ("cur", max_mem_before / (1024 * 1024))("new", target / (1024 * 1024))); resize(target); + dlog("Shared memory resize: remap complete, validating segment"); // Post-resize validation: verify key objects survived the remap. if (max_memory() < target) {