diff --git a/rebar.config b/rebar.config index 83c8910f5..d60449bb1 100644 --- a/rebar.config +++ b/rebar.config @@ -176,7 +176,7 @@ ]}. {deps, [ - {elmdb, {git, "https://github.com/permaweb/elmdb-rs.git", {ref, "06ccf937abc250cb22c782d568efcaa39f5452ff"}}}, + {elmdb, {git, "https://github.com/permaweb/elmdb-rs.git", {ref, "68316484a5bd45bcc4b180a60883db7272c5dbde"}}}, {b64rs, {git, "https://github.com/permaweb/b64rs.git", {ref, "94b7d8e51d9a44f3bd12b7d138dd0d2cb74c169f"}}}, {base32, "1.0.0"}, {cowlib, "2.16.0"}, diff --git a/rebar.lock b/rebar.lock index 479c45556..0125bfea9 100644 --- a/rebar.lock +++ b/rebar.lock @@ -11,7 +11,7 @@ {<<"ddskerl">>,{pkg,<<"ddskerl">>,<<"0.4.2">>},1}, {<<"elmdb">>, {git,"https://github.com/permaweb/elmdb-rs.git", - {ref,"06ccf937abc250cb22c782d568efcaa39f5452ff"}}, + {ref,"68316484a5bd45bcc4b180a60883db7272c5dbde"}}, 0}, {<<"eqwalizer_support">>, {git_subdir,"https://github.com/whatsapp/eqwalizer.git", diff --git a/src/core/include/hb_store_arweave.hrl b/src/core/include/hb_store_arweave.hrl new file mode 100644 index 000000000..6bd62ae9b --- /dev/null +++ b/src/core/include/hb_store_arweave.hrl @@ -0,0 +1,3 @@ +-define(SCOPE_PARENT, <<"parent">>). +-define(SCOPE_OFFSET, <<"offset">>). + diff --git a/src/core/monitor/hb_event.erl b/src/core/monitor/hb_event.erl index 7f157440a..36e343eb2 100644 --- a/src/core/monitor/hb_event.erl +++ b/src/core/monitor/hb_event.erl @@ -405,18 +405,8 @@ check_overload(Last, N) -> case erlang:process_info(self(), message_queue_len) of {message_queue_len, Len} when Len > ?OVERLOAD_QUEUE_LENGTH -> {memory, MemorySize} = erlang:process_info(self(), memory), - case rand:uniform(max(1000, Len - ?OVERLOAD_QUEUE_LENGTH)) of - 1 -> - ?debug_print( - {warning, - prometheus_event_queue_overloading, - {queue, Len}, - {last_event, Last}, - {memory_bytes, MemorySize} - } - ); - _ -> ignored - end, + % If the size of this process is too large, exit such that + % we can be restarted by the next caller. case MemorySize of MemorySize when MemorySize > ?MAX_MEMORY -> ?debug_print( @@ -683,4 +673,4 @@ wait_drain_loop(Pid, Deadline) -> undefined -> error(event_server_dead) end. --endif. +-endif. \ No newline at end of file diff --git a/src/core/monitor/hb_prometheus.erl b/src/core/monitor/hb_prometheus.erl index f4917da77..9f8bb4383 100644 --- a/src/core/monitor/hb_prometheus.erl +++ b/src/core/monitor/hb_prometheus.erl @@ -1,7 +1,7 @@ %%% @doc HyperBEAM wrapper for Prometheus metrics. -module(hb_prometheus). -export([ensure_started/0, declare/2, measure_and_report/2, measure_and_report/3]). --export([observe/2, observe/3, inc/2, inc/3, inc/4, dec/2, dec/3, dec/4]). +-export([observe/2, observe/3, inc/2, inc/3, inc/4, dec/2, dec/3, dec/4, set/4]). -define(STARTED_CACHE_KEY, {?MODULE, started}). %% @doc Ensure the Prometheus application has been started. Caches startup @@ -118,4 +118,13 @@ dec(Type, Metrics, Labels, Value) -> end. do_dec(gauge, Name, Labels, Value) -> - prometheus_gauge:dec(Name, Labels, Value). \ No newline at end of file + prometheus_gauge:dec(Name, Labels, Value). + +set(gauge, Name, Labels, Value) -> + case ensure_started() of + ok -> + try prometheus_gauge:set(Name, Labels, Value) + catch error:mfa_already_exists -> ok + end; + _ -> ok + end. \ No newline at end of file diff --git a/src/core/resolver/hb_opts.erl b/src/core/resolver/hb_opts.erl index 6d271c518..3504aa9ef 100644 --- a/src/core/resolver/hb_opts.erl +++ b/src/core/resolver/hb_opts.erl @@ -20,6 +20,7 @@ -include("include/hb.hrl"). -include("include/hb_opts.hrl"). -include("include/hb_arweave_nodes.hrl"). +-include("include/hb_store_arweave.hrl"). -include("../../_build/hb_preloaded_index.hrl"). -ifndef(PRELOADED_DEVICES_INDEX_MESSAGE_ID). @@ -283,6 +284,12 @@ raw_default_message() -> <<"relay-http-client">> => httpc, % The default codec to use for commitment signatures. <<"commitment-device">> => <<"httpsig@1.0">>, + % Copycat-specific options. + <<"copycat-memory-cap">> => 6 * 1024 * 1024 * 1024, + <<"copycat-memory-budget">> => 6 * 1024 * 1024 * 1024, + <<"copycat-depth-recursion-cap">> => 6, % 2x the deepest we've seen to date + <<"arweave-block-workers">> => 3, + <<"copycat-scope">> => [?SCOPE_OFFSET, ?SCOPE_PARENT], % Dev options <<"mode">> => debug, <<"profiling">> => true, @@ -1258,4 +1265,4 @@ ensure_node_history_test() -> ] }, ?assertEqual({error, invalid_values}, ensure_node_history(InvalidItems, RequiredOpts)). --endif. +-endif. \ No newline at end of file diff --git a/src/core/store/hb_store.erl b/src/core/store/hb_store.erl index 1ad92d85b..07897b7aa 100644 --- a/src/core/store/hb_store.erl +++ b/src/core/store/hb_store.erl @@ -561,8 +561,6 @@ start_one(Store = #{ <<"store-module">> := Mod }, Req, Opts) -> end. call_store_start(Mod, Store, Req, Opts) -> - %% function_exported doesn't load the module. We need to call ensure_loaded - %% here since is the first time we call a function to load the module. code:ensure_loaded(Mod), case erlang:function_exported(Mod, start, 3) of true -> Mod:start(Store, Req, Opts); diff --git a/src/core/store/hb_store_arweave.erl b/src/core/store/hb_store_arweave.erl index 722438bee..7758be94a 100644 --- a/src/core/store/hb_store_arweave.erl +++ b/src/core/store/hb_store_arweave.erl @@ -6,8 +6,15 @@ %%% Unused Store API: -export([resolve/3, write/3, link/3, group/3]). %%% Indexing API: --export([store_from_opts/1, write_offset/5, read_offset/3, read_chunks/3]). +-export([store_from_opts/1, write_offset/6, write_parent/5, read_offset/3, read_parent/3, decode_parent_entries/1, read_chunks/3]). +-export([block_indexed_path/1, block_items_path/2]). +-export([read_block_item_counts/2, read_block_item_ids/2]). +-export([is_tx_indexed/2 ]). +-export([write_block_item_ids/4, read_block_marker_depth/2]). +-export([decode_item_ids/1, is_block_indexed/3, mark_block_indexed/3 ]). +-export([root_offset/3]). -include("include/hb.hrl"). +-include("core/include/hb_store_arweave.hrl"). -include_lib("eunit/include/eunit.hrl"). -define(PARTITION_SIZE, 3_600_000_000_000). @@ -16,7 +23,7 @@ %% for the `arweave_index_store' option, and if not found, searches the main %% `store' list for the first Arweave store with an index. store_from_opts(Opts) -> - case hb_opts:get(arweave_index_store, no_store, Opts) of + case hb_opts:get(<<"arweave-index-store">>, no_store, Opts) of no_store -> first_arweave_store(hb_opts:get(store, [], Opts)); IndexStoreOpts -> IndexStoreOpts end. @@ -31,9 +38,9 @@ first_arweave_store( first_arweave_store([_ | Rest]) -> first_arweave_store(Rest). %% @doc Start the Arweave store, and the downstream associated index store. -start(#{<<"index-store">> := IndexStore}, _Req, _Opts) -> +start(#{<<"index-store">> := IndexStore}, Req, Opts) -> init_prometheus(), - hb_store:start(IndexStore). + hb_store:start(IndexStore, Req, Opts). %% @doc Although the index is local, loading an item via the index will make %% requests to a remote node, so we define the scope as remote. @@ -71,22 +78,21 @@ type(_Store, #{ <<"type">> := _ID }, _NodeOpts) -> {error, not_found}. %% @doc Read the offset of the data at the given key. -read_offset(StoreOpts = #{ <<"index-store">> := IndexStore }, ID, Opts) -> +read_offset(#{ <<"index-store">> := IndexStore }, ID, Opts) -> ReadRes = hb_prometheus:measure_and_report( fun() -> - hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), StoreOpts) + hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), Opts) end, hb_store_arweave_index_check_duration_seconds ), case ReadRes of {ok, OffsetBinary} -> - {Version, CodecName, StartOffset, Length} = + {CodecName, Offset, Length} = hb_store_arweave_offset:decode(OffsetBinary), {ok, #{ - <<"version">> => Version, <<"codec-device">> => CodecName, - <<"start-offset">> => StartOffset, + <<"start-offset">> => Offset, <<"length">> => Length }}; _ -> @@ -94,14 +100,65 @@ read_offset(StoreOpts = #{ <<"index-store">> := IndexStore }, ID, Opts) -> end; read_offset(_, _, _) -> not_found. +%% @doc Read the parent entries for an item from the index store. +read_parent(#{ <<"index-store">> := IndexStore }, ID, Opts) -> + NormalizedID = hb_util:native_id(ID), + ParentPath = <<"parent/", NormalizedID/binary>>, + case hb_store:read(IndexStore, ParentPath, Opts) of + {ok, Bin} -> + case decode_parent_entries(Bin) of + {error, _} = Err -> Err; + Entries -> {ok, Entries} + end; + _ -> + not_found + end; +read_parent(_, _, _) -> not_found. + +decode_parent_entries(<<>>) -> []; +decode_parent_entries(<<0, Height:64/big-unsigned, Rest/binary>>) -> + case decode_parent_entries(Rest) of + {error, _} = Err -> Err; + Tail -> [{Height, block} | Tail] + end; +decode_parent_entries(<<1, ParentID:32/binary, Rest/binary>>) -> + case decode_parent_entries(Rest) of + {error, _} = Err -> Err; + Tail -> [{ParentID, bundle} | Tail] + end; +decode_parent_entries(_Corrupt) -> + {error, corrupt_parent_data}. + + +%% @doc Return the store path for a parent index entry. +parent_path(ItemID) when byte_size(ItemID) =:= 32 -> + <<"parent/", ItemID/binary>>. + +%% @doc Encode a parent entry for storage. +encode_parent_entry(Height, block) when is_integer(Height) -> + <<0, Height:64/big-unsigned>>; +encode_parent_entry(ParentID, bundle) when byte_size(ParentID) =:= 32 -> + <<1, ParentID:32/binary>>. + +%% Block Information Index + +%% @doc Return the store path for a block completion marker. +block_indexed_path(Height) -> + <<"block/", (hb_util:bin(Height))/binary, "/depth">>. + +%% @doc Return the store path for a per-block item index at a given depth. +block_items_path(Height, Depth) -> + <<"block/", (hb_util:bin(Height))/binary, + "/items/", (hb_util:bin(Depth))/binary>>. + %% @doc Read the data at the given key, reading the `local-store' first if %% available. -read(StoreOpts, #{ <<"read">> := ID }, _NodeOpts) when ?IS_ID(ID) -> +read(StoreOpts, #{ <<"read">> := ID }, NodeOpts) when ?IS_ID(ID) -> case hb_store_remote_node:read_local_cache(StoreOpts, ID, StoreOpts) of {ok, Message} -> {ok, Message}; _ -> - case do_read(StoreOpts, ID, StoreOpts) of + case do_read(StoreOpts, ID, NodeOpts) of not_found -> {error, not_found}; Result -> Result end @@ -116,16 +173,19 @@ do_read(StoreOpts, ID, Opts) -> case read_offset(StoreOpts, ID, Opts) of {ok, #{ - <<"version">> := Version, - <<"codec-device">> := CodecName, - <<"start-offset">> := StartOffset, + <<"codec-device">> := Codec, + <<"start-offset">> := Offset, <<"length">> := Length - }} -> + } + } -> Loaded = - case CodecName of - <<"ans104@1.0">> -> load_item(ID, StartOffset, Length, Opts); - <<"tx@1.0">> -> load_tx(ID, StartOffset, Length, Opts) - end, + load_message( + Codec, + ID, + root_offset(Offset, StoreOpts, Opts), + Length, + Opts + ), case Loaded of {ok, Message} -> hb_store_remote_node:maybe_cache(StoreOpts, Message), @@ -133,66 +193,116 @@ do_read(StoreOpts, ID, Opts) -> arweave_offsets, {read_ok, {id, {string, ID}}, - {format_version, Version}, - {type, CodecName}, - {start_offset, StartOffset}, + {codec, Codec}, + {offset, Offset}, {length, Length} } ), - record_partition_metric(StartOffset, ok, Opts), + record_partition_metric(Offset, ok, StoreOpts), Loaded; {error, Reason} -> ?event( arweave_offsets, {read_chunks_not_found, {id, {string, ID}}, - {format_version, Version}, - {type, CodecName}, - {start_offset, StartOffset}, + {codec, Codec}, + {offset, Offset}, {length, Length}, {reason, Reason} } ), - record_partition_metric(StartOffset, not_found, Opts), + record_partition_metric(Offset, not_found, StoreOpts), if Reason =:= not_found -> not_found; true -> {error, Reason} end end; not_found -> - ?event( - arweave_offsets, - {miss, {id, {explicit, ID}}} - ), + ?event(arweave_offsets, {miss, {id, {explicit, ID}}}), not_found end. +%% @doc Takes a `read_offset/2' result and returns it, normalized to the +%% outer-most root that is known: Either the mempool or a global byte offset. +root_offset(relative, _Store, _Opts) -> relative; +root_offset(GlobalOffset, _Store, _Opts) when is_integer(GlobalOffset) -> GlobalOffset; +root_offset(Offset, Store, Opts) -> root_offset(Offset, 0, Store, Opts). +root_offset(#{ <<"relative">> := P, <<"offset">> := Off }, Acc, Store, Opts) -> + case read_offset(Store, P, Opts) of + {ok, #{ <<"start-offset">> := Next = #{ <<"relative">> := _, <<"offset">> := _ } }} -> + % We have another relative offset. Continue. + root_offset(Next, Acc + Off, Store, Opts); + {ok, #{ <<"start-offset">> := relative }} -> + % We have reached an unconfirmed TX as the root of the relative offset + % chain, so we return an offset against that. + #{ <<"relative">> => P, <<"offset">> => Acc + Off }; + {ok, #{ <<"start-offset">> := GlobalOffset }} when is_integer(GlobalOffset) -> + % We have reached a confirmed TX as the root of the relative offset + % chain, so we return a global offset. + GlobalOffset + Acc + Off; + _ -> + % The result was unknown, so we total accumulator and current offset + % and return it with the `relative` key intact. + #{ <<"relative">> => P, <<"offset">> => Acc + Off } + end; +root_offset(Other, _, _, _) -> Other. + +%% @doc Load a TX from Arweave. Supports either confirmed or pending TXs. +load_message(<<"tx@1.0">>, ID, Type, _Length, Opts) -> + % Determine the correct path to hit to load the TX. Confirmed TXs require + % `tx=ID`, while pending TXs require `pending=ID`. + PathKeys = + if Type =:= relative -> #{ <<"path">> => <<"pending">>, <<"pending">> => ID }; + true -> #{ <<"path">> => <<"tx">>, <<"tx">> => ID } + end, + hb_prometheus:measure_and_report( + fun() -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + PathKeys#{ <<"exclude-data">> => false }, + Opts + ) + end, + hb_store_arweave_chunk_fetch_duration_seconds, + [load_tx] + ); %% @doc Load an ANS-104 item from the given start offset and length. -%% Returns an `ok' tuple with the deserialized item, or an `error' tuple with -%% the reason. The `StartOffset` is the precise starting byte of the item _header_, +%% The `StartOffset` is the precise starting byte of the item _header_, %% not the data segment. The `Length` covers the full size of the item, including %% header. The `ExpectedID` is verified against the deserialized item's ID to %% guard against stale offsets (e.g. after a reorg). -load_item(ExpectedID, StartOffset, Length, Opts) -> +load_message(<<"ans104@1.0">>, ID, Offset, Length, Opts) -> hb_prometheus:measure_and_report( fun() -> - case read_chunks(StartOffset, Length, Opts) of + case read_chunks(Offset, Length, Opts) of {ok, SerializedItem} -> - Item = - ar_bundles:deserialize(SerializedItem), - case hb_util:encode(Item#tx.id) of - ExpectedID -> - {ok, hb_message:convert( - Item, - <<"structured@1.0">>, - <<"ans104@1.0">>, - Opts - )}; - ActualID -> - {error, - {id_mismatch, - ExpectedID, ActualID}} + try + Item = + ar_bundles:deserialize(SerializedItem), + case hb_util:encode(Item#tx.id) of + ID -> + {ok, hb_message:convert( + Item, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + )}; + ActualID -> + ?event(error, {load_item, {id_mismatch}}), + {error, {id_mismatch, ID, ActualID}} + end + catch _:Reason:Stacktrace -> + %% Due to malformed encoding, attempt to deserialize + %% can throw. + ?event(error, + {load_item, + {expected_id, ID}, + {reason, Reason}, + {stacktrace, Stacktrace} + }), + {error, Reason} end; {error, Reason} -> + ?event(error, {load_item, Reason}), {error, Reason} end end, @@ -200,91 +310,205 @@ load_item(ExpectedID, StartOffset, Length, Opts) -> [load_item] ). -%% @doc Load a TX from the given start offset and length. The `StartOffset' is -%% the start of the first chunk of the data and runs for the length of the data -%% segment, ignoring header size. -load_tx(ID, StartOffset, Length, Opts) -> - hb_prometheus:measure_and_report( - fun() -> - {ok, StructuredTXHeader} = hb_ao:resolve( - #{ <<"device">> => <<"arweave@2.9">> }, - #{ - <<"path">> => <<"tx">>, - <<"tx">> => ID, - <<"exclude-data">> => true - }, - Opts - ), - TXHeader = - hb_message:convert( - StructuredTXHeader, - <<"tx@1.0">>, - <<"structured@1.0">>, - Opts - ), - case Length of - 0 -> - {ok, hb_message:convert( - TXHeader, - <<"structured@1.0">>, - <<"tx@1.0">>, - Opts)}; - _ -> - case read_chunks(StartOffset, Length, Opts) of - {ok, Data} -> - {ok, hb_message:convert( - TXHeader#tx{data = Data}, - <<"structured@1.0">>, - <<"tx@1.0">>, - Opts - )}; - {error, Reason} -> - {error, Reason} - end - end - end, - hb_store_arweave_chunk_fetch_duration_seconds, - [load_tx] - ). - %% @doc Read the chunks from the given start offset and length using the %% `~arweave@2.9` device. -read_chunks(StartOffset, Length, Opts) -> +read_chunks(Offset, Length, Opts) -> hb_ao:resolve( #{ <<"device">> => <<"arweave@2.9">> }, #{ <<"path">> => <<"chunk">>, - <<"offset">> => StartOffset + 1, + <<"offset">> => + % TODO: The rationale for this seems to be that Arweave offsets + % start at the last byte of the previous chunk. It is unclear + % whether it is wise to apply this offset here, or perhaps it + % should be applied in the device key itself. + if is_integer(Offset) -> Offset + 1; + true -> Offset + end, <<"length">> => Length }, Opts ). +%% @doc Write a parent entry for an item to the index store. +write_parent(ItemID, ParentData, Type, Store, Opts) -> + case + lists:member( + ?SCOPE_PARENT, + hb_opts:get(<<"copycat-scope">>, [], Opts) + ) of + true -> + Entry = encode_parent_entry(ParentData, Type), + hb_store:write(Store, #{parent_path(ItemID) => Entry}, Opts); + false -> + ok + end. + %% @doc Write offset information to the index store. write_offset( - StoreOpts = #{ <<"index-store">> := IndexStore }, + #{ <<"index-store">> := IndexStore }, ID, CodecName, StartOffset, - Length + Length, + Opts ) -> - Value = hb_store_arweave_offset:encode(CodecName, StartOffset, Length), - ?event( - debug_store_arweave, - {writing_offset, - {id, {explicit, ID}}, - {type, CodecName}, - {start_offset, StartOffset}, - {length, Length}, - {value, {explicit, Value}} - } + case + lists:member( + ?SCOPE_OFFSET, + hb_opts:get(<<"copycat-scope">>, [], Opts) + ) of + true -> + Value = hb_store_arweave_offset:encode(CodecName, StartOffset, Length), + ?event( + debug_store_arweave, + {writing_offset, + {id, {explicit, ID}}, + {type, CodecName}, + {start_offset, StartOffset}, + {length, Length}, + {value, {explicit, Value}} + } + ), + hb_store:write( + IndexStore, + #{ hb_store_arweave_offset:path(ID) => Value }, + Opts + ); + false -> + ok + end. + +%% @doc Probe item entries upward from depth 1, applying TransformFun to each. +probe_block_items(Height, Opts, TransformFun) -> + case store_from_opts(Opts) of + no_store -> + erlang:display({no_store, Opts}), + #{}; + #{ <<"index-store">> := Store } -> + probe_block_items(Height, Store, 1, #{}, TransformFun, Opts) + end. + +probe_block_items(Height, Store, Depth, Acc, TransformFun, Opts) -> + case hb_store:read(Store, block_items_path(Height, Depth), Opts) of + {ok, Bin} -> + Key = hb_util:bin(Depth), + probe_block_items( + Height, Store, Depth + 1, + Acc#{Key => TransformFun(Bin)}, TransformFun, Opts); + {error, not_found} -> + Acc + end. + +count_ids(Bin) when byte_size(Bin) rem 32 =:= 0 -> + byte_size(Bin) div 32; +count_ids(_) -> <<"corrupt">>. + +decode_and_encode_ids(Bin) -> + case decode_item_ids(Bin) of + {error, _} -> <<"corrupt">>; + List -> [hb_util:encode(ID) || ID <- List] + end. + +read_block_item_counts(Height, Opts) -> + probe_block_items(Height, Opts, fun count_ids/1). + +read_block_item_ids(Height, Opts) -> + probe_block_items(Height, Opts, fun decode_and_encode_ids/1). + +%% @doc Write per-depth item ID lists for a block. +%% Writes an entry for every depth from 1 through AchievedDepth (empty if +%% no items at that level), plus any partial depths beyond AchievedDepth +%% that were collected during indexing. +write_block_item_ids(Height, AchievedDepth, ItemIDs, Opts) -> + Store = get_index_store(Opts), + MaxStoredDepth = case maps:keys(ItemIDs) of + [] -> AchievedDepth; + Keys -> max(AchievedDepth, lists:max(Keys)) + end, + Results = lists:map( + fun(D) -> + IDs = maps:get(D, ItemIDs, []), + Bin = encode_item_ids(IDs), + hb_store:write( + Store, + #{block_items_path(Height, D) => Bin}, + Opts + ) + end, + lists:seq(1, MaxStoredDepth) ), + case lists:all(fun(R) -> R =:= ok end, Results) of + true -> ok; + false -> + ?event(copycat_short, + {block_item_ids_write_failed, + {height, Height}}), + {error, item_ids_write_failed} + end. + +%% @doc Encode a list of 32-byte raw IDs into a single binary. +encode_item_ids(IDs) -> + << <> || ID <- IDs >>. + +%% @doc Decode a binary of concatenated 32-byte IDs into a list. +%% Rejects binaries whose size is not a multiple of 32. +decode_item_ids(<<>>) -> []; +decode_item_ids(Bin) when byte_size(Bin) rem 32 =/= 0 -> + {error, invalid_item_ids_binary}; +decode_item_ids(Bin) -> + decode_item_ids_acc(Bin, []). + +decode_item_ids_acc(<<>>, Acc) -> lists:reverse(Acc); +decode_item_ids_acc(<>, Acc) -> + decode_item_ids_acc(Rest, [ID | Acc]). + +%% @doc Read the stored marker depth for a block, or undefined if none. +read_block_marker_depth(Height, Opts) -> + case store_from_opts(Opts) of + no_store -> undefined; + #{ <<"index-store">> := Store } -> + case hb_store:read(Store, block_indexed_path(Height), Opts) of + {ok, Bin} -> + try binary_to_integer(Bin) + catch _:_ -> undefined + end; + {error, not_found} -> undefined + end + end. + +%% @doc Check if a block has been indexed at the given depth or deeper. +is_block_indexed(undefined, _TargetDepth, _Opts) -> + false; +is_block_indexed(Height, TargetDepth, Opts) -> + case read_block_marker_depth(Height, Opts) of + undefined -> false; + StoredDepth -> StoredDepth >= TargetDepth + end. + +%% @doc Write a block completion marker with the achieved depth. +mark_block_indexed(Height, Depth, Opts) -> + Store = get_index_store(Opts), hb_store:write( - IndexStore, - #{ hb_store_arweave_offset:path(ID) => Value }, - StoreOpts + Store, + #{block_indexed_path(Height) => integer_to_binary(Depth)}, + Opts ). +%% @doc Check if a transaction ID is indexed in the arweave index store. +is_tx_indexed(TXID, Opts) -> + Store = get_index_store(Opts), + case hb_store:read(Store, hb_store_arweave_offset:path(TXID), Opts) of + {ok, _} -> true; + {error, not_found} -> false + end. + +get_index_store(Opts) -> + case store_from_opts(Opts) of + #{ <<"index-store">> := Store } -> Store; + _ -> throw(no_index_store_available) + end. + %% @doc Record the partition that data is found in when it is requested. record_partition_metric(Offset, Result, StoreOpts) when is_integer(Offset) -> case hb_opts:get(prometheus, not hb_features:test(), StoreOpts) of @@ -299,7 +523,8 @@ record_partition_metric(Offset, Result, StoreOpts) when is_integer(Offset) -> end); false -> ok - end. + end; +record_partition_metric(_, _, _) -> ok. %% @doc Initialize the Prometheus metrics for the Arweave store. Executed on %% `start/1' of the store. @@ -335,17 +560,24 @@ init_prometheus() -> %%% Tests +setup_test_store() -> + IndexStore = [hb_test_utils:test_store()], + ArweaveStore = + #{ + <<"store-module">> => hb_store_arweave, + <<"index-store">> => IndexStore + }, + Opts = #{<<"store">> => [ArweaveStore]}, + {IndexStore, ArweaveStore, Opts}. + write_read_tx_test() -> - Store = [hb_test_utils:test_store()], - Opts = #{ - <<"index-store">> => Store - }, + {_, ArweaveStoreOpts, Opts} = setup_test_store(), ID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, EndOffset = 363524457284025, Size = 8387, StartOffset = EndOffset - Size, - ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, Size), - {ok, Bundle} = read(Opts, #{ <<"read">> => ID }, Opts), + ok = write_offset(ArweaveStoreOpts, ID, <<"tx@1.0">>, StartOffset, Size, Opts), + {ok, Bundle} = read(ArweaveStoreOpts, #{ <<"read">> => ID }, Opts), ?assert(hb_message:verify(Bundle, all, #{})), {ok, Child} = hb_ao:resolve( @@ -379,26 +611,92 @@ write_read_tx_test() -> %% @doc Stale ANS-104 offset: fake ID pointing to a known bundle TX's %% data range. The deserialized item's ID won't match the fake ID. stale_ans104_offset_returns_error_test() -> - Store = [hb_test_utils:test_store()], - Opts = #{<<"index-store">> => Store}, + {_, ArweaveStoreOpts, Opts} = setup_test_store(), FakeID = <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>, RealEndOffset = 363524457284025, RealSize = 8387, RealStartOffset = RealEndOffset - RealSize, - ok = write_offset(Opts, FakeID, <<"ans104@1.0">>, RealStartOffset, RealSize), - Result = read(Opts, #{ <<"read">> => FakeID }, Opts), + ok = write_offset(ArweaveStoreOpts, FakeID, <<"ans104@1.0">>, RealStartOffset, RealSize, Opts), + Result = read(ArweaveStoreOpts, #{ <<"read">> => FakeID }, Opts), ?assertMatch({error, {id_mismatch, _, _}}, Result). %% @doc The L1 TX has bundle tags, but data is not a valid bundle. write_read_fake_bundle_tx_test() -> - Store = [hb_test_utils:test_store()], - Opts = #{ - <<"index-store">> => Store - }, + {_, ArweaveStoreOpts, Opts} = setup_test_store(), ID = <<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, Size = 2, StartOffset = 155309918167286, - ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, Size), - {ok, TX} = read(Opts, #{ <<"read">> => ID }, Opts), + ok = write_offset(ArweaveStoreOpts, ID, <<"tx@1.0">>, StartOffset, Size, Opts), + {ok, TX} = read(ArweaveStoreOpts, #{ <<"read">> => ID }, Opts), ?assert(hb_message:verify(TX, all, #{})), ok. + +%% @doc Interior Arweave offset returns bytes that are not a valid ANS-104 item, +%% so ar_bundles:deserialize/1 throws. The catch in load_item/4 must convert +%% that throw into {error, _} rather than crashing. +load_item_deserialize_throws_test() -> + {_, ArweaveStoreOpts, Opts} = setup_test_store(), + FakeID = <<"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB">>, + %% Same interior offset used in dev_arweave bundle_header_garbage_guard test: + %% the bytes at ProbeOffset are mid-TX application data, not an ANS-104 header. + ProbeOffset = 376836336327208, + Size = 4096, + ok = write_offset(ArweaveStoreOpts, FakeID, <<"ans104@1.0">>, ProbeOffset - 1, Size, Opts), + ?assertMatch({error, _}, read(ArweaveStoreOpts, #{ <<"read">> => FakeID }, Opts)). + +root_offset_confirmed_parent_test() -> + {_, ArweaveStoreOpts, Opts} = setup_test_store(), + ParentID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + ok = write_offset(ArweaveStoreOpts, ParentID, <<"tx@1.0">>, 12345, 99, Opts), + ?assertEqual( + 12352, + root_offset( + #{ <<"relative">> => ParentID, <<"offset">> => 7 }, + ArweaveStoreOpts, + Opts + ) + ). + +corrupt_item_ids_read_test() -> + {IndexStore, _StoreOpts, Opts} = setup_test_store(), + Height = 99999999, + ok = hb_store:write(IndexStore, #{block_indexed_path(Height) => <<"2">>}, Opts), + ok = hb_store:write(IndexStore, #{block_items_path(Height, 1) => <<0:256>>}, Opts), + ok = hb_store:write(IndexStore, #{block_items_path(Height, 2) => <<0:240>>}, Opts), + Counts = read_block_item_counts(Height, Opts), + erlang:display({counts, Counts}), + ?assertEqual(1, maps:get(<<"1">>, Counts)), + ?assertEqual(<<"corrupt">>, maps:get(<<"2">>, Counts)), + IDs = read_block_item_ids(Height, Opts), + ?assertEqual(1, length(maps:get(<<"1">>, IDs))), + ?assertEqual(<<"corrupt">>, maps:get(<<"2">>, IDs)), + ok. + +parent_encode_decode_test() -> + BlockEntry = encode_parent_entry(12345, block), + ?assertEqual(<<0, 12345:64/big-unsigned>>, BlockEntry), + BundleID = crypto:strong_rand_bytes(32), + BundleEntry = encode_parent_entry(BundleID, bundle), + ?assertEqual(<<1, BundleID:32/binary>>, BundleEntry), + Combined = <>, + Decoded = decode_parent_entries(Combined), + ?assertEqual([{12345, block}, {BundleID, bundle}], Decoded), + ok. + +parent_not_found_test() -> + {_IndexStore, ArweaveStoreOpts, Opts} = setup_test_store(), + UnknownID = crypto:strong_rand_bytes(32), + ?assertEqual( + not_found, + hb_store_arweave:read_parent(ArweaveStoreOpts, UnknownID, Opts), + Opts + ), + ok. + +decode_item_ids_validation_test() -> + ?assertEqual([], decode_item_ids(<<>>)), + GoodBin = <<0:256, 1:256>>, + ?assertEqual(2, length(decode_item_ids(GoodBin))), + BadBin = <<0:240>>, + ?assertEqual({error, invalid_item_ids_binary}, decode_item_ids(BadBin)), + ok. diff --git a/src/core/store/hb_store_arweave_offset.erl b/src/core/store/hb_store_arweave_offset.erl index 7b4d5a914..e030e8769 100644 --- a/src/core/store/hb_store_arweave_offset.erl +++ b/src/core/store/hb_store_arweave_offset.erl @@ -1,92 +1,168 @@ %%% @doc Succinct encoding and decoding for Arweave data offset indexing. %%% Arweave data items are extremely numerous (>25,000,000,000 as of Feb 2026), and %%% as such small optimizations to the encoding of their offsets have a significant -%%% effect. For exampple, a single byte sized in the encoding at time of writing +%%% effect. For example, a single byte sized in the encoding at time of writing %%% saves ~25 GB of storage. -%%% -%%% The encoding is as follows: -%%% << Version:4, Codec:4, StartOffset:64, Length/binary >> +%%% +%%% Version 1 of the encoding is as follows: +%%% Encoded ::= MempoolTX | RelativeRef | ConfirmedMessage +%%% MempoolTX ::= << Version:4, 0:4 >> +%%% RelativeRef ::= << Version:4, Codec:4, RELATIVE:64, ParentID:256, Range >> +%%% ConfirmedMessage ::= << Version:4, Codec:4, Range >> +%%% Range ::= << Offset:64, Length:unsigned-variable-length-integer >> %%% where: %%% - Version: 4-bit unsigned integer. Max: 15. Current: version `1`. -%%% - Codec: 4-bit unsigned integer. Max: 15. -%%% - StartOffset: 64-bit uint. Max: 2^64-1. -%%% - Length: unsigned variable-length integer. -%%% -%%% Codecs: +%%% - Codec: 4-bit unsigned integer. Max: 15. Registry included below. +%%% - Offset: 64-bit uint. Max: 2^64-1. +%%% - RELATIVE: An atom, expressing that the offset is relative to the start +%%% of another transaction, rather than the start of the Arweave global +%%% address space. Always expressed as 2^64-1. +%%% - ParentID: The ID of a parent message for a relative offset, 256-bit uint. +%%% - Length: big-endian unsigned variable-length integer. +%%% - MempoolTX: Always << 1:4, 0: 4>>, indicating the version and that the +%%% key refers to an Arweave transaction that is not yet confirmed. +%%% - RelativeRef: A reference to an offset inside an unconfirmed Arweave +%%% transaction, yet to receive a global offset. +%%% - ConfirmedMessage: A message (any codec) that has been confirmed and has +%%% received a global offset. +%%% +%%% Codec Registry: %%% - 0: `tx@1.0`: An Arweave transaction. -%%% - 1: [Reserved for ANS-102: The initial JSON data item format.] +%%% - 1: `ans102@1.0`: The initial JSON data item format. %%% - 2: `~ans104@1.0`: Binary data items. -%%% - 3: [Reserved for `~httpsig@1.0`: RFC-9421 compatible HTTP signed messages.] -%%% +%%% - 3: `~httpsig@1.0`: RFC-9421 compatible HTTP signed messages. +%%% %%% Codec indexes should, in general, be sorted by the time of their first write %%% to Arweave: Arweave TXs as 0, ANS-102 as 1, ANS-104 as 2, etc. -%%% -%%% All `length` values are read by decoding all of the remaining bytes in the +%%% +%%% All `length` values are read by decoding all of the remaining bytes in the %%% offset encoding as an unsigned big-endian integer. This allows the length %%% to contract to only the number of bytes actually necessary to represent it. -%%% -module(hb_store_arweave_offset). --export([encode/3, decode/1, path/1]). +-export([encode/3, decode/1, path/1, mismatch_path/1]). -include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). -%% @doc Determine if a value is within a given unsigned bit range. --define(IN_BIT_RANGE(X, Bits), (X >= 0 andalso X < (1 bsl Bits))). +-define(IN_BIT_RANGE(X, Bits), (is_integer(X) andalso X >= 0 andalso X < (1 bsl Bits))). --define(OFFSET_SZ, (8*8)). % 64-bit uint. Max: 2^64-1. --define(FORMAT_VERSION, 1). % 4-bit uint. Max: 15. +-define(OFFSET_SZ, (8*8)). +-define(OFFSET_MAX, ((1 bsl ?OFFSET_SZ) - 1)). +-define(FORMAT_VERSION, 1). +-define(MEMPOOL_TX, <>). -%% @doc Reserved for future use. At the present time, store containing offsets are -%% expected to be utilized only as sub-stores to a `hb_store_arweave' store. As -%% as consequence, the path is simply the ID of the data item, with the prefix -%% of `~arweave@2.9/offset/` implied. path(ID) when ?IS_ID(ID) -> hb_util:native_id(ID); path(ID) -> throw({cannot_encode_path, ID}). -%% @doc Encode the offset of the data if it is valid. Throws `cannot_encode_offset' -%% if invalid. -encode(Type, StartOffset, Length) - when - (Type == true orelse Type == false orelse is_binary(Type)) - andalso ?IN_BIT_RANGE(StartOffset, ?OFFSET_SZ*8) - andalso is_integer(Length) andalso Length >= 0 - -> +mismatch_path(ID) when ?IS_ID(ID) -> + <<"mismatch/", (hb_util:native_id(ID))/binary>>; +mismatch_path(ID) -> throw({cannot_encode_mismatch_path, ID}). + +%% @doc Encode an offset entry. +%% MempoolTX: a single byte when the key refers to an unconfirmed TX. +encode(<<"tx@1.0">>, relative, _Length) -> + ?MEMPOOL_TX; +%% RelativeRef: sentinel offset + parent ID + range. +encode(Codec, #{ <<"relative">> := ParentID, <<"offset">> := RelOffset }, Length) + when is_binary(Codec) andalso ?IS_ID(ParentID) + andalso ?IN_BIT_RANGE(RelOffset, ?OFFSET_SZ) + andalso is_integer(Length) andalso Length >= 0 -> << - (encode_format(Type))/binary, + (encode_format(Codec))/binary, + ?OFFSET_MAX:?OFFSET_SZ, + (hb_util:native_id(ParentID))/binary, + RelOffset:?OFFSET_SZ, + (binary:encode_unsigned(Length))/binary + >>; +%% ConfirmedMessage: global offset + length. +encode(Codec, StartOffset, Length) + when is_binary(Codec) + andalso is_integer(StartOffset) + andalso ?IN_BIT_RANGE(StartOffset, ?OFFSET_SZ) + andalso is_integer(Length) andalso Length >= 0 -> + << + (encode_format(Codec))/binary, StartOffset:?OFFSET_SZ, (binary:encode_unsigned(Length))/binary >>; -encode(IsTX, StartOffset, Length) -> - throw({cannot_encode_offset, {IsTX, StartOffset, Length}}). +encode(Codec, Offset, Length) -> + throw({cannot_encode_offset, {Codec, Offset, Length}}). -decode(<>) -> - {Version, CodecName} = decode_format(Format), - {Version, CodecName, StartOffset, binary:decode_unsigned(Length)}; +%% @doc Decode an offset entry. +decode(?MEMPOOL_TX) -> + % MempoolTX: exactly one byte, version 1, codec tx@1.0. + {<<"tx@1.0">>, relative, 0}; +decode(<>) -> + % RelativeRef: `RELATIVE` atom in the offset field signals a parent-relative ref. + {_, Codec} = decode_format(Fmt), + { + Codec, + #{ + <<"relative">> => hb_util:encode(ParentID), + <<"offset">> => RelOffset + }, + binary:decode_unsigned(Length) + }; +decode(<>) -> + % ConfirmedMessage: global offset. + {_, Codec} = decode_format(Fmt), + {Codec, Offset, binary:decode_unsigned(Length)}; decode(Binary) -> throw({cannot_decode_offset, Binary}). -%% @doc Encode the type of the data. -encode_type(<<"tx@1.0">>) -> 0; -encode_type(<<"ans102@1.0">>) -> 1; -encode_type(<<"ans104@1.0">>) -> 2; -encode_type(<<"httpsig@1.0">>) -> 3; -encode_type(Type) -> throw({cannot_encode_type, Type}). +encode_codec(<<"tx@1.0">>) -> 0; +encode_codec(<<"ans102@1.0">>) -> 1; +encode_codec(<<"ans104@1.0">>) -> 2; +encode_codec(<<"httpsig@1.0">>) -> 3; +encode_codec(Codec) -> throw({cannot_encode_codec, Codec}). -%% @doc Decode the type of the data to a binary codec name. -decode_type(0) -> <<"tx@1.0">>; -decode_type(1) -> <<"ans102@1.0">>; -decode_type(2) -> <<"ans104@1.0">>; -decode_type(3) -> <<"httpsig@1.0">>; -decode_type(Type) -> throw({cannot_decode_type, Type}). +decode_codec(0) -> <<"tx@1.0">>; +decode_codec(1) -> <<"ans102@1.0">>; +decode_codec(2) -> <<"ans104@1.0">>; +decode_codec(3) -> <<"httpsig@1.0">>; +decode_codec(Codec) -> throw({cannot_decode_codec, Codec}). -%% @doc Encode the format of the offset. See the module documentation for the -%% present index of supported codecs. -encode_format(CodecName) -> - << ?FORMAT_VERSION:4, (encode_type(CodecName)):4 >>; encode_format(CodecName) -> - throw({cannot_encode_format, CodecName}). + <>. -%% @doc Decode the format of the offset. -decode_format(<>) -> - {FormatVersion, decode_type(CodecName)}; +decode_format(<<_Version:4, CodecName:4>>) -> + {?FORMAT_VERSION, decode_codec(CodecName)}; decode_format(Binary) -> - throw({cannot_decode_format, Binary}). \ No newline at end of file + throw({cannot_decode_format, Binary}). + +%%% Tests + +confirmed_round_trip_test() -> + Encoded = encode(<<"tx@1.0">>, 12345, 678), + ?assertEqual({<<"tx@1.0">>, 12345, 678}, decode(Encoded)). + +mempool_tx_round_trip_test() -> + Encoded = encode(<<"tx@1.0">>, relative, 0), + ?assertEqual(1, byte_size(Encoded)), + ?assertEqual({<<"tx@1.0">>, relative, 0}, decode(Encoded)). + +relative_ref_round_trip_test() -> + ParentID = hb_util:encode(crypto:strong_rand_bytes(32)), + Encoded = + encode(<<"ans104@1.0">>, + #{ <<"relative">> => ParentID, <<"offset">> => 321 }, + 654 + ), + ?assertEqual( + { + <<"ans104@1.0">>, + #{ <<"relative">> => ParentID, <<"offset">> => 321 }, + 654 + }, + decode(Encoded) + ). + +relative_ref_zero_offset_round_trip_test() -> + ParentID = hb_util:encode(crypto:strong_rand_bytes(32)), + Encoded = + encode( + <<"ans104@1.0">>, + #{ <<"relative">> => ParentID, <<"offset">> => 0 }, + 100 + ), + ?assertMatch({<<"ans104@1.0">>, #{ <<"offset">> := 0 }, 100}, decode(Encoded)). \ No newline at end of file diff --git a/src/core/store/hb_store_lmdb.erl b/src/core/store/hb_store_lmdb.erl index a5b649ffc..8f3ad4cbd 100644 --- a/src/core/store/hb_store_lmdb.erl +++ b/src/core/store/hb_store_lmdb.erl @@ -23,6 +23,7 @@ -export([start/3, stop/3, scope/0, scope/1, reset/3]). -export([read/3, write/3, list/3, match/3]). -export([group/3, link/3, type/3, resolve/3]). +-export([overlay_count/1]). %% Test framework and project includes -include_lib("eunit/include/eunit.hrl"). @@ -62,9 +63,12 @@ start(Opts = #{ <<"name">> := DataDir }, _Req, _NodeOpts) -> batch_size, hb_util:int(maps:get(<<"batch-size">>, Opts, ?DEFAULT_BATCH_SIZE)) }, - no_mem_init, - no_sync + no_mem_init ] ++ + case maps:get(<<"sync">>, Opts, false) of + true -> []; + false -> [no_sync] + end ++ case maps:get(<<"read-ahead">>, Opts, true) of true -> []; false -> [no_readahead] @@ -84,7 +88,11 @@ start(Opts = #{ <<"name">> := DataDir }, _Req, _NodeOpts) -> % Create the LMDB environment with specified size limit {ok, Env} = elmdb:env_open(DataDirPath, EnvOpts), {ok, DBInstance} = elmdb:db_open(Env, [create]), - {ok, #{ <<"env">> => Env, <<"db">> => DBInstance }}; + SyncInterval = hb_util:int(maps:get(<<"sync-interval">>, Opts, 0)), + MonitorPid = spawn(fun() -> + overlay_monitor_loop(Env, DBInstance, DataDir, SyncInterval, 0) + end), + {ok, #{ <<"env">> => Env, <<"db">> => DBInstance, <<"monitor">> => MonitorPid }}; start(_Store, _Req, _NodeOpts) -> {error, {badarg, <<"StoreOpts must be a map">>}}. @@ -547,8 +555,27 @@ resolve(Opts, #{ <<"resolve">> := Path }, _NodeOpts) -> %% @doc Retrieve or create the LMDB environment handle for a database. find_env(Opts) -> hb_store:find(Opts). +%% @doc Return the number of writes currently pending in the elmdb overlay. +%% Safe to call on any live database — does not trigger any I/O. +-spec overlay_count(map()) -> non_neg_integer(). +overlay_count(Opts) -> + #{ <<"db">> := DB } = find_env(Opts), + elmdb:overlay_count(DB). + %% Shutdown LMDB environment and cleanup resources -stop(#{ <<"store-module">> := ?MODULE, <<"name">> := DataDir }, _Req, _Opts) -> +stop(#{ <<"store-module">> := ?MODULE, <<"name">> := DataDir } = StoreOpts, _Req, _Opts) -> + case maps:get(<<"monitor">>, StoreOpts, undefined) of + undefined -> ok; + Pid -> + Ref = erlang:monitor(process, Pid), + exit(Pid, shutdown), + receive + {'DOWN', Ref, process, Pid, _Reason} -> ok + after 5000 -> + erlang:demonitor(Ref, [flush]), + ok + end + end, % Soft-close by name; refs stay valid and reopen lazily on next access. catch elmdb:env_close_by_name(hb_util:list(DataDir)), ok; @@ -593,6 +620,26 @@ sample_metrics(Name, StartTime, Type) -> miss -> ok end. +%% @doc Periodically samples overlay_count and reports it to Prometheus. +%% When sync-interval > 0, also calls env_sync every that many seconds, +%% decoupling durability from the per-commit flush worker path. +overlay_monitor_loop(Env, DBInstance, StoreName, SyncInterval, SecondsSinceSync) -> + receive + stop -> ok + after 1000 -> + Count = elmdb:overlay_count(DBInstance), + hb_prometheus:set(gauge, hb_store_lmdb_overlay_count, [StoreName], Count), + NextSecondsSinceSync = + case SyncInterval > 0 andalso SecondsSinceSync + 1 >= SyncInterval of + true -> + elmdb:env_sync(Env), + 0; + false -> + SecondsSinceSync + 1 + end, + overlay_monitor_loop(Env, DBInstance, StoreName, SyncInterval, NextSecondsSinceSync) + end. + init_prometheus() -> hb_prometheus:declare(histogram, [ {name, hb_store_lmdb_duration_seconds}, @@ -605,6 +652,11 @@ init_prometheus() -> {labels, [name]}, {help, "LMDB name requested"} ]), + hb_prometheus:declare(gauge, [ + {name, hb_store_lmdb_overlay_count}, + {labels, [store_name]}, + {help, "Number of writes pending in the elmdb overlay for each store"} + ]), ok. %% @doc Test suite demonstrating basic store operations. diff --git a/src/hb.app.src b/src/hb.app.src index 21773478a..495f9cc97 100644 --- a/src/hb.app.src +++ b/src/hb.app.src @@ -11,7 +11,8 @@ cowboy, os_mon, gun, - hackney + hackney, + graphql ]}, {env, []}, {modules, []}, diff --git a/src/hb_copycat_budget.erl b/src/hb_copycat_budget.erl new file mode 100644 index 000000000..3aabf0184 --- /dev/null +++ b/src/hb_copycat_budget.erl @@ -0,0 +1,201 @@ +%%% @doc Atomics-based byte budget pool for copycat memory throttling. +%%% Controls how many bytes of TX data can be held in memory simultaneously +%%% across all copycat workers. Uses persistent_term for constant-time access. +-module(hb_copycat_budget). +-export([ensure_started/1, reset/1, lease/1, release/1, get_budget/0, stats/0]). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +-define(PERSISTENT_KEY, hb_copycat_budget). +-define(IDX_LEASED, 1). +-define(IDX_PEAK, 2). +-define(IDX_BUDGET, 3). +-define(IDX_RETRIES, 4). +-define(RETRY_SLEEP_MS, 50). +-define(LEASE_LOOP_MAX_RETRIES, 100). + +-define(INIT_LOCK, hb_copycat_budget_init). + +ensure_started(Budget) when is_integer(Budget), Budget > 0 -> + case persistent_term:get(?PERSISTENT_KEY, undefined) of + undefined -> + init_with_lock(Budget); + _Ref -> + ok + end. + +init_with_lock(Budget) -> + try register(?INIT_LOCK, self()) of + true -> + try + case persistent_term:get(?PERSISTENT_KEY, undefined) of + undefined -> + Ref = atomics:new(4, [{signed, false}]), + atomics:put(Ref, ?IDX_BUDGET, Budget), + persistent_term:put(?PERSISTENT_KEY, Ref); + _AlreadySet -> + ok + end + after + unregister(?INIT_LOCK) + end, + ok + catch + error:badarg -> + await_init(Budget) + end. + +await_init(Budget) -> + case persistent_term:get(?PERSISTENT_KEY, undefined) of + undefined -> + case whereis(?INIT_LOCK) of + undefined -> + init_with_lock(Budget); + _Pid -> + timer:sleep(1), + await_init(Budget) + end; + _Ref -> + ok + end. + +reset(Budget) when is_integer(Budget), Budget > 0 -> + Ref = atomics:new(4, [{signed, false}]), + atomics:put(Ref, ?IDX_BUDGET, Budget), + persistent_term:put(?PERSISTENT_KEY, Ref), + ok. + +lease(Size) when is_integer(Size), Size > 0 -> + Ref = persistent_term:get(?PERSISTENT_KEY), + lease_loop(Ref, Size, 0). + +lease_loop(Ref, Size, ?LEASE_LOOP_MAX_RETRIES) -> + ?event(error, + {lease_loop_max_retries_exhausted, + {ref, Ref}, + {size, Size}, + {max_retries, ?LEASE_LOOP_MAX_RETRIES}}), + throw(exhausted_lease_loop_max_retires); +lease_loop(Ref, Size, Retries) -> + Current = atomics:get(Ref, ?IDX_LEASED), + Budget = atomics:get(Ref, ?IDX_BUDGET), + case Current + Size > Budget of + true -> + atomics:add(Ref, ?IDX_RETRIES, 1), + timer:sleep(?RETRY_SLEEP_MS), + lease_loop(Ref, Size, Retries + 1); + false -> + case atomics:compare_exchange(Ref, ?IDX_LEASED, Current, Current + Size) of + ok -> + update_peak(Ref, Current + Size), + ok; + _Changed -> + lease_loop(Ref, Size, Retries + 1) + end + end. + +release(Size) when is_integer(Size), Size > 0 -> + Ref = persistent_term:get(?PERSISTENT_KEY), + atomics:sub(Ref, ?IDX_LEASED, Size), + ok. + +get_budget() -> + case persistent_term:get(?PERSISTENT_KEY, undefined) of + undefined -> undefined; + Ref -> atomics:get(Ref, ?IDX_BUDGET) + end. + +stats() -> + case persistent_term:get(?PERSISTENT_KEY, undefined) of + undefined -> + not_started; + Ref -> + #{ + leased => atomics:get(Ref, ?IDX_LEASED), + peak => atomics:get(Ref, ?IDX_PEAK), + budget => atomics:get(Ref, ?IDX_BUDGET), + retries => atomics:get(Ref, ?IDX_RETRIES) + } + end. + +update_peak(Ref, NewLeased) -> + Peak = atomics:get(Ref, ?IDX_PEAK), + case NewLeased =< Peak of + true -> ok; + false -> + case atomics:compare_exchange(Ref, ?IDX_PEAK, Peak, NewLeased) of + ok -> ok; + _Changed -> update_peak(Ref, NewLeased) + end + end. + +%%% Tests + +lease_release_cycle_test() -> + reset(1000), + ?assertEqual(1000, get_budget()), + ok = lease(400), + #{leased := 400, peak := 400, budget := 1000} = stats(), + ok = lease(300), + #{leased := 700, peak := 700} = stats(), + ok = release(400), + #{leased := 300, peak := 700} = stats(), + ok = release(300), + #{leased := 0, peak := 700} = stats(), + reset_to_default(), + ok. + +blocks_when_over_budget_test() -> + reset(100), + ok = lease(100), + Parent = self(), + Ref = make_ref(), + Pid = spawn(fun() -> + Parent ! {Ref, trying}, + ok = lease(50), + Parent ! {Ref, got_lease} + end), + receive {Ref, trying} -> ok end, + timer:sleep(120), + receive + {Ref, got_lease} -> error(should_have_blocked) + after 0 -> ok + end, + release(60), + receive + {Ref, got_lease} -> ok + after 500 -> + exit(Pid, kill), + error(lease_never_granted) + end, + release(50), + #{leased := 40} = stats(), + release(40), + reset_to_default(), + ok. + +concurrent_leases_test() -> + Budget = 1000, + reset(Budget), + Parent = self(), + NumWorkers = 20, + LeaseSize = 200, + Pids = [spawn(fun() -> + ok = lease(LeaseSize), + timer:sleep(10), + release(LeaseSize), + Parent ! {done, self()} + end) || _ <- lists:seq(1, NumWorkers)], + lists:foreach(fun(Pid) -> + receive {done, Pid} -> ok + after 5000 -> error({timeout, Pid}) + end + end, Pids), + #{leased := 0, peak := Peak, budget := Budget} = stats(), + ?assert(Peak =< Budget), + ?assert(Peak > 0), + reset_to_default(), + ok. + +reset_to_default() -> + reset(hb_opts:get(<<"copycat-memory-budget">>, 6 * 1024 * 1024 * 1024, #{})). diff --git a/src/preloaded/arweave/dev_arweave.erl b/src/preloaded/arweave/dev_arweave.erl index 3c967d81c..7ea04dc07 100644 --- a/src/preloaded/arweave/dev_arweave.erl +++ b/src/preloaded/arweave/dev_arweave.erl @@ -7,11 +7,13 @@ -implements(<<"arweave@2.9">>). -device_libraries([lib_arweave_common]). -export([info/0]). --export([tx/3, raw/3, chunk/3, block/3, current/3, status/3, price/3, tx_anchor/3]). +-export([tx/3, raw/3, chunk/3, block/3, parent/3, current/3, status/3, price/3, tx_anchor/3]). -export([pending/3]). -export([post_tx_header/2, post_tx/3, post_tx/4, post_chunk/2]). %%% Helper functions -export([get_chunk/2]). +%%% Test only +-export([setup_arweave_index_opts/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -170,21 +172,31 @@ head_raw(Base, Request, Opts) -> case find_key(<<"raw">>, Base, Request, Opts) of TXID when ?IS_ID(TXID) -> % Read the data from the local cache. - IndexStore = hb_store_arweave:store_from_opts(Opts), - case hb_store_arweave:read_offset(IndexStore, TXID, Opts) of + ArweaveStore = hb_store_arweave:store_from_opts(Opts), + case hb_store_arweave:read_offset(ArweaveStore, TXID, Opts) of {ok, #{ <<"codec-device">> := CodecDevice, - <<"start-offset">> := StartOffset, + <<"start-offset">> := RawOffset, <<"length">> := Length }} -> + StartOffset = hb_store_arweave:root_offset(RawOffset, ArweaveStore, Opts), CodecFun = case CodecDevice of <<"ans104@1.0">> -> fun head_raw_ans104/4; <<"tx@1.0">> -> fun head_raw_tx/4; _ -> throw({invalid_codec_device, CodecDevice}) end, - CodecFun(TXID, StartOffset, Length, Opts); + try CodecFun(TXID, StartOffset, Length, Opts) + catch _:Reason:Stacktrace -> + %% This can be prone to serialization error. + %% Catch and output as an error. + ?event(store_error, {head_raw, + {txid, TXID}, + {reason, Reason}, + {stacktrace, Stacktrace}}), + {error, Reason} + end; not_found -> ?event( arweave, @@ -200,15 +212,14 @@ head_raw(Base, Request, Opts) -> %% @doc Arweave transaction headers are not part of the Arweave data tree, and %% thus we do not add their header bytes to the offset in order to read their %% data. -head_raw_tx(TXID, StartOffset, Length, Opts) -> +head_raw_tx(TXID, Offset, Length, Opts) -> + BaseReq = #{ <<"exclude-data">> => true }, {ok, StructuredTXHeader} = - get_tx( - #{ <<"tx">> => TXID }, - #{ <<"exclude-data">> => true }, - Opts - ), + if is_integer(Offset) -> get_tx(#{}, BaseReq#{ <<"tx">> => TXID }, Opts); + true -> pending(#{}, BaseReq#{ <<"pending">> => TXID }, Opts) + end, ContentType = - hb_ao:get( + hb_maps:get( <<"content-type">>, StructuredTXHeader, <<"application/octet-stream">>, @@ -217,21 +228,32 @@ head_raw_tx(TXID, StartOffset, Length, Opts) -> [<<"no-cache">>, <<"no-store">>] } ), - {ok, - #{ - <<"raw-id">> => TXID, - <<"offset">> => StartOffset, - <<"data-offset">> => StartOffset, - <<"content-type">> => ContentType, - <<"header-length">> => 0, - <<"content-length">> => Length, - <<"accept-ranges">> => <<"bytes">> - } - }. + {ok, #{ + <<"raw-id">> => TXID, + <<"offset">> => Offset, + <<"data-offset">> => Offset, + <<"content-type">> => ContentType, + <<"header-length">> => 0, + <<"content-length">> => Length, + <<"accept-ranges">> => <<"bytes">> + }}. %% @doc ANS-104 headers are stored as part of the global Arweave data tree, so %% so to read the data associated with their IDs, we must first read the header %% chunk, deserialize it, and offset our data read from its starting offset. +head_raw_ans104(TXID, Offset, Length, Opts) when not is_integer(Offset) -> + HeaderReq = + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => min(Length, ?DATA_CHUNK_SIZE) + }, + case hb_ao:resolve(#{ <<"device">> => <<"arweave@2.9">> }, HeaderReq, Opts) of + {ok, HeaderChunk} -> + do_head_raw_ans104(TXID, Offset, Length, HeaderChunk, Opts); + {error, Error} -> + {error, Error} + end; head_raw_ans104(TXID, ArweaveOffset, Length, Opts) -> ?event(debug_raw, {head_raw_ans104, {txid, TXID}, {arweave_offset, ArweaveOffset}, {length, Length}}), HeaderReq = @@ -258,7 +280,7 @@ do_head_raw_ans104(TXID, ArweaveOffset, Length, Data, _Opts) -> #{ <<"raw-id">> => TXID, <<"offset">> => ArweaveOffset, - <<"data-offset">> => ArweaveOffset + HeaderSize, + <<"data-offset">> => add_ans104_offset(ArweaveOffset, HeaderSize), <<"content-type">> => ContentType, <<"header-length">> => HeaderSize, <<"content-length">> => Length - HeaderSize, @@ -281,6 +303,14 @@ deserialize_ans104_header(Data) -> } end. +add_ans104_offset(Offset, HeaderSize) when is_integer(Offset) -> + Offset + HeaderSize; +add_ans104_offset(#{ <<"relative">> := ParentID, <<"offset">> := Offset }, HeaderSize) -> + #{ + <<"relative">> => ParentID, + <<"offset">> => Offset + HeaderSize + }. + %% @doc Get raw transaction *data* and `content-type` of an Arweave message. %% Does not deserialize the message, nor return signature information. Included %% only for compatibility with the legacy Arweave gateway `/raw` endpoint. @@ -289,6 +319,16 @@ get_raw(Base, Request, Opts) -> case head_raw(Base, Request, Opts) of not_found -> {error, not_found}; Err = {error, _} -> Err; + {ok, + Header = #{ + <<"data-offset">> := DataOffset, + <<"content-length">> := ContentLength + } + } when not is_integer(DataOffset) -> + case hb_store_arweave:read_chunks(DataOffset, ContentLength, Opts) of + {ok, Data} -> {ok, Header#{ <<"body">> => Data }}; + Error -> Error + end; {ok, Header = #{ <<"raw-id">> := TXID, @@ -395,22 +435,48 @@ post_chunk(Request, Opts) -> %% global Arweave data tree, or relative to the start of a specific pending %% transaction. get_chunk(_Base, Request, Opts) -> - Offset = hb_util:int(hb_maps:get(<<"offset">>, Request, 0, Opts)), - Length = hb_util:int(hb_maps:get(<<"length">>, Request, 1, Opts)), - MaybeRelativeTXID = hb_maps:get(<<"pending">>, Request, undefined, Opts), + HasExplicitLength = hb_maps:is_key(<<"length">>, Request, Opts), + {ok, Offset, Length, MaybeRelativeTXID} = extract_chunk_params(Request, Opts), case fetch_chunk_range(Offset, Length, MaybeRelativeTXID, Opts) of {ok, Chunks} -> - Data = iolist_to_binary(Chunks), - case hb_maps:is_key(<<"length">>, Request, Opts) of - true -> - {ok, binary:part(Data, 0, min(Length, byte_size(Data)))}; + Data = hb_util:bin(Chunks), + case HasExplicitLength of false -> - {ok, Data} + {ok, Data}; + true -> + { + ok, + binary:part(Data, 0, min(hb_util:int(Length), byte_size(Data))) + } end; {error, Reason} -> {error, Reason} end. +%% @doc Extract the parameters from a chunk request. Supports both global offsets +%% and relative offset+parent ID pairs. +extract_chunk_params(Request, Opts) -> + Length = hb_maps:get(<<"length">>, Request, 1, Opts), + case hb_maps:find(<<"offset">>, Request, Opts) of + {ok, RelativeInfo} when is_map(RelativeInfo) -> + {ok, RelativeOffset} = hb_maps:find(<<"offset">>, RelativeInfo, Opts), + {ok, RelativeTXID} = hb_maps:find(<<"relative">>, RelativeInfo, Opts), + {ok, hb_util:int(RelativeOffset), Length, RelativeTXID}; + {ok, Offset} when is_integer(Offset) orelse is_binary(Offset) -> + { + ok, + hb_util:int(Offset), + Length, + hb_maps:get(<<"pending">>, Request, undefined, Opts) + } + end. + +%% @doc Fetch a range of chunks in parallel. Determines the appropriate algorithm +%% to use to get the chunks based on offset, length, and an optional relative +%% transaction ID. Notably, this function returns the binary for all of the +%% chunks that were fetched, not just the requested length. This allows callers +%% to avoid wasted additional requests in some circumstances, but also requires +%% them to handle truncation themselves. fetch_chunk_range(Offset, Length, undefined, Opts) when (Offset >= ?STRICT_DATA_SPLIT_THRESHOLD) andalso ((Offset + Length - 1) >= ?STRICT_DATA_SPLIT_THRESHOLD) -> @@ -455,42 +521,47 @@ get_chunk_range_variable_size(Offset, EndOffset, Opts) -> end. %% @doc Return a chunk or range of bytes relative to a specific, unconfirmed, -%% transaction's data root. +%% transaction's data root. Pending chunk lookups query the only chunk by +%% `data_size` for single-chunk TXs, otherwise start at 256KiB and advance in +%% 256KiB steps with a final cap at `data_size`. get_chunk_range_relative(Offset, Length, RelativeTXID, Opts) -> - hb_prometheus:observe( - Length, - arweave_chunk_load_requested_bytes, - [] - ), - Offsets = - generate_offsets( - max(1, Offset + 1), - (Offset + Length), - ?DATA_CHUNK_SIZE - ), - GETFun = - fun(XOffset) -> - pending( - #{}, - #{ <<"offset">> => XOffset, <<"pending">> => RelativeTXID }, - Opts - ) - end, - case fetch_and_collect(Offsets, GETFun, Opts) of - {ok, ChunkInfos} -> - Concatenated = - hb_util:bin( - lists:map( - fun(JSONStruct) -> - hb_util:decode(maps:get(<<"chunk">>, JSONStruct)) - end, - ChunkInfos + case pending_tx_data_size(RelativeTXID, Opts) of + {ok, DataSize} -> + hb_prometheus:observe( + Length, + arweave_chunk_load_requested_bytes, + [] + ), + Offsets = pending_relative_chunk_offsets(Offset, Length, DataSize), + GETFun = + fun(XOffset) -> + QueryRes = pending_chunk_query(RelativeTXID, XOffset, Opts), + decode_relative_chunk( + QueryRes ) - ), - {ok, Concatenated}; - Error -> Error + end, + case fetch_and_collect(Offsets, GETFun, Opts) of + {ok, ChunkInfos} -> + assemble_relative_chunks(ChunkInfos, Offset); + Error -> Error + end; + Error -> + Error end. +assemble_relative_chunks(ChunkInfos, Offset) -> + assemble_chunks(ChunkInfos, Offset + 1). + +decode_relative_chunk({ok, JSON}) -> + Chunk = hb_util:decode(maps:get(<<"chunk">>, JSON)), + ChunkEnd = ar_merkle:extract_note( + hb_util:decode(maps:get(<<"data_path">>, JSON)) + ), + ChunkStart = ChunkEnd - byte_size(Chunk) + 1, + {ok, {ChunkStart, ChunkEnd, Chunk}}; +decode_relative_chunk({error, _} = Err) -> + Err. + %% @doc Iteratively detect gaps in coverage and fetch the chunk at the start %% of each gap until the entire range [Offset, EndOffset] is covered. fill_gaps(ChunkInfos, Offset, EndOffset, Opts) -> @@ -548,6 +619,179 @@ generate_offsets(Current, End, _Step, Acc) when Current > End -> generate_offsets(Current, End, Step, Acc) -> generate_offsets(Current + Step, End, Step, [Current | Acc]). +pending_chunk_query(TXID, XOffset, Opts) -> + {RetryCount, RetryDelay} = pending_chunk_poll_config(Opts), + pending_chunk_query( + TXID, + XOffset, + Opts, + RetryCount, + RetryDelay, + RetryCount, + erlang:monotonic_time(millisecond) + ). + +pending_chunk_query( + TXID, + XOffset, + Opts, + RetryCount, + RetryDelay, + TotalRetries, + StartTimeMs +) -> + Attempt = TotalRetries - RetryCount + 1, + case pending_chunk_request(TXID, XOffset, Opts) of + {error, not_found} when RetryCount > 0 -> + maybe_log_pending_chunk_retry( + Attempt, + TXID, + XOffset, + RetryDelay, + Opts + ), + timer:sleep(RetryDelay), + pending_chunk_query( + TXID, + XOffset, + Opts, + RetryCount - 1, + RetryDelay, + TotalRetries, + StartTimeMs + ); + {ok, _} = Result when Attempt > 1 -> + pending_chunk_progress( + Opts, + {pending_chunk_recovered, + {tx_id, {explicit, TXID}}, + {offset, XOffset}, + {attempt, Attempt}, + { + elapsed_ms, + erlang:monotonic_time(millisecond) - StartTimeMs + }} + ), + Result; + {error, not_found} = Result when Attempt > 1 -> + pending_chunk_progress( + Opts, + {pending_chunk_gave_up, + {tx_id, {explicit, TXID}}, + {offset, XOffset}, + {attempts, Attempt}, + { + elapsed_ms, + erlang:monotonic_time(millisecond) - StartTimeMs + }} + ), + Result; + Result -> + Result + end. + +maybe_log_pending_chunk_retry(Attempt, TXID, XOffset, RetryDelay, Opts) -> + case Attempt =:= 1 orelse Attempt rem 10 =:= 0 of + true -> + pending_chunk_progress( + Opts, + {pending_chunk_retrying, + {tx_id, {explicit, TXID}}, + {offset, XOffset}, + {attempt, Attempt}, + {retry_in_ms, RetryDelay}} + ); + false -> + ok + end. + +pending_chunk_progress(Opts, Event) -> + case hb_opts:get(arweave_mempool_progress, false, Opts) of + true -> ?event(copycat_short, Event); + false -> ok + end. + +pending_chunk_request(TXID, XOffset, Opts) -> + request( + <<"GET">>, + << + "/unconfirmed_chunk/", + TXID/binary, + "/", + (hb_util:bin(XOffset))/binary + >>, + Opts + ). + +pending_chunk_poll_config(Opts) -> + RawRetryCount = max(0, hb_opts:get(arweave_pending_chunk_poll_attempts, 0, Opts)), + RetryDelay = max(1, hb_opts:get(arweave_pending_chunk_poll_ms, 500, Opts)), + MinRetryWindowMs = max( + 0, + hb_opts:get(arweave_pending_chunk_poll_min_ms, 20000, Opts) + ), + RetryCount = + case RawRetryCount of + 0 -> 0; + _ -> max(RawRetryCount, ceil_div(MinRetryWindowMs, RetryDelay)) + end, + {RetryCount, RetryDelay}. + +ceil_div(0, _Denominator) -> 0; +ceil_div(Numerator, Denominator) -> + (Numerator + Denominator - 1) div Denominator. + +%% @doc Fetch the advertised data size for an unconfirmed transaction. +pending_tx_data_size(TXID, Opts) -> + case pending(#{}, #{ <<"pending">> => TXID, <<"exclude-data">> => true }, Opts) of + {ok, JSON} -> + {ok, hb_util:int(maps:get(<<"data_size">>, JSON))}; + Error -> + Error + end. + +%% @doc Return the pending chunk end offsets using 256KiB stepping with a final +%% cap at `data_size`. +pending_relative_chunk_offsets(_Offset, Length, _DataSize) when Length =< 0 -> + []; +pending_relative_chunk_offsets(Offset, _Length, DataSize) when Offset >= DataSize -> + []; +pending_relative_chunk_offsets(Offset, Length, DataSize) -> + RangeStart = max(1, Offset + 1), + RangeEnd = min(Offset + Length, DataSize), + ChunkEnds = pending_chunk_end_offsets(DataSize), + pending_relative_chunk_offsets(ChunkEnds, RangeStart, RangeEnd, 0, []). + +pending_relative_chunk_offsets( + [ChunkEnd | Rest], RangeStart, RangeEnd, PrevEnd, Acc +) -> + ChunkStart = PrevEnd + 1, + NewAcc = + case chunk_overlaps_range(ChunkStart, ChunkEnd, RangeStart, RangeEnd) of + true -> [ChunkEnd | Acc]; + false -> Acc + end, + pending_relative_chunk_offsets(Rest, RangeStart, RangeEnd, ChunkEnd, NewAcc); +pending_relative_chunk_offsets([], _RangeStart, _RangeEnd, _PrevEnd, Acc) -> + lists:reverse(Acc). + +pending_chunk_end_offsets(DataSize) when DataSize =< ?DATA_CHUNK_SIZE -> + [DataSize]; +pending_chunk_end_offsets(DataSize) -> + pending_chunk_end_offsets(?DATA_CHUNK_SIZE, DataSize, []). + +pending_chunk_end_offsets(Current, DataSize, Acc) when Current < DataSize -> + pending_chunk_end_offsets( + Current + ?DATA_CHUNK_SIZE, + DataSize, + [Current | Acc] + ); +pending_chunk_end_offsets(_Current, DataSize, Acc) -> + lists:reverse([DataSize | Acc]). + +chunk_overlaps_range(ChunkStart, ChunkEnd, RangeStart, RangeEnd) -> + ChunkEnd >= RangeStart andalso ChunkStart =< RangeEnd. + %% @doc Decode a chunk response into a {Start, End, Binary} tuple. %% Runs inside the pmap worker so raw JSON is GC'd per-worker. decode_chunk({ok, JSON}) -> @@ -743,6 +987,57 @@ only_if_cached(Req, Opts) -> hb_maps:get(<<"cache-control">>, Req, [], Opts) ). +%% @doc Look up the parent (block or bundle) that contains an item. +parent(Base, Request, Opts) -> + case find_key(<<"parent">>, Base, Request, Opts) of + not_found -> + {error, not_found}; + ID -> + StoreOpts = hb_store_arweave:store_from_opts(Opts), + try hb_store_arweave:read_parent(StoreOpts, ID, Opts) of + {ok, [{Height, block} | _]} -> + Entry = #{ + <<"type">> => <<"block">>, + <<"height">> => Height + }, + {ok, #{ + <<"content-type">> => <<"application/json">>, + <<"body">> => + hb_json:encode(#{<<"parents">> => [Entry]}) + }}; + {ok, [{ParentID, bundle} | _]} -> + Entry = #{ + <<"type">> => <<"bundle">>, + <<"id">> => hb_util:encode(ParentID) + }, + {ok, #{ + <<"content-type">> => <<"application/json">>, + <<"body">> => + hb_json:encode(#{<<"parents">> => [Entry]}) + }}; + {error, Reason} -> + ?event(warning, + {parent_read_error, {id, ID}, {reason, Reason}}), + {error, not_found}; + not_found -> + {error, not_found} + catch + error:Reason:Stacktrace -> + ?event(error, + {parent_read_error, + {id, ID}, + {reason, Reason}, + {stacktrace, Stacktrace} + }), + {failure, + #{ + <<"status">> => 500, + <<"type">> => <<"parent_read_error">> + } + } + end + end. + %% @doc Retrieve the current block information from Arweave. current(_Base, _Request, Opts) -> request(<<"GET">>, <<"/block/current">>, Opts). @@ -771,35 +1066,35 @@ tx_anchor(_Base, _Request, Opts) -> %% nodes, or a specific unconfirmed transaction header by its TXID. pending(Base, Request, Opts) -> case find_key(<<"pending">>, Base, Request, Opts) of - not_found -> request(<<"GET">>, <<"/tx/pending">>, Opts); + not_found -> + case hb_opts:get(arweave_static_pending_txids, not_found, Opts) of + TXIDs when is_list(TXIDs) -> + {ok, TXIDs}; + _ -> + request(<<"GET">>, <<"/tx/pending">>, Opts) + end; TXID -> + ExcludeData = + case find_key(<<"exclude-data">>, Base, Request, Opts) of + not_found -> hb_opts:get(exclude_data, false, Opts); + Value -> hb_util:bool(Value) + end, case hb_maps:find(<<"offset">>, Request, Opts) of error -> % Retreive a bare TX header by its TXID - request(<<"GET">>, <<"/unconfirmed_tx/", TXID/binary>>, Opts); - {ok, RawOffset} -> - Offset = hb_util:int(RawOffset), - % Download an unconfirmed chunk by its offset request( <<"GET">>, - << - "/unconfirmed_chunk/", - TXID/binary, - "/", - (hb_util:bin(Offset))/binary - >>, + <<"/unconfirmed_tx/", TXID/binary>>, Opts#{ - <<"exclude-data">> => - hb_util:bool( - find_key( - <<"exclude-data">>, - Base, - Request, - Opts - ) - ) + <<"exclude-data">> => ExcludeData } - ) + ); + {ok, _RawOffset} -> + {error, #{ + <<"status">> => 400, + <<"content-type">> => <<"application/json">>, + <<"body">> => <<"{\"error\":\"invalid_offset\"}">> + }} end end. @@ -909,7 +1204,7 @@ to_message(Path = <<"/block/", _/binary>>, <<"GET">>, {ok, #{ <<"body">> := Body Opts ), CacheRes = - case hb_opts:get(arweave_index_blocks, true, Opts) of + case hb_opts:get(<<"arweave-index-blocks">>, true, Opts) of true -> dev_arweave_block_cache:write(Block, Opts); false -> skipped end, @@ -968,37 +1263,51 @@ to_tx_message(Type, ID, Path, {ok, #{ <<"body">> := Body }}, LogExtra, Opts) -> {tx, TXHeader} } ), - {ok, Data} = - case hb_opts:get(exclude_data, false, Opts) of - true -> {ok, ?DEFAULT_DATA}; + DataRes = + case (TXHeader#tx.data_size == 0) orelse hb_opts:get(exclude_data, false, Opts) of + true -> {ok, <<>>}; false -> - DataRes = - case Type of - tx -> - request(<<"GET">>, <<"/raw/", ID/binary>>, Opts); - pending -> - get_chunk_range_relative( - 0, - TXHeader#tx.data_size, - ID, - Opts - ) - end, - case DataRes of - {ok, RawData} -> {ok, RawData}; - {error, not_found} -> {ok, ?DEFAULT_DATA}; - Error -> Error + case Type of + tx -> + case hb_ao:resolve(<<"~arweave@2.9/raw=", ID/binary>>, Opts) of + {ok, #{<<"body">> := RawBody}} -> + {ok, RawBody}; + Resp -> + Resp + end; + pending -> + get_chunk_range_relative( + 0, + TXHeader#tx.data_size, + ID, + Opts + ) end end, - { - ok, - hb_message:convert( - TXHeader#tx{ data = Data }, - <<"structured@1.0">>, - <<"tx@1.0">>, - Opts - ) - }. + case DataRes of + {ok, Data} -> + { + ok, + hb_message:convert( + TXHeader#tx{ data = Data }, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ) + }; + {error, not_found} -> + { + ok, + hb_message:convert( + TXHeader#tx{ data = ?DEFAULT_DATA }, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ) + }; + Error -> + Error + end. event_request(Path, Method, Status, Extra) -> BaseList = [{request, {explicit, Path}}, {method, Method}, {status, Status}], @@ -1315,7 +1624,8 @@ index_test_tx(TXID, IndexStore, Opts) -> TXID, <<"tx@1.0">>, StartOffset, - Size + Size, + Opts ), ?assertMatch({ok, _}, hb_store_arweave:read_offset(IndexStore, TXID, Opts)), ok. @@ -1326,17 +1636,19 @@ tx_index_block(<<"4FnBmvgWmqXWEEprjVqBsV5aRpAgF6_yJX_GTGsSZjY">>) -> 753012; tx_index_block(<<"YR9m4c3CrlljCRYEWBLeoKekbAyYZRMo2Kpz61IeNp8">>) -> 1233918. get_tx_basic_data_test_parallel() -> + TXID = <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + Opts = setup_arweave_index_opts([TXID]), {ok, Structured} = hb_ao:resolve( #{ <<"device">> => <<"arweave@2.9">> }, #{ <<"path">> => <<"tx">>, - <<"tx">> => <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + <<"tx">> => TXID, <<"exclude-data">> => false }, - #{} + Opts ), ?event(debug_test, {structured_tx, Structured}), - ?assert(hb_message:verify(Structured, all, #{})), + ?assert(hb_message:verify(Structured, all, Opts)), % Hash the data to make it easier to match StructuredWithHash = Structured#{ <<"data">> => hb_util:encode( @@ -1354,16 +1666,18 @@ get_tx_basic_data_test_parallel() -> %% @doc The data for this transaction ends with two smaller chunks. get_tx_split_chunk_test_parallel() -> + TXID = <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + Opts = setup_arweave_index_opts([TXID]), {ok, Structured} = hb_ao:resolve( #{ <<"device">> => <<"arweave@2.9">> }, #{ <<"path">> => <<"tx">>, - <<"tx">> => <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + <<"tx">> => TXID, <<"exclude-data">> => false }, - #{} + Opts ), - ?assert(hb_message:verify(Structured, all, #{})), + ?assert(hb_message:verify(Structured, all, Opts)), ?assertEqual( <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, hb_message:id(Structured, signed)), @@ -1373,9 +1687,8 @@ get_tx_split_chunk_test_parallel() -> <<"Contract">> => <<"KTzTXT_ANmF84fWEKHzWURD1LWd9QaFR9yfYUwH2Lxw">> }, ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), - Child = hb_ao:get(<<"1/2">>, Structured), - ?assert(hb_message:verify(Child, all, #{})), + ?assert(hb_message:verify(Child, all, Opts)), ?event(debug_test, {child, {explicit, hb_message:id(Child, signed)}}), ?assertEqual( <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, @@ -1531,6 +1844,40 @@ head_raw_ans104_invalid_tags_test() -> do_head_raw_ans104(<<0:256>>, 0, byte_size(DataItem), DataItem, #{}) ). +%% @doc Interior Arweave offset returns bytes that are not a valid ANS-104 +%% header, so head_raw_ans104/4 throws inside do_head_raw_ans104/5. The +%% try-catch added to head_raw/3 must convert that throw into {error, _}. +head_raw_ans104_deserialize_throws_test_parallel() -> + TestStore = hb_test_utils:test_store(hb_store_volatile, <<"head-raw-throws">>), + IndexStore = #{ + <<"module">> => hb_store_arweave, + <<"index-store">> => [TestStore] + }, + Opts = #{ + <<"store">> => [TestStore], + <<"arweave-index-ids">> => true, + <<"arweave-index-store">> => IndexStore + }, + FakeID = <<"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC">>, + %% Same interior offset as bundle_header_garbage_guard_test_parallel. + ProbeOffset = 376836336327208, + Size = 4096, + ok = hb_store_arweave:write_offset( + IndexStore, FakeID, <<"ans104@1.0">>, ProbeOffset - 1, Size, Opts + ), + ?assertMatch( + {error, _}, + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => FakeID, + <<"method">> => <<"HEAD">> + }, + Opts + ) + ). + get_raw_range_tx_test_parallel() -> DataItemID = <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, Opts = setup_arweave_index_opts([DataItemID]), @@ -1617,20 +1964,22 @@ get_raw_range_ans104_test_parallel() -> ). get_tx_rsa_nested_bundle_test_parallel() -> - Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9/tx=bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, - {ok, Root} = hb_http:get(Node, Path, #{}), + TXID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + Opts = setup_arweave_index_opts([TXID]), + Node = hb_http_server:start_node(Opts), + Path = <<"/~arweave@2.9/tx=", TXID/binary>>, + {ok, Root} = hb_http:get(Node, Path, Opts), ?event(debug_test, {root, Root}), - ?assert(hb_message:verify(Root, all, #{})), + ?assert(hb_message:verify(Root, all, Opts)), ChildPath = <>, - {ok, Child} = hb_http:get(Node, ChildPath, #{}), + {ok, Child} = hb_http:get(Node, ChildPath, Opts), ?event(debug_test, {child, Child}), - ?assert(hb_message:verify(Child, all, #{})), + ?assert(hb_message:verify(Child, all, Opts)), {ok, ExpectedChild} = hb_ao:resolve( Root, <<"1/2">>, - #{} + Opts ), ?assert(hb_message:match(ExpectedChild, Child, only_present)), ManualChild = #{ @@ -1665,6 +2014,26 @@ get_bad_tx_test_parallel() -> Res = hb_http:get(Node, Path, #{}), ?assertEqual({error, not_found}, Res). +pending_invalid_offset_returns_invalid_offset_test() -> + {error, Error} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"pending">>, + <<"pending">> => <<"cat">>, + <<"offset">> => <<"dog">> + }, + #{} + ), + ?assertMatch( + #{ + <<"status">> := 400, + <<"content-type">> := <<"application/json">>, + <<"body">> := <<"{\"error\":\"invalid_offset\"}">> + }, + Error + ). + %% @doc: helper test to generate and write a dataitem to disk so that we %% can validate it using 3rd-party js libraries and gateways. serialize_data_item_test_disabled() -> @@ -1868,6 +2237,41 @@ get_mid_chunk_pre_split_test_parallel() -> ), ok. +extract_chunk_params_default_length_test_parallel() -> + ?assertEqual( + {ok, 123, 1, undefined}, + extract_chunk_params(#{ <<"offset">> => 123 }, #{}) + ). + +assemble_relative_chunks_zero_offset_test_parallel() -> + {ok, [Chunk]} = + assemble_relative_chunks([{1, 5, <<"abcde">>}], 0), + ?assertEqual(<<"abcde">>, hb_util:bin(Chunk)). + +assemble_relative_chunks_nonzero_offset_test_parallel() -> + {ok, [Chunk]} = + assemble_relative_chunks([{1, 5, <<"abcde">>}], 2), + ?assertEqual(<<"cde">>, hb_util:bin(Chunk)). + +pending_relative_chunk_offsets_single_chunk_test_parallel() -> + ?assertEqual([1234], pending_relative_chunk_offsets(0, 1, 1234)), + ?assertEqual([1234], pending_relative_chunk_offsets(0, 1234, 1234)). + +pending_relative_chunk_offsets_standard_multi_chunk_test_parallel() -> + DataSize = 315127, + ?assertEqual( + [?DATA_CHUNK_SIZE], + pending_relative_chunk_offsets(0, 1, DataSize) + ), + ?assertEqual( + [DataSize], + pending_relative_chunk_offsets(?DATA_CHUNK_SIZE, 1, DataSize) + ), + ?assertEqual( + [?DATA_CHUNK_SIZE, DataSize], + pending_relative_chunk_offsets(0, DataSize, DataSize) + ). + get_pre_split_small_chunks_test_parallel() -> TXID = <<"4FnBmvgWmqXWEEprjVqBsV5aRpAgF6_yJX_GTGsSZjY">>, Opts = setup_arweave_index_opts([TXID]), diff --git a/src/preloaded/arweave/dev_arweave_offset.erl b/src/preloaded/arweave/dev_arweave_offset.erl index 66e6923a6..656697b44 100644 --- a/src/preloaded/arweave/dev_arweave_offset.erl +++ b/src/preloaded/arweave/dev_arweave_offset.erl @@ -382,9 +382,9 @@ offset_item_cases_test() -> ok. offset_nested_item_test() -> - Opts = #{}, TXID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, - Node = hb_http_server:start_node(), + Opts = dev_arweave:setup_arweave_index_opts([TXID]), + Node = hb_http_server:start_node(Opts), {ok, Expected} = hb_http:get( Node, diff --git a/src/preloaded/query/dev_copycat_arweave.erl b/src/preloaded/query/dev_copycat_arweave.erl index 329219193..f5749aa6b 100644 --- a/src/preloaded/query/dev_copycat_arweave.erl +++ b/src/preloaded/query/dev_copycat_arweave.erl @@ -1,15 +1,23 @@ %%% @doc A `~copycat@1.0' engine that fetches block data from an Arweave node for %%% replication. This engine works in _reverse_ chronological order by default. %%% If `to' is omitted, it keeps moving downward from `from' until it reaches a -%%% block where at least one TX is already indexed, then stops. If `to' is -%%% provided, every block in the range is processed. +%%% block that is already indexed at the requested depth (checked via block +%%% markers first, then legacy per-TX fallback for pre-marker indexes). If `to' +%%% is provided, every block in the range is processed. -module(dev_copycat_arweave). -device_libraries([lib_arweave_common]). -export([arweave/3]). +-export([set_depth_recursion_cap/2, get_depth_recursion_cap/1]). -include_lib("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -define(ARWEAVE_DEVICE, <<"~arweave@2.9">>). +-define(DEPTH_SENTINEL, 99999). +%% `full` uses the copycat-depth-recursion-cap option +%% as a safe depth to go to. This can be changed to an +%% integer value. +-define(DEFAULT_BLOCK_DEPTH, <<"full">>). +-define(DEFAULT_COPYCAT_MEMORY_BUDGET, 6 * 1024 * 1024 * 1024). % GET /~cron@1.0/once&cron-path=~copycat@1.0/arweave @@ -17,18 +25,305 @@ %% latest known block towards the Genesis block. If no range is provided, we %% fetch blocks from the latest known block towards the Genesis block. arweave(_Base, Request, Opts) -> - case parse_range(Request, Opts) of - {error, unavailable} -> - {error, unavailable}; - {ok, {From, To}} -> - case hb_maps:get(<<"mode">>, Request, <<"write">>, Opts) of - <<"write">> -> fetch_blocks(Request, From, To, Opts); - <<"list">> -> list_index(From, To, Opts); - Mode -> - {error, <<"Unsupported mode `", (hb_util:bin(Mode))/binary, "`. Supported modes are: write, list">>} + case hb_maps:get(<<"mode">>, Request, <<"write">>, Opts) of + <<"mempool">> -> index_mempool(Request, Opts); + <<"write">> -> + case hb_maps:find(<<"id">>, Request, Opts) of + {ok, TXID} -> index_explicit_tx(TXID, Request, Opts); + error -> + Depth = request_depth(Request, ?DEFAULT_BLOCK_DEPTH, Opts), + with_range( + Request, + Opts, + fun(F, T, O) -> fetch_blocks(F, T, Depth, O) end + ) + end; + <<"list">> -> with_range(Request, Opts, fun list_index/3); + <<"inventory">> -> with_range(Request, Opts, fun inventory_index/3); + Mode -> + { + error, + <<"Unsupported mode `", (hb_util:bin(Mode))/binary,"`. Supported", + "modes are: write, list, inventory, mempool">> + } + end. + +%% @doc Set bundles descendant recursion cap, avoids recursion +%% in very nested bundles (very rare). +set_depth_recursion_cap(Cap, Opts) when is_integer(Cap), Cap > 0 -> + Opts#{<<"copycat-depth-recursion-cap">> => Cap}. + +%% @doc Get the set depth recursion cap from hb_opts. +get_depth_recursion_cap(Opts) -> + hb_opts:get(<<"copycat-depth-recursion-cap">>, undefined, Opts). + +%% @doc Return the effective per-TX memory cap, clamped to the global budget. +%% Lazily initializes the budget pool on first call. +effective_memory_cap(Opts) -> + Budget = + hb_opts:get( + <<"copycat-memory-budget">>, + ?DEFAULT_COPYCAT_MEMORY_BUDGET, + Opts + ), + hb_copycat_budget:ensure_started(Budget), + hb_copycat_budget:get_budget(). + +%% @doc Shift all depth keys in an item ID map by Offset. +shift_item_ids(Map, Offset) -> + maps:fold( + fun(Depth, IDs, Acc) -> Acc#{Depth + Offset => IDs} end, + #{}, + Map + ). + +%% @doc Merge a list of depth→ID-list maps in one pass per depth key. +merge_all_item_ids(Maps) -> + AllKeys = lists:usort(lists:flatmap(fun maps:keys/1, Maps)), + maps:from_list([ + {K, lists:append([maps:get(K, M, []) || M <- Maps])} + || K <- AllKeys]). + +%% @doc Merge two depth→ID-list maps by concatenating lists at each depth. +merge_item_ids(A, B) -> + maps:fold( + fun(Depth, IDs, Acc) -> + Existing = maps:get(Depth, Acc, []), + Acc#{Depth => Existing ++ IDs} + end, + A, + B + ). + +%% @doc Normalize an owner address into the native ID form used for comparisons. +normalize_owner_id(Addr) -> + hb_util:native_id(hb_util:bin(Addr)). + +%% @doc Adds an address to the owners aliases cache in Opts, mapping +%% Alias -> native address for fast lookup and once per address computation. +add_owner_alias(Addr, Alias, Opts) when is_binary(Alias) -> + ExistingAliases = hb_opts:get(<<"owner_aliases">>, #{}, Opts), + Opts#{ <<"owner_aliases">> => ExistingAliases#{ Alias => normalize_owner_id(Addr) }}; +add_owner_alias(_Addr, Alias, _Opts) -> + throw({invalid_owner_alias, Alias}). + +%% @doc Retrieve the address of a given alias. +resolve_owner_alias(Alias, Opts) when is_binary(Alias) -> + Aliases = hb_opts:get(<<"owner_aliases">>, #{}, Opts), + case hb_maps:find(Alias, Aliases) of + {ok, Addr} -> {ok, Addr}; + error -> {error, {owner_alias_not_found, Alias}} + end; +resolve_owner_alias(Alias, _Opts) -> + {error, {invalid_owner_alias, Alias}}. +%% @doc Parse include/exclude owner filters from the request. +%% Supports direct owner values and owner aliases. +parse_owner_filter(Request, Opts) -> + maybe + {ok, IncludeOwner} ?= + resolve_owner_filter_value( + <<"include-owner">>, + <<"include-owner-alias">>, + Request, + Opts + ), + {ok, ExcludeOwner} ?= + resolve_owner_filter_value( + <<"exclude-owner">>, + <<"exclude-owner-alias">>, + Request, + Opts + ), + {ok, #{ + include_owner => IncludeOwner, + exclude_owner => ExcludeOwner + }} + else + {error, _} = Error -> + Error + end. +%% @doc Resolve one owner filter value from either a direct owner param or +%% a comma-separated owner alias param. Alias takes precedence. +resolve_owner_filter_value(OwnerKey, AliasKey, Request, Opts) -> + case hb_maps:find(AliasKey, Request, Opts) of + {ok, Alias} -> + resolve_owner_aliases(Alias, Opts); + error -> + case hb_maps:find(OwnerKey, Request, Opts) of + {ok, Owner} -> + {ok, normalize_owner_id(Owner)}; + error -> + {ok, undefined} + end + end. +%% @doc Resolve one or more comma-separated owner aliases into normalized owner IDs. +resolve_owner_aliases(Alias, Opts) -> + case + lists:filter( + fun(Part) -> byte_size(Part) > 0 end, + binary:split(hb_util:bin(Alias), <<",">>, [global]) + ) + of + [SingleAlias] -> + case resolve_owner_alias(SingleAlias, Opts) of + {ok, Addr} -> {ok, normalize_owner_id(Addr)}; + {error, _} = Error -> Error + end; + Aliases -> + resolve_owner_aliases(Aliases, Opts, []) + end. +%% @doc Resolve a list of owner aliases into normalized owner IDs. +resolve_owner_aliases([], _Opts, Acc) -> + {ok, lists:reverse(Acc)}; +resolve_owner_aliases([Alias | Rest], Opts, Acc) -> + case resolve_owner_alias(Alias, Opts) of + {ok, Addr} -> + resolve_owner_aliases(Rest, Opts, [normalize_owner_id(Addr) | Acc]); + {error, _} = Error -> + Error + end. +%% @doc Parse an L1 tag filter from `Name:Value` form. +parse_tag_filter(Key, Request, Opts) -> + case hb_maps:find(Key, Request, Opts) of + {ok, Tag} -> + case binary:split(hb_util:bin(Tag), <<":">>, [global]) of + [Name, Value] + when byte_size(Name) > 0 andalso byte_size(Value) > 0 -> + {ok, #{name => Name, value => Value}}; + _ -> + {error, invalid_tag_filter} + end; + error -> + {ok, undefined} + end. +%% @doc Process the `id=...` copycat path for an already indexed L1 TX. +%% applies L1-level owner/tag filters on the lightweight TX header first, then, +%% if the TX passes and is a bundle, loads the full L1 payload once and indexes +%% descendants in-memory up to the requested safe depth (defaults to full recursion +%% till the set copycat-depth-recursion-cap). +process_l1_request(TXID, Request, Opts) -> + Depth = request_depth(Request, <<"full">>, Opts), + QueryL1Offset = + hb_util:bool( + hb_maps:get(<<"query-l1-offset">>, Request, false, Opts) + ), + observe_copycat_l1_stage( + <<"l1_request_total">>, + fun() -> + try + maybe + {ok, OwnerFilters} ?= parse_owner_filter(Request, Opts), + {ok, IncludeTag} ?= parse_tag_filter(<<"include-tag">>, Request, Opts), + {ok, ExcludeTag} ?= parse_tag_filter(<<"exclude-tag">>, Request, Opts), + {ok, + maybe_process_l1_tx( + TXID, + OwnerFilters#{ + include_tag => IncludeTag, + exclude_tag => ExcludeTag + }, + Depth, + QueryL1Offset, + Opts + )} + else + {error, _} = Error -> + Error + end + catch + _:Reason:Stacktrace -> + ?event(copycat_short, + {error, + {reason, Reason}, + {stacktrace, Stacktrace}}), + {error, Reason} end + end + ). +%% @doc Parse the requested recursion depth and clamp it to the configured +%% safe cap. Depth is relative so depth 1 is always one level below the +%% root specified in the request (either a block or an L1 TX ID). +%% +%% `full` resolves to the current copycat depth recursion cap. +request_depth(Request, Default, Opts) -> + MaxRecursionCap = get_depth_recursion_cap(Opts), + RequestedDepth = + case hb_maps:get(<<"depth">>, Request, Default, Opts) of + <<"full">> -> MaxRecursionCap; + Value -> hb_util:int(Value) + end, + erlang:min( + MaxRecursionCap, + erlang:max(1, RequestedDepth) + ). +%% @doc Return the first matching L1 filter reason for a TX header, or `pass`. +l1_filter_reason(TX, Filters) -> + IncludeOwner = maps:get(include_owner, Filters, undefined), + ExcludeOwner = maps:get(exclude_owner, Filters, undefined), + IncludeTag = maps:get(include_tag, Filters, undefined), + ExcludeTag = maps:get(exclude_tag, Filters, undefined), + Owner = ar_tx:get_owner_address(TX), + maybe + pass ?= maybe_include_owner(Owner, IncludeOwner), + pass ?= maybe_exclude_owner(Owner, ExcludeOwner), + pass ?= maybe_include_tag(TX, IncludeTag), + pass ?= maybe_exclude_tag(TX, ExcludeTag), + pass + else + Reason -> Reason end. +%% @doc Match an owner against an undefined, single-owner, or multi-owner filter. +owner_matches_filter(_Owner, undefined) -> + false; +owner_matches_filter(Owner, Owners) when is_list(Owners) -> + lists:member(Owner, Owners); +owner_matches_filter(Owner, FilterOwner) -> + Owner =:= FilterOwner. +maybe_include_owner(_Owner, undefined) -> + pass; +maybe_include_owner(Owner, IncludeOwner) -> + case owner_matches_filter(Owner, IncludeOwner) of + true -> pass; + false -> include_owner_mismatch + end. + +maybe_exclude_owner(_Owner, undefined) -> + pass; +maybe_exclude_owner(Owner, ExcludeOwner) -> + case owner_matches_filter(Owner, ExcludeOwner) of + true -> exclude_owner_match; + false -> pass + end. + +maybe_include_tag(_TX, undefined) -> + pass; +maybe_include_tag(TX, IncludeTag) -> + case has_tag_pair(TX, IncludeTag) of + true -> pass; + false -> include_tag_mismatch + end. + +maybe_exclude_tag(_TX, undefined) -> + pass; +maybe_exclude_tag(TX, ExcludeTag) -> + case has_tag_pair(TX, ExcludeTag) of + true -> exclude_tag_match; + false -> pass + end. + +has_tag_pair(#tx{tags = Tags}, #{name := Name, value := Value}) -> + TagValue = ar_tx:tagfind(Name, Tags, not_found), + case TagValue of + not_found -> + false; + _ -> + LowerTagValue = hb_util:to_lower(TagValue), + LowerValue = hb_util:to_lower(Value), + LowerTagValue =:= LowerValue + end; +has_tag_pair(_, _) -> + false. %% @doc Parse the range from the request. parse_range(Request, Opts) -> maybe @@ -58,6 +353,12 @@ parse_range(Request, Opts) -> {error, unavailable} end. +with_range(Request, Opts, Fun) -> + case parse_range(Request, Opts) of + {error, unavailable} -> {error, unavailable}; + {ok, {From, To}} -> Fun(From, To, Opts) + end. + normalize_height(Height, Opts) -> RequestedHeight = hb_util:int(Height), case RequestedHeight < 0 of @@ -79,33 +380,15 @@ latest_height(Opts) -> {error, Reason} -> {error, Reason} end. -%% @doc Check if a transaction ID is indexed in the arweave index store. -is_tx_indexed(TXID, Opts) -> - case hb_store_arweave:store_from_opts(Opts) of - no_store -> false; - #{ <<"index-store">> := Store } -> - case hb_store:read(Store, hb_store_arweave_offset:path(TXID), Opts) of - {ok, _} -> true; - {error, not_found} -> false - end - end. - %% @doc List indexed blocks and transactions in the given range. %% Returns JSON with block heights as keys, each containing indexed and not-indexed lists. list_index(From, undefined, Opts) -> list_index(From, 0, Opts); list_index(From, To, _Opts) when From < To -> - {ok, #{ - <<"content-type">> => <<"application/json">>, - <<"body">> => hb_json:encode(#{}) - }}; + json_response(#{}); list_index(From, To, Opts) -> Result = list_index_blocks(From, To, Opts, #{}), - JSON = hb_json:encode(Result), - {ok, #{ - <<"content-type">> => <<"application/json">>, - <<"body">> => JSON - }}. + json_response(Result). %% @doc Iterate through blocks and check index status for each transaction. list_index_blocks(Current, To, _Opts, Acc) when Current < To -> @@ -118,19 +401,26 @@ list_index_blocks(Current, To, Opts, Acc) -> [] -> list_index_blocks(Current - 1, To, Opts, Acc); _ -> - {IndexedTXs, NotIndexedTXs} = classify_txs(TXIDs, Opts), + {IndexedTXs, _NotIndexedTXs} = classify_txs(TXIDs, Opts), case IndexedTXs of [] -> % Do not include blocks with no locally indexed TXs. list_index_blocks(Current - 1, To, Opts, Acc); _ -> BlockKey = hb_util:bin(Current), - NewAcc = Acc#{ - BlockKey => #{ - <<"indexed">> => IndexedTXs, - <<"not-indexed">> => NotIndexedTXs - } - }, + BlockInfo = assemble_block_info( + Current, Block, Opts), + WithItems = case maps:get( + <<"depth">>, BlockInfo, undefined) + of + undefined -> BlockInfo; + _ -> + BlockInfo#{ + <<"items">> => + hb_store_arweave:read_block_item_counts( + Current, Opts)} + end, + NewAcc = Acc#{BlockKey => WithItems}, list_index_blocks(Current - 1, To, Opts, NewAcc) end end; @@ -138,6 +428,82 @@ list_index_blocks(Current, To, Opts, Acc) -> list_index_blocks(Current - 1, To, Opts, Acc) end. +%% @doc Build base block info with indexed/not-indexed TXs and optional depth. +assemble_block_info(Height, Block, Opts) -> + TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), + {IndexedTXs, NotIndexedTXs} = classify_txs(TXIDs, Opts), + Base = #{ + <<"indexed">> => IndexedTXs, + <<"not-indexed">> => NotIndexedTXs + }, + case hb_store_arweave:read_block_marker_depth(Height, Opts) of + undefined -> Base; + Depth -> Base#{<<"depth">> => Depth} + end. + +%% @doc mode=inventory: return per-depth item ID lists from the local index store. +%% Supports range queries. The inventory read itself is local-only (no network). +%% Note: range parsing may call latest_height/1 if from/to are omitted or negative. +inventory_index(From, undefined, Opts) -> + inventory_index(From, 0, Opts); +inventory_index(From, To, _Opts) when From < To -> + json_response(#{}); +inventory_index(From, To, Opts) -> + Result = inventory_local(From, To, Opts, #{}), + json_response(Result). + +%% @doc Wrap a map as an `application/json' HTTP response with encoded body. +json_response(Map) -> + {ok, #{ + <<"content-type">> => <<"application/json">>, + <<"body">> => hb_json:encode(Map) + }}. + +inventory_local(Current, To, _Opts, Acc) when Current < To -> Acc; +inventory_local(Current, To, Opts, Acc) -> + case hb_store_arweave:read_block_marker_depth(Current, Opts) of + undefined -> + inventory_local(Current - 1, To, Opts, Acc); + Depth -> + ItemIDs = hb_store_arweave:read_block_item_ids(Current, Opts), + BlockKey = hb_util:bin(Current), + BlockInfo = #{<<"depth">> => Depth, <<"items">> => ItemIDs}, + inventory_local(Current - 1, To, Opts, + Acc#{BlockKey => BlockInfo}) + end. + +%% @doc Materialise a parsed `#tx{}' header into local-store so that +%% `hb_cache:match' can answer GraphQL filters. No-op when header indexing is +%% disabled via `<<"index-headers">>'. +write_item_header(TX, Codec, Opts) -> + case hb_opts:get(<<"index-headers">>, true, Opts) of + true -> + LocalOpts = hb_store:scope(Opts, local), + maybe + Msg = hb_message:convert(TX, <<"structured@1.0">>, Codec, LocalOpts), + {ok, _Path} ?= hb_cache:write(Msg, LocalOpts), + ?event(debug_copycat, + {header_inline_written, + {id, {explicit, hb_util:encode(TX#tx.id)}}, + {tx, TX}, + {codec, Codec} + } + ), + ok + else + {error, R} -> + ?event(copycat_short, + {header_write_failed, + {id, {explicit, hb_util:encode(TX#tx.id)}}, + {codec, Codec}, + {reason, R} + } + ), + {error, R} + end; + _ -> ok + end. + fetch_block_header(Height, Opts) -> ?event(debug_copycat, {fetching_block, Height}), observe_event(<<"block_header">>, fun() -> @@ -155,7 +521,7 @@ fetch_block_header(Height, Opts) -> classify_txs(TXIDs, Opts) -> lists:foldl( fun(TXID, {IndexedAcc, NotIndexedAcc}) -> - case is_tx_indexed(TXID, Opts) of + case hb_store_arweave:is_tx_indexed(TXID, Opts) of true -> {[TXID | IndexedAcc], NotIndexedAcc}; false -> {IndexedAcc, [TXID | NotIndexedAcc]} end @@ -164,61 +530,159 @@ classify_txs(TXIDs, Opts) -> TXIDs ). +%% @doc Index a single L1 TX by ID. Returns indexing stats (items, bundles, +%% skipped) on success, or zeroed stats on failure. +index_explicit_tx(TXID, Request, Opts) -> + case process_l1_request(TXID, Request, Opts) of + {ok, Stats} when is_map(Stats) -> + ?event(copycat_short, + {arweave_tx_indexed, + {id, {explicit, TXID}}, + {items_indexed, maps:get(items_count, Stats, 0)}, + {bundle_txs, maps:get(bundle_count, Stats, 0)}, + {skipped_txs, maps:get(skipped_count, Stats, 0)} + } + ), + {ok, Stats#{ <<"body">> => maps:get(items_count, Stats, 0) }}; + _ -> + {ok, #{ + items_count => 0, + bundle_count => 0, + skipped_count => 0, + <<"body">> => 0 + }} + end. + %% @doc Fetch blocks from an Arweave node while moving downward from `Current'. %% If `To' is provided, every block in [`To', `Current'] is processed. If `To' -%% is omitted, stop at the first block where any TX is already indexed. -fetch_blocks(Req, Current, To, _Opts) when is_integer(To), Current < To -> +%% is omitted, stop at the first block already indexed at the requested depth +%% (via block markers above cutover, or legacy per-TX check below cutover). +fetch_blocks(From, To, Depth, Opts) -> + ?event(copycat_short, + {indexing_blocks, {from, From}, {to, To}, {depth, Depth}}), + do_fetch_blocks(From, To, Depth, Opts). + +do_fetch_blocks(Current, To, Depth, _Opts) when is_integer(To), Current < To -> + ?event(copycat_short, + {arweave_block_indexing_completed, + {reached_target, To}, {target_depth, Depth} + } + ), + {ok, To}; +do_fetch_blocks(Current, undefined, _Depth, _Opts) when Current < 0 -> + {ok, 0}; +do_fetch_blocks(Current, undefined, Depth, Opts) -> + fetch_blocks_open_ended(Current, Depth, block_workers(Opts), Opts); +do_fetch_blocks(Current, To, Depth, Opts) -> + fetch_blocks_ranged(Current, To, Depth, block_workers(Opts), Opts). + +block_workers(Opts) -> + max(1, hb_opts:get(<<"arweave-block-workers">>, 3, Opts)). + +%% @doc Process a known range of blocks in parallel batches. +fetch_blocks_ranged(Current, To, TargetDepth, _Workers, _Opts) + when Current < To -> ?event(copycat_short, {arweave_block_indexing_completed, {reached_target, To}, - {initial_request, Req} + {target_depth, TargetDepth} } ), {ok, To}; -fetch_blocks(_Req, Current, undefined, _Opts) when Current < 0 -> +fetch_blocks_ranged(Current, To, TargetDepth, Workers, Opts) -> + BatchEnd = max(To, Current - Workers + 1), + Heights = lists:seq(Current, BatchEnd, -1), + hb_pmap:parallel_map( + Heights, + fun(H) -> + observe_event(<<"block_indexed">>, fun() -> + fetch_and_process_block(H, To, TargetDepth, Opts) + end) + end, + Workers + ), + fetch_blocks_ranged(BatchEnd - 1, To, TargetDepth, Workers, Opts). + +%% @doc Process blocks until an already-indexed block is found. +%% Fetches headers in parallel, stops at the first indexed block, +%% then processes the unindexed prefix in parallel. +fetch_blocks_open_ended(Current, _TargetDepth, _Workers, _Opts) + when Current < 0 -> {ok, 0}; -fetch_blocks(Req, Current, undefined, Opts) -> - BlockRes = fetch_block_header(Current, Opts), - case is_already_indexed(BlockRes, Opts) of - true -> +fetch_blocks_open_ended(Current, TargetDepth, Workers, Opts) -> + BatchEnd = max(0, Current - Workers + 1), + Heights = lists:seq(Current, BatchEnd, -1), + HeaderResults = hb_pmap:parallel_map( + Heights, + fun(H) -> {H, fetch_block_header(H, Opts)} end, + Workers + ), + case find_indexed_prefix(HeaderResults, TargetDepth, Opts) of + {stop_at, StopHeight, ToProcess} -> + process_prefetched_blocks( + ToProcess, TargetDepth, Workers, Opts), ?event(copycat_short, {arweave_block_indexing_completed, - {stop_at_indexed_block, Current}, - {initial_request, Req} + {stop_at_indexed_block, StopHeight} } ), - {ok, Current}; + {ok, StopHeight}; + {all_unindexed, ToProcess} -> + process_prefetched_blocks( + ToProcess, TargetDepth, Workers, Opts), + fetch_blocks_open_ended( + BatchEnd - 1, TargetDepth, Workers, Opts) + end. + +%% @doc Walk header results in order, return the unindexed prefix and +%% either the stop height or all_unindexed. +find_indexed_prefix(HeaderResults, TargetDepth, Opts) -> + find_indexed_prefix(HeaderResults, TargetDepth, Opts, []). + +find_indexed_prefix([], _TargetDepth, _Opts, Acc) -> + {all_unindexed, lists:reverse(Acc)}; +find_indexed_prefix([{H, BlockRes} | Rest], TargetDepth, Opts, Acc) -> + case is_already_indexed(BlockRes, TargetDepth, Opts) of + true -> + {stop_at, H, lists:reverse(Acc)}; false -> + find_indexed_prefix( + Rest, TargetDepth, Opts, [{H, BlockRes} | Acc]) + end. + +%% @doc Process a list of {Height, BlockRes} tuples in parallel. +process_prefetched_blocks(Blocks, TargetDepth, Workers, Opts) -> + hb_pmap:parallel_map( + Blocks, + fun({H, BlockRes}) -> observe_event(<<"block_indexed">>, fun() -> - process_block(BlockRes, Current, undefined, Opts) - end), - fetch_blocks(Req, Current - 1, undefined, Opts) - end; -fetch_blocks(Req, Current, To, Opts) -> - observe_event(<<"block_indexed">>, fun() -> - fetch_and_process_block(Current, To, Opts) - end), - fetch_blocks(Req, Current - 1, To, Opts). + process_block(BlockRes, H, undefined, TargetDepth, Opts) + end) + end, + Workers + ). -%% @doc Determine whether a fetched block is considered indexed. -%% A block is indexed when any TX from its `txs' list is in the index. -is_already_indexed({ok, Block}, Opts) -> - TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), - lists:any(fun(TXID) -> is_tx_indexed(TXID, Opts) end, TXIDs); -is_already_indexed({error, _}, _Opts) -> +%% @doc Determine whether a fetched block is considered indexed at the +%% requested depth. Checks block markers first. For blocks at or above +%% the cutover height, the marker is authoritative. For blocks below +%% the cutover, falls back to legacy per-TX check. +is_already_indexed({ok, Block}, TargetDepth, Opts) -> + Height = hb_maps:get(<<"height">>, Block, undefined, Opts), + hb_store_arweave:is_block_indexed(Height, TargetDepth, Opts); +is_already_indexed({error, _}, _TargetDepth, _Opts) -> false. -fetch_and_process_block(Current, To, Opts) -> +fetch_and_process_block(Current, To, TargetDepth, Opts) -> BlockRes = fetch_block_header(Current, Opts), - process_block(BlockRes, Current, To, Opts). + process_block(BlockRes, Current, To, TargetDepth, Opts). %% @doc Process a block. -process_block(BlockRes, Current, To, Opts) -> +process_block(BlockRes, Current, To, TargetDepth, Opts) -> case BlockRes of {ok, Block} -> ?event(debug_copycat, {{processing_block, Current}, {indep_hash, hb_maps:get(<<"indep_hash">>, Block, <<>>)}}), - case maybe_index_ids(Block, Opts) of + case maybe_index_block(Block, TargetDepth, Opts) of {block_skipped, Results} -> TotalTXs = maps:get(total_txs, Results, 0), ?event( @@ -234,17 +698,48 @@ process_block(BlockRes, Current, To, Opts) -> TotalTXs = maps:get(total_txs, Results, 0), BundleTXs = maps:get(bundle_count, Results, 0), SkippedTXs = maps:get(skipped_count, Results, 0), - ?event( - copycat_short, - {arweave_block_indexed, - {height, Current}, - {items_indexed, ItemsIndexed}, - {total_txs, TotalTXs}, - {bundle_txs, BundleTXs}, - {skipped_txs, SkippedTXs}, - {target, To} - } - ) + AchievedDepth = maps:get( + achieved_depth, Results, + max(2, TargetDepth)), + ItemIDs = maps:get(item_ids, Results, #{}), + maybe + ok ?= hb_store_arweave:write_block_item_ids( + Current, AchievedDepth, ItemIDs, Opts), + ok ?= hb_store_arweave:mark_block_indexed( + Current, AchievedDepth, Opts), + ?event( + copycat_short, + {arweave_block_indexed, + {height, Current}, + {items_indexed, ItemsIndexed}, + {total_txs, TotalTXs}, + {bundle_txs, BundleTXs}, + {skipped_txs, SkippedTXs}, + {achieved_depth, AchievedDepth}, + {target, To} + } + ) + else + {error, item_ids_write_failed} -> + ?event( + copycat_short, + {arweave_block_metadata_failed, + {height, Current}, + {target, To} + } + ), + throw(item_ids_write_failed); + Error -> + ?event( + copycat_short, + {arweave_block_marker_failed, + {height, Current}, + {target, To}, + {error, Error} + } + ), + throw({writing_to_index_store, Error}) + end end; {error, _} = Error -> ?event( @@ -257,9 +752,9 @@ process_block(BlockRes, Current, To, Opts) -> end. %% @doc Index the IDs of all transactions in the block if configured to do so. -maybe_index_ids(Block, Opts) -> +maybe_index_block(Block, TargetDepth, Opts) -> TotalTXs = length(hb_maps:get(<<"txs">>, Block, [], Opts)), - case hb_opts:get(arweave_index_ids, true, Opts) of + case hb_opts:get(<<"arweave-index-ids">>, true, Opts) of false -> {block_skipped, #{ items_count => 0, @@ -282,14 +777,20 @@ maybe_index_ids(Block, Opts) -> }}; {ok, TXs} -> Height = hb_maps:get(<<"height">>, Block, 0, Opts), - TXsWithData = ar_block:generate_size_tagged_list_from_txs(TXs, Height), - % Filter out padding entries before processing + L1IDs = [TX#tx.id || TX <- TXs], + TXsWithData = + ar_block:generate_size_tagged_list_from_txs(TXs, Height), ValidTXs = lists:filter( fun({{padding, _}, _}) -> false; (_) -> true end, TXsWithData ), - TXResults = process_txs(ValidTXs, BlockStartOffset, Opts), - {block_cached, TXResults#{total_txs => TotalTXs}} + TXResults = process_block_txs( + ValidTXs, BlockStartOffset, TargetDepth, Height, Opts), + ExistingIDs = maps:get(item_ids, TXResults, #{}), + {block_cached, TXResults#{ + total_txs => TotalTXs, + item_ids => ExistingIDs#{1 => L1IDs} + }} end end. @@ -301,12 +802,21 @@ parallel_map(Items, Fun, Opts) -> MaxWorkers = max(1, hb_opts:get(arweave_index_workers, 1, Opts)), hb_pmap:parallel_map(Items, Fun, MaxWorkers). +%% @doc Build the standard 4-key indexing counters result map. +counters(Items, Bundles, Skipped, Depth) -> + #{ + items_count => Items, + bundle_count => Bundles, + skipped_count => Skipped, + achieved_depth => Depth + }. + %% @doc Process a single transaction and return its contribution to the counters. %% Returns a map with keys: items_count, bundle_count, skipped_count -process_tx({{padding, _PaddingRoot}, _EndOffset}, _BlockStartOffset, _Opts) -> - #{items_count => 0, bundle_count => 0, skipped_count => 0}; -process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> - IndexStore = hb_store_arweave:store_from_opts(Opts), +process_block_tx({{padding, _PaddingRoot}, _EndOffset}, _BlockStartOffset, TargetDepth, _BlockHeight, _Opts) -> + counters(0, 0, 0, max(2, TargetDepth)); +process_block_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, TargetDepth, BlockHeight, Opts) -> + ArweaveStore = hb_store_arweave:store_from_opts(Opts), TXID = hb_util:encode(TX#tx.id), TXEndOffset = BlockStartOffset + EndOffset, TXStartOffset = TXEndOffset - TX#tx.data_size, @@ -315,17 +825,41 @@ process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> {offset, TXStartOffset}, {size, TX#tx.data_size} }), - observe_event(<<"item_indexed">>, fun() -> + ok = observe_event(<<"item_indexed">>, fun() -> hb_store_arweave:write_offset( - IndexStore, + ArweaveStore, TXID, <<"tx@1.0">>, TXStartOffset, - TX#tx.data_size + TX#tx.data_size, + Opts ) end), - case is_bundle_tx(TX, Opts) of - false -> #{items_count => 0, bundle_count => 0, skipped_count => 0}; + #{ <<"index-store">> := IndexStore } = ArweaveStore, + ok = hb_store_arweave:write_parent(TX#tx.id, BlockHeight, block, IndexStore, Opts), + ok = write_item_header(TX, <<"tx@1.0">>, Opts), + try is_bundle_tx(TX, Opts) of + false -> + counters(0, 0, 0, max(2, TargetDepth)); + true when TargetDepth > 2 -> + %% Retry to preserve bundle count + try + L1Result = process_l1_tx_direct( + TXStartOffset, TX#tx.data_size, + TargetDepth - 1, ArweaveStore, TXID, TX#tx.id, Opts), + L1Result#{ + achieved_depth => + max(2, maps:get(achieved_depth, L1Result, 0)) + } + catch + _:Reason:Stacktrace -> + ?event(copycat_short, + {arweave_bundle_skipped, + {tx, {explicit, TX#tx.id}}, + {reason, Reason}, + {stacktrace, Stacktrace}}), + counters(0, 1, 1, 0) + end; true -> % Lightweight processing of block transactions to depth 2. We % can avoid loading the full L1 TX data into memory, and instead @@ -341,23 +875,25 @@ process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> ), case BundleRes of {ok, HeaderSize, BundleIndex} -> - % Batch event tracking: measure total time and count for all write_offset calls {TotalTime, {_, ItemsCount}} = timer:tc(fun() -> lists:foldl( fun({ItemID, Size}, {ItemStartOffset, ItemsCountAcc}) -> - hb_store_arweave:write_offset( - IndexStore, + ok = hb_store_arweave:write_offset( + ArweaveStore, hb_util:encode(ItemID), <<"ans104@1.0">>, ItemStartOffset, - Size + Size, + Opts ), + ok = hb_store_arweave:write_parent(ItemID, TX#tx.id, bundle, IndexStore, Opts), {ItemStartOffset + Size, ItemsCountAcc + 1} end, {TXStartOffset + HeaderSize, 0}, BundleIndex ) end), + L2IDs = [ItemID || {ItemID, _Size} <- BundleIndex], ?event(debug_copycat, {bundle_items_indexed, {tx_id, {string, TXID}}, @@ -365,7 +901,9 @@ process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> }), % Single event record for the batch record_event_metrics(<<"item_indexed">>, ItemsCount, TotalTime), - #{items_count => ItemsCount, bundle_count => 1, skipped_count => 0}; + #{items_count => ItemsCount, bundle_count => 1, + skipped_count => 0, achieved_depth => 2, + item_ids => #{2 => L2IDs}}; {error, Reason} -> ?event( copycat_short, @@ -374,119 +912,1001 @@ process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> {reason, Reason} } ), - #{items_count => 0, bundle_count => 1, skipped_count => 1} + counters(0, 1, 1, 0) end + catch + _:Reason:Stacktrace -> + ?event(copycat_short, + {arweave_bundle_skipped, + {tx, {explicit, TX#tx.id}}, + {reason, Reason}, + {stacktrace, Stacktrace}}), + counters(0, 0, 1, 0) end. +%% @doc Download and decode a bundle header from chunk data. +download_bundle_header(EndOffset, Size, Opts) -> + observe_event(<<"bundle_header">>, fun() -> + lib_arweave_common:bundle_header(EndOffset - Size, Size, Opts) + end). + %% @doc Process transactions: spawn workers and manage the worker pool. %% This function processes transactions in parallel using parallel_map. %% When arweave_index_workers <= 1, processes sequentially (one worker at a time). %% When arweave_index_workers > 1, processes in parallel with the specified concurrency limit. %% Returns a map with keys: items_count, bundle_count, skipped_count. -process_txs(ValidTXs, BlockStartOffset, Opts) -> +process_block_txs(ValidTXs, BlockStartOffset, TargetDepth, BlockHeight, Opts) -> Results = parallel_map( ValidTXs, - fun(TXWithData) -> process_tx(TXWithData, BlockStartOffset, Opts) end, + fun(TXWithData) -> process_block_tx( + TXWithData, BlockStartOffset, TargetDepth, BlockHeight, Opts) end, Opts ), - lists:foldl( + Folded = lists:foldl( fun(Result, Acc) -> #{ - items_count => maps:get(items_count, Result, 0) + maps:get(items_count, Acc, 0), - bundle_count => maps:get(bundle_count, Result, 0) + maps:get(bundle_count, Acc, 0), - skipped_count => maps:get(skipped_count, Result, 0) + maps:get(skipped_count, Acc, 0) + items_count => + maps:get(items_count, Result, 0) + + maps:get(items_count, Acc, 0), + bundle_count => + maps:get(bundle_count, Result, 0) + + maps:get(bundle_count, Acc, 0), + skipped_count => + maps:get(skipped_count, Result, 0) + + maps:get(skipped_count, Acc, 0), + achieved_depth => + min( + maps:get(achieved_depth, Result, ?DEPTH_SENTINEL), + maps:get(achieved_depth, Acc, ?DEPTH_SENTINEL) + ) } end, - #{items_count => 0, bundle_count => 0, skipped_count => 0}, + counters(0, 0, 0, ?DEPTH_SENTINEL), Results - ). - -%% @doc Check whether a TX header indicates bundle content. -is_bundle_tx(TX, _Opts) -> - ar_tx:type(TX) =/= binary. - -%% @doc Download and decode a bundle header from chunk data. -download_bundle_header(EndOffset, Size, Opts) -> - observe_event(<<"bundle_header">>, fun() -> - lib_arweave_common:bundle_header(EndOffset - Size, Size, Opts) - end). - -resolve_tx_headers(TXIDs, Opts) -> - Results = parallel_map( - TXIDs, - fun(TXID) -> resolve_tx_header(TXID, Opts) end, - Opts ), - case lists:any(fun(Res) -> Res =:= error end, Results) of - true -> error; - false -> - TXs = lists:foldr( - fun({ok, TX}, Acc) -> [TX | Acc] end, - [], - Results - ), - {ok, TXs} + MergedIDs = merge_all_item_ids( + [maps:get(item_ids, R, #{}) || R <- Results]), + Folded2 = Folded#{item_ids => MergedIDs}, + case maps:get(achieved_depth, Folded2) of + ?DEPTH_SENTINEL -> + Folded2#{achieved_depth => max(2, TargetDepth)}; + _ -> + Folded2 end. -resolve_tx_header(TXID, Opts) -> - try - ?event(debug_copycat, {fetching_tx, {explicit, TXID}}), - ResolveRes = observe_event(<<"tx_header">>, fun() -> - hb_ao:resolve( - << - ?ARWEAVE_DEVICE/binary, - "/tx&tx=", - TXID/binary, - "&exclude-data=true" - >>, - Opts - ) - end), - case ResolveRes of - {ok, StructuredTXHeader} -> - {ok, - hb_message:convert( - StructuredTXHeader, - <<"tx@1.0">>, - <<"structured@1.0">>, - Opts)}; - {error, ResolveError} -> - ?event( - copycat_short, - {arweave_tx_skipped, - {tx_id, {explicit, TXID}}, - {reason, ResolveError} - } - ), - error +%% @doc Process a single indexed L1 TX candidate after lightweight filter checks. +maybe_process_l1_tx(TXID, Filters, Depth, QueryL1Offset, Opts) -> + Skipped = counters(0, 0, 1, 0), + NormalizedTXID = hb_util:native_id(TXID), + EncodedTXID = hb_util:encode(NormalizedTXID), + IndexStore = hb_store_arweave:store_from_opts(Opts), + ?event(copycat_short, + {indexing_l1_tx, {tx_id, {explicit, EncodedTXID}}, + {depth, Depth}, + {query_l1_offset, QueryL1Offset} + }), + maybe + {ok, + #{ + <<"codec-device">> := <<"tx@1.0">>, + <<"start-offset">> := StartOffset, + <<"length">> := Length + }} ?= + observe_copycat_l1_stage( + <<"l1_offset_lookup">>, + fun() -> + ensure_l1_tx_offset( + NormalizedTXID, + EncodedTXID, + IndexStore, + QueryL1Offset, + Opts + ) + end + ), + {ok, TX} ?= resolve_tx_header(EncodedTXID, Opts), + pass ?= l1_filter_reason(TX, Filters), + bundle ?= + case is_bundle_tx(TX, Opts) of + true -> bundle; + false -> not_bundle + end, + within_effective_cap ?= + case Length =< effective_memory_cap(Opts) of + true -> within_effective_cap; + false -> effective_cap_exceeded + end, + ok ?= hb_copycat_budget:lease(Length), + try process_l1_tx( + StartOffset, + Length, + Depth, + IndexStore, + EncodedTXID, + hb_util:decode(EncodedTXID), + Opts + ) + after + hb_copycat_budget:release(Length) end - catch - Class:Reason:_ -> + else + {error, Reason} -> ?event( copycat_short, {arweave_tx_skipped, - {tx_id, {explicit, TXID}}, - {class, Class}, + {tx_id, {explicit, EncodedTXID}}, {reason, Reason} } ), - error + Skipped; + error -> + % event already logged in resolve_tx_header + Skipped; + not_bundle -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, not_bundle} + } + ), + Skipped; + effective_cap_exceeded -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, effective_cap_exceeded} + } + ), + counters(0, 1, 1, 0); + FilterReason -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, FilterReason} + } + ), + Skipped end. -%% @doc Record event metrics (count and duration) using hb_event:record. -record_event_metrics(MetricName, Count, Duration) -> - hb_event:record(<<"arweave_block_count">>, MetricName, #{}, Count), - hb_event:record(<<"arweave_block_duration">>, MetricName, #{}, Duration). +%% @doc Fast path for depth>2 block indexing. Skips offset lookup and +%% header re-fetch since the caller already has both. +process_l1_tx_direct(StartOffset, Length, Depth, IndexStore, EncodedTXID, ParentID, Opts) -> + EffectiveCap = effective_memory_cap(Opts), + case Length > EffectiveCap of + true -> + ?event(copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, effective_cap_exceeded} + } + ), + counters(0, 1, 1, 0); + false -> + ok = hb_copycat_budget:lease(Length), + try + process_l1_tx( + StartOffset, Length, Depth, + IndexStore, EncodedTXID, ParentID, Opts) + after + hb_copycat_budget:release(Length) + end + end. -%% @doc Track an operation's execution time and count using hb_event:record. -%% Always tracks both count and duration, regardless of success/failure. +%% @doc Load the L1 TX data into memory and index it. +process_l1_tx( + StartOffset, Length, Depth, IndexStore, EncodedTXID, ParentID, Opts) -> + case observe_copycat_l1_stage( + <<"l1_read_chunks">>, + fun() -> hb_store_arweave:read_chunks(StartOffset, Length, Opts) end + ) of + {ok, BundleData} -> + {TotalTime, IndexRes} = timer:tc( + fun() -> + observe_copycat_l1_stage( + <<"l1_full_bundle_index">>, + fun() -> + index_full_bundle_bytes( + BundleData, + StartOffset, + Depth, + IndexStore, + ParentID, + Opts + ) + end + ) + end + ), + case IndexRes of + {ok, ItemsCount, AchievedDepth, BundleIDs} -> + record_event_metrics( + <<"item_indexed">>, + ItemsCount, + TotalTime + ), + #{ + items_count => ItemsCount, + bundle_count => 1, + skipped_count => 0, + achieved_depth => 1 + AchievedDepth, + item_ids => shift_item_ids(BundleIDs, 1) + }; + {error, Reason} -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, Reason} + } + ), + counters(0, 1, 1, 0) + end; + {error, Reason} -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, Reason} + } + ), + counters(0, 1, 1, 0); + not_found -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, EncodedTXID}}, + {reason, not_found} + } + ), + counters(0, 1, 1, 0) + end. +%% @doc Ensure the root L1 TX offset exists locally before `id=...` indexing. +%% if the offset is missing and `query_l1_offset` is enabled, fetches the TX +%% offset metadata from Arweave, writes it to the local offset store, and +%% retries the local lookup. +ensure_l1_tx_offset(_TXID, _EncodedTXID, IndexStore, _LoadL1Offset, _Opts) + when is_map(IndexStore) =:= false -> + {error, missing_offset}; +ensure_l1_tx_offset(TXID, EncodedTXID, IndexStore, QueryL1Offset, Opts) -> + case hb_store_arweave:read_offset(IndexStore, TXID, Opts) of + {ok, _} = OffsetRes -> + OffsetRes; + not_found when QueryL1Offset -> + ?event( + copycat_short, + {arweave_tx_querying_offset, + {tx_id, {explicit, EncodedTXID}}, + {source, network} + } + ), + case query_l1_tx_offset(EncodedTXID, IndexStore, Opts) of + ok -> + case hb_store_arweave:read_offset(IndexStore, TXID, Opts) of + {ok, _} = OffsetRes -> + OffsetRes; + not_found -> + {error, missing_offset} + end; + {error, Reason} -> + {error, Reason} + end; + not_found -> + {error, missing_offset} + end. + +query_l1_tx_offset(TXID, IndexStore, Opts) -> + case observe_copycat_l1_stage( + <<"l1_offset_query_http">>, + fun() -> + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ) + end + ) of + {ok, #{ <<"body">> := OffsetBody }} -> + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + StartOffset = EndOffset - Size, + ok = observe_copycat_l1_stage( + <<"l1_offset_query_store_write">>, + fun() -> + hb_store_arweave:write_offset( + IndexStore, + TXID, + <<"tx@1.0">>, + StartOffset, + Size, + Opts + ) + end + ), + ok; + {error, Reason} -> + {error, Reason}; + not_found -> + {error, not_found} + end. + +index_full_bundle_bytes(_BundleData, _BundleStartOffset, Depth, _Store, _ParentID, _Opts) + when Depth =< 0 -> + {ok, 0, 0, #{}}; +index_full_bundle_bytes(BundleData, BundleStartOffset, Depth, Store, ParentID, Opts) -> + case ar_bundles:decode_bundle_header(BundleData) of + invalid_bundle_header -> + {error, invalid_bundle_header}; + {ItemsBin, BundleIndex} -> + HeaderSize = byte_size(BundleData) - byte_size(ItemsBin), + index_full_bundle_items( + BundleIndex, + ItemsBin, + BundleStartOffset + HeaderSize, + Depth, + Store, + ParentID, + Opts, + 0, + ?DEPTH_SENTINEL, + [], + #{} + ) + end. + +%% @doc Index bundle children from decoded bundle bytes and recurse descendants in-memory. +%% Returns {ok, Count, MinAchievedDepth, ItemIDs} or {error, Reason}. +%% ItemIDs is a map of relative-depth => list of raw 32-byte IDs. +index_full_bundle_items( + [], _ItemsBin, _ItemStartOffset, Depth, _Store, _ParentID, _Opts, + Count, MinDepth, ThisLevelIDs, DescIDs) -> + FinalDepth = case MinDepth of + ?DEPTH_SENTINEL -> Depth; + _ -> 1 + MinDepth + end, + AllIDs = DescIDs#{1 => lists:reverse(ThisLevelIDs)}, + {ok, Count, FinalDepth, AllIDs}; +index_full_bundle_items( + [{ItemID, Size} | Rest], + ItemsBin, + ItemStartOffset, + Depth, + #{ <<"index-store">> := IndexStore } = Store, + ParentID, + Opts, + Count, + MinDepth, + ThisLevelIDs, + DescIDs +) when byte_size(ItemsBin) >= Size -> + ItemBinary = binary:part(ItemsBin, 0, Size), + EncodedItemID = hb_util:encode(ItemID), + ParseResult = validate_and_flag_item_id( + ItemBinary, ItemID, EncodedItemID, IndexStore), + ok = hb_store_arweave:write_offset( + Store, + EncodedItemID, + <<"ans104@1.0">>, + ItemStartOffset, + Size, + Opts + ), + ok = hb_store_arweave:write_parent(ItemID, ParentID, bundle, IndexStore, Opts), + ok = + case ParseResult of + {ok, _, Parsed} -> write_item_header(Parsed, <<"ans104@1.0">>, Opts); + _ -> ok + end, + {DescendantCount, ItemAchievedDepth, ChildIDs} = + case {Depth > 1, ParseResult} of + {true, {ok, HeaderSize, ParsedItem}} -> + index_full_bundle_descendants_parsed( + ParsedItem, HeaderSize, + ItemStartOffset, Depth - 1, Store, ItemID, Opts); + _ -> + {0, Depth - 1, #{}} + end, + ShiftedChildIDs = shift_item_ids(ChildIDs, 1), + index_full_bundle_items( + Rest, + binary:part(ItemsBin, Size, byte_size(ItemsBin) - Size), + ItemStartOffset + Size, + Depth, + Store, + ParentID, + Opts, + Count + 1 + DescendantCount, + min(MinDepth, ItemAchievedDepth), + [ItemID | ThisLevelIDs], + merge_item_ids(DescIDs, ShiftedChildIDs) + ); +index_full_bundle_items( + _BundleIndex, _ItemsBin, _ItemStartOffset, _Depth, + _Store, _ParentID, _Opts, _Count, _MinDepth, _ThisLevelIDs, _DescIDs) -> + {error, invalid_bundle_header}. + +%% @doc Recurse into a nested data item using an already-parsed header. +%% Returns {Count, AchievedDepth, ItemIDs}. +index_full_bundle_descendants_parsed( + _ParsedItem, _HeaderSize, _ItemStartOffset, Depth, _Store, _ParentID, _Opts) + when Depth =< 0 -> + {0, 0, #{}}; +index_full_bundle_descendants_parsed( + ParsedItem, HeaderSize, ItemStartOffset, Depth, Store, ParentID, Opts) -> + case is_bundle_tx(ParsedItem, Opts) of + true -> + case index_full_bundle_bytes( + ParsedItem#tx.data, + ItemStartOffset + HeaderSize, + Depth, + Store, + ParentID, + Opts + ) of + {ok, Count, ChildDepth, ChildIDs} -> + {Count, ChildDepth, ChildIDs}; + _ -> + {0, 0, #{}} + end; + false -> + {0, Depth, #{}} + end. + +%% @doc Validate an item ID by hashing the signature from the deserialized +%% header. Returns {ok, HeaderSize, ParsedItem} on successful parse, or +%% error if deserialization fails. Mismatch flags are written but don't +%% prevent the item from being indexed. +validate_and_flag_item_id(ItemBinary, DeclaredID, EncodedDeclaredID, IndexStore) -> + try ar_bundles:deserialize_header(ItemBinary) of + {ok, HeaderSize, ParsedItem} -> + ComputedID = crypto:hash(sha256, ParsedItem#tx.signature), + case ComputedID =:= DeclaredID of + true -> + ok; + false -> + ok = hb_store:write( + IndexStore, + #{hb_store_arweave_offset:mismatch_path(DeclaredID) => ComputedID}, + #{} + ), + ?event(copycat_short, + {item_id_mismatch, + {declared_id, {explicit, EncodedDeclaredID}}, + {computed_id, + {explicit, hb_util:encode(ComputedID)}} + } + ) + end, + {ok, HeaderSize, ParsedItem}; + _ -> + error + catch + _:_ -> + error + end. + +%% @doc Check whether a TX header indicates bundle content. +%% NOTE: This function can throw if transaction tags aren't properly formated +is_bundle_tx(TX, _Opts) -> + ar_tx:type(TX) =/= binary. + +resolve_tx_headers(TXIDs, Opts) -> + Results = parallel_map( + TXIDs, + fun(TXID) -> resolve_tx_header(TXID, Opts) end, + Opts + ), + case lists:any(fun(Res) -> Res =:= error end, Results) of + true -> error; + false -> + TXs = lists:foldr( + fun({ok, TX}, Acc) -> [TX | Acc] end, + [], + Results + ), + {ok, TXs} + end. + +resolve_tx_header(TXID, Opts) -> + try + ?event(debug_copycat, {fetching_tx, {explicit, TXID}}), + ResolveRes = observe_event(<<"tx_header">>, fun() -> + hb_ao:resolve( + << + ?ARWEAVE_DEVICE/binary, + "/tx&tx=", + TXID/binary, + "&exclude-data=true" + >>, + Opts + ) + end), + case ResolveRes of + {ok, StructuredTXHeader} -> + {ok, + hb_message:convert( + StructuredTXHeader, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts)}; + {error, ResolveError} -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {reason, ResolveError} + } + ), + error + end + catch + Class:Reason:_ -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {class, Class}, + {reason, Reason} + } + ), + error + end. + +%% @doc Record event metrics (count and duration) using hb_event:record. +record_event_metrics(MetricName, Count, Duration) -> + hb_event:record(<<"arweave_block_count">>, MetricName, #{}, Count), + hb_event:record(<<"arweave_block_duration">>, MetricName, #{}, Duration). + +record_copycat_l1_metrics(MetricName, Count, Duration) -> + hb_event:record(copycat_l1_count, MetricName, #{}, Count), + hb_event:record(copycat_l1_duration, MetricName, #{}, Duration). + +%% @doc Track an operation's execution time and count using hb_event:record. +%% Always tracks both count and duration, regardless of success/failure. observe_event(MetricName, Fun) -> {Time, Result} = timer:tc(Fun), record_event_metrics(MetricName, 1, Time), Result. +observe_copycat_l1_stage(MetricName, Fun) -> + {Time, Result} = timer:tc(Fun), + record_copycat_l1_metrics(MetricName, 1, Time), + Result. + +%% @doc Scan the mempool and index any accessible unconfirmed TXs. +index_mempool(Request, Opts) -> + SenderFilter = mempool_sender_filter(Request, Opts), + case mempool_pending(#{}, #{}, Opts) of + {ok, TXIDs} when is_list(TXIDs) -> + mempool_progress( + Opts, + {mempool_scan_started, {pending_count, length(TXIDs)}} + ), + Results = parallel_map(TXIDs, + fun(TXID) -> index_mempool_tx(TXID, SenderFilter, Opts) end, Opts), + Summary = lists:foldl( + fun mempool_accumulate_result/2, + mempool_empty_summary(), + Results + ), + mempool_progress(Opts, {mempool_scan_completed, Summary}), + {ok, Summary}; + Error -> Error + end. + +mempool_progress(Opts, Event) -> + case hb_opts:get(arweave_mempool_progress, false, Opts) of + true -> ?event(copycat_short, Event); + false -> ok + end. + +mempool_pending(Base, Request, Opts) -> + case hb_opts:get(arweave_pending_fun, undefined, Opts) of + Fun when is_function(Fun, 3) -> + Fun(Base, Request, Opts); + _ -> + dev_arweave:pending(Base, Request, Opts) + end. + +mempool_sender_filter(Request, Opts) -> + case hb_maps:find(<<"sender">>, Request, Opts) of + {ok, Sender} when is_binary(Sender) -> + normalize_sender_filter(Sender); + _ -> + not_found + end. + +normalize_sender_filter(Sender) when is_binary(Sender) -> + case byte_size(Sender) of + 32 -> hb_util:human_id(Sender); + 42 -> Sender; + 43 -> Sender; + 44 -> Sender; + _ -> Sender + end. + +mempool_empty_summary() -> + #{ + indexed => 0, + existing => 0, + missing_data => 0, + failed => 0, + tx_offsets_written => 0, + bundle_txs => 0, + items_indexed => 0 + }. + +mempool_accumulate_result(Result, Acc) -> + maps:fold( + fun(Key, Value, SummaryAcc) -> + SummaryAcc#{ Key => maps:get(Key, SummaryAcc) + Value } + end, + Acc, + mempool_result_summary(Result) + ). + +mempool_result_summary(existing) -> + (mempool_empty_summary())#{ existing => 1 }; +mempool_result_summary(filtered) -> + mempool_empty_summary(); +mempool_result_summary(indexed) -> + (mempool_empty_summary())#{ indexed => 1 }; +mempool_result_summary(missing_data) -> + (mempool_empty_summary())#{ missing_data => 1 }; +mempool_result_summary(ok) -> + (mempool_empty_summary())#{ indexed => 1 }; +mempool_result_summary(failed) -> + (mempool_empty_summary())#{ failed => 1 }; +mempool_result_summary(#{ status := Status } = Result) -> + Base = mempool_result_summary(Status), + lists:foldl( + fun(Key, SummaryAcc) -> + SummaryAcc#{ + Key => maps:get(Key, SummaryAcc) + maps:get(Key, Result, 0) + } + end, + Base, + [tx_offsets_written, bundle_txs, items_indexed] + ); +mempool_result_summary(_) -> + (mempool_empty_summary())#{ failed => 1 }. + +index_mempool_tx(TXID, SenderFilter, Opts) -> + mempool_progress(Opts, {mempool_tx_started, {tx_id, {explicit, TXID}}}), + Result = + case SenderFilter of + not_found -> + index_mempool_tx_unfiltered(TXID, Opts); + _ -> + index_mempool_tx_filtered(TXID, SenderFilter, Opts) + end, + mempool_progress( + Opts, + {mempool_tx_finished, + {tx_id, {explicit, TXID}}, + mempool_progress_result(Result)} + ), + Result. + +index_mempool_tx_unfiltered(TXID, Opts) -> + case hb_store_arweave:is_tx_indexed(TXID, Opts) of + true -> existing; + false -> + case load_mempool_tx_header(TXID, Opts) of + {ok, TX} -> write_mempool_offsets(TXID, TX, Opts); + error -> failed + end + end. + +index_mempool_tx_filtered(TXID, SenderFilter, Opts) -> + case load_mempool_tx_header(TXID, Opts) of + {ok, TX} -> + case mempool_tx_sender_matches(TX, SenderFilter) of + false -> filtered; + true -> + case hb_store_arweave:is_tx_indexed(TXID, Opts) of + true -> existing; + false -> write_mempool_offsets(TXID, TX, Opts) + end + end; + error -> + failed + end. + +load_mempool_tx_header(TXID, Opts) -> + mempool_progress( + Opts, + {mempool_tx_header_fetch_started, {tx_id, {explicit, TXID}}} + ), + case mempool_pending( + #{}, + #{ <<"pending">> => TXID, <<"exclude-data">> => true }, + Opts + ) of + {ok, TX} when is_record(TX, tx) -> + mempool_progress( + Opts, + {mempool_tx_header_fetch_finished, + {tx_id, {explicit, TXID}}} + ), + mempool_progress( + Opts, + {mempool_tx_convert_finished, + {tx_id, {explicit, TXID}}, + {data_size, TX#tx.data_size}, + {bundle, is_bundle_tx(TX, Opts)}} + ), + {ok, TX}; + {ok, StructuredTX} -> + mempool_progress( + Opts, + {mempool_tx_header_fetch_finished, + {tx_id, {explicit, TXID}}} + ), + TX = hb_message:convert( + StructuredTX, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts + ), + mempool_progress( + Opts, + {mempool_tx_convert_finished, + {tx_id, {explicit, TXID}}, + {data_size, TX#tx.data_size}, + {bundle, is_bundle_tx(TX, Opts)}} + ), + {ok, TX}; + _ -> + error + end. + +mempool_progress_result(existing) -> + {status, existing}; +mempool_progress_result(filtered) -> + {status, filtered}; +mempool_progress_result(indexed) -> + {status, indexed}; +mempool_progress_result(missing_data) -> + {status, missing_data}; +mempool_progress_result(ok) -> + {status, indexed}; +mempool_progress_result(failed) -> + {status, failed}; +mempool_progress_result(#{ status := Status }) -> + {status, Status}; +mempool_progress_result(_) -> + {status, failed}. + +mempool_tx_sender_matches(TX, SenderFilter) -> + case ar_tx:get_owner_address(TX) of + not_set -> false; + OwnerAddress -> normalize_sender_filter(OwnerAddress) =:= SenderFilter + end. + +write_mempool_offsets(TXID, TX, Opts) -> + Store = hb_store_arweave:store_from_opts(Opts), + mempool_progress( + Opts, + {mempool_data_load_started, + {tx_id, {explicit, TXID}}, + {length, TX#tx.data_size}} + ), + case load_mempool_data(TXID, TX, Opts) of + {ok, Data} -> + mempool_progress( + Opts, + {mempool_data_loaded, + {tx_id, {explicit, TXID}}, + {loaded_bytes, byte_size(Data)}} + ), + ok = hb_store_arweave:write_offset( + Store, TXID, <<"tx@1.0">>, relative, TX#tx.data_size, Opts), + write_mempool_children(Store, TXID, TX, Data, Opts); + _Error -> + #{ status => missing_data } + end. + +write_mempool_children(Store, TXID, TX, Data, Opts) -> + case is_bundle_tx(TX, Opts) of + true -> + case load_mempool_bundle_index(TXID, Data, Opts) of + {ok, HeaderSize, BundleIndex} -> + write_mempool_items(Store, TXID, BundleIndex, HeaderSize, Opts), + #{ + status => indexed, + tx_offsets_written => 1, + bundle_txs => 1, + items_indexed => length(BundleIndex) + }; + _Error -> + #{ + status => failed, + tx_offsets_written => 1 + } + end; + false -> + case standalone_item_id(Data) of + {ok, ItemID} -> + Ref = #{ <<"relative">> => TXID, <<"offset">> => 0 }, + hb_store_arweave:write_offset( + Store, ItemID, <<"ans104@1.0">>, + Ref, TX#tx.data_size, Opts), + #{ + status => indexed, + tx_offsets_written => 1, + items_indexed => 1 + }; + not_found -> + #{ + status => indexed, + tx_offsets_written => 1 + } + end + end. + +write_mempool_items(_Store, _TXID, [], _Offset, _Opts) -> ok; +write_mempool_items(Store, TXID, [{ItemID, Size} | Rest], Offset, Opts) -> + Ref = #{ <<"relative">> => TXID, <<"offset">> => Offset }, + hb_store_arweave:write_offset( + Store, hb_util:encode(ItemID), <<"ans104@1.0">>, Ref, Size, Opts), + write_mempool_items(Store, TXID, Rest, Offset + Size, Opts). + +load_mempool_data(_TXID, #tx{ data_size = 0 }, _Opts) -> + {ok, <<>>}; +load_mempool_data(TXID, #tx{ data_size = Size }, Opts) when Size > 0 -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => #{ + <<"relative">> => TXID, + <<"offset">> => 0 + }, + <<"length">> => Size + }, + Opts + ). + +load_mempool_bundle_index(_TXID, Data, _Opts) when is_binary(Data), Data =/= <<>> -> + try ar_bundles:decode_bundle_header(Data) of + {ItemsBin, BundleIndex} -> + {ok, byte_size(Data) - byte_size(ItemsBin), BundleIndex}; + invalid_bundle_header -> + {error, invalid_bundle_header} + catch _:_ -> + {error, invalid_bundle_header} + end; +load_mempool_bundle_index(TXID, <<>>, Opts) -> + try + {ok, FirstChunk} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => #{ + <<"relative">> => TXID, + <<"offset">> => 0 + } + }, + Opts + ), + case ar_bundles:bundle_header_size(FirstChunk) of + invalid_bundle_header -> + {error, invalid_bundle_header}; + HeaderSize when HeaderSize =< byte_size(FirstChunk) -> + {_ItemsBin, BundleIndex} = + ar_bundles:decode_bundle_header( + binary:part(FirstChunk, 0, HeaderSize) + ), + {ok, HeaderSize, BundleIndex}; + HeaderSize -> + RemainingSize = HeaderSize - byte_size(FirstChunk), + {ok, RemainingChunk} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => #{ + <<"relative">> => TXID, + <<"offset">> => byte_size(FirstChunk) + }, + <<"length">> => RemainingSize + }, + Opts + ), + HeaderBin = <>, + {_ItemsBin, BundleIndex} = + ar_bundles:decode_bundle_header(HeaderBin), + {ok, HeaderSize, BundleIndex} + end + catch _:_ -> + {error, invalid_bundle_header} + end. + +standalone_item_id(<> = Data) + when is_binary(Data), Data =/= <<>> -> + case lists:member(SigType, [<<1, 0>>, <<2, 0>>, <<3, 0>>, <<4, 0>>, <<7, 0>>]) of + false -> not_found; + true -> + try + Item = ar_bundles:deserialize(Data), + case ar_bundles:verify_item(Item) of + true -> {ok, hb_util:encode(Item#tx.id)}; + false -> not_found + end + catch _:_ -> not_found + end + end; +standalone_item_id(_) -> not_found. + %%% Tests +mempool_result_summary_filtered_test_parallel() -> + ?assertEqual(mempool_empty_summary(), mempool_result_summary(filtered)). + +normalize_sender_filter_binary_address_test_parallel() -> + Address = crypto:strong_rand_bytes(32), + ?assertEqual(hb_util:human_id(Address), normalize_sender_filter(Address)). + +mempool_tx_sender_matches_owner_address_test_parallel() -> + Address = crypto:strong_rand_bytes(32), + TX = #tx{ owner = <<1>>, owner_address = Address }, + ?assert(mempool_tx_sender_matches(TX, hb_util:human_id(Address))), + ?assertNot(mempool_tx_sender_matches(TX, hb_util:human_id(crypto:strong_rand_bytes(32)))). + +mempool_sender_filter_indexes_matching_tx_test_parallel() -> + TestStore = hb_test_utils:test_store(), + IndexStore = #{ <<"index-store">> => [TestStore] }, + BaseOpts = #{ + <<"store">> => [TestStore], + <<"arweave-index-ids">> => true, + <<"arweave-index-store">> => IndexStore + }, + ok = hb_store:reset([TestStore]), + ok = hb_store:start([TestStore]), + MatchTXID = hb_util:human_id(crypto:strong_rand_bytes(32)), + OtherTXID = hb_util:human_id(crypto:strong_rand_bytes(32)), + Sender = <<"FPjbN_btYKzcf8QASjs30v5C0FPv7XpwKXENBW8dqVw">>, + MatchTX = mempool_test_pending_tx(Sender), + OtherTX = mempool_test_pending_tx( + hb_util:human_id(crypto:strong_rand_bytes(32)) + ), + Opts = BaseOpts#{ + <<"arweave-pending-fun">> => + fun(_, #{ <<"pending">> := PendingTXID }, _) + when PendingTXID =:= MatchTXID -> + {ok, MatchTX}; + (_, #{ <<"pending">> := PendingTXID }, _) + when PendingTXID =:= OtherTXID -> + {ok, OtherTX}; + (_, Request, _) when map_size(Request) =:= 0 -> + {ok, [MatchTXID, OtherTXID]} + end + }, + ?assertEqual( + {ok, (mempool_empty_summary())#{ indexed => 1, tx_offsets_written => 1 }}, + arweave( + #{}, + #{ <<"mode">> => <<"mempool">>, <<"sender">> => Sender }, + Opts + ) + ), + ?assert(hb_store_arweave:is_tx_indexed(MatchTXID, Opts)), + ?assertNot(hb_store_arweave:is_tx_indexed(OtherTXID, Opts)). + +mempool_test_pending_tx(Sender) -> + #tx{ + format = 2, + owner = <<1>>, + owner_address = Sender + }. + index_ids_test_parallel() -> %% Test block: https://viewblock.io/arweave/block/1827942 %% Note: this block includes a data item with an Ethereum signature. This @@ -496,7 +1916,7 @@ index_ids_test_parallel() -> {_TestStore, StoreOpts, Opts} = setup_index_opts(), {ok, 1827942} = hb_ao:resolve( - <<"~copycat@1.0/arweave&from=1827942&to=1827942">>, + <<"~copycat@1.0/arweave&from=1827942&to=1827942&depth=2">>, Opts ), ?assertMatch( @@ -557,7 +1977,23 @@ index_ids_test_parallel() -> ], Opts ), - ok. + % L3 item not read when doing L1 depth=1 + assert_item_not_read(<<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, Opts), + ok. + +block_depth_3_test() -> + %% Test block: https://viewblock.io/arweave/block/1827942 + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&depth=3">>, + Opts + ), + % L3 item read when doing depth=3 + assert_item_read( + <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, + Opts), + ok. %% @doc Test a bundle header that fits in a single chunk. small_bundle_header_test_parallel() -> @@ -619,25 +2055,27 @@ invalid_bundle_header_test_parallel() -> download_bundle_header(EndOffset, Size, Opts)), ok. -invalid_bundle_test_parallel() -> - {_TestStore, _StoreOpts, Opts} = setup_index_opts(), - Block = 1307606, - {ok, Block} = - hb_ao:resolve( - <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, +invalid_bundle_test_parallel_() -> + {timeout, 60, fun() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1307606, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, + Opts + ), + assert_bundle_read( + <<"8S12ZqO6-_icGkeuH8mFq6x9q7OIoXOqFRGH5k-wshg">>, + [ + {<<"gintz-t6q_kdeP_IBQVGnp9fgFzs-pPGGehXW-V7ZRk">>, <<"1">>} + ], Opts ), - assert_bundle_read( - <<"8S12ZqO6-_icGkeuH8mFq6x9q7OIoXOqFRGH5k-wshg">>, - [ - {<<"gintz-t6q_kdeP_IBQVGnp9fgFzs-pPGGehXW-V7ZRk">>, <<"1">>} - ], - Opts - ), - % L1 TX with bundle tags, but data is not a valid bundle. The L1 TX - % should still be indexed. - assert_item_read(<<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, Opts), - ok. + % L1 TX with bundle tags, but data is not a valid bundle. The L1 TX + % should still be indexed. + assert_item_read(<<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, Opts), + ok + end}. block_with_large_integer_test_parallel() -> {_TestStore, _StoreOpts, Opts} = setup_index_opts(), @@ -662,34 +2100,6 @@ empty_block_test_parallel() -> ), ok. -% ecdsa_no_data_test() -> -% {_TestStore, _StoreOpts, Opts} = setup_index_opts(), -% {ok, 1827904} = -% hb_ao:resolve( -% <<"~copycat@1.0/arweave&from=1827904&to=1827904">>, -% Opts -% ), -% assert_bundle_read( -% Opts, -% <<"VNhX_pSANk_8j0jZBR5bh_5jr-lkfbHDjtHd8FKqx7U">>, -% [ -% {<<"3xDKhrCQcPuBtcm1ipZS5C9gAfFYClgHuHOHAXGfchM">>, <<"1">>}, -% {<<"JantC8f89VE-RidArHnU9589gY5T37NDXnWpI7H_psc">>, <<"7">>} -% ] -% ), -% ok. - -% ecdsa_with_data_test() -> -% {_TestStore, _StoreOpts, Opts} = setup_index_opts(), -% Block = 1720431, -% fetch_and_process_block(Block, Block, Opts), -% {ok, Block} = -% hb_ao:resolve( -% <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, -% Opts -% ), -% ok. - %% @doc Disabled because the test takes ~30 seconds to run. %% dev_arweave:get_tx_data_tag_exclude_data_test has some test coverage for %% handling an L1 TX with a data tag. @@ -737,7 +2147,7 @@ tx_with_no_data_test_parallel() -> "~copycat@1.0/arweave&" "from=", BlockBin/binary, "&" "to=", BlockBin/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -806,7 +2216,7 @@ list_index_test_parallel() -> "~copycat@1.0/arweave&" "from=", BlockBin/binary, "&" "to=", BlockBin/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -854,7 +2264,7 @@ auto_stop_on_indexed_block_test_parallel() -> "~copycat@1.0/arweave&" "from=", (hb_util:bin(IndexedBlock))/binary, "&" "to=", (hb_util:bin(IndexedBlock))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -863,7 +2273,7 @@ auto_stop_on_indexed_block_test_parallel() -> << "~copycat@1.0/arweave&" "from=", (hb_util:bin(Higher2))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -871,6 +2281,9 @@ auto_stop_on_indexed_block_test_parallel() -> ?assert(has_any_indexed_tx(Higher1, Opts)), ?assert(has_any_indexed_tx(IndexedBlock, Opts)), ?assertNot(has_any_indexed_tx(IndexedBlock-1, Opts)), + ?assert(hb_store_arweave:is_block_indexed(IndexedBlock, 2, Opts)), + ?assert(hb_store_arweave:is_block_indexed(Higher1, 2, Opts)), + ?assert(hb_store_arweave:is_block_indexed(Higher2, 2, Opts)), ok. explicit_to_reindexes_all_test_parallel() -> @@ -883,7 +2296,7 @@ explicit_to_reindexes_all_test_parallel() -> "~copycat@1.0/arweave&" "from=", (hb_util:bin(IndexedBlock))/binary, "&" "to=", (hb_util:bin(IndexedBlock))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -894,7 +2307,7 @@ explicit_to_reindexes_all_test_parallel() -> "~copycat@1.0/arweave&" "from=", (hb_util:bin(IndexedBlock+1))/binary, "&" "to=", (hb_util:bin(LowerBlock))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), @@ -904,12 +2317,13 @@ explicit_to_reindexes_all_test_parallel() -> %% @doc Manually write to the index to simulate a partially indexed block. %% This should also trigger a stop when the `to` option is omitted. auto_stop_partial_index_test_parallel() -> - {_TestStore, StoreOpts, Opts} = setup_index_opts(), + {IndexStore, StoreOpts, Opts} = setup_index_opts(), Block = 1826700, HigherBlock = Block + 1, NoIndexOpts = Opts#{ <<"arweave-index-ids">> => false, - <<"arweave-index-blocks">> => true + <<"arweave-index-blocks">> => true, + <<"index-headers">> => false }, {ok, Block} = hb_ao:resolve( @@ -917,7 +2331,7 @@ auto_stop_partial_index_test_parallel() -> "~copycat@1.0/arweave&" "from=", (hb_util:bin(Block))/binary, "&" "to=", (hb_util:bin(Block))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, NoIndexOpts ), @@ -934,19 +2348,27 @@ auto_stop_partial_index_test_parallel() -> TXIDs = hb_maps:get(<<"txs">>, BlockData, [], Opts), ?assert(length(TXIDs) > 0), [OneTXID | _] = TXIDs, - hb_store_arweave:write_offset(StoreOpts, OneTXID, <<"tx@1.0">>, 0, 0), + ok = hb_store_arweave:write_offset(StoreOpts, OneTXID, <<"tx@1.0">>, 0, 0, Opts), + %% Write block depth maker, to indicate the block was previously indexed. + hb_store:write( + IndexStore, + #{hb_store_arweave:block_indexed_path(Block) => integer_to_binary(2)}, + Opts + ), {ok, Block} = hb_ao:resolve( << "~copycat@1.0/arweave&" "from=", (hb_util:bin(HigherBlock))/binary, "&" - "mode=write" + "mode=write&depth=2" >>, Opts ), ?assert(has_any_indexed_tx(HigherBlock, Opts)), ?assert(has_any_indexed_tx(Block, Opts)), ?assertNot(has_any_indexed_tx(Block-1, Opts)), + ?assert(hb_store_arweave:is_block_indexed(HigherBlock, 2, Opts)), + ?assert(hb_store_arweave:is_block_indexed(Block, 2, Opts)), ok. negative_parse_range_test_parallel() -> @@ -1037,39 +2459,368 @@ negative_resolved_height_test_parallel() -> hb_mock_server:stop(MockHandle) end. -negative_from_index_test_parallel() -> +negative_from_index_test_parallel() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, Tip} = latest_height(Opts), + StopBlock = 1827942, + StartBlock = 1827943, + OffsetFromTip = Tip - StartBlock, + ?assert(OffsetFromTip > 0), + NegativeFrom = <<"-", (hb_util:bin(OffsetFromTip))/binary>>, + {ok, StopBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(StopBlock))/binary, "&" + "to=", (hb_util:bin(StopBlock))/binary, "&" + "mode=write&depth=2" + >>, + Opts + ), + {ok, StopBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", NegativeFrom/binary, "&" + "mode=write&depth=2" + >>, + Opts + ), + ?assert(has_any_indexed_tx(StartBlock, Opts)), + NextBlock = highest_contiguous_indexed_block(StopBlock, 50, Opts), + ?assertEqual(StartBlock, NextBlock), + assert_indexed_range(NextBlock, StopBlock, Opts), + ?assertNot(has_any_indexed_tx(StopBlock - 1, Opts)), + ?assertNot(has_any_indexed_tx(NextBlock + 1, Opts)), + ok. + +owner_alias_roundtrip_test_parallel() -> + Opts1 = + add_owner_alias( + <<"FPjbN7EVwP3XwQJx8qnKqJDYa4TLJ0Y8gu4AaiUuW1c">>, + <<"turbo">>, + #{} + ), + Opts2 = + add_owner_alias( + <<"JNC6vBhU4sAK5T49VL4k79vNer0tZjM8fI1gpqUQK5g">>, + <<"redstone">>, + Opts1 + ), + ?assertEqual( + {ok, normalize_owner_id(<<"FPjbN7EVwP3XwQJx8qnKqJDYa4TLJ0Y8gu4AaiUuW1c">>)}, + resolve_owner_alias(<<"turbo">>, Opts2) + ), + ?assertEqual( + {ok, normalize_owner_id(<<"JNC6vBhU4sAK5T49VL4k79vNer0tZjM8fI1gpqUQK5g">>)}, + resolve_owner_alias(<<"redstone">>, Opts2) + ), + ?assertEqual( + {error, {owner_alias_not_found, <<"unknown">>}}, + resolve_owner_alias(<<"unknown">>, Opts2) + ), + ok. + +parse_tag_filter_test() -> + ?assertEqual( + {ok, #{name => <<"App-Name">>, value => <<"ao">>}}, + parse_tag_filter(<<"include-tag">>, #{<<"include-tag">> => <<"App-Name:ao">>}, #{}) + ), + ?assertEqual( + {ok, undefined}, + parse_tag_filter(<<"include-tag">>, #{}, #{}) + ), + ?assertEqual( + {error, invalid_tag_filter}, + parse_tag_filter(<<"include-tag">>, #{<<"include-tag">> => <<"App-Name">>}, #{}) + ), + ?assertEqual( + {error, invalid_tag_filter}, + parse_tag_filter(<<"include-tag">>, #{<<"include-tag">> => <<":ao">>}, #{}) + ), + ?assertEqual( + {error, invalid_tag_filter}, + parse_tag_filter(<<"include-tag">>, #{<<"include-tag">> => <<"App-Name:">>}, #{}) + ), + ok. + +l1_filter_reason_test() -> + Owner = <<"owner-1">>, + OtherOwner = <<"owner-2">>, + TX = #tx{ + owner = <<"non-default-owner">>, + owner_address = Owner, + tags = [ + {<<"App-Name">>, <<"ao">>}, + {<<"Bundler-App-Name">>, <<"Redstone">>} + ] + }, + IncludeTag = #{name => <<"App-Name">>, value => <<"ao">>}, + ExcludeTag = #{name => <<"Bundler-App-Name">>, value => <<"Redstone">>}, + ?assertEqual(pass, l1_filter_reason(TX, #{})), + ?assertEqual(pass, l1_filter_reason(TX, #{include_owner => Owner})), + ?assertEqual( + include_owner_mismatch, + l1_filter_reason(TX, #{include_owner => OtherOwner}) + ), + ?assertEqual( + exclude_owner_match, + l1_filter_reason(TX, #{exclude_owner => Owner}) + ), + ?assertEqual( + pass, + l1_filter_reason(TX, #{exclude_owner => OtherOwner}) + ), + ?assertEqual(pass, l1_filter_reason(TX, #{include_tag => IncludeTag})), + ?assertEqual( + include_tag_mismatch, + l1_filter_reason( + TX, + #{include_tag => #{name => <<"Content-Type">>, value => <<"text/plain">>}} + ) + ), + ?assertEqual( + exclude_tag_match, + l1_filter_reason(TX, #{exclude_tag => ExcludeTag}) + ), + ?assertEqual( + pass, + l1_filter_reason( + TX, + #{exclude_tag => #{name => <<"Content-Type">>, value => <<"text/plain">>}} + ) + ), + ?assertEqual( + exclude_tag_match, + l1_filter_reason( + TX, + #{include_tag => IncludeTag, exclude_tag => ExcludeTag} + ) + ), + ?assertEqual( + pass, + l1_filter_reason(TX, #{include_owner => [OtherOwner, Owner]}) + ), + ok. + +request_depth_clamping_test() -> + {_TestStore, _StoreOpts, Opts0} = setup_index_opts(), + ?assertEqual(6, request_depth(#{}, <<"full">>, Opts0)), + ?assertEqual( + 2, + request_depth(#{<<"depth">> => <<"2">>}, <<"full">>, Opts0) + ), + ?assertEqual( + 1, + request_depth(#{<<"depth">> => <<"0">>}, <<"full">>, Opts0) + ), + ?assertEqual( + 6, + request_depth(#{<<"depth">> => <<"999">>}, <<"full">>, Opts0) + ), + Opts1 = set_depth_recursion_cap(2, Opts0), + ?assertEqual(2, request_depth(#{}, <<"full">>, Opts1)), + % no recursion cap set, use default from hb_opts + ?assertEqual(6, request_depth(#{}, <<"full">>, #{})), + ok. + +id_depth_1_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + ok = index_l1_offsets(Block, Opts), + {ok, Result} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "id=", TXID/binary, "&" + "mode=write&" + "depth=1" + >>, + Opts + ), + ?assertEqual(26, maps:get(items_count, Result)), + ?assertEqual(1, maps:get(bundle_count, Result)), + ?assertEqual(0, maps:get(skipped_count, Result)), + assert_bundle_read( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + [ + {<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, <<"1">>}, + {<<"MgatoEjlO_YtdbxFi9Q7Hxbs0YQVcChddhSS7FsdeIg">>, <<"19">>}, + {<<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, <<"26">>} + ], + Opts + ), + % L3 item not read when doing L1 depth=1 + assert_item_not_read(<<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, Opts), + ok. + +id_depth_2_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + ok = index_l1_offsets(Block, Opts), + {ok, Result} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "id=", TXID/binary, "&" + "mode=write&" + "depth=2" + >>, + Opts + ), + ?assertEqual(52, maps:get(items_count, Result)), + ?assertEqual(1, maps:get(bundle_count, Result)), + ?assertEqual(0, maps:get(skipped_count, Result)), + assert_bundle_read( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + [ + {<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, <<"1">>}, + {<<"MgatoEjlO_YtdbxFi9Q7Hxbs0YQVcChddhSS7FsdeIg">>, <<"19">>}, + {<<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, <<"26">>} + ], + Opts + ), + % L2 bundle and L3 children should be read when doing L1 with depth=2 + assert_bundle_read( + <<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, + [ + {<<"iS5R3iSKaCdcXG2nlKWsbdT1_uhQe54nMsgYK-ivEcE">>, <<"1">>}, + {<<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, <<"2">>} + ], + Opts + ), + ok. + +id_exclude_tag_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + ok = index_l1_offsets(Block, Opts), + {ok, Result} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "id=", TXID/binary, "&" + "mode=write&" + "exclude-tag=App-Name:ArDrive%20Turbo&" + "depth=2" + >>, + Opts + ), + ?assertEqual(0, maps:get(items_count, Result)), + ?assertEqual(0, maps:get(bundle_count, Result)), + ?assertEqual(1, maps:get(skipped_count, Result)), + assert_item_not_read(<<"iS5R3iSKaCdcXG2nlKWsbdT1_uhQe54nMsgYK-ivEcE">>, Opts), + ok. + +id_include_owner_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + ok = index_l1_offsets(Block, Opts), + {ok, Included} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "id=", TXID/binary, "&" + "mode=write&" + "include-owner=JNC6vBhjHY1EPwV3pEeNmrsgFMxH5d38_LHsZ7jful8" + >>, + Opts + ), + ?assertEqual(52, maps:get(items_count, Included)), + ?assertEqual(1, maps:get(bundle_count, Included)), + ?assertEqual(0, maps:get(skipped_count, Included)), + {ok, Skipped} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "id=", TXID/binary, "&" + "mode=write&" + "include-owner=FPjbN7EVwP3XwQJx8qnKqJDYa4TLJ0Y8gu4AaiUuW1c" + >>, + Opts + ), + ?assertEqual(0, maps:get(items_count, Skipped)), + ?assertEqual(0, maps:get(bundle_count, Skipped)), + ?assertEqual(1, maps:get(skipped_count, Skipped)). + +id_missing_offset_without_load_test() -> {_TestStore, _StoreOpts, Opts} = setup_index_opts(), - {ok, Tip} = latest_height(Opts), - StopBlock = 1827942, - StartBlock = 1827943, - OffsetFromTip = Tip - StartBlock, - ?assert(OffsetFromTip > 0), - NegativeFrom = <<"-", (hb_util:bin(OffsetFromTip))/binary>>, - {ok, StopBlock} = + {_Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + {ok, Result} = hb_ao:resolve( << "~copycat@1.0/arweave&" - "from=", (hb_util:bin(StopBlock))/binary, "&" - "to=", (hb_util:bin(StopBlock))/binary, "&" + "id=", TXID/binary, "&" "mode=write" >>, Opts ), - {ok, StopBlock} = + ?assertEqual(0, maps:get(items_count, Result)), + ?assertEqual(0, maps:get(bundle_count, Result)), + ?assertEqual(1, maps:get(skipped_count, Result)), + assert_item_not_read(<<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, Opts), + ok. + +id_missing_offset_with_load_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {_Block, TXID} = {1827942, <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>}, + {ok, Result} = hb_ao:resolve( << "~copycat@1.0/arweave&" - "from=", NegativeFrom/binary, "&" - "mode=write" + "id=", TXID/binary, "&" + "mode=write&" + "query-l1-offset=true&" + "depth=2" + >>, + Opts + ), + ?assertEqual(52, maps:get(items_count, Result)), + ?assertEqual(1, maps:get(bundle_count, Result)), + ?assertEqual(0, maps:get(skipped_count, Result)), + assert_bundle_read( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + [ + {<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, <<"1">>}, + {<<"MgatoEjlO_YtdbxFi9Q7Hxbs0YQVcChddhSS7FsdeIg">>, <<"19">>}, + {<<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, <<"26">>} + ], + Opts + ), + % L2 bundle and L3 children should be read when doing L1 with depth=2 + assert_bundle_read( + <<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, + [ + {<<"iS5R3iSKaCdcXG2nlKWsbdT1_uhQe54nMsgYK-ivEcE">>, <<"1">>}, + {<<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, <<"2">>} + ], + Opts + ), + ok. + +parse_owner_filter_unknown_alias_test() -> + ?assertEqual( + {error, {owner_alias_not_found, <<"nonexistent">>}}, + parse_owner_filter( + #{<<"include-owner-alias">> => <<"nonexistent">>}, + #{} + ) + ), + ok. + +index_l1_offsets(Block, Opts) -> + BlockBin = hb_util:bin(Block), + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", BlockBin/binary, "&" + "to=", BlockBin/binary, "&" + "mode=write&" + "depth=1" >>, Opts ), - ?assert(has_any_indexed_tx(StartBlock, Opts)), - NextBlock = highest_contiguous_indexed_block(StopBlock, 50, Opts), - ?assertEqual(StartBlock, NextBlock), - assert_indexed_range(NextBlock, StopBlock, Opts), - ?assertNot(has_any_indexed_tx(StopBlock - 1, Opts)), - ?assertNot(has_any_indexed_tx(NextBlock + 1, Opts)), ok. setup_index_opts() -> @@ -1105,7 +2856,8 @@ setup_index_opts() -> Opts = #{ <<"store">> => Store, <<"arweave-index-ids">> => true, - <<"arweave-index-store">> => StoreOpts + <<"arweave-index-store">> => StoreOpts, + <<"index-headers">> => false }, {TestStore, StoreOpts, Opts}. @@ -1121,7 +2873,9 @@ assert_bundle_read(BundleID, ExpectedItems, Opts) -> lists:foreach( fun({{_ItemID, Index}, Item}) -> QueriedItem = hb_ao:get(Index, Bundle, Opts), - ?assertEqual(hb_maps:without(?AO_CORE_KEYS, Item), hb_maps:without(?AO_CORE_KEYS, QueriedItem)) + ?assertEqual( + hb_maps:without(?AO_CORE_KEYS, Item), + hb_maps:without(?AO_CORE_KEYS, QueriedItem)) end, lists:zip(ExpectedItems, ReadItems) ), @@ -1129,19 +2883,32 @@ assert_bundle_read(BundleID, ExpectedItems, Opts) -> assert_item_read(ItemID, Opts) -> ?event(debug_test, {resolving, {explicit, ItemID}}), - Resolved = hb_ao:resolve(ItemID, Opts), - ?assertMatch({ok, _}, Resolved, ItemID), - {ok, Item} = Resolved, + ReadResult = hb_store_arweave:read( + hb_store_arweave:store_from_opts(Opts), + #{<<"read">> => ItemID}, + Opts + ), + ?assertMatch({ok, _}, ReadResult, ItemID), + {ok, Item} = ReadResult, ?event(debug_test, {item, Item}), ?assert(hb_message:verify(Item, all, Opts)), ?assertEqual(ItemID, hb_message:id(Item, signed)), Item. +assert_item_not_read(ItemID, Opts) -> + ReadResult = hb_store_arweave:read( + hb_store_arweave:store_from_opts(Opts), + #{<<"read">> => ItemID}, + Opts + ), + ?assertEqual({error, not_found}, ReadResult), + ok. + has_any_indexed_tx(Height, Opts) -> case fetch_block_header(Height, Opts) of {ok, Block} -> TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), - lists:any(fun(TXID) -> is_tx_indexed(TXID, Opts) end, TXIDs); + lists:any(fun(TXID) -> hb_store_arweave:is_tx_indexed(TXID, Opts) end, TXIDs); {error, _} -> false end. @@ -1170,3 +2937,497 @@ assert_indexed_range(From, To, _Opts) when From < To -> assert_indexed_range(From, To, Opts) -> ?assert(has_any_indexed_tx(From, Opts)), assert_indexed_range(From - 1, To, Opts). + +block_marker_default_depth_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=2">>, + Opts + ), + ?assert(hb_store_arweave:is_block_indexed(Block, 2, Opts)), + ?assertNot(hb_store_arweave:is_block_indexed(Block, 3, Opts)), + ok. + +depth_1_normalizes_to_2_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + TX1 = #tx{ + format = 2, + id = crypto:strong_rand_bytes(32), + data_size = 100, + tags = [] + }, + TX2 = #tx{ + format = 2, + id = crypto:strong_rand_bytes(32), + data_size = 200, + tags = [] + }, + Tuples = [ + {{TX1, <<>>}, 100}, + {{TX2, <<>>}, 300} + ], + Result = process_block_txs(Tuples, 0, 1, 88888888, Opts), + ?assertEqual(2, maps:get(achieved_depth, Result)), + Height = 88888888, + hb_store_arweave:mark_block_indexed(Height, maps:get(achieved_depth, Result), Opts), + ?assert(hb_store_arweave:is_block_indexed(Height, 1, Opts)), + ?assert(hb_store_arweave:is_block_indexed(Height, 2, Opts)), + ?assertNot(hb_store_arweave:is_block_indexed(Height, 3, Opts)), + ok. + +achieved_depth_block_depth_2_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=2">>, + Opts + ), + ?assert(hb_store_arweave:is_block_indexed(Block, 2, Opts)), + ?assertNot(hb_store_arweave:is_block_indexed(Block, 3, Opts)), + ok. + + +achieved_depth_block_depth_3_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=3">>, + Opts + ), + ?assert(hb_store_arweave:is_block_indexed(Block, 3, Opts)), + ok. + +invalid_bundle_bytes_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + StoreOpts = hb_store_arweave:store_from_opts(Opts), + ?assertEqual( + {error, invalid_bundle_header}, + index_full_bundle_bytes(<<"not a bundle">>, 0, 2, StoreOpts, <<0:256>>, Opts) + ), + ok. + +small_block_depth_3_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1889322, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=3">>, + Opts + ), + ?assert(hb_store_arweave:is_block_indexed(Block, 3, Opts)), + #{ <<"index-store">> := Store } = hb_store_arweave:store_from_opts(Opts), + {ok, L1Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(Block, 1), Opts), + ?assert(length(hb_store_arweave:decode_item_ids(L1Bin)) > 0), + {ok, L2Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(Block, 2), Opts), + ?assert(length(hb_store_arweave:decode_item_ids(L2Bin)) > 0), + {ok, L3Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(Block, 3), Opts), + L3IDs = hb_store_arweave:decode_item_ids(L3Bin), + ?assertEqual(3, length(L3IDs)), + assert_item_read( + <<"npAzk_BomjWBQQr_xnmlhdxjyl97EJnNv_MAaXffs1s">>, + Opts), + ok. + +no_mismatch_flags_on_valid_bundles_test() -> + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=3">>, + Opts + ), + #{ <<"index-store">> := IndexStore } = StoreOpts, + ItemID = hb_util:native_id( + <<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>), + ?assertEqual( + {error, not_found}, + hb_store:read( + IndexStore, + hb_store_arweave_offset:mismatch_path(ItemID), + Opts + ) + ), + ok. + +mismatch_path_encoding_test() -> + ID = crypto:strong_rand_bytes(32), + Path = hb_store_arweave_offset:mismatch_path(ID), + ?assert(binary:match(Path, <<"mismatch/">>) =/= nomatch), + ok. + +exact_marker_depth_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=3">>, + Opts + ), + #{ <<"index-store">> := Store } = + hb_store_arweave:store_from_opts(Opts), + {ok, StoredBin} = + hb_store:read(Store, hb_store_arweave:block_indexed_path(Block), Opts), + StoredDepth = binary_to_integer(StoredBin), + ?assertEqual(3, StoredDepth), + ok. + +fabricated_mismatch_test() -> + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + {Priv, Pub} = ar_wallet:new(), + Target = crypto:strong_rand_bytes(32), + Anchor = crypto:strong_rand_bytes(32), + Item = ar_bundles:sign_item( + ar_bundles:new_item(Target, Anchor, [], <<"test data">>), + {Priv, Pub} + ), + ItemBinary = ar_bundles:serialize(Item), + RealID = crypto:hash(sha256, Item#tx.signature), + FakeID = crypto:strong_rand_bytes(32), + EncodedFakeID = hb_util:encode(FakeID), + #{ <<"index-store">> := IndexStore } = StoreOpts, + validate_and_flag_item_id(ItemBinary, FakeID, EncodedFakeID, IndexStore), + {ok, StoredActualID} = + hb_store:read( + IndexStore, + hb_store_arweave_offset:mismatch_path(FakeID), + Opts + ), + ?assertEqual(RealID, StoredActualID), + ?assertEqual( + {error, not_found}, + hb_store:read( + IndexStore, + hb_store_arweave_offset:mismatch_path(RealID), + Opts + ) + ), + ok. + +block_item_ids_depth_2_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&depth=2">>, + Opts + ), + #{ <<"index-store">> := Store } = hb_store_arweave:store_from_opts(Opts), + {ok, L1Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 1), Opts), + L1IDs = hb_store_arweave:decode_item_ids(L1Bin), + ?assert(length(L1IDs) > 0), + {ok, L2Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 2), Opts), + L2IDs = hb_store_arweave:decode_item_ids(L2Bin), + ?assert(length(L2IDs) > 0), + L2Encoded = [hb_util:encode(ID) || ID <- L2IDs], + Pos54K = index_of(<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, L2Encoded), + PosOBK = index_of(<<"OBKr-7UrmjxFD-h-qP-XLuvCgtyuO_IDpBMgIytvusA">>, L2Encoded), + ?assert(is_integer(Pos54K)), + ?assert(is_integer(PosOBK)), + ?assert(Pos54K < PosOBK), + ?assertEqual({error, not_found}, hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 3), Opts)), + ok. + +block_item_ids_depth_3_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&depth=3">>, + Opts + ), + #{ <<"index-store">> := Store } = hb_store_arweave:store_from_opts(Opts), + {ok, L1Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 1), Opts), + L1Count = length(hb_store_arweave:decode_item_ids(L1Bin)), + ?assertEqual(5, L1Count), + {ok, L2Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 2), Opts), + L2Count = length(hb_store_arweave:decode_item_ids(L2Bin)), + ?assert(L2Count > 0), + {ok, L3Bin} = hb_store:read(Store, hb_store_arweave:block_items_path(1827942, 3), Opts), + L3Count = length(hb_store_arweave:decode_item_ids(L3Bin)), + ?assert(L3Count >= 1), + L3IDs = hb_store_arweave:decode_item_ids(L3Bin), + L3Encoded = [hb_util:encode(ID) || ID <- L3IDs], + ?assert(lists:member( + <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, L3Encoded)), + ok. + +list_index_with_items_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942">>, + Opts + ), + {ok, ListResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&mode=list">>, + Opts + ), + Body = hb_json:decode(hb_maps:get(<<"body">>, ListResult)), + BlockInfo = maps:get(<<"1827942">>, Body), + ?assert(is_integer(maps:get(<<"depth">>, BlockInfo))), + Items = maps:get(<<"items">>, BlockInfo), + ?assert(maps:get(<<"1">>, Items) > 0), + ?assert(maps:get(<<"2">>, Items) > 0), + ok. + +inventory_single_block_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942">>, + Opts + ), + {ok, InvResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&mode=inventory">>, + Opts + ), + Body = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + BlockInfo = maps:get(<<"1827942">>, Body), + ?assert(is_integer(maps:get(<<"depth">>, BlockInfo))), + Items = maps:get(<<"items">>, BlockInfo), + L1Items = maps:get(<<"1">>, Items), + ?assert(is_list(L1Items)), + ?assert(length(L1Items) > 0), + L2Items = maps:get(<<"2">>, Items), + ?assert(is_list(L2Items)), + ?assert(length(L2Items) > 0), + ?assertEqual(5, length(L1Items)), + ?assert(lists:member( + <<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, L2Items)), + ok. + +inventory_range_test() -> + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + #{ <<"index-store">> := Store } = StoreOpts, + ok = hb_store:write(Store, #{hb_store_arweave:block_indexed_path(77777777) => <<"2">>}, Opts), + ok = hb_store:write(Store, #{hb_store_arweave:block_items_path(77777777, 1) => <<0:256>>}, Opts), + ok = hb_store:write(Store, #{hb_store_arweave:block_items_path(77777777, 2) => <<>>}, Opts), + ok = hb_store:write(Store, #{hb_store_arweave:block_indexed_path(77777778) => <<"2">>}, Opts), + ok = hb_store:write(Store, #{hb_store_arweave:block_items_path(77777778, 1) => <<1:256>>}, Opts), + ok = hb_store:write(Store, #{hb_store_arweave:block_items_path(77777778, 2) => <<>>}, Opts), + {ok, InvResult} = inventory_index(77777778, 77777777, Opts), + Body = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + ?assert(maps:is_key(<<"77777777">>, Body)), + ?assert(maps:is_key(<<"77777778">>, Body)), + ?assertEqual(2, maps:get(<<"depth">>, maps:get(<<"77777777">>, Body))), + ?assertEqual(2, maps:get(<<"depth">>, maps:get(<<"77777778">>, Body))), + ok. + +parent_depth_2_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=2">>, + Opts + ), + StoreOpts2 = hb_store_arweave:store_from_opts(Opts), + {ok, InvResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&mode=inventory">>, + Opts + ), + Body = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + BlockInfo = maps:get(hb_util:bin(Block), Body), + L1Items = maps:get(<<"1">>, maps:get(<<"items">>, BlockInfo)), + L1ID = hb_util:decode(hd(L1Items)), + {ok, [{Block, block}]} = hb_store_arweave:read_parent(StoreOpts2, L1ID, Opts), + L2Items = maps:get(<<"2">>, maps:get(<<"items">>, BlockInfo)), + case L2Items of + [] -> ok; + [FirstL2 | _] -> + L2ID = hb_util:decode(FirstL2), + {ok, [{L2Parent, bundle}]} = + hb_store_arweave:read_parent(StoreOpts2, L2ID, Opts), + ?assert(lists:member( + hb_util:encode(L2Parent), L1Items)) + end, + ok. + +parent_depth_3_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1889322, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=3">>, + Opts + ), + StoreOpts2 = hb_store_arweave:store_from_opts(Opts), + {ok, InvResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&mode=inventory">>, + Opts + ), + Body = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + BlockInfo = maps:get(hb_util:bin(Block), Body), + L3Items = maps:get(<<"3">>, maps:get(<<"items">>, BlockInfo)), + ?assert(length(L3Items) > 0), + L2Items = maps:get(<<"2">>, maps:get(<<"items">>, BlockInfo)), + L3ID = hb_util:decode(hd(L3Items)), + {ok, [{L3Parent, bundle}]} = + hb_store_arweave:read_parent(StoreOpts2, L3ID, Opts), + ?assert(lists:member(hb_util:encode(L3Parent), L2Items)), + ok. + +parent_corrupt_data_test() -> + ?assertEqual([], hb_store_arweave:decode_parent_entries(<<>>)), + ?assertEqual( + {error, corrupt_parent_data}, + hb_store_arweave:decode_parent_entries(<<5, 1, 2, 3>>)), + Truncated = <<0, 1, 2, 3>>, + ?assertEqual( + {error, corrupt_parent_data}, + hb_store_arweave:decode_parent_entries(Truncated)), + ValidThenCorrupt = <<0, 100:64/big-unsigned, 99>>, + ?assertEqual( + {error, corrupt_parent_data}, + hb_store_arweave:decode_parent_entries(ValidThenCorrupt)), + ok. + +parent_endpoint_block_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=2">>, + Opts + ), + {ok, InvResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&mode=inventory">>, + Opts + ), + InvBody = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + BlockInfo = maps:get(hb_util:bin(Block), InvBody), + L1Items = maps:get(<<"1">>, maps:get(<<"items">>, BlockInfo)), + L1EncodedID = hd(L1Items), + {ok, ParentResult} = + hb_ao:resolve( + <<"~arweave@2.9/parent=", L1EncodedID/binary>>, + Opts + ), + ?assertEqual( + <<"application/json">>, + hb_maps:get(<<"content-type">>, ParentResult)), + Body = hb_json:decode(hb_maps:get(<<"body">>, ParentResult)), + Parents = maps:get(<<"parents">>, Body), + ?assertEqual(1, length(Parents)), + [Entry] = Parents, + ?assertEqual(<<"block">>, maps:get(<<"type">>, Entry)), + ?assertEqual(Block, maps:get(<<"height">>, Entry)), + ok. + +parent_endpoint_bundle_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1827942, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&depth=2">>, + Opts + ), + {ok, InvResult} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", + (hb_util:bin(Block))/binary, "&to=", + (hb_util:bin(Block))/binary, "&mode=inventory">>, + Opts + ), + InvBody = hb_json:decode(hb_maps:get(<<"body">>, InvResult)), + BlockInfo = maps:get(hb_util:bin(Block), InvBody), + L1Items = maps:get(<<"1">>, maps:get(<<"items">>, BlockInfo)), + L2Items = maps:get(<<"2">>, maps:get(<<"items">>, BlockInfo)), + ?assert(length(L2Items) > 0), + L2EncodedID = hd(L2Items), + {ok, ParentResult} = + hb_ao:resolve( + <<"~arweave@2.9/parent=", L2EncodedID/binary>>, + Opts + ), + ?assertEqual( + <<"application/json">>, + hb_maps:get(<<"content-type">>, ParentResult)), + Body = hb_json:decode(hb_maps:get(<<"body">>, ParentResult)), + [Entry] = maps:get(<<"parents">>, Body), + ?assertEqual(<<"bundle">>, maps:get(<<"type">>, Entry)), + ParentID = maps:get(<<"id">>, Entry), + ?assert(lists:member(ParentID, L1Items)), + ok. + +parent_endpoint_not_found_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + FakeID = <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>, + ?assertEqual( + {error, not_found}, + hb_ao:resolve( + <<"~arweave@2.9/parent=", FakeID/binary>>, + Opts + ) + ), + ok. + +strip_preserves_verify_test_parallel() -> + {_TestStore, _StoreOpts, DefaultOpts} = setup_index_opts(), + Opts = DefaultOpts#{<<"index-headers">> => true}, + + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942&mode=write&depth=3">>, + Opts + ), + L1ID = <<"bXEgFm4K2b5VD64skBNAlS3I__4qxlM3Sm4Z5IXj3h8">>, + L2ID = <<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, + L3ID = <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, + lists:foreach( + fun(ID) -> + %% mode=write now materialises the header inline, so no + %% separate index_headers call is needed. + {ok, HeaderMsg} = hb_cache:read(ID, hb_store:scope(Opts, local)), + ?event( + {verify_msg, + {id, ID}, + {unsigned, hb_message:id(HeaderMsg, unsigned, Opts)}, + {signed, hb_message:id(HeaderMsg, signed, Opts)}, + {all, hb_message:id(HeaderMsg, all, Opts)} + } + ), + ?assert(hb_message:verify(HeaderMsg, all, Opts), {verify_failed, ID}), + ?assertEqual(ID, hb_message:id(HeaderMsg, signed, Opts)) + end, + [L1ID, L3ID, L2ID] + ). + +index_of(Elem, List) -> index_of(Elem, List, 1). + +index_of(_Elem, [], _N) -> not_found; +index_of(Elem, [Elem | _], N) -> N; +index_of(Elem, [_ | Rest], N) -> index_of(Elem, Rest, N + 1). diff --git a/src/preloaded/query/dev_query_arweave.erl b/src/preloaded/query/dev_query_arweave.erl index 03accc358..4c0544fd0 100644 --- a/src/preloaded/query/dev_query_arweave.erl +++ b/src/preloaded/query/dev_query_arweave.erl @@ -54,7 +54,15 @@ query(Obj, <<"transactions">>, Args, Opts) -> {field, <<"transactions">>}, {args, Args} }), - Matches = match_args(Args, Opts), + Matches = + case has_set_filter(Args, Opts) of + true -> match_args(Args, Opts); + false -> + enumerate_block_range( + hb_maps:get(<<"block">>, Args, undefined, Opts), + Opts + ) + end, WithExplicit = case explicit_ids(Args, Opts) of [] -> Matches; @@ -78,9 +86,16 @@ query(Obj, <<"transactions">>, Args, Opts) -> ?event({transactions_matches, Matches}), {ok, connection(Ordered, Args, Opts)}; query(Obj, <<"block">>, Args, Opts) -> - case query(Obj, <<"blocks">>, Args, Opts) of - {ok, []} -> {ok, null}; - {ok, [Msg|_]} -> {ok, Msg} + case hb_maps:get(<<"id">>, Args, undefined, Opts) of + undefined -> + %% `block' field on a transaction node: resolve its containing + %% block by walking the parent index up to the L1 block. + tx_containing_block(Obj, Opts); + _ -> + case query(Obj, <<"blocks">>, Args, Opts) of + {ok, []} -> {ok, null}; + {ok, [Msg|_]} -> {ok, Msg} + end end; query(Obj, <<"blocks">>, Args, Opts) -> ?event({blocks, @@ -93,7 +108,7 @@ query(Obj, <<"blocks">>, Args, Opts) -> Blocks = lists:filtermap( fun(Match) -> - case hb_cache:read(Match, Opts) of + case hb_cache:read(Match, local_opts(Opts)) of {ok, Msg} -> {true, Msg}; _ -> false end @@ -110,46 +125,36 @@ query(Block, <<"height">>, _Args, Opts) -> query(Block, <<"timestamp">>, _Args, Opts) -> {ok, hb_maps:get(<<"timestamp">>, Block, null, Opts)}; query(Msg, <<"signature">>, _Args, Opts) -> - % Return the signature of the transaction. - % Other TX access methods are defined below. - case hb_message:commitments(#{ <<"committer">> => '_' }, Msg, Opts) of + case first_commitment(<<"committer">>, Msg, Opts) of not_found -> {ok, null}; - Commitments -> - case hb_maps:keys(Commitments) of - [] -> {ok, null}; - [CommID | _] -> - {ok, Commitment} = hb_maps:find(CommID, Commitments, Opts), - hb_maps:find(<<"signature">>, Commitment, Opts) - end + {ok, Commitment} -> hb_maps:find(<<"signature">>, Commitment, Opts) end; query(Msg, <<"owner">>, _Args, Opts) -> - ?event({query_owner, Msg}), - case hb_message:commitments(#{ <<"committer">> => '_' }, Msg, Opts) of + case first_commitment(<<"committer">>, Msg, Opts) of not_found -> {ok, null}; - Commitments -> - case hb_maps:keys(Commitments) of - [] -> {ok, null}; - [CommID | _] -> - {ok, Commitment} = hb_maps:find(CommID, Commitments, Opts), - {ok, Address} = hb_maps:find(<<"committer">>, Commitment, Opts), - {ok, KeyID} = hb_maps:find(<<"keyid">>, Commitment, Opts), - Key = hb_util:remove_scheme_prefix(KeyID), - {ok, #{ - <<"address">> => Address, - <<"key">> => Key - }} - end + {ok, Commitment} -> + {ok, Address} = hb_maps:find(<<"committer">>, Commitment, Opts), + {ok, KeyID} = hb_maps:find(<<"keyid">>, Commitment, Opts), + {ok, #{ + <<"address">> => Address, + <<"key">> => hb_util:remove_scheme_prefix(KeyID) + }} end; query(#{ <<"key">> := Key }, <<"key">>, _Args, _Opts) -> {ok, Key}; query(#{ <<"address">> := Address }, <<"address">>, _Args, _Opts) -> {ok, Address}; query(Msg, <<"fee">>, _Args, Opts) -> - {ok, hb_maps:get(<<"fee">>, Msg, 0, Opts)}; + case find_field_key(<<"field-reward">>, Msg, Opts) of + {ok, null} -> {ok, 0}; + {ok, Reward} -> hb_util:safe_int(Reward) + end; query(Msg, <<"quantity">>, _Args, Opts) -> {ok, hb_maps:get(<<"quantity">>, Msg, 0, Opts)}; query(Number, <<"winston">>, _Args, _Opts) when is_number(Number) -> {ok, Number}; +query(Number, <<"ar">>, _Args, _Opts) when is_number(Number) -> + {ok, winston_to_ar(Number)}; query(Msg, <<"recipient">>, _Args, Opts) -> case find_field_key(<<"field-target">>, Msg, Opts) of {ok, null} -> {ok, <<"">>}; @@ -158,7 +163,7 @@ query(Msg, <<"recipient">>, _Args, Opts) -> query(Msg, <<"anchor">>, _Args, Opts) -> case find_field_key(<<"field-anchor">>, Msg, Opts) of {ok, null} -> {ok, <<"">>}; - {ok, Anchor} -> {ok, hb_util:human_id(Anchor)} + {ok, Anchor} -> encode_anchor(Anchor) end; query(Msg, <<"data">>, _Args, Opts) -> Data = @@ -184,20 +189,40 @@ query(Obj, Field, Args, _Opts) -> }), {ok, <<"Not implemented.">>}. -%% @doc Find and return a value from the fields of a message (from its -%% commitments). -find_field_key(Field, Msg, Opts) -> - case hb_message:commitments(#{ Field => '_' }, Msg, Opts) of - not_found -> {ok, null}; +%% @doc Encode a transaction anchor (`last_tx`) for the GraphQL response. +%% Per the Arweave spec, an anchor is one of: +%% - empty (first TX from a wallet), +%% - a 32-byte raw TX ID (the wallet's last outgoing TX), or +%% - a 48-byte raw block hash (any of the last 50 blocks). +%% The cached value may already be base64url-encoded (43 / 64 chars). Other +%% sizes are not valid per the spec. +encode_anchor(<<>>) -> {ok, <<>>}; +encode_anchor(Bin) when is_binary(Bin), byte_size(Bin) == 32 -> {ok, hb_util:encode(Bin)}; +encode_anchor(Bin) when is_binary(Bin), byte_size(Bin) == 48 -> {ok, hb_util:encode(Bin)}; +encode_anchor(Bin) when is_binary(Bin), byte_size(Bin) == 43 -> {ok, Bin}; +encode_anchor(Bin) when is_binary(Bin), byte_size(Bin) == 64 -> {ok, Bin}; +encode_anchor(Other) -> {error, <<"invalid_anchor: ", Other/binary>>}. + +%% @doc Return the first commitment of a message matching `MatchField', or +%% `not_found'. Centralizes the commitments lookup used by the field accessors. +first_commitment(MatchField, Msg, Opts) -> + case hb_message:commitments(#{ MatchField => '_' }, Msg, Opts) of + not_found -> not_found; Commitments -> case hb_maps:keys(Commitments) of - [] -> {ok, null}; - [CommID | _] -> - {ok, Commitment} = hb_maps:find(CommID, Commitments, Opts), - case hb_maps:find(Field, Commitment, Opts) of - {ok, Value} -> {ok, Value}; - error -> {ok, null} - end + [] -> not_found; + [CommID | _] -> hb_maps:find(CommID, Commitments, Opts) + end + end. + +%% @doc Find and return a committed field value from a message, or null. +find_field_key(Field, Msg, Opts) -> + case first_commitment(Field, Msg, Opts) of + not_found -> {ok, null}; + {ok, Commitment} -> + case hb_maps:find(Field, Commitment, Opts) of + {ok, Value} -> {ok, Value}; + error -> {ok, null} end end. @@ -207,7 +232,7 @@ connection(Ordered, Args, Opts) -> ResultsCount = length(Ordered), {DroppedCount, Remaining} = drop_to_cursor(Args, Ordered, Opts), CountToReturn = page_size(Args, Opts), - ResultsPage = read_ids(Remaining, CountToReturn, Opts), + ResultsPage = read_ids(Remaining, CountToReturn, local_opts(Opts)), #{ <<"count">> => hb_util:bin(ResultsCount), <<"edges">> => ResultsPage, @@ -274,8 +299,9 @@ sort_offset_annotated(AnnotatedIDs, SortOrder, _Opts) -> ), Ascending = lists:sort( - fun(#{ <<"offset">> := OffsetA }, #{ <<"offset">> := OffsetB }) -> - OffsetA < OffsetB + fun(#{ <<"offset">> := OffsetA, <<"id">> := IdA }, + #{ <<"offset">> := OffsetB, <<"id">> := IdB }) -> + {OffsetA, IdA} =< {OffsetB, IdB} end, WithOffset ), @@ -291,7 +317,12 @@ sort_offset_annotated(AnnotatedIDs, SortOrder, _Opts) -> {without_offset, length(WithoutOffset)} } ), - UserOrderSorted ++ WithoutOffset. + StableWithout = + lists:sort( + fun(#{ <<"id">> := IdA }, #{ <<"id">> := IdB }) -> IdA =< IdB end, + WithoutOffset + ), + UserOrderSorted ++ StableWithout. %% @doc Convert a block height range (`#{<<"min">> => Min, <<"max">> => Max}') %% into weave byte offset boundaries `{StartOffset, EndOffset}'. Notably, the @@ -342,18 +373,7 @@ block_range_to_offset_range(Heights, Opts) -> read_block(Height, Opts) -> case read_cached_block(Height, Opts) of {ok, Block} -> {ok, Block}; - {error, not_found} -> - case hb_opts:get(query_arweave_remote_block_ranges, true, Opts) of - true -> - ?event({read_block_remote, {height, Height}}), - hb_ao:resolve( - #{ <<"device">> => <<"arweave@2.9">> }, - #{ <<"path">> => <<"block">>, <<"block">> => Height }, - Opts - ); - _ -> not_found - end; - not_found -> + _NotCached -> case hb_opts:get(query_arweave_remote_block_ranges, true, Opts) of true -> ?event({read_block_remote, {height, Height}}), @@ -436,6 +456,7 @@ match_args([{Field, X} | Rest], Acc, Opts) -> ?event({match, {field, Field}, {arg, X}}), case match(Field, X, Opts) of {ok, Result} -> match_args(Rest, [Result | Acc], Opts); + not_found -> match_args(Rest, [[] | Acc], Opts); _Error -> match_args(Rest, Acc, Opts) end. @@ -467,7 +488,10 @@ match(<<"id">>, ID, _Opts) -> match(<<"ids">>, IDs, _Opts) -> {ok, IDs}; match(<<"tags">>, Tags, Opts) -> - hb_cache:match(dev_query_graphql:keys_to_template(Tags), Opts); + case lists:any(fun(T) -> is_multi_value_tag(T, Opts) end, Tags) of + false -> hb_cache:match(dev_query_graphql:keys_to_template(Tags), Opts); + true -> {ok, intersect_id_sets([tag_filter_ids(T, Opts) || T <- Tags])} + end; match(<<"owners">>, Owners, Opts) -> {ok, matching_commitments(<<"committer">>, Owners, Opts)}; match(<<"owner">>, Owner, Opts) -> @@ -479,6 +503,44 @@ match(<<"recipients">>, Recipients, Opts) -> match(UnsupportedFilter, _, _) -> throw({unsupported_query_filter, UnsupportedFilter}). +%% @doc True if a tag filter supplies more than one value (Arweave OR match). +is_multi_value_tag(Tag, Opts) -> + case hb_maps:get(<<"values">>, Tag, undefined, Opts) of + Values when is_list(Values) -> length(Values) > 1; + _ -> false + end. + +%% @doc The IDs matching a single tag filter: the union over its values. +tag_filter_ids(Tag, Opts) -> + Name = hb_maps:get(<<"name">>, Tag, undefined, Opts), + NormName = hb_util:to_lower(hb_ao:normalize_key(Name)), + lists:foldl( + fun(Value, Acc) -> + case hb_cache:match(#{ NormName => Value }, Opts) of + {ok, IDs} -> hb_util:unique(IDs ++ Acc); + _ -> Acc + end + end, + [], + tag_filter_values(Tag, Opts) + ). + +%% @doc The values of a tag filter, accepting either `values' or singular `value'. +tag_filter_values(Tag, Opts) -> + case hb_maps:get(<<"values">>, Tag, undefined, Opts) of + Values when is_list(Values) -> Values; + _ -> + case hb_maps:get(<<"value">>, Tag, undefined, Opts) of + undefined -> []; + Value -> [Value] + end + end. + +%% @doc Intersect a list of ID sets (AND across tag filters). +intersect_id_sets([]) -> []; +intersect_id_sets([First | Rest]) -> + lists:foldl(fun(Set, Acc) -> hb_util:list_with(Set, Acc) end, First, Rest). + %%% Block range post-filter %% @doc Offset-annotate a list of IDs, returning {StartOffset, ID} pairs. @@ -568,13 +630,24 @@ matching_commitments(Field, Value, Opts) when is_binary(Value) -> {ids, IDs} } ), - lists:map(fun(ID) -> commitment_id_to_base_id(ID, Opts) end, IDs); + lists:filtermap( + fun(ID) -> + case commitment_id_to_base_id(ID, Opts) of + not_found -> false; + BaseID -> {true, BaseID} + end + end, + IDs + ); _ -> not_found end. %% @doc Convert a commitment message's ID to a base ID. commitment_id_to_base_id(ID, Opts) -> - Store = hb_opts:get(store, no_store, Opts), + %% Read the matched commitment's signature from the local-scoped store only. + %% Using the full store here cascades to the gateway/arweave stores + %% on a local miss, adding seconds per matched owner/recipient. + Store = scoped_store(Opts), ?event({commitment_id_to_base_id, ID}), case hb_store:read(Store, << ID/binary, "/signature">>, Opts) of {ok, EncSig} -> @@ -617,11 +690,16 @@ all_signed_ids(ID, Store, Opts) -> [ID] end. +%% @doc Opts with the store scoped to the local stores (per `query_arweave_scope', +%% default `[local]'), so query result reads never cascade to the gateway/arweave +%% network stores. Keeps the query path self-contained. +local_opts(Opts) -> + hb_store:scope(Opts, hb_opts:get(query_arweave_scope, [local], Opts)). + %% @doc Scope the stores used for block matching. The searched stores can be %% scoped by setting the `query_arweave_scope' option. scoped_store(Opts) -> - Scope = hb_opts:get(query_arweave_scope, [local], Opts), - hb_opts:get(store, no_store, hb_store:scope(Opts, Scope)). + hb_opts:get(store, no_store, local_opts(Opts)). %% @doc Return the explicit IDs from the arguments, if given. Searches for %% both `ids' and `id' keys. @@ -636,3 +714,99 @@ explicit_ids(Args, Opts) -> _ -> [] end ). + +%% @doc True if the args contain any filter that produces a candidate ID set. +%% When false, a `transactions' query enumerates from the block range instead +%% of intersecting matchers. +has_set_filter(Args, Opts) -> + lists:any( + fun(Key) -> + case hb_maps:get(Key, Args, undefined, Opts) of + undefined -> false; + null -> false; + _ -> true + end + end, + [<<"ids">>, <<"id">>, <<"owners">>, <<"recipients">>, <<"tags">>] + ). + +%% @doc Enumerate all indexed transaction IDs within a block height range, using +%% the per-block item index written by the copycat at index time. Returns `[]' +%% when no range is given (an unbounded, unfiltered transactions query). +enumerate_block_range(undefined, _Opts) -> []; +enumerate_block_range(null, _Opts) -> []; +enumerate_block_range(Heights, Opts) -> + Min = hb_util:int(hb_maps:get(<<"min">>, Heights, 0, Opts)), + Max = + case hb_maps:get(<<"max">>, Heights, undefined, Opts) of + undefined -> hb_util:ok_or(latest_cached_block(Opts), Min); + RawMax -> hb_util:int(RawMax) + end, + HeightRange = + case Max >= Min of + true -> lists:seq(Min, Max); + false -> [] + end, + lists:flatmap( + fun(Height) -> + maps:fold( + fun(_Depth, IDs, Acc) -> IDs ++ Acc end, + [], + hb_store_arweave:read_block_item_ids(Height, Opts) + ) + end, + HeightRange + ). + +%% @doc Resolve the block that contains a transaction node, for the `block' +%% field of a `Transaction'. Walks the parent index from the item up to its L1 +%% block, then loads the block message. Returns `{ok, null}' when unknown. +tx_containing_block(Msg, Opts) -> + try hb_message:id(Msg, all, Opts) of + ID when is_binary(ID) -> + case tx_block_height(ID, Opts) of + {ok, Height} -> + case read_block(Height, Opts) of + {ok, Block} -> {ok, Block}; + _ -> {ok, null} + end; + not_found -> {ok, null} + end; + _ -> {ok, null} + catch _:_ -> {ok, null} + end. + +%% @doc Find the L1 block height containing an item by following the parent +%% index: a `block' entry resolves directly; a `bundle' entry recurses into the +%% parent. Bounded to guard against cyclic/corrupt parent chains. +tx_block_height(ID, Opts) -> + case hb_store_arweave:store_from_opts(Opts) of + no_store -> not_found; + Store -> walk_parent_to_block(ID, Store, Opts, 0) + end. + +walk_parent_to_block(_ID, _Store, _Opts, Depth) when Depth > 8 -> not_found; +walk_parent_to_block(ID, Store, Opts, Depth) -> + case hb_store_arweave:read_parent(Store, ID, Opts) of + {ok, Entries} -> + case lists:keyfind(block, 2, Entries) of + {Height, block} -> {ok, Height}; + false -> + case [Parent || {Parent, bundle} <- Entries] of + [ParentID | _] -> + walk_parent_to_block(ParentID, Store, Opts, Depth + 1); + [] -> not_found + end + end; + _ -> not_found + end. + +winston_to_ar(W) when is_integer(W), W >= 0 -> + case {W div 1000000000000, W rem 1000000000000} of + {Whole, 0} -> + hb_util:bin(io_lib:format("~B", [Whole])); + {Whole, Frac} -> + Padded = io_lib:format("~12..0B", [Frac]), + Trimmed = string:trim(Padded, trailing, "0"), + hb_util:bin(io_lib:format("~B.~s", [Whole, Trimmed])) + end. \ No newline at end of file diff --git a/src/preloaded/query/dev_query_graphql.erl b/src/preloaded/query/dev_query_graphql.erl index 25571a82b..59f290684 100644 --- a/src/preloaded/query/dev_query_graphql.erl +++ b/src/preloaded/query/dev_query_graphql.erl @@ -108,12 +108,16 @@ handle(_Base, RawReq, Opts) -> end, ?event({request, {processed, Req}}), Query = hb_maps:get(<<"query">>, Req, <<>>, Opts), - OpName = hb_maps:get(<<"operationName">>, Req, undefined, Opts), - Vars = - hb_message:uncommitted_deep( - hb_maps:get(<<"variables">>, Req, #{}, Opts), - Opts - ), + OpName = + case hb_maps:get(<<"operationName">>, Req, undefined, Opts) of + Name when is_binary(Name) -> Name; + _ -> undefined + end, + Vars = + case hb_maps:get(<<"variables">>, Req, #{}, Opts) of + V when is_map(V) -> hb_message:uncommitted_deep(V, Opts); + _ -> #{} + end, ?event( {graphql_run_called, {query, Query}, @@ -240,23 +244,26 @@ message_query(Msg, <<"cursor">>, _Args, Opts) -> message_query(_Obj, _Field, _, _) -> {ok, <<"Not found.">>}. -keys_to_template(Keys) -> - maps:from_list(lists:foldl( - fun(#{<<"name">> := Name, <<"value">> := Value}, Acc) -> - [{Name, Value} | Acc]; - (#{<<"name">> := Name, <<"values">> := [Value]}, Acc) -> - [{Name, Value} | Acc]; - (#{<<"name">> := Name, <<"values">> := Values}, _Acc) -> - throw( - {multivalue_tag_search_not_supported, #{ - <<"name">> => Name, - <<"values">> => Values - }} - ) - end, - [], - Keys - )). +%% @doc Build a tag-match template from a list of GraphQL tag filters. +keys_to_template(Keys) -> + maps:from_list([key_to_pair(K) || K <- Keys]). + +key_to_pair(#{ <<"name">> := Name, <<"value">> := Value }) -> + {normalize_tag_name(Name), Value}; +key_to_pair(#{ <<"name">> := Name, <<"values">> := [Value] }) -> + {normalize_tag_name(Name), Value}; +key_to_pair(#{ <<"name">> := Name, <<"values">> := Values} ) -> + throw( + {multivalue_tag_search_not_supported, #{ + <<"name">> => Name, + <<"values">> => Values + }} + ). + +%% @doc Lowercase a GraphQL tag name to match the storage convention used by +%% `dev_codec_ans104_from'. Without this, query for `Action' never matches `action'. +normalize_tag_name(Name) -> + hb_util:to_lower(hb_ao:normalize_key(Name)). %%% Test helpers. diff --git a/test/arbundles.js/upload-items.js b/test/arbundles.js/upload-items.js index 67579202f..43dc216c2 100644 --- a/test/arbundles.js/upload-items.js +++ b/test/arbundles.js/upload-items.js @@ -8,7 +8,7 @@ const ENDPOINT_PATH = process.env.ENDPOINT_PATH || "/~bundler@1.0/item?codec-dev const DEFAULT_WALLET = "../../hyperbeam-key.json"; const CONCURRENT_UPLOADS = 100; // Number of parallel uploads -async function performanceTest(walletPath, itemCount, bytesPerItem = 0) { +async function performanceTest(walletPath, itemCount, bytesPerItem = 0, bundlerUrl = BUNDLER_URL) { const wallet = require(path.resolve(walletPath)); const signer = new ArweaveSigner(wallet); const endpoint = `${BUNDLER_URL}${ENDPOINT_PATH}`; @@ -139,14 +139,16 @@ if (require.main === module) { const walletPath = firstIsNumber ? DEFAULT_WALLET : (process.argv[2] || DEFAULT_WALLET); const itemCount = parseInt(firstIsNumber ? process.argv[2] : process.argv[3], 10); const bytesPerItem = parseInt(firstIsNumber ? process.argv[3] : process.argv[4], 10) || 0; + const bundlerUrl = (firstIsNumber ? process.argv[4] : process.argv[5]) || BUNDLER_URL; if (!itemCount || itemCount < 1 || isNaN(itemCount)) { - console.error("Usage: node upload-items.js [wallet_path] [bytes_per_item]"); + console.error("Usage: node upload-items.js [wallet_path] [bytes_per_item] [bundler_url]"); console.error(""); console.error("Arguments:"); console.error(" wallet_path - Path to Arweave wallet JSON (default: ../../hyperbeam-key.json)"); console.error(" number_of_items - Number of data items to create and upload"); console.error(" bytes_per_item - Minimum size of each item in bytes (optional)"); + console.error(" bundler_url - Bundler base URL (default: " + BUNDLER_URL + ")"); console.error(""); console.error("Environment variables:"); console.error(" BUNDLER_URL - Gateway base URL (default: http://localhost:8734)"); @@ -165,7 +167,7 @@ if (require.main === module) { process.exit(1); } - performanceTest(walletPath, itemCount, bytesPerItem) + performanceTest(walletPath, itemCount, bytesPerItem, bundlerUrl) .then(() => { process.exit(0); }) @@ -175,4 +177,4 @@ if (require.main === module) { }); } -module.exports = { performanceTest }; +module.exports = { performanceTest }; \ No newline at end of file