Skip to content
48 changes: 48 additions & 0 deletions crypto/crypto/src/merkle_tree/backends/field_element_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,54 @@ where
}
}

/// A backend for Merkle trees that uses fixed-size quads (4 elements) of field elements.
/// Used for arity-4 FRI layers, where each leaf commits to 4 consecutive evaluations.
#[derive(Clone)]
pub struct FieldElementQuadBackend<F, D: Digest, const NUM_BYTES: usize> {
phantom1: PhantomData<F>,
phantom2: PhantomData<D>,
}

impl<F, D: Digest, const NUM_BYTES: usize> Default for FieldElementQuadBackend<F, D, NUM_BYTES> {
fn default() -> Self {
Self {
phantom1: PhantomData,
phantom2: PhantomData,
}
}
}

impl<F, D: Digest, const NUM_BYTES: usize> IsMerkleTreeBackend
for FieldElementQuadBackend<F, D, NUM_BYTES>
where
F: IsField,
FieldElement<F>: AsBytes,
[u8; NUM_BYTES]: From<Output<D>>,
{
type Node = [u8; NUM_BYTES];
type Data = [FieldElement<F>; 4];

fn hash_data(input: &[FieldElement<F>; 4]) -> [u8; NUM_BYTES] {
let mut hasher = D::new();
hasher.update(input[0].as_bytes());
hasher.update(input[1].as_bytes());
hasher.update(input[2].as_bytes());
hasher.update(input[3].as_bytes());
let mut result_hash = [0_u8; NUM_BYTES];
result_hash.copy_from_slice(&hasher.finalize());
result_hash
}

fn hash_new_parent(left: &[u8; NUM_BYTES], right: &[u8; NUM_BYTES]) -> [u8; NUM_BYTES] {
let mut hasher = D::new();
hasher.update(left);
hasher.update(right);
let mut result_hash = [0_u8; NUM_BYTES];
result_hash.copy_from_slice(&hasher.finalize());
result_hash
}
}

#[derive(Clone)]
pub struct FieldElementVectorBackend<F, D: Digest, const NUM_BYTES: usize> {
phantom1: PhantomData<F>,
Expand Down
7 changes: 6 additions & 1 deletion crypto/crypto/src/merkle_tree/backends/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use sha3::Keccak256;

use super::{
field_element::FieldElementBackend,
field_element_vector::{FieldElementPairBackend, FieldElementVectorBackend},
field_element_vector::{
FieldElementPairBackend, FieldElementQuadBackend, FieldElementVectorBackend,
},
};

// Field element backend definitions
Expand All @@ -13,3 +15,6 @@ pub type BatchKeccak256Backend<F> = FieldElementVectorBackend<F, Keccak256, 32>;

// Fixed-size pair backends (more efficient for FRI layers)
pub type PairKeccak256Backend<F> = FieldElementPairBackend<F, Keccak256, 32>;

// Fixed-size quad backends (for arity-4 FRI layers)
pub type QuadKeccak256Backend<F> = FieldElementQuadBackend<F, Keccak256, 32>;
1 change: 1 addition & 0 deletions crypto/crypto/src/merkle_tree/proof.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ where
/// This ordering is critical for verification, which consumes nodes in the same order
/// as they were generated by `get_batch_proof`.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct BatchProof<T: PartialEq + Eq> {
pub path: Vec<T>,
}
Expand Down
10 changes: 4 additions & 6 deletions crypto/math-cuda/tests/keccak_leaves.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
//! Parity: GPU Keccak-256 leaf hashes must match the CPU prover's leaf
//! hashing helpers. `stark::prover::keccak_leaves_bit_reversed` for
//! per-row commits, `keccak_leaves_row_pair_bit_reversed` for the R2
//! composition commit, and `FriLayerMerkleTreeBackend::hash_data` for the
//! FRI commit. These are the same helpers the prover itself calls so any
//! composition commit, and `PairKeccak256Backend::hash_data` for the
//! pair-leaf commit. These are the same helpers the prover itself calls so any
//! change to the CPU leaf-hash contract surfaces here.

use crypto::merkle_tree::backends::types::PairKeccak256Backend;
use crypto::merkle_tree::traits::IsMerkleTreeBackend;
use math::field::element::FieldElement;
use math::field::extensions_goldilocks::Degree3GoldilocksExtensionField;
use math::field::goldilocks::GoldilocksField;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use stark::config::FriLayerMerkleTreeBackend;
use stark::prover::{keccak_leaves_bit_reversed, keccak_leaves_row_pair_bit_reversed};

type Fp = FieldElement<GoldilocksField>;
Expand Down Expand Up @@ -171,9 +171,7 @@ fn keccak_fri_leaves_matches_cpu() {
let cpu: Vec<[u8; 32]> = evals
.chunks_exact(2)
.map(|c| {
FriLayerMerkleTreeBackend::<Degree3GoldilocksExtensionField>::hash_data(&[
c[0], c[1],
])
PairKeccak256Backend::<Degree3GoldilocksExtensionField>::hash_data(&[c[0], c[1]])
})
.collect();

Expand Down
8 changes: 4 additions & 4 deletions crypto/stark/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crypto::merkle_tree::{
backends::types::{BatchKeccak256Backend, Keccak256Backend, PairKeccak256Backend},
backends::types::{BatchKeccak256Backend, Keccak256Backend, QuadKeccak256Backend},
merkle::MerkleTree,
};

Expand All @@ -19,6 +19,6 @@ pub type Commitment = [u8; COMMITMENT_SIZE];
pub type BatchedMerkleTreeBackend<F> = BatchKeccak256Backend<F>;
pub type BatchedMerkleTree<F> = MerkleTree<BatchedMerkleTreeBackend<F>>;

// FRI layer uses fixed-size pairs for efficiency (avoids Vec allocation per pair)
pub type FriLayerMerkleTreeBackend<F> = PairKeccak256Backend<F>;
pub type FriLayerMerkleTree<F> = MerkleTree<FriLayerMerkleTreeBackend<F>>;
// Arity-4 FRI layer: each leaf commits to 4 consecutive evaluations.
pub type FriLayerQuadMerkleTreeBackend<F> = QuadKeccak256Backend<F>;
pub type FriLayerQuadMerkleTree<F> = MerkleTree<FriLayerQuadMerkleTreeBackend<F>>;
4 changes: 3 additions & 1 deletion crypto/stark/src/fri/fri_decommit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@ use crate::config::Commitment;
#[serde(bound = "")]
pub struct FriDecommitment<F: IsField> {
pub layers_auth_paths: Vec<Proof<Commitment>>,
pub layers_evaluations_sym: Vec<FieldElement<F>>,
/// For arity-4 FRI: the 3 sibling evaluations per layer at positions
/// {index^1, index^2, index^3} within the 4-element orbit.
pub layers_evaluations_siblings: Vec<[FieldElement<F>; 3]>,
}
155 changes: 110 additions & 45 deletions crypto/stark/src/fri/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,22 @@ use math::field::traits::IsSubFieldOf;
use math::field::traits::{IsFFTField, IsField};
use math::traits::AsBytes;

use crate::config::{FriLayerMerkleTree, FriLayerMerkleTreeBackend};
use crate::config::{FriLayerQuadMerkleTree, FriLayerQuadMerkleTreeBackend};

use self::fri_commitment::FriLayer;
use self::fri_decommit::FriDecommitment;
use self::fri_functions::{
compute_coset_twiddles_inv, fold_evaluations_in_place, update_twiddles_in_place,
};

/// FRI commit phase from pre-computed bit-reversed evaluations.
/// skipping the initial FFT. Use this when the caller already has the evaluation
/// vector (e.g. from a fused LDE pipeline).
/// FRI commit phase using arity-4 folding (2 binary folds per committed layer).
///
/// For `number_layers` binary fold levels, this produces `(number_layers - 1) / 2`
/// committed layers (each covering 2 binary folds) plus a final single-fold to
/// produce the last value. For a 2^19 trace: 19 levels → 9 committed layers.
///
/// Each committed layer is a quad Merkle tree (4-element leaves), halving the
/// number of Merkle commits vs binary FRI.
pub fn commit_phase_from_evaluations<F: IsFFTField + IsSubFieldOf<E>, E: IsField>(
number_layers: usize,
mut evals: Vec<FieldElement<E>>,
Expand All @@ -27,35 +32,50 @@ pub fn commit_phase_from_evaluations<F: IsFFTField + IsSubFieldOf<E>, E: IsField
domain_size: usize,
) -> (
FieldElement<E>,
Vec<FriLayer<E, FriLayerMerkleTreeBackend<E>>>,
Vec<FriLayer<E, FriLayerQuadMerkleTreeBackend<E>>>,
)
where
FieldElement<F>: AsBytes + Sync + Send,
FieldElement<E>: AsBytes + Sync + Send,
{
// Inverse twiddle factors for evaluation-form folding
let mut inv_twiddles = compute_coset_twiddles_inv(coset_offset, domain_size);

let mut fri_layer_list = Vec::with_capacity(number_layers);
let mut fri_layer_list = Vec::new();
let mut current_coset_offset = coset_offset.clone();
let mut current_domain_size = domain_size;

for _ in 1..number_layers {
// <<<< Receive challenge 𝜁ₖ₋₁
let zeta = transcript.sample_field_element();
// Number of double-fold (arity-4) committed rounds from the (number_layers - 1) middle layers.
// The final fold is handled separately below.
let num_double_rounds = number_layers.saturating_sub(1) / 2;

for _ in 0..num_double_rounds {
// Sample both fold challenges before committing.
// This is sound for arity-4 FRI: the prover commits *one* combined layer
// that covers both binary folds, so it fixes its evaluations before either
// challenge is revealed. It cannot choose zeta2 adaptively after seeing
// zeta1 because both are sampled from the same transcript state, before
// the commitment is appended.
let zeta1 = transcript.sample_field_element();
let zeta2 = transcript.sample_field_element();

Comment on lines +53 to +60
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium – both challenges sampled before committing: soundness note

The comment argues this is sound because the prover already fixed f_k in the previous commitment and f_{k+2} is fully determined by f_k, zeta1, zeta2. That argument is correct for a honest prover, but it deviates from the standard Fiat-Shamir assumption used in security proofs.

In standard arity-2 FRI the challenge zeta_k is derived from the transcript state including C_k, so the prover cannot choose C_k after seeing zeta_k. Here, zeta1 and zeta2 are both derived from the state before C_{k+1} is appended. A prover that has some slack in choosing f_{k+1} (e.g., from a weak binding), would learn both challenges before fixing the committed polynomial.

If the security proof for this construction has been written up, it would help to reference it in the comment.

// First binary fold: current_size → current_size / 2
current_coset_offset = current_coset_offset.square();
current_domain_size /= 2;
fold_evaluations_in_place(&mut evals, &zeta1, &inv_twiddles);
update_twiddles_in_place(&mut inv_twiddles);

// Fold evaluations in-place (no FFT needed)
fold_evaluations_in_place(&mut evals, &zeta, &inv_twiddles);
// Second binary fold: current_size / 2 → current_size / 4
current_coset_offset = current_coset_offset.square();
current_domain_size /= 2;
fold_evaluations_in_place(&mut evals, &zeta2, &inv_twiddles);

// Build Merkle tree from consecutive pairs
let leaves: Vec<[FieldElement<E>; 2]> = evals
.chunks_exact(2)
.map(|chunk| [chunk[0].clone(), chunk[1].clone()])
// Commit the doubly-folded evaluations as quad (4-element) Merkle leaves.
let leaves: Vec<[FieldElement<E>; 4]> = evals
.chunks_exact(4)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High – silent data loss / panic for small domains

chunks_exact(4) silently drops trailing elements when evals.len() is not a multiple of 4, and produces zero chunks (leading to a MerkleTree::build panic on an empty slice) when evals.len() < 4.

For number_layers = 3 (domain_size = 8), the double-fold round reduces evals to 2 elements before this line, so zero leaves are produced and the expect below panics. The query_domain_size == 0 guard in sample_query_indexes only protects against domain_size ≤ 3, not this case.

Consider asserting the invariant explicitly:

debug_assert!(
    evals.len() >= 4 && evals.len() % 4 == 0,
    "evals length {} is not a multiple of 4 after double fold",
    evals.len()
);

And add a guard at the top of the function (or at the call site) to skip FRI commit when the folded domain is too small for quad leaves.

.map(|c| [c[0].clone(), c[1].clone(), c[2].clone(), c[3].clone()])
.collect();
let merkle_tree = FriLayerMerkleTree::build(&leaves)
.expect("FRI commit: Merkle tree construction must succeed");
let merkle_tree = FriLayerQuadMerkleTree::build(&leaves)
.expect("FRI commit: quad Merkle tree construction must succeed");
let root = merkle_tree.root;
fri_layer_list.push(FriLayer::new(
&evals,
Expand All @@ -64,30 +84,56 @@ where
current_domain_size,
));

// >>>> Send commitment: [pₖ]
// Append commitment to transcript so subsequent samples depend on it.
transcript.append_bytes(&root);

// Update twiddles for next level
update_twiddles_in_place(&mut inv_twiddles);
}

// <<<< Receive challenge: 𝜁ₙ₋₁
let zeta = transcript.sample_field_element();
// Handle the leftover single binary round when (number_layers - 1) is odd.
// For number_layers=19: (19-1)/2 = 9 double rounds, remainder 0 → skipped.
// For number_layers=20: (20-1)/2 = 9 double rounds, remainder 1 → one extra.
if number_layers.saturating_sub(1) % 2 == 1 {
let zeta = transcript.sample_field_element();
current_coset_offset = current_coset_offset.square();
current_domain_size /= 2;
fold_evaluations_in_place(&mut evals, &zeta, &inv_twiddles);

// Final fold
fold_evaluations_in_place(&mut evals, &zeta, &inv_twiddles);
// Commit remaining as quad leaves (evals.len() must be >= 4 here).
let leaves: Vec<[FieldElement<E>; 4]> = evals
.chunks_exact(4)
.map(|c| [c[0].clone(), c[1].clone(), c[2].clone(), c[3].clone()])
.collect();
let merkle_tree = FriLayerQuadMerkleTree::build(&leaves)
.expect("FRI commit: quad Merkle tree construction must succeed");
let root = merkle_tree.root;
fri_layer_list.push(FriLayer::new(
&evals,
merkle_tree,
current_coset_offset.clone().to_extension(),
current_domain_size,
));
transcript.append_bytes(&root);
update_twiddles_in_place(&mut inv_twiddles);
}

let last_value = evals.first().unwrap_or(&FieldElement::zero()).clone();
// Final fold: one more binary fold to produce the last value (not committed).
let zeta = transcript.sample_field_element();
fold_evaluations_in_place(&mut evals, &zeta, &inv_twiddles);

// >>>> Send value: pₙ
let last_value = evals
.first()
.expect("FRI evals empty after folding")
.clone();
transcript.append_field_element(&last_value);

(last_value, fri_layer_list)
}

pub fn query_phase<F: IsField>(
fri_layers: &Vec<FriLayer<F, FriLayerMerkleTreeBackend<F>>>,
fri_layers: &[FriLayer<F, FriLayerQuadMerkleTreeBackend<F>>],
iotas: &[usize],
num_double_rounds: usize,
) -> Vec<FriDecommitment<F>>
where
FieldElement<F>: AsBytes + Sync + Send,
Expand All @@ -97,35 +143,54 @@ where
iotas
.iter()
.map(|iota_s| {
let mut layers_evaluations_sym = Vec::with_capacity(num_layers);
let mut layers_auth_paths_sym = Vec::with_capacity(num_layers);

let mut index = *iota_s;
for layer in fri_layers {
// symmetric element
let evaluation_sym = layer.evaluation[index ^ 1].clone();
let auth_path_sym = layer.merkle_tree.get_proof_by_pos(index >> 1).unwrap();
layers_evaluations_sym.push(evaluation_sym);
layers_auth_paths_sym.push(auth_path_sym);

index >>= 1;
let mut layers_evaluations_siblings = Vec::with_capacity(num_layers);
let mut layers_auth_paths = Vec::with_capacity(num_layers);

// For double bootstrap (num_double_rounds >= 1): iota is already the
// index in layer[0] (which has LDE/4 elements after 2 binary folds).
// For single bootstrap (num_double_rounds == 0): layer[0] has LDE/2
// elements, so the index is 2*iota.
let mut index = if num_double_rounds >= 1 {
*iota_s
} else {
iota_s * 2
};
for (i, layer) in fri_layers.iter().enumerate() {
// The 4-element orbit of `index` is {index&~3, ..., (index&~3)+3}.
// index^1, index^2, index^3 are the 3 siblings (XOR flips last 2 bits).
let s1 = layer.evaluation[index ^ 1].clone();
let s2 = layer.evaluation[index ^ 2].clone();
let s3 = layer.evaluation[index ^ 3].clone();

// Quad leaf position: each leaf holds 4 evaluations, leaf j covers
// indices {4j, 4j+1, 4j+2, 4j+3}, so the leaf index is index >> 2.
let auth_path = layer.merkle_tree.get_proof_by_pos(index >> 2).unwrap();

layers_evaluations_siblings.push([s1, s2, s3]);
layers_auth_paths.push(auth_path);

// Round (i+1) is a double fold iff (i+1) < num_double_rounds,
// meaning layer[i] → layer[i+1] involves 2 binary folds (index >>= 2).
// Otherwise it is a single fold (index >>= 1).
if (i + 1) < num_double_rounds {
index >>= 2;
} else {
index >>= 1;
}
}

FriDecommitment {
layers_auth_paths: layers_auth_paths_sym,
layers_evaluations_sym,
layers_auth_paths,
layers_evaluations_siblings,
}
})
.collect()
} else {
// For 0 FRI layers (small traces), return empty decommitments for each query.
// The verifier still needs one decommitment entry per query, even if the
// FRI layer data is empty.
iotas
.iter()
.map(|_| FriDecommitment {
layers_auth_paths: vec![],
layers_evaluations_sym: vec![],
layers_evaluations_siblings: vec![],
})
.collect()
}
Expand Down
Loading