From 807fd6dfa7b1175b60d4246cc4a3be1523d79640 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Fri, 24 Apr 2026 18:21:07 -0300 Subject: [PATCH 1/3] perf: sparse gather for LogUp aux trace when multiplicity is selector-gated Many bus interactions are gated by mutually-exclusive selector columns (e.g., AND/OR/XOR op flags, ADD/SUB/MUL op flags): when the selector is zero, the row's contribution to the term column is zero regardless of the fingerprint. We now detect Column/Sum/Sum3 multiplicities and, when fewer than 7/8 of a chunk's rows are active, compute fingerprints only at the active rows before batch-inverting. Inactive rows stay at zero from the initial `vec![zero; trace_len]` allocation. Soundness: `sparse_active_rows` marks a row as active whenever any contributing column is non-zero, which is an over-approximation of `m != 0` (cancellation can only cause us to process more rows, never fewer). Dense multiplicities (One, Negated, Diff, Linear) fall back to the unchanged contiguous path. Applies to both single (`compute_logup_term_column`) and paired (`compute_logup_batched_term_column`) term-column builders. Verified via all 71 stark bus soundness tests and 265 prover lib tests. --- crypto/stark/src/lookup.rs | 598 +++++++++++++++++++++++++++++-------- 1 file changed, 474 insertions(+), 124 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index cdc68e7e0..b9c196e5f 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1048,45 +1048,58 @@ where // the throughput the per-pair dispatch used to provide for small-trace // tables with many interactions. // Without `parallel`: sequential over pairs, sequential over rows. - let interactions = &self.auxiliary_trace_build_data.interactions; - let build_pair = |i: usize| { - compute_logup_term_column( - &[&interactions[i * 2], &interactions[i * 2 + 1]], - &main_segment_cols, - trace_len, - challenges, - _table_name, - ) - }; - #[cfg(feature = "parallel")] let committed_columns: Vec>> = if trace_len <= LOGUP_CHUNK_SIZE { (0..num_committed_pairs) .into_par_iter() - .map(build_pair) + .map(|i| { + compute_logup_batched_term_column( + &self.auxiliary_trace_build_data.interactions[i * 2], + &self.auxiliary_trace_build_data.interactions[i * 2 + 1], + &main_segment_cols, + trace_len, + challenges, + ) + }) .collect() } else { - (0..num_committed_pairs).map(build_pair).collect() + (0..num_committed_pairs) + .map(|i| { + compute_logup_batched_term_column( + &self.auxiliary_trace_build_data.interactions[i * 2], + &self.auxiliary_trace_build_data.interactions[i * 2 + 1], + &main_segment_cols, + trace_len, + challenges, + ) + }) + .collect() }; #[cfg(not(feature = "parallel"))] - let committed_columns: Vec>> = - (0..num_committed_pairs).map(build_pair).collect(); + let committed_columns: Vec>> = (0..num_committed_pairs) + .map(|i| { + compute_logup_batched_term_column( + &self.auxiliary_trace_build_data.interactions[i * 2], + &self.auxiliary_trace_build_data.interactions[i * 2 + 1], + &main_segment_cols, + trace_len, + challenges, + ) + }) + .collect(); - // Virtual column for absorbed interactions (NOT written to trace). + // Compute virtual column for absorbed interactions (NOT written to trace) let virtual_column = if absorbed_count == 2 { - compute_logup_term_column( - &[ - &interactions[num_interactions - 2], - &interactions[num_interactions - 1], - ], + compute_logup_batched_term_column( + &self.auxiliary_trace_build_data.interactions[num_interactions - 2], + &self.auxiliary_trace_build_data.interactions[num_interactions - 1], &main_segment_cols, trace_len, challenges, - _table_name, ) } else { compute_logup_term_column( - &[&interactions[num_interactions - 1]], + &self.auxiliary_trace_build_data.interactions[num_interactions - 1], &main_segment_cols, trace_len, challenges, @@ -1229,21 +1242,28 @@ pub enum Multiplicity { } impl Multiplicity { - /// Evaluate the multiplicity expression to a field element. `get_col(i)` - /// must return the value of main column `i` at the row being evaluated. + /// Evaluate the multiplicity for a single row. #[inline] - fn evaluate_with(&self, get_col: G) -> FieldElement - where - F: IsField, - G: Fn(usize) -> FieldElement, - { + fn evaluate_at_row( + &self, + main_segment_cols: &[Vec>], + row: usize, + ) -> FieldElement { match self { Multiplicity::One => FieldElement::one(), - Multiplicity::Column(col) => get_col(*col), - Multiplicity::Sum(a, b) => get_col(*a) + get_col(*b), - Multiplicity::Negated(col) => FieldElement::::one() - get_col(*col), - Multiplicity::Diff(a, b) => get_col(*a) - get_col(*b), - Multiplicity::Sum3(a, b, c) => get_col(*a) + get_col(*b) + get_col(*c), + Multiplicity::Column(col) => main_segment_cols[*col][row].clone(), + Multiplicity::Sum(col_a, col_b) => { + &main_segment_cols[*col_a][row] + &main_segment_cols[*col_b][row] + } + Multiplicity::Negated(col) => FieldElement::::one() - &main_segment_cols[*col][row], + Multiplicity::Diff(col_a, col_b) => { + &main_segment_cols[*col_a][row] - &main_segment_cols[*col_b][row] + } + Multiplicity::Sum3(col_a, col_b, col_c) => { + &main_segment_cols[*col_a][row] + + &main_segment_cols[*col_b][row] + + &main_segment_cols[*col_c][row] + } Multiplicity::Linear(terms) => { let mut result = FieldElement::::zero(); for term in terms { @@ -1251,28 +1271,26 @@ impl Multiplicity { LinearTerm::Column { coefficient, column, - } => result += get_col(column) * FieldElement::::from(coefficient), + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } LinearTerm::ColumnUnsigned { coefficient, column, - } => result += get_col(column) * FieldElement::::from(coefficient), - LinearTerm::Constant(value) => result += FieldElement::::from(value), + } => { + let coeff = FieldElement::::from(coefficient); + result += &main_segment_cols[column][row] * coeff; + } + LinearTerm::Constant(value) => { + result += FieldElement::::from(value); + } } } result } } } - - /// Evaluate the multiplicity for a single row of column-major main data. - #[inline] - fn evaluate_at_row( - &self, - main_segment_cols: &[Vec>], - row: usize, - ) -> FieldElement { - self.evaluate_with(|col| main_segment_cols[col][row].clone()) - } } /// Struct representing a lookup interaction for a given table. @@ -1425,23 +1443,99 @@ where { } -/// Compute a LogUp term column for one or two interactions sharing the result -/// column. For each row, returns the sum Σₖ signₖ·mₖ[row] / fpₖ[row] where the -/// loop runs over `interactions` (must be length 1 or 2). +/// Classifies a multiplicity as sparse-capable for a given chunk. +/// +/// Returns `Some(active_indices)` when the multiplicity has a cheap structural +/// test for zero (e.g. `Column`, `Sum`, `Sum3` where each term is a selector +/// column that is mostly 0), and `None` when the multiplicity is effectively +/// dense (e.g. `One`, or forms like `Negated`/`Diff`/`Linear` whose zero set is +/// not easily identifiable from a single column check). +/// +/// Soundness: `active_indices[..]` must include every row where the multiplicity +/// is non-zero. Here we mark a row as active if ANY of the contributing columns +/// is non-zero — which is an over-approximation of `m != 0` (we don't miss any +/// non-zero rows, we just might process some m=0 rows due to cancellation). +/// +/// The threshold check: if more than `SPARSE_ACTIVE_FRAC` of the chunk is active, +/// return None to fall back to the dense path (gather/scatter overhead not worth it). +#[inline] +fn sparse_active_rows( + multiplicity: &Multiplicity, + main_segment_cols: &[Vec>], + chunk_start: usize, + chunk_len: usize, +) -> Option> +where + F: IsField, +{ + let zero = FieldElement::::zero(); + // If more than this fraction of rows are active, skip sparse path. + // (Numerator/denominator comparison to avoid floating point.) + const SPARSE_NUM: usize = 7; + const SPARSE_DEN: usize = 8; + + let collect_if_sparse = |active: Vec| -> Option> { + if active.len() * SPARSE_DEN > chunk_len * SPARSE_NUM { + None + } else { + Some(active) + } + }; + + match multiplicity { + Multiplicity::One => None, + Multiplicity::Column(col) => { + let c = &main_segment_cols[*col]; + let active: Vec = (0..chunk_len) + .filter(|&i| c[chunk_start + i] != zero) + .collect(); + collect_if_sparse(active) + } + Multiplicity::Sum(col_a, col_b) => { + let ca = &main_segment_cols[*col_a]; + let cb = &main_segment_cols[*col_b]; + let active: Vec = (0..chunk_len) + .filter(|&i| { + let row = chunk_start + i; + ca[row] != zero || cb[row] != zero + }) + .collect(); + collect_if_sparse(active) + } + Multiplicity::Sum3(col_a, col_b, col_c) => { + let ca = &main_segment_cols[*col_a]; + let cb = &main_segment_cols[*col_b]; + let cc = &main_segment_cols[*col_c]; + let active: Vec = (0..chunk_len) + .filter(|&i| { + let row = chunk_start + i; + ca[row] != zero || cb[row] != zero || cc[row] != zero + }) + .collect(); + collect_if_sparse(active) + } + // Negated = 1 - col is dense (usually 1 when col is a bit flag). + // Diff and Linear could cancel arbitrarily; safest to treat as dense. + Multiplicity::Negated(_) | Multiplicity::Diff(_, _) | Multiplicity::Linear(_) => None, + } +} + +/// Computes a term column for a table interaction without writing to the trace. +/// +/// Each row contains the LogUp quotient: `term[i] = sign * multiplicity[i] / fingerprint[i]` /// -/// Single-interaction case yields the per-interaction quotient (used for the -/// absorbed virtual column when only one interaction remains, and by the -/// debug-checks per-interaction breakdown). Two-interaction case yields the -/// batched sum that backs a committed term column. Both share a single chunked -/// implementation with one batch inversion per chunk for cache locality. +/// This is a pure function that takes shared main columns and returns the computed column, +/// enabling parallel computation across interactions within a table. /// -/// Debug-checks bus tracker is invoked only when `interactions.len() == 1`, -/// matching the previous behavior of the dedicated single-interaction helper. +/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`, +/// giving good cache locality (each thread touches only CHUNK_SIZE rows before moving on). +/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path). /// -/// With `parallel`: chunked over rows via `par_chunks_mut`. -/// Without `parallel`: processed as a single chunk. +/// Sparse fast path: when the multiplicity's zero-set is structurally identifiable +/// (e.g., the interaction is gated by a selector column), we compute fingerprints +/// and terms only at the rows where the multiplicity is potentially non-zero. fn compute_logup_term_column( - interactions: &[&BusInteraction], + table_interaction: &BusInteraction, main_segment_cols: &[Vec>], trace_len: usize, challenges: &[FieldElement], @@ -1451,41 +1545,254 @@ where F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, E: IsField + Send + Sync, { - assert!( - matches!(interactions.len(), 1 | 2), - "compute_logup_term_column expects 1 or 2 interactions, got {}", - interactions.len() - ); + let z = &challenges[0]; + let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; + let num_bus_elements = table_interaction.num_bus_elements(); + let alpha_powers = compute_alpha_powers(alpha, num_bus_elements); + let negate = !table_interaction.is_sender; + let bus_id_f = FieldElement::::from(table_interaction.bus_id); + let shifts = PackingShifts::::new(); + + let mut result = vec![FieldElement::::zero(); trace_len]; + + let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement]| { + let chunk_len = result_chunk.len(); + + // Try sparse path first: compute fingerprints/terms only at rows where + // the multiplicity is (potentially) non-zero. Rows initialize to zero in + // the result vector, so inactive rows naturally stay zero. + let active = sparse_active_rows( + &table_interaction.multiplicity, + main_segment_cols, + chunk_start, + chunk_len, + ); + + let compute_fp = |row: usize| -> FieldElement { + let mut lc = &bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &table_interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + z - &lc + }; + + match active { + Some(indices) => { + // Sparse path + let mut fingerprints: Vec> = indices + .iter() + .map(|&i| compute_fp(chunk_start + i)) + .collect(); + + #[cfg(feature = "debug-checks")] + { + // Log all rows for debug symmetry; inactive rows contribute zero. + for row in chunk_start..chunk_start + chunk_len { + let mut base_elements: Vec> = vec![bus_id_f.clone()]; + base_elements.extend(table_interaction.values.iter().flat_map(|bv| { + bv.combine_from(|col| main_segment_cols[col][row].clone()) + })); + let multiplicity = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + // Compute fp fresh for debug (cheap). + let fp = compute_fp(row); + crate::bus_debug::log_interaction( + _table_name, + row, + table_interaction.bus_id, + table_interaction.is_sender, + &multiplicity.canonical(), + &base_elements, + &fp, + ); + } + } + + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + for (k, &i) in indices.iter().enumerate() { + let row = chunk_start + i; + let m = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term = &m * &fingerprints[k]; + result_chunk[i] = if negate { -term } else { term }; + } + } + None => { + // Dense path + let mut fingerprints: Vec> = Vec::with_capacity(chunk_len); + for row in chunk_start..chunk_start + chunk_len { + fingerprints.push(compute_fp(row)); + + #[cfg(feature = "debug-checks")] + { + let mut base_elements: Vec> = vec![bus_id_f.clone()]; + base_elements.extend(table_interaction.values.iter().flat_map(|bv| { + bv.combine_from(|col| main_segment_cols[col][row].clone()) + })); + let multiplicity = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + crate::bus_debug::log_interaction( + _table_name, + row, + table_interaction.bus_id, + table_interaction.is_sender, + &multiplicity.canonical(), + &base_elements, + fingerprints.last().unwrap(), + ); + } + } + + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let m = table_interaction + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term = &m * &fingerprints[i]; + *result_elem = if negate { -term } else { term }; + } + } + } + }; + + #[cfg(feature = "parallel")] + result + .par_chunks_mut(LOGUP_CHUNK_SIZE) + .enumerate() + .for_each(|(i, chunk)| process_chunk(i * LOGUP_CHUNK_SIZE, chunk)); + + #[cfg(not(feature = "parallel"))] + process_chunk(0, &mut result); + + result +} + +/// Computes a batched term column for two interactions sharing one aux column. +/// +/// Each row contains: `term[i] = sign_a * m_a[i] / fp_a[i] + sign_b * m_b[i] / fp_b[i]` +/// +/// Uses chunk-local batch inversion for good cache locality: each chunk processes +/// both interactions for CHUNK_SIZE rows before moving on. +/// +/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`. +/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path). +fn compute_logup_batched_term_column( + interaction_a: &BusInteraction, + interaction_b: &BusInteraction, + main_segment_cols: &[Vec>], + trace_len: usize, + challenges: &[FieldElement], +) -> Vec> +where + F: IsFFTField + IsSubFieldOf + IsPrimeField + Send + Sync, + E: IsField + Send + Sync, +{ let z = &challenges[0]; let alpha = &challenges[LOGUP_CHALLENGE_ALPHA]; - let max_bus_elements = interactions - .iter() - .map(|i| i.num_bus_elements()) - .max() - .unwrap_or(0); + let max_bus_elements = interaction_a + .num_bus_elements() + .max(interaction_b.num_bus_elements()); let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); - let bus_ids: Vec> = interactions - .iter() - .map(|i| FieldElement::::from(i.bus_id)) - .collect(); + let negate_a = !interaction_a.is_sender; + let negate_b = !interaction_b.is_sender; + let bus_id_a = FieldElement::::from(interaction_a.bus_id); + let bus_id_b = FieldElement::::from(interaction_b.bus_id); let shifts = PackingShifts::::new(); - let n = interactions.len(); let mut result = vec![FieldElement::::zero(); trace_len]; let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement]| { let chunk_len = result_chunk.len(); - // Phase 1 — fingerprints, laid out as [int_0 rows…, int_1 rows…]. - // fp[k*chunk_len + i] = interaction k at row chunk_start+i. - let mut fingerprints: Vec> = Vec::with_capacity(n * chunk_len); - for (k, interaction) in interactions.iter().enumerate() { - for row in chunk_start..chunk_start + chunk_len { - let mut lc = &bus_ids[k] * &alpha_powers[0]; + // Build per-interaction active-row index lists. `None` means dense + // (treat all rows as active); `Some(idx)` means process only `idx`. + let active_a = sparse_active_rows( + &interaction_a.multiplicity, + main_segment_cols, + chunk_start, + chunk_len, + ); + let active_b = sparse_active_rows( + &interaction_b.multiplicity, + main_segment_cols, + chunk_start, + chunk_len, + ); + + // Dense fast path: preserves the original contiguous memory access + // pattern when sparse gather would be pure overhead. + if active_a.is_none() && active_b.is_none() { + let compute_fps = |interaction: &BusInteraction, + bus_id_f: &FieldElement, + fps: &mut Vec>| { + for row in chunk_start..chunk_start + chunk_len { + let mut lc = bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + fps.push(z - &lc); + } + }; + + let mut fingerprints: Vec> = Vec::with_capacity(2 * chunk_len); + compute_fps(interaction_a, &bus_id_a, &mut fingerprints); + compute_fps(interaction_b, &bus_id_b, &mut fingerprints); + + FieldElement::inplace_batch_inverse(&mut fingerprints) + .expect("fingerprint is zero - probability of sampling zero is negligible"); + + for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let row = chunk_start + i; + let fp_a_inv = &fingerprints[i]; + let fp_b_inv = &fingerprints[chunk_len + i]; + let m_a = interaction_a + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let m_b = interaction_b + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term_a = &m_a * fp_a_inv; + let term_b = &m_b * fp_b_inv; + let term_a = if negate_a { -term_a } else { term_a }; + let term_b = if negate_b { -term_b } else { term_b }; + *result_elem = term_a + term_b; + } + return; + } + + // Sparse (or half-sparse) path: gather fingerprints only for active rows. + let compute_fp = + |interaction: &BusInteraction, bus_id_f: &FieldElement, row: usize| { + let mut lc = bus_id_f * &alpha_powers[0]; let mut alpha_offset = 1; for bv in &interaction.values { - alpha_offset += bv.accumulate_fingerprint( + let consumed = bv.accumulate_fingerprint( main_segment_cols, row, &alpha_powers, @@ -1493,53 +1800,52 @@ where &mut lc, &shifts, ); + alpha_offset += consumed; } - fingerprints.push(z - &lc); - } - } + z - &lc + }; - #[cfg(feature = "debug-checks")] - if n == 1 { - let interaction = interactions[0]; - for (i, row) in (chunk_start..chunk_start + chunk_len).enumerate() { - let mut base_elements: Vec> = vec![bus_ids[0].clone()]; - base_elements.extend( - interaction - .values - .iter() - .flat_map(|bv| bv.combine_from(|col| main_segment_cols[col][row].clone())), - ); - let multiplicity = interaction - .multiplicity - .evaluate_at_row(main_segment_cols, row); - crate::bus_debug::log_interaction( - _table_name, - row, - interaction.bus_id, - interaction.is_sender, - &multiplicity.canonical(), - &base_elements, - &fingerprints[i], - ); - } + let indices_a: Vec = match &active_a { + Some(v) => v.clone(), + None => (0..chunk_len).collect(), + }; + let indices_b: Vec = match &active_b { + Some(v) => v.clone(), + None => (0..chunk_len).collect(), + }; + + let mut fingerprints: Vec> = + Vec::with_capacity(indices_a.len() + indices_b.len()); + for &i in &indices_a { + fingerprints.push(compute_fp(interaction_a, &bus_id_a, chunk_start + i)); + } + for &i in &indices_b { + fingerprints.push(compute_fp(interaction_b, &bus_id_b, chunk_start + i)); } - // Phase 2: batch invert FieldElement::inplace_batch_inverse(&mut fingerprints) .expect("fingerprint is zero - probability of sampling zero is negligible"); - // Phase 3: Compute terms - for (i, result_elem) in result_chunk.iter_mut().enumerate() { + let (fp_a_inv, fp_b_inv) = fingerprints.split_at(indices_a.len()); + + // Scatter a-terms (overwrite: result_chunk starts at zero). + for (k, &i) in indices_a.iter().enumerate() { let row = chunk_start + i; - let mut acc = FieldElement::::zero(); - for (k, interaction) in interactions.iter().enumerate() { - let m = interaction - .multiplicity - .evaluate_at_row(main_segment_cols, row); - let term = &m * &fingerprints[k * chunk_len + i]; - acc += if interaction.is_sender { term } else { -term }; - } - *result_elem = acc; + let m_a = interaction_a + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term = &m_a * &fp_a_inv[k]; + result_chunk[i] = if negate_a { -term } else { term }; + } + // Add b-terms on top of whatever a left behind (possibly zero). + for (k, &i) in indices_b.iter().enumerate() { + let row = chunk_start + i; + let m_b = interaction_b + .multiplicity + .evaluate_at_row(main_segment_cols, row); + let term = &m_b * &fp_b_inv[k]; + let term = if negate_b { -term } else { term }; + result_chunk[i] = &result_chunk[i] + term; } }; @@ -1631,7 +1937,7 @@ where // Compute each interaction's individual term column for summing for interaction in interactions.iter() { let individual_terms = compute_logup_term_column( - &[interaction], + interaction, main_segment_cols, trace_len, challenges, @@ -1664,7 +1970,51 @@ fn compute_multiplicity_from_step, B: IsField>( step: &TableView, multiplicity: &Multiplicity, ) -> FieldElement { - multiplicity.evaluate_with(|col| step.get_main_evaluation_element(0, col).clone()) + match multiplicity { + Multiplicity::One => FieldElement::::one(), + Multiplicity::Column(col) => step.get_main_evaluation_element(0, *col).clone(), + Multiplicity::Sum(col_a, col_b) => { + step.get_main_evaluation_element(0, *col_a) + + step.get_main_evaluation_element(0, *col_b) + } + Multiplicity::Negated(col) => { + FieldElement::::one() - step.get_main_evaluation_element(0, *col) + } + Multiplicity::Diff(col_a, col_b) => { + step.get_main_evaluation_element(0, *col_a) + - step.get_main_evaluation_element(0, *col_b) + } + Multiplicity::Sum3(col_a, col_b, col_c) => { + step.get_main_evaluation_element(0, *col_a) + + step.get_main_evaluation_element(0, *col_b) + + step.get_main_evaluation_element(0, *col_c) + } + Multiplicity::Linear(terms) => { + let mut result = FieldElement::::zero(); + for term in terms { + match term { + LinearTerm::Column { + coefficient, + column, + } => { + let coeff = FieldElement::::from(*coefficient); + result += step.get_main_evaluation_element(0, *column) * coeff; + } + LinearTerm::ColumnUnsigned { + coefficient, + column, + } => { + let coeff = FieldElement::::from(*coefficient); + result += step.get_main_evaluation_element(0, *column) * coeff; + } + LinearTerm::Constant(value) => { + result += FieldElement::::from(*value); + } + } + } + result + } + } } /// Computes the fingerprint for an interaction from a `TableView`. From 5156ba72f5b2830c24806d6ece779f4a8c5b901f Mon Sep 17 00:00:00 2001 From: diegokingston Date: Fri, 24 Apr 2026 18:40:55 -0300 Subject: [PATCH 2/3] fix(ci): rustfmt + gate pre-existing unused Polynomial import behind debug_assertions - `cargo fmt` on sparse LogUp changes - Polynomial import in constraints/evaluator.rs is only used inside `#[cfg(all(debug_assertions, not(feature = "parallel")))]`; match the import cfg to the use site so release builds without `parallel` don't warn. (Pre-existing on origin/main but surfaced by CI lint here.) --- crypto/stark/src/lookup.rs | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index b9c196e5f..d359155f7 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1787,23 +1787,22 @@ where } // Sparse (or half-sparse) path: gather fingerprints only for active rows. - let compute_fp = - |interaction: &BusInteraction, bus_id_f: &FieldElement, row: usize| { - let mut lc = bus_id_f * &alpha_powers[0]; - let mut alpha_offset = 1; - for bv in &interaction.values { - let consumed = bv.accumulate_fingerprint( - main_segment_cols, - row, - &alpha_powers, - alpha_offset, - &mut lc, - &shifts, - ); - alpha_offset += consumed; - } - z - &lc - }; + let compute_fp = |interaction: &BusInteraction, bus_id_f: &FieldElement, row: usize| { + let mut lc = bus_id_f * &alpha_powers[0]; + let mut alpha_offset = 1; + for bv in &interaction.values { + let consumed = bv.accumulate_fingerprint( + main_segment_cols, + row, + &alpha_powers, + alpha_offset, + &mut lc, + &shifts, + ); + alpha_offset += consumed; + } + z - &lc + }; let indices_a: Vec = match &active_a { Some(v) => v.clone(), From aed425fc9b0d8769bfbbe00460915f4309bce9a2 Mon Sep 17 00:00:00 2001 From: diegokingston Date: Tue, 26 May 2026 15:30:13 -0300 Subject: [PATCH 3/3] fix(stark/lookup): apply reviewer feedback on sparse-gather MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three improvements requested by Codex and Claude review: 1. `sparse_active_rows` — early-abort the active-row collector once `active.len()` exceeds the sparse threshold. The old `filter().collect()` walked the whole chunk and built a full `Vec` before deciding to fall back to dense. In non-parallel builds where `chunk_len == trace_len` this could allocate ~7N/8 entries only to discard them; for large traces that was a memory and time regression. The new bounded collector caps work and allocation at the threshold. 2. Batched term-column builder — replace the `(0..chunk_len).collect()` materialization for the dense side of a half-sparse pair with an `ActiveIter` enum that yields indices from either the sparse `Vec` or the dense `Range` without owning a vector. Also drops the redundant `v.clone()` Claude flagged at lines 1808-1815. 3. Docstring — replace the dangling reference to `SPARSE_ACTIVE_FRAC` (a constant that never existed) with the actual `SPARSE_NUM / SPARSE_DEN` ratio. No semantic change to fingerprint computation, batch inversion, or scatter: just trimming allocations and a stale comment. Tests: `cargo test -p stark` → 125/125. `cargo test -p lambda-vm-prover --lib` → 281 pass / 77 pre-existing failures (identical to main). `make lint` clean. --- crypto/stark/src/lookup.rs | 112 +++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 42 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index d359155f7..e352985c0 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -1456,8 +1456,11 @@ where /// is non-zero — which is an over-approximation of `m != 0` (we don't miss any /// non-zero rows, we just might process some m=0 rows due to cancellation). /// -/// The threshold check: if more than `SPARSE_ACTIVE_FRAC` of the chunk is active, -/// return None to fall back to the dense path (gather/scatter overhead not worth it). +/// The threshold check: if more than `SPARSE_NUM / SPARSE_DEN` of the chunk +/// is active, return `None` to fall back to the dense path (gather/scatter +/// overhead not worth it). The scan early-aborts as soon as the active count +/// passes the threshold so we never allocate more than `~chunk_len * 7/8` +/// entries — important for non-parallel builds where `chunk_len = trace_len`. #[inline] fn sparse_active_rows( multiplicity: &Multiplicity, @@ -1469,50 +1472,50 @@ where F: IsField, { let zero = FieldElement::::zero(); - // If more than this fraction of rows are active, skip sparse path. + // If strictly more than this fraction of rows are active, skip sparse path. // (Numerator/denominator comparison to avoid floating point.) const SPARSE_NUM: usize = 7; const SPARSE_DEN: usize = 8; - - let collect_if_sparse = |active: Vec| -> Option> { - if active.len() * SPARSE_DEN > chunk_len * SPARSE_NUM { - None - } else { - Some(active) + let max_active = chunk_len * SPARSE_NUM / SPARSE_DEN; + + // Generic bounded collector: aborts as soon as `active.len()` would exceed + // the sparse threshold, avoiding the wasted allocation Codex flagged for + // dense chunks at full-trace length. + let collect_bounded = |is_active: &dyn Fn(usize) -> bool| -> Option> { + let mut active = Vec::new(); + for i in 0..chunk_len { + if is_active(i) { + if active.len() >= max_active { + return None; + } + active.push(i); + } } + Some(active) }; match multiplicity { Multiplicity::One => None, Multiplicity::Column(col) => { let c = &main_segment_cols[*col]; - let active: Vec = (0..chunk_len) - .filter(|&i| c[chunk_start + i] != zero) - .collect(); - collect_if_sparse(active) + collect_bounded(&|i| c[chunk_start + i] != zero) } Multiplicity::Sum(col_a, col_b) => { let ca = &main_segment_cols[*col_a]; let cb = &main_segment_cols[*col_b]; - let active: Vec = (0..chunk_len) - .filter(|&i| { - let row = chunk_start + i; - ca[row] != zero || cb[row] != zero - }) - .collect(); - collect_if_sparse(active) + collect_bounded(&|i| { + let row = chunk_start + i; + ca[row] != zero || cb[row] != zero + }) } Multiplicity::Sum3(col_a, col_b, col_c) => { let ca = &main_segment_cols[*col_a]; let cb = &main_segment_cols[*col_b]; let cc = &main_segment_cols[*col_c]; - let active: Vec = (0..chunk_len) - .filter(|&i| { - let row = chunk_start + i; - ca[row] != zero || cb[row] != zero || cc[row] != zero - }) - .collect(); - collect_if_sparse(active) + collect_bounded(&|i| { + let row = chunk_start + i; + ca[row] != zero || cb[row] != zero || cc[row] != zero + }) } // Negated = 1 - col is dense (usually 1 when col is a bit flag). // Diff and Linear could cancel arbitrarily; safest to treat as dense. @@ -1520,6 +1523,35 @@ where } } +/// Iterator over active row indices within a chunk: either the explicit sparse +/// index slice or the full `0..chunk_len` range when the interaction is dense. +/// Used by the batched term-column builder to walk active rows without +/// materializing `(0..chunk_len).collect::>()` for the dense side of a +/// half-sparse pair (Claude/Codex review finding). +enum ActiveIter<'a> { + Sparse(core::slice::Iter<'a, usize>), + Dense(core::ops::Range), +} + +impl<'a> ActiveIter<'a> { + fn new(active: Option<&'a Vec>, chunk_len: usize) -> Self { + match active { + Some(v) => ActiveIter::Sparse(v.iter()), + None => ActiveIter::Dense(0..chunk_len), + } + } +} + +impl Iterator for ActiveIter<'_> { + type Item = usize; + fn next(&mut self) -> Option { + match self { + ActiveIter::Sparse(it) => it.next().copied(), + ActiveIter::Dense(r) => r.next(), + } + } +} + /// Computes a term column for a table interaction without writing to the trace. /// /// Each row contains the LogUp quotient: `term[i] = sign * multiplicity[i] / fingerprint[i]` @@ -1804,31 +1836,27 @@ where z - &lc }; - let indices_a: Vec = match &active_a { - Some(v) => v.clone(), - None => (0..chunk_len).collect(), - }; - let indices_b: Vec = match &active_b { - Some(v) => v.clone(), - None => (0..chunk_len).collect(), - }; + // Length per side without materializing the index vector. For dense + // interactions this is just `chunk_len`; for sparse it's the kept + // active vector's length. + let len_a = active_a.as_ref().map(|v| v.len()).unwrap_or(chunk_len); + let len_b = active_b.as_ref().map(|v| v.len()).unwrap_or(chunk_len); - let mut fingerprints: Vec> = - Vec::with_capacity(indices_a.len() + indices_b.len()); - for &i in &indices_a { + let mut fingerprints: Vec> = Vec::with_capacity(len_a + len_b); + for i in ActiveIter::new(active_a.as_ref(), chunk_len) { fingerprints.push(compute_fp(interaction_a, &bus_id_a, chunk_start + i)); } - for &i in &indices_b { + for i in ActiveIter::new(active_b.as_ref(), chunk_len) { fingerprints.push(compute_fp(interaction_b, &bus_id_b, chunk_start + i)); } FieldElement::inplace_batch_inverse(&mut fingerprints) .expect("fingerprint is zero - probability of sampling zero is negligible"); - let (fp_a_inv, fp_b_inv) = fingerprints.split_at(indices_a.len()); + let (fp_a_inv, fp_b_inv) = fingerprints.split_at(len_a); // Scatter a-terms (overwrite: result_chunk starts at zero). - for (k, &i) in indices_a.iter().enumerate() { + for (k, i) in ActiveIter::new(active_a.as_ref(), chunk_len).enumerate() { let row = chunk_start + i; let m_a = interaction_a .multiplicity @@ -1837,7 +1865,7 @@ where result_chunk[i] = if negate_a { -term } else { term }; } // Add b-terms on top of whatever a left behind (possibly zero). - for (k, &i) in indices_b.iter().enumerate() { + for (k, i) in ActiveIter::new(active_b.as_ref(), chunk_len).enumerate() { let row = chunk_start + i; let m_b = interaction_b .multiplicity