From 807fd6dfa7b1175b60d4246cc4a3be1523d79640 Mon Sep 17 00:00:00 2001
From: diegokingston <dkingston@fi.uba.ar>
Date: Fri, 24 Apr 2026 18:21:07 -0300
Subject: [PATCH 1/3] perf: sparse gather for LogUp aux trace when multiplicity
 is selector-gated

Many bus interactions are gated by mutually-exclusive selector columns
(e.g., AND/OR/XOR op flags, ADD/SUB/MUL op flags): when the selector is
zero, the row's contribution to the term column is zero regardless of
the fingerprint. We now detect Column/Sum/Sum3 multiplicities and, when
fewer than 7/8 of a chunk's rows are active, compute fingerprints only
at the active rows before batch-inverting. Inactive rows stay at zero
from the initial `vec![zero; trace_len]` allocation.

Soundness: `sparse_active_rows` marks a row as active whenever any
contributing column is non-zero, which is an over-approximation of
`m != 0` (cancellation can only cause us to process more rows, never
fewer). Dense multiplicities (One, Negated, Diff, Linear) fall back to
the unchanged contiguous path.

Applies to both single (`compute_logup_term_column`) and paired
(`compute_logup_batched_term_column`) term-column builders. Verified
via all 71 stark bus soundness tests and 265 prover lib tests.
---
 crypto/stark/src/lookup.rs | 598 +++++++++++++++++++++++++++++--------
 1 file changed, 474 insertions(+), 124 deletions(-)
diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs
index cdc68e7e0..b9c196e5f 100644
--- a/crypto/stark/src/lookup.rs
+++ b/crypto/stark/src/lookup.rs
@@ -1048,45 +1048,58 @@ where
         // the throughput the per-pair dispatch used to provide for small-trace
         // tables with many interactions.
         // Without `parallel`: sequential over pairs, sequential over rows.
-        let interactions = &self.auxiliary_trace_build_data.interactions;
-        let build_pair = |i: usize| {
-            compute_logup_term_column(
-                &[&interactions[i * 2], &interactions[i * 2 + 1]],
-                &main_segment_cols,
-                trace_len,
-                challenges,
-                _table_name,
-            )
-        };
-
         #[cfg(feature = "parallel")]
         let committed_columns: Vec<Vec<FieldElement<E>>> = if trace_len <= LOGUP_CHUNK_SIZE {
             (0..num_committed_pairs)
                 .into_par_iter()
-                .map(build_pair)
+                .map(|i| {
+                    compute_logup_batched_term_column(
+                        &self.auxiliary_trace_build_data.interactions[i * 2],
+                        &self.auxiliary_trace_build_data.interactions[i * 2 + 1],
+                        &main_segment_cols,
+                        trace_len,
+                        challenges,
+                    )
+                })
                 .collect()
         } else {
-            (0..num_committed_pairs).map(build_pair).collect()
+            (0..num_committed_pairs)
+                .map(|i| {
+                    compute_logup_batched_term_column(
+                        &self.auxiliary_trace_build_data.interactions[i * 2],
+                        &self.auxiliary_trace_build_data.interactions[i * 2 + 1],
+                        &main_segment_cols,
+                        trace_len,
+                        challenges,
+                    )
+                })
+                .collect()
         };
         #[cfg(not(feature = "parallel"))]
-        let committed_columns: Vec<Vec<FieldElement<E>>> =
-            (0..num_committed_pairs).map(build_pair).collect();
+        let committed_columns: Vec<Vec<FieldElement<E>>> = (0..num_committed_pairs)
+            .map(|i| {
+                compute_logup_batched_term_column(
+                    &self.auxiliary_trace_build_data.interactions[i * 2],
+                    &self.auxiliary_trace_build_data.interactions[i * 2 + 1],
+                    &main_segment_cols,
+                    trace_len,
+                    challenges,
+                )
+            })
+            .collect();
 
-        // Virtual column for absorbed interactions (NOT written to trace).
+        // Compute virtual column for absorbed interactions (NOT written to trace)
         let virtual_column = if absorbed_count == 2 {
-            compute_logup_term_column(
-                &[
-                    &interactions[num_interactions - 2],
-                    &interactions[num_interactions - 1],
-                ],
+            compute_logup_batched_term_column(
+                &self.auxiliary_trace_build_data.interactions[num_interactions - 2],
+                &self.auxiliary_trace_build_data.interactions[num_interactions - 1],
                 &main_segment_cols,
                 trace_len,
                 challenges,
-                _table_name,
             )
         } else {
             compute_logup_term_column(
-                &[&interactions[num_interactions - 1]],
+                &self.auxiliary_trace_build_data.interactions[num_interactions - 1],
                 &main_segment_cols,
                 trace_len,
                 challenges,
@@ -1229,21 +1242,28 @@ pub enum Multiplicity {
 }
 
 impl Multiplicity {
-    /// Evaluate the multiplicity expression to a field element. `get_col(i)`
-    /// must return the value of main column `i` at the row being evaluated.
+    /// Evaluate the multiplicity for a single row.
     #[inline]
-    fn evaluate_with<F, G>(&self, get_col: G) -> FieldElement<F>
-    where
-        F: IsField,
-        G: Fn(usize) -> FieldElement<F>,
-    {
+    fn evaluate_at_row<F: IsField>(
+        &self,
+        main_segment_cols: &[Vec<FieldElement<F>>],
+        row: usize,
+    ) -> FieldElement<F> {
         match self {
             Multiplicity::One => FieldElement::one(),
-            Multiplicity::Column(col) => get_col(*col),
-            Multiplicity::Sum(a, b) => get_col(*a) + get_col(*b),
-            Multiplicity::Negated(col) => FieldElement::<F>::one() - get_col(*col),
-            Multiplicity::Diff(a, b) => get_col(*a) - get_col(*b),
-            Multiplicity::Sum3(a, b, c) => get_col(*a) + get_col(*b) + get_col(*c),
+            Multiplicity::Column(col) => main_segment_cols[*col][row].clone(),
+            Multiplicity::Sum(col_a, col_b) => {
+                &main_segment_cols[*col_a][row] + &main_segment_cols[*col_b][row]
+            }
+            Multiplicity::Negated(col) => FieldElement::<F>::one() - &main_segment_cols[*col][row],
+            Multiplicity::Diff(col_a, col_b) => {
+                &main_segment_cols[*col_a][row] - &main_segment_cols[*col_b][row]
+            }
+            Multiplicity::Sum3(col_a, col_b, col_c) => {
+                &main_segment_cols[*col_a][row]
+                    + &main_segment_cols[*col_b][row]
+                    + &main_segment_cols[*col_c][row]
+            }
             Multiplicity::Linear(terms) => {
                 let mut result = FieldElement::<F>::zero();
                 for term in terms {
@@ -1251,28 +1271,26 @@ impl Multiplicity {
                         LinearTerm::Column {
                             coefficient,
                             column,
-                        } => result += get_col(column) * FieldElement::<F>::from(coefficient),
+                        } => {
+                            let coeff = FieldElement::<F>::from(coefficient);
+                            result += &main_segment_cols[column][row] * coeff;
+                        }
                         LinearTerm::ColumnUnsigned {
                             coefficient,
                             column,
-                        } => result += get_col(column) * FieldElement::<F>::from(coefficient),
-                        LinearTerm::Constant(value) => result += FieldElement::<F>::from(value),
+                        } => {
+                            let coeff = FieldElement::<F>::from(coefficient);
+                            result += &main_segment_cols[column][row] * coeff;
+                        }
+                        LinearTerm::Constant(value) => {
+                            result += FieldElement::<F>::from(value);
+                        }
                     }
                 }
                 result
             }
         }
     }
-
-    /// Evaluate the multiplicity for a single row of column-major main data.
-    #[inline]
-    fn evaluate_at_row<F: IsField>(
-        &self,
-        main_segment_cols: &[Vec<FieldElement<F>>],
-        row: usize,
-    ) -> FieldElement<F> {
-        self.evaluate_with(|col| main_segment_cols[col][row].clone())
-    }
 }
 
 /// Struct representing a lookup interaction for a given table.
@@ -1425,23 +1443,99 @@ where
 {
 }
 
-/// Compute a LogUp term column for one or two interactions sharing the result
-/// column. For each row, returns the sum Σₖ signₖ·mₖ[row] / fpₖ[row] where the
-/// loop runs over `interactions` (must be length 1 or 2).
+/// Classifies a multiplicity as sparse-capable for a given chunk.
+///
+/// Returns `Some(active_indices)` when the multiplicity has a cheap structural
+/// test for zero (e.g. `Column`, `Sum`, `Sum3` where each term is a selector
+/// column that is mostly 0), and `None` when the multiplicity is effectively
+/// dense (e.g. `One`, or forms like `Negated`/`Diff`/`Linear` whose zero set is
+/// not easily identifiable from a single column check).
+///
+/// Soundness: `active_indices[..]` must include every row where the multiplicity
+/// is non-zero. Here we mark a row as active if ANY of the contributing columns
+/// is non-zero — which is an over-approximation of `m != 0` (we don't miss any
+/// non-zero rows, we just might process some m=0 rows due to cancellation).
+///
+/// The threshold check: if more than `SPARSE_ACTIVE_FRAC` of the chunk is active,
+/// return None to fall back to the dense path (gather/scatter overhead not worth it).
+#[inline]
+fn sparse_active_rows<F>(
+    multiplicity: &Multiplicity,
+    main_segment_cols: &[Vec<FieldElement<F>>],
+    chunk_start: usize,
+    chunk_len: usize,
+) -> Option<Vec<usize>>
+where
+    F: IsField,
+{
+    let zero = FieldElement::<F>::zero();
+    // If more than this fraction of rows are active, skip sparse path.
+    // (Numerator/denominator comparison to avoid floating point.)
+    const SPARSE_NUM: usize = 7;
+    const SPARSE_DEN: usize = 8;
+
+    let collect_if_sparse = |active: Vec<usize>| -> Option<Vec<usize>> {
+        if active.len() * SPARSE_DEN > chunk_len * SPARSE_NUM {
+            None
+        } else {
+            Some(active)
+        }
+    };
+
+    match multiplicity {
+        Multiplicity::One => None,
+        Multiplicity::Column(col) => {
+            let c = &main_segment_cols[*col];
+            let active: Vec<usize> = (0..chunk_len)
+                .filter(|&i| c[chunk_start + i] != zero)
+                .collect();
+            collect_if_sparse(active)
+        }
+        Multiplicity::Sum(col_a, col_b) => {
+            let ca = &main_segment_cols[*col_a];
+            let cb = &main_segment_cols[*col_b];
+            let active: Vec<usize> = (0..chunk_len)
+                .filter(|&i| {
+                    let row = chunk_start + i;
+                    ca[row] != zero || cb[row] != zero
+                })
+                .collect();
+            collect_if_sparse(active)
+        }
+        Multiplicity::Sum3(col_a, col_b, col_c) => {
+            let ca = &main_segment_cols[*col_a];
+            let cb = &main_segment_cols[*col_b];
+            let cc = &main_segment_cols[*col_c];
+            let active: Vec<usize> = (0..chunk_len)
+                .filter(|&i| {
+                    let row = chunk_start + i;
+                    ca[row] != zero || cb[row] != zero || cc[row] != zero
+                })
+                .collect();
+            collect_if_sparse(active)
+        }
+        // Negated = 1 - col is dense (usually 1 when col is a bit flag).
+        // Diff and Linear could cancel arbitrarily; safest to treat as dense.
+        Multiplicity::Negated(_) | Multiplicity::Diff(_, _) | Multiplicity::Linear(_) => None,
+    }
+}
+
+/// Computes a term column for a table interaction without writing to the trace.
+///
+/// Each row contains the LogUp quotient: `term[i] = sign * multiplicity[i] / fingerprint[i]`
 ///
-/// Single-interaction case yields the per-interaction quotient (used for the
-/// absorbed virtual column when only one interaction remains, and by the
-/// debug-checks per-interaction breakdown). Two-interaction case yields the
-/// batched sum that backs a committed term column. Both share a single chunked
-/// implementation with one batch inversion per chunk for cache locality.
+/// This is a pure function that takes shared main columns and returns the computed column,
+/// enabling parallel computation across interactions within a table.
 ///
-/// Debug-checks bus tracker is invoked only when `interactions.len() == 1`,
-/// matching the previous behavior of the dedicated single-interaction helper.
+/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`,
+/// giving good cache locality (each thread touches only CHUNK_SIZE rows before moving on).
+/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path).
 ///
-/// With `parallel`: chunked over rows via `par_chunks_mut`.
-/// Without `parallel`: processed as a single chunk.
+/// Sparse fast path: when the multiplicity's zero-set is structurally identifiable
+/// (e.g., the interaction is gated by a selector column), we compute fingerprints
+/// and terms only at the rows where the multiplicity is potentially non-zero.
 fn compute_logup_term_column<F, E>(
-    interactions: &[&BusInteraction],
+    table_interaction: &BusInteraction,
     main_segment_cols: &[Vec<FieldElement<F>>],
     trace_len: usize,
     challenges: &[FieldElement<E>],
@@ -1451,41 +1545,254 @@ where
     F: IsFFTField + IsSubFieldOf<E> + IsPrimeField + Send + Sync,
     E: IsField + Send + Sync,
 {
-    assert!(
-        matches!(interactions.len(), 1 | 2),
-        "compute_logup_term_column expects 1 or 2 interactions, got {}",
-        interactions.len()
-    );
+    let z = &challenges[0];
+    let alpha = &challenges[LOGUP_CHALLENGE_ALPHA];
+    let num_bus_elements = table_interaction.num_bus_elements();
+    let alpha_powers = compute_alpha_powers(alpha, num_bus_elements);
+    let negate = !table_interaction.is_sender;
+    let bus_id_f = FieldElement::<F>::from(table_interaction.bus_id);
+    let shifts = PackingShifts::<F>::new();
+
+    let mut result = vec![FieldElement::<E>::zero(); trace_len];
+
+    let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement<E>]| {
+        let chunk_len = result_chunk.len();
+
+        // Try sparse path first: compute fingerprints/terms only at rows where
+        // the multiplicity is (potentially) non-zero. Rows initialize to zero in
+        // the result vector, so inactive rows naturally stay zero.
+        let active = sparse_active_rows(
+            &table_interaction.multiplicity,
+            main_segment_cols,
+            chunk_start,
+            chunk_len,
+        );
+
+        let compute_fp = |row: usize| -> FieldElement<E> {
+            let mut lc = &bus_id_f * &alpha_powers[0];
+            let mut alpha_offset = 1;
+            for bv in &table_interaction.values {
+                let consumed = bv.accumulate_fingerprint(
+                    main_segment_cols,
+                    row,
+                    &alpha_powers,
+                    alpha_offset,
+                    &mut lc,
+                    &shifts,
+                );
+                alpha_offset += consumed;
+            }
+            z - &lc
+        };
+
+        match active {
+            Some(indices) => {
+                // Sparse path
+                let mut fingerprints: Vec<FieldElement<E>> = indices
+                    .iter()
+                    .map(|&i| compute_fp(chunk_start + i))
+                    .collect();
+
+                #[cfg(feature = "debug-checks")]
+                {
+                    // Log all rows for debug symmetry; inactive rows contribute zero.
+                    for row in chunk_start..chunk_start + chunk_len {
+                        let mut base_elements: Vec<FieldElement<F>> = vec![bus_id_f.clone()];
+                        base_elements.extend(table_interaction.values.iter().flat_map(|bv| {
+                            bv.combine_from(|col| main_segment_cols[col][row].clone())
+                        }));
+                        let multiplicity = table_interaction
+                            .multiplicity
+                            .evaluate_at_row(main_segment_cols, row);
+                        // Compute fp fresh for debug (cheap).
+                        let fp = compute_fp(row);
+                        crate::bus_debug::log_interaction(
+                            _table_name,
+                            row,
+                            table_interaction.bus_id,
+                            table_interaction.is_sender,
+                            &multiplicity.canonical(),
+                            &base_elements,
+                            &fp,
+                        );
+                    }
+                }
+
+                FieldElement::inplace_batch_inverse(&mut fingerprints)
+                    .expect("fingerprint is zero - probability of sampling zero is negligible");
+
+                for (k, &i) in indices.iter().enumerate() {
+                    let row = chunk_start + i;
+                    let m = table_interaction
+                        .multiplicity
+                        .evaluate_at_row(main_segment_cols, row);
+                    let term = &m * &fingerprints[k];
+                    result_chunk[i] = if negate { -term } else { term };
+                }
+            }
+            None => {
+                // Dense path
+                let mut fingerprints: Vec<FieldElement<E>> = Vec::with_capacity(chunk_len);
+                for row in chunk_start..chunk_start + chunk_len {
+                    fingerprints.push(compute_fp(row));
+
+                    #[cfg(feature = "debug-checks")]
+                    {
+                        let mut base_elements: Vec<FieldElement<F>> = vec![bus_id_f.clone()];
+                        base_elements.extend(table_interaction.values.iter().flat_map(|bv| {
+                            bv.combine_from(|col| main_segment_cols[col][row].clone())
+                        }));
+                        let multiplicity = table_interaction
+                            .multiplicity
+                            .evaluate_at_row(main_segment_cols, row);
+                        crate::bus_debug::log_interaction(
+                            _table_name,
+                            row,
+                            table_interaction.bus_id,
+                            table_interaction.is_sender,
+                            &multiplicity.canonical(),
+                            &base_elements,
+                            fingerprints.last().unwrap(),
+                        );
+                    }
+                }
+
+                FieldElement::inplace_batch_inverse(&mut fingerprints)
+                    .expect("fingerprint is zero - probability of sampling zero is negligible");
 
+                for (i, result_elem) in result_chunk.iter_mut().enumerate() {
+                    let row = chunk_start + i;
+                    let m = table_interaction
+                        .multiplicity
+                        .evaluate_at_row(main_segment_cols, row);
+                    let term = &m * &fingerprints[i];
+                    *result_elem = if negate { -term } else { term };
+                }
+            }
+        }
+    };
+
+    #[cfg(feature = "parallel")]
+    result
+        .par_chunks_mut(LOGUP_CHUNK_SIZE)
+        .enumerate()
+        .for_each(|(i, chunk)| process_chunk(i * LOGUP_CHUNK_SIZE, chunk));
+
+    #[cfg(not(feature = "parallel"))]
+    process_chunk(0, &mut result);
+
+    result
+}
+
+/// Computes a batched term column for two interactions sharing one aux column.
+///
+/// Each row contains: `term[i] = sign_a * m_a[i] / fp_a[i] + sign_b * m_b[i] / fp_b[i]`
+///
+/// Uses chunk-local batch inversion for good cache locality: each chunk processes
+/// both interactions for CHUNK_SIZE rows before moving on.
+///
+/// With `parallel`: processes rows in chunks of `LOGUP_CHUNK_SIZE` via `par_chunks_mut`.
+/// Without `parallel`: processes all rows as a single chunk (equivalent to the old sequential path).
+fn compute_logup_batched_term_column<F, E>(
+    interaction_a: &BusInteraction,
+    interaction_b: &BusInteraction,
+    main_segment_cols: &[Vec<FieldElement<F>>],
+    trace_len: usize,
+    challenges: &[FieldElement<E>],
+) -> Vec<FieldElement<E>>
+where
+    F: IsFFTField + IsSubFieldOf<E> + IsPrimeField + Send + Sync,
+    E: IsField + Send + Sync,
+{
     let z = &challenges[0];
     let alpha = &challenges[LOGUP_CHALLENGE_ALPHA];
-    let max_bus_elements = interactions
-        .iter()
-        .map(|i| i.num_bus_elements())
-        .max()
-        .unwrap_or(0);
+    let max_bus_elements = interaction_a
+        .num_bus_elements()
+        .max(interaction_b.num_bus_elements());
     let alpha_powers = compute_alpha_powers(alpha, max_bus_elements);
-    let bus_ids: Vec<FieldElement<F>> = interactions
-        .iter()
-        .map(|i| FieldElement::<F>::from(i.bus_id))
-        .collect();
+    let negate_a = !interaction_a.is_sender;
+    let negate_b = !interaction_b.is_sender;
+    let bus_id_a = FieldElement::<F>::from(interaction_a.bus_id);
+    let bus_id_b = FieldElement::<F>::from(interaction_b.bus_id);
     let shifts = PackingShifts::<F>::new();
-    let n = interactions.len();
 
     let mut result = vec![FieldElement::<E>::zero(); trace_len];
 
     let process_chunk = |chunk_start: usize, result_chunk: &mut [FieldElement<E>]| {
         let chunk_len = result_chunk.len();
 
-        // Phase 1 — fingerprints, laid out as [int_0 rows…, int_1 rows…].
-        // fp[k*chunk_len + i] = interaction k at row chunk_start+i.
-        let mut fingerprints: Vec<FieldElement<E>> = Vec::with_capacity(n * chunk_len);
-        for (k, interaction) in interactions.iter().enumerate() {
-            for row in chunk_start..chunk_start + chunk_len {
-                let mut lc = &bus_ids[k] * &alpha_powers[0];
+        // Build per-interaction active-row index lists. `None` means dense
+        // (treat all rows as active); `Some(idx)` means process only `idx`.
+        let active_a = sparse_active_rows(
+            &interaction_a.multiplicity,
+            main_segment_cols,
+            chunk_start,
+            chunk_len,
+        );
+        let active_b = sparse_active_rows(
+            &interaction_b.multiplicity,
+            main_segment_cols,
+            chunk_start,
+            chunk_len,
+        );
+
+        // Dense fast path: preserves the original contiguous memory access
+        // pattern when sparse gather would be pure overhead.
+        if active_a.is_none() && active_b.is_none() {
+            let compute_fps = |interaction: &BusInteraction,
+                               bus_id_f: &FieldElement<F>,
+                               fps: &mut Vec<FieldElement<E>>| {
+                for row in chunk_start..chunk_start + chunk_len {
+                    let mut lc = bus_id_f * &alpha_powers[0];
+                    let mut alpha_offset = 1;
+                    for bv in &interaction.values {
+                        let consumed = bv.accumulate_fingerprint(
+                            main_segment_cols,
+                            row,
+                            &alpha_powers,
+                            alpha_offset,
+                            &mut lc,
+                            &shifts,
+                        );
+                        alpha_offset += consumed;
+                    }
+                    fps.push(z - &lc);
+                }
+            };
+
+            let mut fingerprints: Vec<FieldElement<E>> = Vec::with_capacity(2 * chunk_len);
+            compute_fps(interaction_a, &bus_id_a, &mut fingerprints);
+            compute_fps(interaction_b, &bus_id_b, &mut fingerprints);
+
+            FieldElement::inplace_batch_inverse(&mut fingerprints)
+                .expect("fingerprint is zero - probability of sampling zero is negligible");
+
+            for (i, result_elem) in result_chunk.iter_mut().enumerate() {
+                let row = chunk_start + i;
+                let fp_a_inv = &fingerprints[i];
+                let fp_b_inv = &fingerprints[chunk_len + i];
+                let m_a = interaction_a
+                    .multiplicity
+                    .evaluate_at_row(main_segment_cols, row);
+                let m_b = interaction_b
+                    .multiplicity
+                    .evaluate_at_row(main_segment_cols, row);
+                let term_a = &m_a * fp_a_inv;
+                let term_b = &m_b * fp_b_inv;
+                let term_a = if negate_a { -term_a } else { term_a };
+                let term_b = if negate_b { -term_b } else { term_b };
+                *result_elem = term_a + term_b;
+            }
+            return;
+        }
+
+        // Sparse (or half-sparse) path: gather fingerprints only for active rows.
+        let compute_fp =
+            |interaction: &BusInteraction, bus_id_f: &FieldElement<F>, row: usize| {
+                let mut lc = bus_id_f * &alpha_powers[0];
                 let mut alpha_offset = 1;
                 for bv in &interaction.values {
-                    alpha_offset += bv.accumulate_fingerprint(
+                    let consumed = bv.accumulate_fingerprint(
                         main_segment_cols,
                         row,
                         &alpha_powers,
@@ -1493,53 +1800,52 @@ where
                         &mut lc,
                         &shifts,
                     );
+                    alpha_offset += consumed;
                 }
-                fingerprints.push(z - &lc);
-            }
-        }
+                z - &lc
+            };
 
-        #[cfg(feature = "debug-checks")]
-        if n == 1 {
-            let interaction = interactions[0];
-            for (i, row) in (chunk_start..chunk_start + chunk_len).enumerate() {
-                let mut base_elements: Vec<FieldElement<F>> = vec![bus_ids[0].clone()];
-                base_elements.extend(
-                    interaction
-                        .values
-                        .iter()
-                        .flat_map(|bv| bv.combine_from(|col| main_segment_cols[col][row].clone())),
-                );
-                let multiplicity = interaction
-                    .multiplicity
-                    .evaluate_at_row(main_segment_cols, row);
-                crate::bus_debug::log_interaction(
-                    _table_name,
-                    row,
-                    interaction.bus_id,
-                    interaction.is_sender,
-                    &multiplicity.canonical(),
-                    &base_elements,
-                    &fingerprints[i],
-                );
-            }
+        let indices_a: Vec<usize> = match &active_a {
+            Some(v) => v.clone(),
+            None => (0..chunk_len).collect(),
+        };
+        let indices_b: Vec<usize> = match &active_b {
+            Some(v) => v.clone(),
+            None => (0..chunk_len).collect(),
+        };
+
+        let mut fingerprints: Vec<FieldElement<E>> =
+            Vec::with_capacity(indices_a.len() + indices_b.len());
+        for &i in &indices_a {
+            fingerprints.push(compute_fp(interaction_a, &bus_id_a, chunk_start + i));
+        }
+        for &i in &indices_b {
+            fingerprints.push(compute_fp(interaction_b, &bus_id_b, chunk_start + i));
         }
 
-        // Phase 2: batch invert
         FieldElement::inplace_batch_inverse(&mut fingerprints)
             .expect("fingerprint is zero - probability of sampling zero is negligible");
 
-        // Phase 3: Compute terms
-        for (i, result_elem) in result_chunk.iter_mut().enumerate() {
+        let (fp_a_inv, fp_b_inv) = fingerprints.split_at(indices_a.len());
+
+        // Scatter a-terms (overwrite: result_chunk starts at zero).
+        for (k, &i) in indices_a.iter().enumerate() {
             let row = chunk_start + i;
-            let mut acc = FieldElement::<E>::zero();
-            for (k, interaction) in interactions.iter().enumerate() {
-                let m = interaction
-                    .multiplicity
-                    .evaluate_at_row(main_segment_cols, row);
-                let term = &m * &fingerprints[k * chunk_len + i];
-                acc += if interaction.is_sender { term } else { -term };
-            }
-            *result_elem = acc;
+            let m_a = interaction_a
+                .multiplicity
+                .evaluate_at_row(main_segment_cols, row);
+            let term = &m_a * &fp_a_inv[k];
+            result_chunk[i] = if negate_a { -term } else { term };
+        }
+        // Add b-terms on top of whatever a left behind (possibly zero).
+        for (k, &i) in indices_b.iter().enumerate() {
+            let row = chunk_start + i;
+            let m_b = interaction_b
+                .multiplicity
+                .evaluate_at_row(main_segment_cols, row);
+            let term = &m_b * &fp_b_inv[k];
+            let term = if negate_b { -term } else { term };
+            result_chunk[i] = &result_chunk[i] + term;
         }
     };
 
@@ -1631,7 +1937,7 @@ where
     // Compute each interaction's individual term column for summing
     for interaction in interactions.iter() {
         let individual_terms = compute_logup_term_column(
-            &[interaction],
+            interaction,
             main_segment_cols,
             trace_len,
             challenges,
@@ -1664,7 +1970,51 @@ fn compute_multiplicity_from_step<A: IsSubFieldOf<B>, B: IsField>(
     step: &TableView<A, B>,
     multiplicity: &Multiplicity,
 ) -> FieldElement<A> {
-    multiplicity.evaluate_with(|col| step.get_main_evaluation_element(0, col).clone())
+    match multiplicity {
+        Multiplicity::One => FieldElement::<A>::one(),
+        Multiplicity::Column(col) => step.get_main_evaluation_element(0, *col).clone(),
+        Multiplicity::Sum(col_a, col_b) => {
+            step.get_main_evaluation_element(0, *col_a)
+                + step.get_main_evaluation_element(0, *col_b)
+        }
+        Multiplicity::Negated(col) => {
+            FieldElement::<A>::one() - step.get_main_evaluation_element(0, *col)
+        }
+        Multiplicity::Diff(col_a, col_b) => {
+            step.get_main_evaluation_element(0, *col_a)
+                - step.get_main_evaluation_element(0, *col_b)
+        }
+        Multiplicity::Sum3(col_a, col_b, col_c) => {
+            step.get_main_evaluation_element(0, *col_a)
+                + step.get_main_evaluation_element(0, *col_b)
+                + step.get_main_evaluation_element(0, *col_c)
+        }
+        Multiplicity::Linear(terms) => {
+            let mut result = FieldElement::<A>::zero();
+            for term in terms {
+                match term {
+                    LinearTerm::Column {
+                        coefficient,
+                        column,
+                    } => {
+                        let coeff = FieldElement::<A>::from(*coefficient);
+                        result += step.get_main_evaluation_element(0, *column) * coeff;
+                    }
+                    LinearTerm::ColumnUnsigned {
+                        coefficient,
+                        column,
+                    } => {
+                        let coeff = FieldElement::<A>::from(*coefficient);
+                        result += step.get_main_evaluation_element(0, *column) * coeff;
+                    }
+                    LinearTerm::Constant(value) => {
+                        result += FieldElement::<A>::from(*value);
+                    }
+                }
+            }
+            result
+        }
+    }
 }
 
 /// Computes the fingerprint for an interaction from a `TableView`.

From 5156ba72f5b2830c24806d6ece779f4a8c5b901f Mon Sep 17 00:00:00 2001
From: diegokingston <dkingston@fi.uba.ar>
Date: Fri, 24 Apr 2026 18:40:55 -0300
Subject: [PATCH 2/3] fix(ci): rustfmt + gate pre-existing unused Polynomial
 import behind debug_assertions

- `cargo fmt` on sparse LogUp changes
- Polynomial import in constraints/evaluator.rs is only used inside
  `#[cfg(all(debug_assertions, not(feature = "parallel")))]`; match the
  import cfg to the use site so release builds without `parallel` don't
  warn. (Pre-existing on origin/main but surfaced by CI lint here.)
---
 crypto/stark/src/lookup.rs | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs
index b9c196e5f..d359155f7 100644
--- a/crypto/stark/src/lookup.rs
+++ b/crypto/stark/src/lookup.rs
@@ -1787,23 +1787,22 @@ where
         }
 
         // Sparse (or half-sparse) path: gather fingerprints only for active rows.
-        let compute_fp =
-            |interaction: &BusInteraction, bus_id_f: &FieldElement<F>, row: usize| {
-                let mut lc = bus_id_f * &alpha_powers[0];
-                let mut alpha_offset = 1;
-                for bv in &interaction.values {
-                    let consumed = bv.accumulate_fingerprint(
-                        main_segment_cols,
-                        row,
-                        &alpha_powers,
-                        alpha_offset,
-                        &mut lc,
-                        &shifts,
-                    );
-                    alpha_offset += consumed;
-                }
-                z - &lc
-            };
+        let compute_fp = |interaction: &BusInteraction, bus_id_f: &FieldElement<F>, row: usize| {
+            let mut lc = bus_id_f * &alpha_powers[0];
+            let mut alpha_offset = 1;
+            for bv in &interaction.values {
+                let consumed = bv.accumulate_fingerprint(
+                    main_segment_cols,
+                    row,
+                    &alpha_powers,
+                    alpha_offset,
+                    &mut lc,
+                    &shifts,
+                );
+                alpha_offset += consumed;
+            }
+            z - &lc
+        };
 
         let indices_a: Vec<usize> = match &active_a {
             Some(v) => v.clone(),

From aed425fc9b0d8769bfbbe00460915f4309bce9a2 Mon Sep 17 00:00:00 2001
From: diegokingston <dkingston@fi.uba.ar>
Date: Tue, 26 May 2026 15:30:13 -0300
Subject: [PATCH 3/3] fix(stark/lookup): apply reviewer feedback on
 sparse-gather
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three improvements requested by Codex and Claude review:

1. `sparse_active_rows` — early-abort the active-row collector once
   `active.len()` exceeds the sparse threshold. The old `filter().collect()`
   walked the whole chunk and built a full `Vec<usize>` before deciding to
   fall back to dense. In non-parallel builds where `chunk_len == trace_len`
   this could allocate ~7N/8 entries only to discard them; for large traces
   that was a memory and time regression. The new bounded collector caps
   work and allocation at the threshold.

2. Batched term-column builder — replace the `(0..chunk_len).collect()`
   materialization for the dense side of a half-sparse pair with an
   `ActiveIter` enum that yields indices from either the sparse `Vec` or
   the dense `Range` without owning a vector. Also drops the redundant
   `v.clone()` Claude flagged at lines 1808-1815.

3. Docstring — replace the dangling reference to `SPARSE_ACTIVE_FRAC` (a
   constant that never existed) with the actual `SPARSE_NUM / SPARSE_DEN`
   ratio.

No semantic change to fingerprint computation, batch inversion, or scatter:
just trimming allocations and a stale comment.

Tests: `cargo test -p stark` → 125/125. `cargo test -p lambda-vm-prover --lib`
→ 281 pass / 77 pre-existing failures (identical to main). `make lint` clean.
---
 crypto/stark/src/lookup.rs | 112 +++++++++++++++++++++++--------------
 1 file changed, 70 insertions(+), 42 deletions(-)

diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs
index d359155f7..e352985c0 100644
--- a/crypto/stark/src/lookup.rs
+++ b/crypto/stark/src/lookup.rs
@@ -1456,8 +1456,11 @@ where
 /// is non-zero — which is an over-approximation of `m != 0` (we don't miss any
 /// non-zero rows, we just might process some m=0 rows due to cancellation).
 ///
-/// The threshold check: if more than `SPARSE_ACTIVE_FRAC` of the chunk is active,
-/// return None to fall back to the dense path (gather/scatter overhead not worth it).
+/// The threshold check: if more than `SPARSE_NUM / SPARSE_DEN` of the chunk
+/// is active, return `None` to fall back to the dense path (gather/scatter
+/// overhead not worth it). The scan early-aborts as soon as the active count
+/// passes the threshold so we never allocate more than `~chunk_len * 7/8`
+/// entries — important for non-parallel builds where `chunk_len = trace_len`.
 #[inline]
 fn sparse_active_rows<F>(
     multiplicity: &Multiplicity,
@@ -1469,50 +1472,50 @@ where
     F: IsField,
 {
     let zero = FieldElement::<F>::zero();
-    // If more than this fraction of rows are active, skip sparse path.
+    // If strictly more than this fraction of rows are active, skip sparse path.
     // (Numerator/denominator comparison to avoid floating point.)
     const SPARSE_NUM: usize = 7;
     const SPARSE_DEN: usize = 8;
-
-    let collect_if_sparse = |active: Vec<usize>| -> Option<Vec<usize>> {
-        if active.len() * SPARSE_DEN > chunk_len * SPARSE_NUM {
-            None
-        } else {
-            Some(active)
+    let max_active = chunk_len * SPARSE_NUM / SPARSE_DEN;
+
+    // Generic bounded collector: aborts as soon as `active.len()` would exceed
+    // the sparse threshold, avoiding the wasted allocation Codex flagged for
+    // dense chunks at full-trace length.
+    let collect_bounded = |is_active: &dyn Fn(usize) -> bool| -> Option<Vec<usize>> {
+        let mut active = Vec::new();
+        for i in 0..chunk_len {
+            if is_active(i) {
+                if active.len() >= max_active {
+                    return None;
+                }
+                active.push(i);
+            }
         }
+        Some(active)
     };
 
     match multiplicity {
         Multiplicity::One => None,
         Multiplicity::Column(col) => {
             let c = &main_segment_cols[*col];
-            let active: Vec<usize> = (0..chunk_len)
-                .filter(|&i| c[chunk_start + i] != zero)
-                .collect();
-            collect_if_sparse(active)
+            collect_bounded(&|i| c[chunk_start + i] != zero)
         }
         Multiplicity::Sum(col_a, col_b) => {
             let ca = &main_segment_cols[*col_a];
             let cb = &main_segment_cols[*col_b];
-            let active: Vec<usize> = (0..chunk_len)
-                .filter(|&i| {
-                    let row = chunk_start + i;
-                    ca[row] != zero || cb[row] != zero
-                })
-                .collect();
-            collect_if_sparse(active)
+            collect_bounded(&|i| {
+                let row = chunk_start + i;
+                ca[row] != zero || cb[row] != zero
+            })
         }
         Multiplicity::Sum3(col_a, col_b, col_c) => {
             let ca = &main_segment_cols[*col_a];
             let cb = &main_segment_cols[*col_b];
             let cc = &main_segment_cols[*col_c];
-            let active: Vec<usize> = (0..chunk_len)
-                .filter(|&i| {
-                    let row = chunk_start + i;
-                    ca[row] != zero || cb[row] != zero || cc[row] != zero
-                })
-                .collect();
-            collect_if_sparse(active)
+            collect_bounded(&|i| {
+                let row = chunk_start + i;
+                ca[row] != zero || cb[row] != zero || cc[row] != zero
+            })
         }
         // Negated = 1 - col is dense (usually 1 when col is a bit flag).
         // Diff and Linear could cancel arbitrarily; safest to treat as dense.
@@ -1520,6 +1523,35 @@ where
     }
 }
 
+/// Iterator over active row indices within a chunk: either the explicit sparse
+/// index slice or the full `0..chunk_len` range when the interaction is dense.
+/// Used by the batched term-column builder to walk active rows without
+/// materializing `(0..chunk_len).collect::<Vec<_>>()` for the dense side of a
+/// half-sparse pair (Claude/Codex review finding).
+enum ActiveIter<'a> {
+    Sparse(core::slice::Iter<'a, usize>),
+    Dense(core::ops::Range<usize>),
+}
+
+impl<'a> ActiveIter<'a> {
+    fn new(active: Option<&'a Vec<usize>>, chunk_len: usize) -> Self {
+        match active {
+            Some(v) => ActiveIter::Sparse(v.iter()),
+            None => ActiveIter::Dense(0..chunk_len),
+        }
+    }
+}
+
+impl Iterator for ActiveIter<'_> {
+    type Item = usize;
+    fn next(&mut self) -> Option<usize> {
+        match self {
+            ActiveIter::Sparse(it) => it.next().copied(),
+            ActiveIter::Dense(r) => r.next(),
+        }
+    }
+}
+
 /// Computes a term column for a table interaction without writing to the trace.
 ///
 /// Each row contains the LogUp quotient: `term[i] = sign * multiplicity[i] / fingerprint[i]`
@@ -1804,31 +1836,27 @@ where
             z - &lc
         };
 
-        let indices_a: Vec<usize> = match &active_a {
-            Some(v) => v.clone(),
-            None => (0..chunk_len).collect(),
-        };
-        let indices_b: Vec<usize> = match &active_b {
-            Some(v) => v.clone(),
-            None => (0..chunk_len).collect(),
-        };
+        // Length per side without materializing the index vector. For dense
+        // interactions this is just `chunk_len`; for sparse it's the kept
+        // active vector's length.
+        let len_a = active_a.as_ref().map(|v| v.len()).unwrap_or(chunk_len);
+        let len_b = active_b.as_ref().map(|v| v.len()).unwrap_or(chunk_len);
 
-        let mut fingerprints: Vec<FieldElement<E>> =
-            Vec::with_capacity(indices_a.len() + indices_b.len());
-        for &i in &indices_a {
+        let mut fingerprints: Vec<FieldElement<E>> = Vec::with_capacity(len_a + len_b);
+        for i in ActiveIter::new(active_a.as_ref(), chunk_len) {
             fingerprints.push(compute_fp(interaction_a, &bus_id_a, chunk_start + i));
         }
-        for &i in &indices_b {
+        for i in ActiveIter::new(active_b.as_ref(), chunk_len) {
             fingerprints.push(compute_fp(interaction_b, &bus_id_b, chunk_start + i));
         }
 
         FieldElement::inplace_batch_inverse(&mut fingerprints)
             .expect("fingerprint is zero - probability of sampling zero is negligible");
 
-        let (fp_a_inv, fp_b_inv) = fingerprints.split_at(indices_a.len());
+        let (fp_a_inv, fp_b_inv) = fingerprints.split_at(len_a);
 
         // Scatter a-terms (overwrite: result_chunk starts at zero).
-        for (k, &i) in indices_a.iter().enumerate() {
+        for (k, i) in ActiveIter::new(active_a.as_ref(), chunk_len).enumerate() {
             let row = chunk_start + i;
             let m_a = interaction_a
                 .multiplicity
@@ -1837,7 +1865,7 @@ where
             result_chunk[i] = if negate_a { -term } else { term };
         }
         // Add b-terms on top of whatever a left behind (possibly zero).
-        for (k, &i) in indices_b.iter().enumerate() {
+        for (k, i) in ActiveIter::new(active_b.as_ref(), chunk_len).enumerate() {
             let row = chunk_start + i;
             let m_b = interaction_b
                 .multiplicity