From 6c398b1ae339d76f504bb78ace416e451681b6f3 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Tue, 12 May 2026 14:02:19 -0600 Subject: [PATCH 1/4] dpd: add ecmp del tests for full/fragmented table Signed-off-by: Trey Aspelund --- dpd/src/main.rs | 2 +- dpd/src/route.rs | 277 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 278 insertions(+), 1 deletion(-) diff --git a/dpd/src/main.rs b/dpd/src/main.rs index a2b37dd..a13638e 100644 --- a/dpd/src/main.rs +++ b/dpd/src/main.rs @@ -258,7 +258,7 @@ fn get_sidecar_revision( } impl Switch { - fn new( + pub(crate) fn new( log: slog::Logger, _p4_name: &str, config: config::Config, diff --git a/dpd/src/route.rs b/dpd/src/route.rs index df50344..08b1a06 100644 --- a/dpd/src/route.rs +++ b/dpd/src/route.rs @@ -912,3 +912,280 @@ pub fn init(log: &slog::Logger) -> RouteData { v6_freemap: freemap::FreeMap::new(log, "route_ipv6"), } } + +#[cfg(test)] +mod tests { + use super::*; + use std::net::{Ipv4Addr, Ipv6Addr}; + + const FAKE_ASIC_PORT: u16 = 1; + + fn fake_port_id() -> PortId { + common::ports::RearPort::new(0).unwrap().into() + } + + fn make_route(tgt_ip: IpAddr) -> Route { + Route { + tag: "test".into(), + port_id: fake_port_id(), + link_id: LinkId(0), + tgt_ip, + vlan_id: None, + } + } + + fn make_switch() -> Switch { + // The asic stub locates its bfrt artifacts by walking up from the + // running binary path, which fails for `cargo test` because the test + // executable doesn't end in "dpd". Pin the P4 directory explicitly so + // the test runs from a workspace checkout regardless of host. + let p4_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .join("target/proto/opt/oxide/dendrite/sidecar"); + // SAFETY: set_var is unsafe wrt concurrent access; tests in this + // module all compute the same value and don't race meaningfully. + unsafe { std::env::set_var("P4_DIR", &p4_dir) }; + + let log = common::logging::init( + "route-test", + &None, + common::logging::LogFormat::Human, + ) + .expect("test logger"); + let mut switch = + Switch::new(log, "sidecar", crate::config::Config::default()) + .expect("construct stub switch"); + table::init(&mut switch).expect("init tables"); + switch + } + + /// Install one entry per target onto `victim`. Callers use this against + /// an empty table, so any failure is a bug in the test setup. + fn install_victim( + switch: &Switch, + rd: &mut RouteData, + victim: IpNet, + targets: impl IntoIterator, + ) { + for tgt in targets { + add_route_locked( + switch, + rd, + victim, + make_route(tgt), + FAKE_ASIC_PORT, + ) + .expect("victim add must succeed on an empty table"); + } + } + + /// Install single-target fillers (CIDRs generated by `cidr_at(i)` for + /// `i = 0, 1, ...`) until any add returns [`DpdError::TableFull`], + /// returning the CIDRs that were accepted. + fn fill_table( + switch: &Switch, + rd: &mut RouteData, + filler: IpAddr, + mut cidr_at: impl FnMut(u32) -> IpNet, + ) -> Vec { + let mut fillers = Vec::new(); + for i in 0u32.. { + let cidr = cidr_at(i); + match add_route_locked( + switch, + rd, + cidr, + make_route(filler), + FAKE_ASIC_PORT, + ) { + Ok(_) => fillers.push(cidr), + Err(DpdError::TableFull(_)) => break, + Err(e) => panic!("unexpected filler add error: {e:?}"), + } + } + fillers + } + + /// Delete every other entry in `fillers` so the freemap is left with + /// size-1 free spans separated by surviving entries — spans that + /// `reclaim()` cannot coalesce into anything wider. + fn fragment(switch: &Switch, rd: &mut RouteData, fillers: &[IpNet]) { + assert!( + fillers.len() >= 2, + "table did not accept enough fillers: got {}", + fillers.len(), + ); + for f in fillers.iter().step_by(2) { + delete_route_locked(switch, rd, *f).expect("delete filler"); + } + } + + fn v6_filler_cidr(i: u32) -> IpNet { + format!("3fff:beef:{i:x}::/64").parse::().unwrap().into() + } + + fn v4_filler_cidr(i: u32) -> IpNet { + format!("172.17.{}.{}/32", (i >> 8) & 0xff, i & 0xff) + .parse::() + .unwrap() + .into() + } + + /// Deleting a target must succeed when the v6 forwarding table is full. + /// + /// Setup mirrors the simplest "no fragmentation, just no room" scenario: + /// 1. Install one 2-target victim route. + /// 2. Install 1-target filler routes until any add returns TableFull. + /// The fillers consume bin[1] from the victim's growth phase and + /// then the freelist tail, leaving the freemap with no spans of + /// any size when the loop exits. + /// 3. Delete one of the victim's two targets. + #[tokio::test] + async fn delete_target_succeeds_on_full_table_v6() { + let switch = make_switch(); + let victim: Ipv6Net = "3fff:dead::/64".parse().unwrap(); + let t1: Ipv6Addr = "2001:db8::55:1".parse().unwrap(); + let t2: Ipv6Addr = "2001:db8::55:2".parse().unwrap(); + let filler: Ipv6Addr = "2001:db8::55:ff".parse().unwrap(); + + { + let mut rd = switch.routes.lock().await; + install_victim( + &switch, + &mut rd, + victim.into(), + [t1.into(), t2.into()], + ); + fill_table(&switch, &mut rd, filler.into(), v6_filler_cidr); + } + + delete_route_target_ipv6( + &switch, + victim, + fake_port_id(), + LinkId(0), + t2, + ) + .await + .expect("delete-target must succeed on a full table"); + } + + /// Deleting a target must succeed when the v6 forwarding table is full + /// AND fragmented into size-1 free spans separated by live routes. + /// + /// Setup: + /// 1. Install one 4-target victim. K=4 (not 2) so the deletion's + /// alloc(3) cannot be satisfied by the size-1 spans we create in + /// step 3. + /// 2. Install 1-target filler routes until the table is full. + /// 3. Delete every other filler in installation order. Each freed + /// slot lands in recycle_bins[1]; because surviving fillers (or + /// the victim) occupy the positions between them, those size-1 + /// spans are pairwise non-adjacent and reclaim() cannot coalesce + /// them into anything wider. + /// 4. Delete one of the victim's four targets. + #[tokio::test] + async fn delete_target_succeeds_on_fragmented_table_v6() { + let switch = make_switch(); + let victim: Ipv6Net = "3fff:dead::/64".parse().unwrap(); + let targets: [Ipv6Addr; 4] = [ + "2001:db8::55:1".parse().unwrap(), + "2001:db8::55:2".parse().unwrap(), + "2001:db8::55:3".parse().unwrap(), + "2001:db8::55:4".parse().unwrap(), + ]; + let filler: Ipv6Addr = "2001:db8::55:ff".parse().unwrap(); + + { + let mut rd = switch.routes.lock().await; + install_victim( + &switch, + &mut rd, + victim.into(), + targets.iter().map(|a| IpAddr::from(*a)), + ); + let fillers = + fill_table(&switch, &mut rd, filler.into(), v6_filler_cidr); + fragment(&switch, &mut rd, &fillers); + } + + delete_route_target_ipv6( + &switch, + victim, + fake_port_id(), + LinkId(0), + targets[3], + ) + .await + .expect("delete-target must succeed on a fragmented table"); + } + + /// IPv4 analog of [`delete_target_succeeds_on_full_table_v6`]. + #[tokio::test] + async fn delete_target_succeeds_on_full_table_v4() { + let switch = make_switch(); + let victim: Ipv4Net = "172.16.0.0/32".parse().unwrap(); + let t1 = Ipv4Addr::new(10, 0, 0, 1); + let t2 = Ipv4Addr::new(10, 0, 0, 2); + let filler = Ipv4Addr::new(10, 0, 0, 255); + + { + let mut rd = switch.routes.lock().await; + install_victim( + &switch, + &mut rd, + victim.into(), + [t1.into(), t2.into()], + ); + fill_table(&switch, &mut rd, filler.into(), v4_filler_cidr); + } + + delete_route_target_ipv4( + &switch, + victim, + fake_port_id(), + LinkId(0), + t2.into(), + ) + .await + .expect("delete-target must succeed on a full table"); + } + + /// IPv4 analog of [`delete_target_succeeds_on_fragmented_table_v6`]. + #[tokio::test] + async fn delete_target_succeeds_on_fragmented_table_v4() { + let switch = make_switch(); + let victim: Ipv4Net = "172.16.0.0/32".parse().unwrap(); + let targets: [Ipv4Addr; 4] = [ + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + Ipv4Addr::new(10, 0, 0, 3), + Ipv4Addr::new(10, 0, 0, 4), + ]; + let filler = Ipv4Addr::new(10, 0, 0, 255); + + { + let mut rd = switch.routes.lock().await; + install_victim( + &switch, + &mut rd, + victim.into(), + targets.iter().map(|a| IpAddr::from(*a)), + ); + let fillers = + fill_table(&switch, &mut rd, filler.into(), v4_filler_cidr); + fragment(&switch, &mut rd, &fillers); + } + + delete_route_target_ipv4( + &switch, + victim, + fake_port_id(), + LinkId(0), + targets[3].into(), + ) + .await + .expect("delete-target must succeed on a fragmented table"); + } +} From 4ee52afe4a2448082bf1b52e5829da372a973f03 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Fri, 15 May 2026 11:43:35 -0600 Subject: [PATCH 2/4] dpd: shrink route_target reservations in place on subset removal Make subset target removals succeed under arbitrary route_target fragmentation. The alloc-then-swap path fails whenever the FreeMap can't satisfy alloc(new_n), including the common case where a fragmented table has plenty of total free slots but no contiguous span of size new_n. The new shrink-in-place path compacts the existing reservation rather than allocating a new one, so it never needs to find contiguous free space. `replace_route_targets` classifies the requested update via `classify_update` -> `RouteTargetUpdate`: - ShrinkInPlace { removed }: every old element appears in new at least once and the count delta matches the removed-positions list. Dispatched to shrink_in_place. - Alloc: anything else. Dispatched to alloc_then_swap, which follows the alloc-write-swap-free pattern. This case can still TableFull on a fragmented table. `replace_route_targets` starts by calling `unhook_route`, which removes the in-core entry for the subnet and deletes the on-chip `route_index` in one step, returning ownership of the previous `RouteEntry`. Every downstream branch receives the owned old entry and is responsible for either installing a new in-core entry on success or restoring the old one on failure. `shrink_in_place` operates on a reservation that's already been unhooked. It walks the removed indices in decreasing order, pulling the current tail slot's contents down into each doomed position via delete + add on the route_target table. The slots are unreachable from the dataplane (route_index is gone), so per-slot atomicity isn't required. After the compaction loop it adds a new route_index pointing at `(base, new_n)` to bring the route back online with the smaller group, then tears down the unreachable tail entries and returns their slots to the FreeMap in a single bulk `free(base + new_n, removed.len())` call (the released slots are by construction the contiguous suffix of the old reservation). The in-core `RouteEntry.targets` is rebuilt from the post-compaction `live` mirror, not from the caller-supplied vec. Compaction reorders survivors by physical pull-tail-into-hole mechanics, so caller- supplied order need not match what was just programmed on chip; honoring the compacted layout keeps in-core and ASIC slot order aligned, which the shrink path itself relies on when it builds `live` from `old.targets` on the next call. This trades the (undocumented, unrelied-on) caller-order property for an internal invariant. On failure in any compaction or index-add step, `rollback_shrink` restores every position that was successfully deleted -- tracked in a `touched` vec populated as compaction progresses -- to its `old.targets[i]` contents, then reinstalls the original route_index. If rollback succeeds the owned `old` is re-inserted into the in-core mirror; if rollback itself fails the in-core entry stays cleared so the next control-plane update on this subnet rebuilds from scratch. Per-slot failures are logged individually so a divergence postmortem can name the slots involved. Additional helpers added: - `RouteData::freemap_mut(is_ipv4)` accessor. - `write_one_target` / `delete_one_target_at` for per-family dispatch on route_target. - `add_route_index_for` / `delete_route_index_for` for per-family dispatch on route_index. Tests cover delta=1 single-target removal on full and fragmented v4/v6 tables, multi-target (delta=3) subset removal on full and fragmented v4/v6 tables, and assertions that the in-core entry's tgt_ip set matches the requested survivors and that the FreeMap can satisfy a fresh alloc of the just-freed slot count. Two positional tests pin the compacted-layout invariant: a permutation-shrink and a shrink to a single non-zero-position survivor. An assertion that genuine grow-into-full-table operations still surface TableFull guards the alloc path. Signed-off-by: Trey Aspelund --- dpd/src/route.rs | 975 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 810 insertions(+), 165 deletions(-) diff --git a/dpd/src/route.rs b/dpd/src/route.rs index 08b1a06..f940d24 100644 --- a/dpd/src/route.rs +++ b/dpd/src/route.rs @@ -114,7 +114,9 @@ use dpd_types::link::LinkId; use dpd_types::route::Ipv4Route; use dpd_types::route::Ipv6Route; use slog::debug; +use slog::error; use slog::info; +use slog::warn; use crate::freemap; use crate::types::{DpdError, DpdResult}; @@ -293,6 +295,10 @@ impl RouteData { self.v6.remove(&subnet) } } + + fn freemap_mut(&mut self, is_ipv4: bool) -> &mut freemap::FreeMap { + if is_ipv4 { &mut self.v4_freemap } else { &mut self.v6_freemap } + } } // Remove all the data for a given route from both the route_data and @@ -337,11 +343,9 @@ fn cleanup_route( // to the FreeMap. If something went wrong, and there's really no reason it // should, then this table space will be leaked. if all_clear { - if entry.is_ipv4 { - route_data.v4_freemap.free(entry.index, entry.slots as u16); - } else { - route_data.v6_freemap.free(entry.index, entry.slots as u16); - } + route_data + .freemap_mut(entry.is_ipv4) + .free(entry.index, entry.slots as u16); } Ok(()) } @@ -379,6 +383,166 @@ fn finalize_route( } } +/// Categorizes a target-set replacement so `replace_route_targets` can dispatch +/// to the right execution path. +enum RouteTargetUpdate { + /// `new` is a strict subset of `old` (any positive delta). Safe to + /// compact in place without ever calling `FreeMap::alloc`. `removed` + /// lists the indices into `old.targets` that should be evicted, in + /// decreasing order — that ordering keeps the invariant that, at each + /// iteration, the current tail slot holds either a survivor (to be + /// pulled into a lower position) or the slot being evicted. + ShrinkInPlace { removed: Vec }, + /// Anything else (new target set is not a subset of the old set, or + /// no old set exists). Allocate fresh space from the user pool, + /// write, and flip `route_index`. `TableFull` here is a legitimate + /// "no room" condition for grow/new and a fragmentation condition + /// for non-subset same-or-smaller replaces. + Alloc, + /// `new` and `old` denote the same set (same length, no removals). + /// The dispatcher short-circuits before unhooking so the dataplane + /// sees no LPM miss for an idempotent replace. + NoOp, +} + +/// Decide how to apply `new` on top of `old`. See `RouteTargetUpdate` for the +/// meaning of each variant. +// +// Note: subset detection compares `NextHop`s by structural equality, so +// `old`/`new` sets that contain duplicate `NextHop`s (same port + same +// `Route`) will be conservatively classified as `Alloc` even when they +// represent a valid shrink. `add_route_locked` rejects exact duplicates +// upstream, so this is a fallback rather than a reachable case in practice. +fn classify_update( + old: Option<&RouteEntry>, + new: &[NextHop], +) -> RouteTargetUpdate { + let Some(old) = old else { + return RouteTargetUpdate::Alloc; + }; + if new.len() > old.targets.len() { + return RouteTargetUpdate::Alloc; + } + let mut removed: Vec = old + .targets + .iter() + .enumerate() + .filter(|(_, hop)| !new.contains(hop)) + .map(|(i, _)| i as u16) + .collect(); + if old.targets.len() - removed.len() == new.len() { + if removed.is_empty() { + RouteTargetUpdate::NoOp + } else { + removed.sort_unstable_by(|a, b| b.cmp(a)); + RouteTargetUpdate::ShrinkInPlace { removed } + } + } else { + RouteTargetUpdate::Alloc + } +} + +fn write_one_target( + switch: &Switch, + subnet_is_ipv4: bool, + idx: u16, + target: &NextHop, +) -> DpdResult<()> { + match target.route.tgt_ip { + IpAddr::V4(tgt_ip) => table::route_ipv4::add_route_target( + switch, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ), + IpAddr::V6(tgt_ip) => { + if subnet_is_ipv4 { + table::route_ipv4::add_route_target_v6( + switch, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ) + } else { + table::route_ipv6::add_route_target( + switch, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ) + } + } + } +} + +/// Delete one route_target entry at `idx`, dispatching on subnet family. +fn delete_one_target_at( + switch: &Switch, + subnet_is_ipv4: bool, + idx: u16, +) -> DpdResult<()> { + if subnet_is_ipv4 { + table::route_ipv4::delete_route_target(switch, idx) + } else { + table::route_ipv6::delete_route_target(switch, idx) + } +} + +/// Add a route_index entry for `subnet` pointing at `[index, index + slots)`. +fn add_route_index_for( + switch: &Switch, + subnet: IpNet, + index: u16, + slots: u8, +) -> DpdResult<()> { + match subnet { + IpNet::V4(v4) => { + table::route_ipv4::add_route_index(switch, &v4, index, slots) + } + IpNet::V6(v6) => { + table::route_ipv6::add_route_index(switch, &v6, index, slots) + } + } +} + +/// Delete the route_index entry for `subnet`. +fn delete_route_index_for(switch: &Switch, subnet: IpNet) -> DpdResult<()> { + match subnet { + IpNet::V4(v4) => table::route_ipv4::delete_route_index(switch, &v4), + IpNet::V6(v6) => table::route_ipv6::delete_route_index(switch, &v6), + } +} + +/// Take the route off the dataplane: remove the in-core entry for `subnet` +/// and delete the on-chip `route_index`. On success the caller owns the +/// previous `RouteEntry` (or `None` if the subnet had none). On failure +/// of the on-chip delete the in-core mirror is restored. +/// +/// Every path that mutates a subnet's slot reservation should start here. +/// It gives the caller sole ownership of the old entry and guarantees +/// the dataplane can't reach the slots while they're in flight. +fn unhook_route( + switch: &Switch, + route_data: &mut RouteData, + subnet: IpNet, +) -> DpdResult> { + let Some(old) = route_data.remove(subnet) else { + return Ok(None); + }; + if let Err(e) = delete_route_index_for(switch, subnet) { + debug!( + switch.log, + "unhook_route: route_index delete failed for {subnet}: {e:?}" + ); + route_data.insert(subnet, old); + return Err(e); + } + Ok(Some(old)) +} + // Update the set of targets associated with a route. // // This routine can be used to either add or remove a route - all it knows is @@ -388,46 +552,64 @@ fn finalize_route( // freed. On failure, the new data will be freed and the tables will still // contain the old data. In either case, there should be nothing for the // calling routine to do but pass the DpdResult back up the stack. +// +// The function dispatches on the shape of the requested change (see +// `RouteTargetUpdate`). Subset removals take a shrink-in-place path that never +// calls `FreeMap::alloc`; everything else falls through to the +// alloc-then-swap path. fn replace_route_targets( switch: &Switch, route_data: &mut RouteData, subnet: IpNet, targets: Vec, ) -> DpdResult<()> { - // Remove the old entry from our in-core and on-chip indexes, but don't free - // the data yet. debug!(switch.log, "replacing targets for {subnet} with: {targets:?}"); - let old_entry = route_data.remove(subnet); - if let Some(ref old) = old_entry - && let Err(e) = match subnet { - IpNet::V4(v4) => table::route_ipv4::delete_route_index(switch, &v4), - IpNet::V6(v6) => table::route_ipv6::delete_route_index(switch, &v6), - } - { - debug!( - switch.log, - "failed to delete route index, restoring internal data" - ); - route_data.insert(subnet, old.clone()); - return Err(e); - } - // If the new set of targets is empty, the route has been deleted and there - // is no new data to insert in either table. + // Delete-route path: no classification needed; unhook and free. if targets.is_empty() { - if let Some(entry) = old_entry { - return cleanup_route(switch, route_data, None, entry); + let old_entry = unhook_route(switch, route_data, subnet)?; + return match old_entry { + Some(entry) => cleanup_route(switch, route_data, None, entry), + None => Ok(()), + }; + } + + // Classify against the current in-core entry *before* unhooking so that + // a NoOp replace leaves the dataplane untouched (no LPM miss window). + match classify_update(route_data.get(subnet), &targets) { + RouteTargetUpdate::NoOp => Ok(()), + RouteTargetUpdate::ShrinkInPlace { removed } => { + let old = unhook_route(switch, route_data, subnet)? + .expect("subset removal requires existing route"); + // shrink_in_place reconstructs the in-core target vec from the + // compacted ASIC layout rather than caller-supplied order (see + // its body for why), so the caller's vec carries no useful + // information past this point. + drop(targets); + shrink_in_place(switch, route_data, subnet, old, removed) + } + RouteTargetUpdate::Alloc => { + let old_entry = unhook_route(switch, route_data, subnet)?; + alloc_then_swap(switch, route_data, subnet, old_entry, targets) } - return Ok(()); } +} - // Allocate space in the p4 table for the new set of targets. +// Allocate a fresh slot reservation, write the new targets there, then add +// `route_index` pointing at it. Caller has already unhooked any pre-existing +// route (the in-core entry and on-chip route_index are gone for `subnet`); +// `old_entry` carries the previous slot reservation that we still need to +// free on success or restore on failure. +fn alloc_then_swap( + switch: &Switch, + route_data: &mut RouteData, + subnet: IpNet, + old_entry: Option, + targets: Vec, +) -> DpdResult<()> { let slots = targets.len() as u8; let is_ipv4 = subnet.is_ipv4(); - let mut new_entry = match match is_ipv4 { - true => route_data.v4_freemap.alloc(slots), - false => route_data.v6_freemap.alloc(slots), - } { + let mut new_entry = match route_data.freemap_mut(is_ipv4).alloc(slots) { Ok(index) => RouteEntry { is_ipv4, index, @@ -439,6 +621,8 @@ fn replace_route_targets( switch.log, "failed to allocate space for the new target list" ); + // Restore the old route_index + in-core entry (or no-op if + // there was no old entry). let _ = finalize_route(switch, route_data, subnet, old_entry); return Err(e); } @@ -448,34 +632,7 @@ fn replace_route_targets( let mut idx = new_entry.index; for target in targets { - if let Err(e) = match target.route.tgt_ip { - IpAddr::V4(tgt_ip) => table::route_ipv4::add_route_target( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ), - IpAddr::V6(tgt_ip) => { - if subnet.is_ipv4() { - table::route_ipv4::add_route_target_v6( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ) - } else { - table::route_ipv6::add_route_target( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ) - } - } - } { + if let Err(e) = write_one_target(switch, is_ipv4, idx, &target) { debug!(switch.log, "failed to insert {target:?} into route table"); let _ = cleanup_route(switch, route_data, None, new_entry); let _ = finalize_route(switch, route_data, subnet, old_entry); @@ -507,6 +664,233 @@ fn replace_route_targets( } } +// Apply a subset-removal update by compacting the existing reservation in +// place. Four steps: +// +// 1. Delete `route_index` for `subnet`. The dataplane now misses on this +// subnet (LPM lookup returns the default action) for the duration of +// the compaction. This is the same brief miss window the existing +// alloc-then-swap path produces. +// +// 2. For each removed slot (in decreasing index order), pull the current +// tail contents down into the doomed position via delete + add on the +// route_target slot, and plan the tail slot for release. The slots +// are unreachable from the dataplane at this point because step 1 +// removed the index, so per-slot atomicity isn't required. +// +// 3. Re-add `route_index` pointing at `(base, new.len())`. The dataplane +// now resumes with the compacted policy. +// +// 4. Delete the now-unreachable tail entries and return their slots to +// the FreeMap via `commit_release`. These deletes are post-commit; +// failures here leak slots but cannot corrupt forwarding. +// +// The caller has already unhooked the route (route_index gone, in-core +// entry removed); we own `old` outright. On failure in any step, +// `rollback_shrink` restores the ASIC slot contents and reinstalls the +// original `route_index`; if rollback succeeds we re-insert `old` into the +// in-core mirror; if rollback itself fails we leave the in-core empty +// (matching the now-unknown ASIC state) so the next control-plane update +// rebuilds from scratch. +fn shrink_in_place( + switch: &Switch, + route_data: &mut RouteData, + subnet: IpNet, + old: RouteEntry, + removed: Vec, +) -> DpdResult<()> { + let base = old.index; + let is_ipv4 = old.is_ipv4; + let new_n = old.slots - removed.len() as u8; + // Mirror of the slot contents under [base, base+old.slots) that we update + // in lockstep with each successful ASIC operation. `None` means the + // slot's ASIC entry is currently deleted. + let mut live: Vec> = + old.targets.iter().map(|t| Some(t.clone())).collect(); + // Positions whose ASIC contents have diverged from `old.targets[i]`. A + // position lands here as soon as we successfully delete its slot; from + // that point forward it differs from the original, regardless of whether + // the subsequent write succeeded. Rollback walks this set instead of + // all `old.slots` positions, which keeps recovery O(removed) and lets us + // log per-position failures without flooding for untouched survivors. + let mut touched: Vec = Vec::with_capacity(removed.len()); + + // Step 1: compact in decreasing-removed-index order via delete + add. + // Slots are unreachable from the dataplane (route_index was deleted by + // the dispatcher's `unhook_route`). By construction, the released slots + // form the contiguous suffix `[base + new_n, base + old.slots)`; there is + // no per-slot release bookkeeping to do during the loop. + let mut current_top = old.slots as u16; + for &removed_idx in &removed { + let tail_idx = current_top - 1; + if removed_idx != tail_idx { + let tail_contents = live[tail_idx as usize] + .as_ref() + .expect("tail slot is live at this point") + .clone(); + if let Err(e) = + delete_one_target_at(switch, is_ipv4, base + removed_idx) + { + warn!( + switch.log, + "shrink-in-place compact delete failed at slot {}: {e:?}", + base + removed_idx + ); + restore_after_shrink_failure( + switch, route_data, subnet, &live, &touched, old, + ); + return Err(e); + } + live[removed_idx as usize] = None; + touched.push(removed_idx); + if let Err(e) = write_one_target( + switch, + is_ipv4, + base + removed_idx, + &tail_contents, + ) { + warn!( + switch.log, + "shrink-in-place compact add failed at slot {}: {e:?}", + base + removed_idx + ); + restore_after_shrink_failure( + switch, route_data, subnet, &live, &touched, old, + ); + return Err(e); + } + live[removed_idx as usize] = Some(tail_contents); + } + current_top -= 1; + } + + // Step 2: install the route_index pointing at the compacted range. + if let Err(e) = add_route_index_for(switch, subnet, base, new_n) { + warn!( + switch.log, + "shrink-in-place index re-add failed for {subnet}: {e:?}" + ); + restore_after_shrink_failure( + switch, route_data, subnet, &live, &touched, old, + ); + return Err(e); + } + + // Step 3: drop the now-unreachable tail entries and release the slots in + // one bulk free. Best effort; failures past the commit point are leaks, + // not correctness bugs. Mirrors `cleanup_route`'s `all_clear` posture. + let release_base = base + new_n as u16; + let release_count = old.slots as u16 - new_n as u16; + let mut all_clear = true; + for offset in 0..release_count { + if delete_one_target_at(switch, is_ipv4, release_base + offset).is_err() + { + all_clear = false; + } + } + if all_clear { + route_data.freemap_mut(is_ipv4).free(release_base, release_count); + } else { + warn!( + switch.log, + "shrink-in-place tail cleanup partially failed for {subnet}; \ + leaking slots in the FreeMap's accounting" + ); + // Skipping the free() call leaves the slots claimed in the FreeMap's + // accounting, which matches what's still allocated on the ASIC. + } + + // Build the in-core targets vec from the compacted ASIC layout. + // Compaction reorders survivors by physical pull-tail-into-hole mechanics, + // so caller-supplied order need not match what was just programmed on + // chip. The in-core mirror's positional contract is "targets[i] is what + // occupies slot index+i"; honoring that here is what keeps the in-core + // <-> on-chip alignment that shrink_in_place itself relies on (it + // rebuilds `live` from `old.targets` on the next call). + // `live[0..new_n)` is fully populated: each i < new_n is either a + // survivor in its original position or was overwritten with tail + // contents during step 1. + let compacted: Vec = live + .into_iter() + .take(new_n as usize) + .map(|opt| opt.expect("live[0..new_n) is populated post-compaction")) + .collect(); + let prev = route_data.insert( + subnet, + RouteEntry { is_ipv4, index: base, slots: new_n, targets: compacted }, + ); + debug_assert!( + prev.is_none(), + "shrink_in_place insert for {subnet} replaced an unexpected \ + existing entry" + ); + Ok(()) +} + +// Try to restore the original ASIC slot contents at every `touched` +// position and reinstall the original `route_index`; if that succeeds +// re-insert `old` into the in-core mirror; if rollback itself fails leave +// the in-core empty so the next update rebuilds. Consumes `old`. +fn restore_after_shrink_failure( + switch: &Switch, + route_data: &mut RouteData, + subnet: IpNet, + live: &[Option], + touched: &[u16], + old: RouteEntry, +) { + if rollback_shrink(switch, subnet, &old, live, touched) { + route_data.insert(subnet, old); + } else { + error!( + switch.log, + "shrink-in-place rollback failed for {subnet}; in-core entry \ + stays cleared and ASIC state may diverge until the next \ + control-plane update on this subnet rebuilds it" + ); + } +} + +/// Restore every position in `touched` to its pre-shrink contents (a +/// delete-if-present followed by a write of `old.targets[i]`), then +/// reinstall the original `route_index` so the dataplane resumes with +/// the pre-shrink policy. Returns `true` if every restore + index-readd +/// succeeded, `false` otherwise. Per-slot failures are logged +/// individually so a divergence postmortem can name the slots involved. +fn rollback_shrink( + switch: &Switch, + subnet: IpNet, + old: &RouteEntry, + live: &[Option], + touched: &[u16], +) -> bool { + let is_ipv4 = old.is_ipv4; + let base = old.index; + let mut ok = true; + for &i in touched { + let slot = base + i; + let orig = &old.targets[i as usize]; + if live[i as usize].is_some() + && let Err(e) = delete_one_target_at(switch, is_ipv4, slot) + { + error!(switch.log, "rollback delete failed at slot {slot}: {e:?}"); + ok = false; + } + if let Err(e) = write_one_target(switch, is_ipv4, slot, orig) { + error!(switch.log, "rollback write failed at slot {slot}: {e:?}"); + ok = false; + } + } + if let Err(e) = add_route_index_for(switch, subnet, base, old.slots) { + error!( + switch.log, + "rollback route_index re-add failed for {subnet}: {e:?}" + ); + ok = false; + } + ok +} + fn add_route_locked( switch: &Switch, route_data: &mut RouteData, @@ -960,6 +1344,21 @@ mod tests { switch } + /// Total size to use for the per-family FreeMap in tests that exercise + /// "table full" / "table fragmented" behavior. Small enough to keep + /// the fill_table iteration cheap, large enough to install a small + /// victim plus a handful of fillers. + const TEST_FREEMAP_SIZE: u16 = 64; + + /// Pre-initialize the per-family FreeMap to `TEST_FREEMAP_SIZE` so + /// subsequent `add_route_locked` calls don't enlarge it to the stub + /// table's full size. `maybe_init` is idempotent — once we set the + /// geometry, the production code path is a no-op. Must be called + /// before any `add_route_locked` invocation in the test. + fn shrink_test_freemap(rd: &mut RouteData, is_ipv4: bool) { + rd.freemap_mut(is_ipv4).maybe_init(TEST_FREEMAP_SIZE); + } + /// Install one entry per target onto `victim`. Callers use this against /// an empty table, so any failure is a bug in the test setup. fn install_victim( @@ -1021,6 +1420,48 @@ mod tests { } } + /// Per-family fixtures used by every test below. `targets` carries + /// four distinct addresses so the same fixture works for the 2-target + /// "full" scenario, the 4-target "fragmented" scenario, and the + /// "shared tgt_ip with one survivor" multi-target scenario. + struct FamilyFixtures { + is_ipv4: bool, + victim: IpNet, + targets: [IpAddr; 4], + filler: IpAddr, + filler_cidr: fn(u32) -> IpNet, + } + + fn fixtures_v6() -> FamilyFixtures { + FamilyFixtures { + is_ipv4: false, + victim: "3fff:dead::/64".parse::().unwrap().into(), + targets: [ + "2001:db8::55:1".parse::().unwrap().into(), + "2001:db8::55:2".parse::().unwrap().into(), + "2001:db8::55:3".parse::().unwrap().into(), + "2001:db8::55:4".parse::().unwrap().into(), + ], + filler: "2001:db8::55:ff".parse::().unwrap().into(), + filler_cidr: v6_filler_cidr, + } + } + + fn fixtures_v4() -> FamilyFixtures { + FamilyFixtures { + is_ipv4: true, + victim: "172.16.0.0/32".parse::().unwrap().into(), + targets: [ + Ipv4Addr::new(10, 0, 0, 1).into(), + Ipv4Addr::new(10, 0, 0, 2).into(), + Ipv4Addr::new(10, 0, 0, 3).into(), + Ipv4Addr::new(10, 0, 0, 4).into(), + ], + filler: Ipv4Addr::new(10, 0, 0, 255).into(), + filler_cidr: v4_filler_cidr, + } + } + fn v6_filler_cidr(i: u32) -> IpNet { format!("3fff:beef:{i:x}::/64").parse::().unwrap().into() } @@ -1032,7 +1473,47 @@ mod tests { .into() } - /// Deleting a target must succeed when the v6 forwarding table is full. + /// Build a Vec from the supplied target IPs using `FAKE_ASIC_PORT` + /// and the default `make_route` template (empty tag, RearPort(0), no vlan). + fn next_hops(targets: impl IntoIterator) -> Vec { + targets + .into_iter() + .map(|ip| NextHop { + asic_port_id: FAKE_ASIC_PORT, + route: make_route(ip), + }) + .collect() + } + + /// Common post-shrink checks: the in-core entry's tgt_ip set matches + /// `expected`, and the FreeMap can satisfy a fresh allocation of size + /// `freed_slots` -- proving the suffix the shrink released was actually + /// returned to the pool. The probe consumes the freed span, so callers + /// should treat it as a terminal assertion. + fn assert_post_shrink( + rd: &mut RouteData, + subnet: IpNet, + is_ipv4: bool, + expected: &[IpAddr], + freed_slots: u16, + ) { + use std::collections::BTreeSet; + let entry = rd.get(subnet).expect("victim must still exist"); + let observed: BTreeSet = + entry.targets.iter().map(|t| t.route.tgt_ip).collect(); + let expected_set: BTreeSet = expected.iter().copied().collect(); + assert_eq!( + observed, expected_set, + "in-core target set must match expected" + ); + if freed_slots > 0 { + rd.freemap_mut(is_ipv4) + .alloc(freed_slots) + .expect("freed slots must be allocatable post-shrink"); + } + } + + /// Deleting a target must succeed when the forwarding table is full. /// /// Setup mirrors the simplest "no fragmentation, just no room" scenario: /// 1. Install one 2-target victim route. @@ -1040,152 +1521,316 @@ mod tests { /// The fillers consume bin[1] from the victim's growth phase and /// then the freelist tail, leaving the freemap with no spans of /// any size when the loop exits. - /// 3. Delete one of the victim's two targets. - #[tokio::test] - async fn delete_target_succeeds_on_full_table_v6() { + /// 3. Replace the victim's target set with just the first one, + /// i.e. drop the second target. + async fn delete_target_full_scenario(f: FamilyFixtures) { let switch = make_switch(); - let victim: Ipv6Net = "3fff:dead::/64".parse().unwrap(); - let t1: Ipv6Addr = "2001:db8::55:1".parse().unwrap(); - let t2: Ipv6Addr = "2001:db8::55:2".parse().unwrap(); - let filler: Ipv6Addr = "2001:db8::55:ff".parse().unwrap(); - - { - let mut rd = switch.routes.lock().await; - install_victim( - &switch, - &mut rd, - victim.into(), - [t1.into(), t2.into()], - ); - fill_table(&switch, &mut rd, filler.into(), v6_filler_cidr); - } + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim( + &switch, + &mut rd, + f.victim, + [f.targets[0], f.targets[1]], + ); + fill_table(&switch, &mut rd, f.filler, f.filler_cidr); - delete_route_target_ipv6( + replace_route_targets( &switch, - victim, - fake_port_id(), - LinkId(0), - t2, + &mut rd, + f.victim, + next_hops([f.targets[0]]), ) - .await .expect("delete-target must succeed on a full table"); + assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[f.targets[0]], 1); + } + + #[tokio::test] + async fn delete_target_full() { + delete_target_full_scenario(fixtures_v6()).await; + delete_target_full_scenario(fixtures_v4()).await; } - /// Deleting a target must succeed when the v6 forwarding table is full + /// Deleting a target must succeed when the forwarding table is full /// AND fragmented into size-1 free spans separated by live routes. /// /// Setup: - /// 1. Install one 4-target victim. K=4 (not 2) so the deletion's - /// alloc(3) cannot be satisfied by the size-1 spans we create in - /// step 3. + /// 1. Install one 4-target victim. K=4 (not 2) so the alloc-then-swap + /// path's alloc(3) would not be satisfied by the size-1 spans + /// created in step 3 — shrink-in-place is the only path that can + /// win here. /// 2. Install 1-target filler routes until the table is full. - /// 3. Delete every other filler in installation order. Each freed + /// 3. Delete every other filler in installation order. Each freed /// slot lands in recycle_bins[1]; because surviving fillers (or /// the victim) occupy the positions between them, those size-1 /// spans are pairwise non-adjacent and reclaim() cannot coalesce /// them into anything wider. - /// 4. Delete one of the victim's four targets. - #[tokio::test] - async fn delete_target_succeeds_on_fragmented_table_v6() { + /// 4. Drop the victim's last target. + async fn delete_target_fragmented_scenario(f: FamilyFixtures) { let switch = make_switch(); - let victim: Ipv6Net = "3fff:dead::/64".parse().unwrap(); - let targets: [Ipv6Addr; 4] = [ - "2001:db8::55:1".parse().unwrap(), - "2001:db8::55:2".parse().unwrap(), - "2001:db8::55:3".parse().unwrap(), - "2001:db8::55:4".parse().unwrap(), - ]; - let filler: Ipv6Addr = "2001:db8::55:ff".parse().unwrap(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, f.targets); + let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + fragment(&switch, &mut rd, &fillers); - { - let mut rd = switch.routes.lock().await; - install_victim( + replace_route_targets( + &switch, + &mut rd, + f.victim, + next_hops(f.targets[..3].iter().copied()), + ) + .expect("delete-target must succeed on a fragmented table"); + assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &f.targets[..3], 1); + } + + #[tokio::test] + async fn delete_target_fragmented() { + delete_target_fragmented_scenario(fixtures_v6()).await; + delete_target_fragmented_scenario(fixtures_v4()).await; + } + + /// Install three NextHops that all share one `tgt_ip` (distinguished + /// by `tag`) plus one survivor with a different `tgt_ip`, then issue a + /// single replace that drops all three sharing the `tgt_ip` (delta=3). + /// Exercises the multi-target subset-removal path through + /// `shrink_in_place`. + async fn delete_targets_full_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + let shared_tgt = f.targets[0]; + let survivor = f.targets[1]; + for tag in ["a", "b", "c"] { + add_route_locked( &switch, &mut rd, - victim.into(), - targets.iter().map(|a| IpAddr::from(*a)), - ); - let fillers = - fill_table(&switch, &mut rd, filler.into(), v6_filler_cidr); - fragment(&switch, &mut rd, &fillers); + f.victim, + Route { + tag: tag.into(), + port_id: fake_port_id(), + link_id: LinkId(0), + tgt_ip: shared_tgt, + vlan_id: None, + }, + FAKE_ASIC_PORT, + ) + .expect("victim add must succeed on an empty table"); } + add_route_locked( + &switch, + &mut rd, + f.victim, + make_route(survivor), + FAKE_ASIC_PORT, + ) + .expect("survivor add must succeed on an empty table"); + fill_table(&switch, &mut rd, f.filler, f.filler_cidr); - delete_route_target_ipv6( + replace_route_targets( &switch, - victim, - fake_port_id(), - LinkId(0), - targets[3], + &mut rd, + f.victim, + next_hops([survivor]), ) - .await - .expect("delete-target must succeed on a fragmented table"); + .expect("multi-target delete must succeed on a full table"); + assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[survivor], 3); } - /// IPv4 analog of [`delete_target_succeeds_on_full_table_v6`]. #[tokio::test] - async fn delete_target_succeeds_on_full_table_v4() { - let switch = make_switch(); - let victim: Ipv4Net = "172.16.0.0/32".parse().unwrap(); - let t1 = Ipv4Addr::new(10, 0, 0, 1); - let t2 = Ipv4Addr::new(10, 0, 0, 2); - let filler = Ipv4Addr::new(10, 0, 0, 255); + async fn delete_targets_full() { + delete_targets_full_scenario(fixtures_v6()).await; + delete_targets_full_scenario(fixtures_v4()).await; + } - { - let mut rd = switch.routes.lock().await; - install_victim( + /// Same shape as the multi-target full-table scenario but against a + /// fragmented freemap. shrink-in-place must succeed without calling + /// `FreeMap::alloc`, regardless of fragmentation. + async fn delete_targets_fragmented_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + let shared_tgt = f.targets[0]; + let survivor = f.targets[1]; + for tag in ["a", "b", "c"] { + add_route_locked( &switch, &mut rd, - victim.into(), - [t1.into(), t2.into()], - ); - fill_table(&switch, &mut rd, filler.into(), v4_filler_cidr); + f.victim, + Route { + tag: tag.into(), + port_id: fake_port_id(), + link_id: LinkId(0), + tgt_ip: shared_tgt, + vlan_id: None, + }, + FAKE_ASIC_PORT, + ) + .expect("victim add must succeed on an empty table"); } + add_route_locked( + &switch, + &mut rd, + f.victim, + make_route(survivor), + FAKE_ASIC_PORT, + ) + .expect("survivor add must succeed on an empty table"); + let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + fragment(&switch, &mut rd, &fillers); - delete_route_target_ipv4( + replace_route_targets( &switch, - victim, - fake_port_id(), - LinkId(0), - t2.into(), + &mut rd, + f.victim, + next_hops([survivor]), ) - .await - .expect("delete-target must succeed on a full table"); + .expect("multi-target delete must succeed on a fragmented table"); + assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[survivor], 3); } - /// IPv4 analog of [`delete_target_succeeds_on_fragmented_table_v6`]. #[tokio::test] - async fn delete_target_succeeds_on_fragmented_table_v4() { + async fn delete_targets_fragmented() { + delete_targets_fragmented_scenario(fixtures_v6()).await; + delete_targets_fragmented_scenario(fixtures_v4()).await; + } + + /// Growing an existing route on a full table is a legitimate TableFull + /// condition; the alloc-then-swap path should surface that, not paper + /// over it. + async fn add_target_fails_full_scenario(f: FamilyFixtures) { let switch = make_switch(); - let victim: Ipv4Net = "172.16.0.0/32".parse().unwrap(); - let targets: [Ipv4Addr; 4] = [ - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 2), - Ipv4Addr::new(10, 0, 0, 3), - Ipv4Addr::new(10, 0, 0, 4), - ]; - let filler = Ipv4Addr::new(10, 0, 0, 255); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, [f.targets[0]]); + fill_table(&switch, &mut rd, f.filler, f.filler_cidr); - { - let mut rd = switch.routes.lock().await; - install_victim( - &switch, - &mut rd, - victim.into(), - targets.iter().map(|a| IpAddr::from(*a)), - ); - let fillers = - fill_table(&switch, &mut rd, filler.into(), v4_filler_cidr); - fragment(&switch, &mut rd, &fillers); + // Grow: replace the 1-target set with a 2-target set (old + new). + match replace_route_targets( + &switch, + &mut rd, + f.victim, + next_hops([f.targets[0], f.targets[1]]), + ) { + Err(DpdError::TableFull(_)) => {} + other => panic!("expected TableFull, got {other:?}"), } + } + + #[tokio::test] + async fn add_target_fails_full() { + add_target_fails_full_scenario(fixtures_v6()).await; + add_target_fails_full_scenario(fixtures_v4()).await; + } + + /// After a shrink, the in-core `RouteEntry.targets` must reflect the + /// physical compacted ASIC layout — not the caller-supplied order. + /// Install [t0,t1,t2,t3], replace with [t3,t0] (caller order intentionally + /// different from the natural compaction order). Compaction processes + /// removed=[2,1] in decreasing order: slot 2 <- live[3]=t3, slot 1 <- + /// live[2]=t3 (the just-moved tail). After step 3 drops the released + /// tail, the live window is [t0, t3] — that's what in-core must reflect. + async fn shrink_preserves_compacted_order_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, f.targets); - delete_route_target_ipv4( + replace_route_targets( &switch, - victim, - fake_port_id(), - LinkId(0), - targets[3].into(), + &mut rd, + f.victim, + next_hops([f.targets[3], f.targets[0]]), ) - .await - .expect("delete-target must succeed on a fragmented table"); + .expect("shrink must succeed on an empty table"); + + let entry = rd.get(f.victim).expect("victim must still exist"); + let observed: Vec = + entry.targets.iter().map(|t| t.route.tgt_ip).collect(); + assert_eq!( + observed, + vec![f.targets[0], f.targets[3]], + "in-core targets must match the compacted ASIC layout, \ + not the caller-supplied order" + ); + } + + #[tokio::test] + async fn shrink_preserves_compacted_order() { + shrink_preserves_compacted_order_scenario(fixtures_v6()).await; + shrink_preserves_compacted_order_scenario(fixtures_v4()).await; + } + + /// Exercise the `removed_idx == tail_idx` branch (skip the copy) for + /// every iteration: shrink [t0,t1,t2,t3] to [t2] by removing + /// {3, 1, 0}. Compaction order: j=0 tail=3=removed (skip), j=1 tail=2 + /// removed=1 (copy t2 -> slot 1), j=2 tail=1 removed=0 (copy live[1]=t2 + /// -> slot 0). Expected in-core: [t2]. + async fn shrink_to_single_non_zero_survivor_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, f.targets); + + replace_route_targets( + &switch, + &mut rd, + f.victim, + next_hops([f.targets[2]]), + ) + .expect("shrink to single survivor must succeed"); + + let entry = rd.get(f.victim).expect("victim must still exist"); + let observed: Vec = + entry.targets.iter().map(|t| t.route.tgt_ip).collect(); + assert_eq!(observed, vec![f.targets[2]]); + } + + #[tokio::test] + async fn shrink_to_single_non_zero_survivor() { + shrink_to_single_non_zero_survivor_scenario(fixtures_v6()).await; + shrink_to_single_non_zero_survivor_scenario(fixtures_v4()).await; + } + + /// An identity replace (new target set equals old target set) must + /// short-circuit: classification returns `NoOp` and the dispatcher + /// returns without unhooking. We assert that the in-core `RouteEntry` + /// is byte-for-byte identical to its pre-call state and that the + /// FreeMap's bookkeeping is undisturbed. The index check is the + /// strongest signal: any alloc-then-swap path would have moved the + /// reservation to a new base. + async fn identity_replace_is_noop_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, f.targets); + let before = rd.get(f.victim).expect("victim installed").clone(); + + replace_route_targets(&switch, &mut rd, f.victim, next_hops(f.targets)) + .expect("identity replace must succeed"); + + let after = rd.get(f.victim).expect("victim must still exist"); + assert_eq!( + &before, after, + "identity replace must leave the in-core entry untouched" + ); + // The original reservation must still be claimed: an alloc of + // `old.slots` starting at `before.index` would only be possible if + // the FreeMap had reclaimed it, which would mean we ran the + // alloc-then-swap path. + let probe = rd + .freemap_mut(f.is_ipv4) + .alloc(before.slots) + .expect("freemap must still have room for a fresh allocation"); + assert_ne!( + probe, before.index, + "identity replace must not have freed the original reservation" + ); + } + + #[tokio::test] + async fn identity_replace_is_noop() { + identity_replace_is_noop_scenario(fixtures_v6()).await; + identity_replace_is_noop_scenario(fixtures_v4()).await; } } From b3bebcb0d8bd5f41290e215f3ca9c060eb49d001 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Fri, 15 May 2026 15:24:24 -0600 Subject: [PATCH 3/4] dpd: short-circuit no-op route_target replaces; review polish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the shrink-in-place commit, addressing review feedback: * Add a NoOp variant to RouteTargetUpdate so an identity replace (new target set equals old) is detected by the classifier and the dispatcher returns without unhooking. Eliminates the brief LPM-miss window the pre-refactor code (and the first iteration of this branch) produced on idempotent reconciles. * Bump compact-loop and commit-boundary failure logs in shrink_in_place from debug! to warn!: those failures are user-visible Err returns from the very path that exists to avoid TableFull, so they deserve more than trace-level visibility. Rollback failures stay at error!. * Reword the "drop new_targets" cue into a comment at the dispatcher call site and drop the parameter from shrink_in_place entirely — new_n is now derived from old.slots - removed.len(). * Note in classify_update that subset detection compares NextHops by structural equality, so duplicate hops would be conservatively classified as Alloc. add_route_locked rejects exact duplicates upstream, so this is a fallback rather than a reachable case. * Test additions: same_size_non_subset_replace and grow_succeeds cover the Alloc-path dispatch under same-length swap and successful growth respectively. identity_replace_is_noop pins the new short-circuit via a recycle-bin probe (alloc(slots) returning != before.index proves the original reservation was never freed, since FreeMap::alloc checks recycle_bins before the freelist). * Test factoring: collapse delete_targets_full_scenario and delete_targets_fragmented_scenario into a single fragmented:bool helper. -30 lines of duplication. Signed-off-by: Trey Aspelund --- dpd/src/route.rs | 152 +++++++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 57 deletions(-) diff --git a/dpd/src/route.rs b/dpd/src/route.rs index f940d24..9894b49 100644 --- a/dpd/src/route.rs +++ b/dpd/src/route.rs @@ -1595,7 +1595,12 @@ mod tests { /// single replace that drops all three sharing the `tgt_ip` (delta=3). /// Exercises the multi-target subset-removal path through /// `shrink_in_place`. - async fn delete_targets_full_scenario(f: FamilyFixtures) { + /// Install three NextHops that all share one `tgt_ip` (distinguished by + /// `tag`) plus one survivor with a different `tgt_ip`, then issue a + /// single replace that drops all three sharing the `tgt_ip` (delta=3). + /// Exercises the multi-target subset-removal path through + /// `shrink_in_place` against either a full or a full+fragmented freemap. + async fn delete_targets_scenario(f: FamilyFixtures, fragmented: bool) { let switch = make_switch(); let mut rd = switch.routes.lock().await; shrink_test_freemap(&mut rd, f.is_ipv4); @@ -1625,7 +1630,10 @@ mod tests { FAKE_ASIC_PORT, ) .expect("survivor add must succeed on an empty table"); - fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + if fragmented { + fragment(&switch, &mut rd, &fillers); + } replace_route_targets( &switch, @@ -1633,66 +1641,20 @@ mod tests { f.victim, next_hops([survivor]), ) - .expect("multi-target delete must succeed on a full table"); + .expect("multi-target delete must succeed"); assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[survivor], 3); } #[tokio::test] async fn delete_targets_full() { - delete_targets_full_scenario(fixtures_v6()).await; - delete_targets_full_scenario(fixtures_v4()).await; - } - - /// Same shape as the multi-target full-table scenario but against a - /// fragmented freemap. shrink-in-place must succeed without calling - /// `FreeMap::alloc`, regardless of fragmentation. - async fn delete_targets_fragmented_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - let shared_tgt = f.targets[0]; - let survivor = f.targets[1]; - for tag in ["a", "b", "c"] { - add_route_locked( - &switch, - &mut rd, - f.victim, - Route { - tag: tag.into(), - port_id: fake_port_id(), - link_id: LinkId(0), - tgt_ip: shared_tgt, - vlan_id: None, - }, - FAKE_ASIC_PORT, - ) - .expect("victim add must succeed on an empty table"); - } - add_route_locked( - &switch, - &mut rd, - f.victim, - make_route(survivor), - FAKE_ASIC_PORT, - ) - .expect("survivor add must succeed on an empty table"); - let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); - fragment(&switch, &mut rd, &fillers); - - replace_route_targets( - &switch, - &mut rd, - f.victim, - next_hops([survivor]), - ) - .expect("multi-target delete must succeed on a fragmented table"); - assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[survivor], 3); + delete_targets_scenario(fixtures_v6(), false).await; + delete_targets_scenario(fixtures_v4(), false).await; } #[tokio::test] async fn delete_targets_fragmented() { - delete_targets_fragmented_scenario(fixtures_v6()).await; - delete_targets_fragmented_scenario(fixtures_v4()).await; + delete_targets_scenario(fixtures_v6(), true).await; + delete_targets_scenario(fixtures_v4(), true).await; } /// Growing an existing route on a full table is a legitimate TableFull @@ -1814,10 +1776,12 @@ mod tests { &before, after, "identity replace must leave the in-core entry untouched" ); - // The original reservation must still be claimed: an alloc of - // `old.slots` starting at `before.index` would only be possible if - // the FreeMap had reclaimed it, which would mean we ran the - // alloc-then-swap path. + // The original reservation must still be claimed. `FreeMap::alloc` + // checks the per-size recycle bin before the freelist, so if we *had* + // taken alloc-then-swap, the old span would now sit in + // `recycle_bins[before.slots]` and this probe would return + // `before.index`. A return value other than `before.index` is + // therefore proof that the old reservation was never freed. let probe = rd .freemap_mut(f.is_ipv4) .alloc(before.slots) @@ -1833,4 +1797,78 @@ mod tests { identity_replace_is_noop_scenario(fixtures_v6()).await; identity_replace_is_noop_scenario(fixtures_v4()).await; } + + /// A same-length, non-subset replace (one target swapped for another) + /// must take the alloc-then-swap path: the new set isn't ⊆ old, so + /// shrink-in-place doesn't apply. Verifies the dispatcher routes + /// correctly and that the resulting in-core entry reflects the new + /// target set. Run against an empty freemap so the alloc itself is + /// uncontested — we're testing dispatch, not allocation pressure. + async fn same_size_non_subset_replace_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, [f.targets[0], f.targets[1]]); + let before_index = rd.get(f.victim).expect("victim installed").index; + + replace_route_targets( + &switch, + &mut rd, + f.victim, + next_hops([f.targets[0], f.targets[2]]), + ) + .expect("same-size non-subset replace must succeed"); + + let entry = rd.get(f.victim).expect("victim must still exist"); + use std::collections::BTreeSet; + let observed: BTreeSet = + entry.targets.iter().map(|t| t.route.tgt_ip).collect(); + let expected: BTreeSet = + [f.targets[0], f.targets[2]].into_iter().collect(); + assert_eq!(observed, expected, "in-core target set must match new set"); + assert_ne!( + entry.index, before_index, + "alloc-then-swap must have moved the reservation to a new base" + ); + } + + #[tokio::test] + async fn same_size_non_subset_replace() { + same_size_non_subset_replace_scenario(fixtures_v6()).await; + same_size_non_subset_replace_scenario(fixtures_v4()).await; + } + + /// Growing an existing route on an uncontested table must succeed via + /// the alloc-then-swap path. Companion to `add_target_fails_full`, + /// which covers the same code path under freemap exhaustion. + async fn grow_succeeds_scenario(f: FamilyFixtures) { + let switch = make_switch(); + let mut rd = switch.routes.lock().await; + shrink_test_freemap(&mut rd, f.is_ipv4); + install_victim(&switch, &mut rd, f.victim, [f.targets[0]]); + + replace_route_targets( + &switch, + &mut rd, + f.victim, + next_hops([f.targets[0], f.targets[1]]), + ) + .expect("grow on an uncontested table must succeed"); + + let entry = rd.get(f.victim).expect("victim must still exist"); + assert_eq!(entry.slots, 2, "reservation must reflect the new size"); + use std::collections::BTreeSet; + let observed: BTreeSet = + entry.targets.iter().map(|t| t.route.tgt_ip).collect(); + let expected: BTreeSet = + [f.targets[0], f.targets[1]].into_iter().collect(); + assert_eq!(observed, expected, "in-core target set must match new set"); + } + + #[tokio::test] + async fn grow_succeeds() { + grow_succeeds_scenario(fixtures_v6()).await; + grow_succeeds_scenario(fixtures_v4()).await; + } + } From d5cbf7b63d4de00c8ce6b373964c9f3f055417e2 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Fri, 15 May 2026 17:03:04 -0600 Subject: [PATCH 4/4] dpd: introduce RouteTableOps trait; decouple route tests from the ASIC backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The route tests added in 6c398b1 ("dpd: add ecmp del tests for full/fragmented table") build a `Switch` via the stub asic backend, which loads `bfrt.json` on construction. That artifact is produced by `cargo xtask codegen` (P4 compilation) and is not built by CI s `test.sh`, so every route test fails at runtime with `P4Missing(no bf-rt file: target/proto/.../bfrt.json)`. The deeper issue is that the route subsystem is being tested through a Switch even though it does not exercise ASIC semantics — the tests care about in-core `RouteData` state and `FreeMap` accounting, neither of which depends on what the stub records on the asic side. Coupling the tests to bfrt.json is a build-time accident. This commit removes the coupling by introducing a `RouteTableOps` trait that abstracts the four on-chip table operations the route subsystem performs: - add_target / delete_target on the `route_target` table - add_index / delete_index on the `route_index` table plus `log()` and `route_fwd_table_size()` so functions taking the trait do not need a `&Switch` at all. Production code uses `impl RouteTableOps for Switch`, which dispatches to the live `table::route_ipv{4,6}::*` functions — identical behavior to the prior direct calls. All internal route helpers (`replace_route_targets`, `unhook_route`, `alloc_then_swap`, `shrink_in_place`, `restore_after_shrink_failure`, `rollback_shrink`, `cleanup_route`, `finalize_route`, `add_route_locked`, `delete_route_target_locked`, `delete_route_locked`) now take `&dyn RouteTableOps` rather than `&Switch`. The four free helpers introduced in the parent commit (`write_one_target`, `delete_one_target_at`, `add_route_index_for`, `delete_route_index_for`) become trait methods and are removed. Public API signatures are unchanged: the async entry points still take `&Switch`, which coerces to `&dyn RouteTableOps` at the internal call sites. The route test module gains a `TestOps` that implements the trait, returning `Ok` for every on-chip operation and `TEST_FREEMAP_SIZE` for `route_fwd_table_size`. `TestOps` also carries per-op failpoint state (a `Cell>` per op): tests call `ops.arm(op, after)` to make the `(after+1)`-th subsequent call return a synthetic `Switch(SdeError)`. This replaces the thread-local failpoint module the previous iteration of this PR used. With `TestOps` in place, `make_switch` and `shrink_test_freemap` are gone. Every test now constructs a `(TestOps, RouteData)` via `make_test_ctx()`, drops the `tokio::test`/`async` decoration (no async lock is needed), and the bfrt.json runtime dependency is gone with it. The four rollback tests from the prior iteration land here using `TestOps::arm` directly: * rollback_on_compact_delete_failure — empty `touched` set, simplest recovery: rollback only re-adds the original route_index. * rollback_on_compact_write_failure — load-bearing case for the `live[]` / `touched[]` distinction. Delete succeeded then write failed, so `live[i] = None` and rollback must skip the delete and rewrite `original[i]`. * rollback_on_index_readd_failure — compact loop completes, the commit-boundary `add_index` fails. Rollback restores every touched position and re-adds the original index (the second AddIndex call of the run). * rollback_failure_leaves_in_core_cleared — arms two failpoints to break both the primary delete and rollback s own `add_index`. Asserts the documented degraded posture: in-core entry absent, FreeMap reservation leaked. Caveat: the harness injects SDE-shaped errors at the trait boundary. This exercises rollback sequencing and the in-core mirror state machine, but it does not cover the class of failures where the SDE returns Err yet leaves the ASIC in a partial state the `live[]` mirror does not anticipate. Catching that needs hardware-in-the-loop testing; out of scope here. Signed-off-by: Trey Aspelund --- dpd/src/route.rs | 897 ++++++++++++++++++++++++++++++----------------- 1 file changed, 569 insertions(+), 328 deletions(-) diff --git a/dpd/src/route.rs b/dpd/src/route.rs index 9894b49..03e8fbf 100644 --- a/dpd/src/route.rs +++ b/dpd/src/route.rs @@ -301,6 +301,122 @@ impl RouteData { } } +/// Per-subnet-family dispatch over the on-chip `route_target` and +/// `route_index` tables. Internal route helpers (`replace_route_targets`, +/// `shrink_in_place`, etc.) take `&dyn RouteTableOps` rather than `&Switch` +/// so tests can supply an implementation that records calls without +/// invoking the ASIC. Production code passes `&Switch`, whose impl below +/// dispatches to the live `table::route_ipv{4,6}` functions. +trait RouteTableOps { + /// Logger to use for the routing subsystem's messages. + fn log(&self) -> &slog::Logger; + + /// Size (in slots) of the `route_target` forwarding table for the + /// requested family. Used by `add_route_locked` to lazily initialize + /// the per-family `FreeMap` on first use. + fn route_fwd_table_size(&self, is_ipv4: bool) -> DpdResult; + + /// Write `target` into slot `idx` of the `route_target` table for a + /// subnet of the indicated family. Production dispatches on + /// `(subnet_is_ipv4, target.route.tgt_ip)`: an IPv4 target always goes + /// to the v4 table; a v6 target goes to `route_ipv4::add_route_target_v6` + /// when the subnet is v4, otherwise to the v6 table. + fn add_target( + &self, + subnet_is_ipv4: bool, + idx: u16, + target: &NextHop, + ) -> DpdResult<()>; + + /// Delete slot `idx` of the `route_target` table for a subnet of the + /// indicated family. + fn delete_target(&self, subnet_is_ipv4: bool, idx: u16) -> DpdResult<()>; + + /// Install a `route_index` entry pointing at `[index, index + slots)` + /// of the family inferred from `subnet`. + fn add_index(&self, subnet: IpNet, index: u16, slots: u8) -> DpdResult<()>; + + /// Delete the `route_index` entry for `subnet`. + fn delete_index(&self, subnet: IpNet) -> DpdResult<()>; +} + +impl RouteTableOps for Switch { + fn log(&self) -> &slog::Logger { + &self.log + } + + fn route_fwd_table_size(&self, is_ipv4: bool) -> DpdResult { + let table = if is_ipv4 { + TableType::RouteFwdIpv4 + } else { + TableType::RouteFwdIpv6 + }; + self.table_size(table) + } + + fn add_target( + &self, + subnet_is_ipv4: bool, + idx: u16, + target: &NextHop, + ) -> DpdResult<()> { + match target.route.tgt_ip { + IpAddr::V4(tgt_ip) => table::route_ipv4::add_route_target( + self, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ), + IpAddr::V6(tgt_ip) => { + if subnet_is_ipv4 { + table::route_ipv4::add_route_target_v6( + self, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ) + } else { + table::route_ipv6::add_route_target( + self, + idx, + target.asic_port_id, + tgt_ip, + target.route.vlan_id, + ) + } + } + } + } + + fn delete_target(&self, subnet_is_ipv4: bool, idx: u16) -> DpdResult<()> { + if subnet_is_ipv4 { + table::route_ipv4::delete_route_target(self, idx) + } else { + table::route_ipv6::delete_route_target(self, idx) + } + } + + fn add_index(&self, subnet: IpNet, index: u16, slots: u8) -> DpdResult<()> { + match subnet { + IpNet::V4(v4) => { + table::route_ipv4::add_route_index(self, &v4, index, slots) + } + IpNet::V6(v6) => { + table::route_ipv6::add_route_index(self, &v6, index, slots) + } + } + } + + fn delete_index(&self, subnet: IpNet) -> DpdResult<()> { + match subnet { + IpNet::V4(v4) => table::route_ipv4::delete_route_index(self, &v4), + IpNet::V6(v6) => table::route_ipv6::delete_route_index(self, &v6), + } + } +} + // Remove all the data for a given route from both the route_data and // route_index tables. // @@ -308,34 +424,23 @@ impl RouteData { // operation, not all of the target slots may yet be populated. Thus we require // the caller to explicitly indicate which slots need to be cleared. fn cleanup_route( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: Option, entry: RouteEntry, ) -> DpdResult<()> { // Remove the subnet -> index mapping first, so nobody can reach the // entries we delete below. - match subnet { - Some(IpNet::V4(subnet)) => { - table::route_ipv4::delete_route_index(switch, &subnet) - } - Some(IpNet::V6(subnet)) => { - table::route_ipv6::delete_route_index(switch, &subnet) - } - None => Ok(()), - }?; + if let Some(subnet) = subnet { + ops.delete_index(subnet)?; + } let all_clear = entry .targets .iter() .enumerate() .map(|(idx, _hop)| { - let x = entry.index + idx as u16; - if entry.is_ipv4 { - table::route_ipv4::delete_route_target(switch, x) - } else { - table::route_ipv6::delete_route_target(switch, x) - } + ops.delete_target(entry.is_ipv4, entry.index + idx as u16) }) .all(|rval| rval.is_ok()); @@ -354,32 +459,19 @@ fn cleanup_route( // // If that fails, free all resources associated with the new set of targets and fn finalize_route( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, entry: Option, ) -> DpdResult<()> { let Some(entry) = entry else { return Ok(()) }; - match match subnet { - IpNet::V4(subnet) => table::route_ipv4::add_route_index( - switch, - &subnet, - entry.index, - entry.slots, - ), - IpNet::V6(subnet) => table::route_ipv6::add_route_index( - switch, - &subnet, - entry.index, - entry.slots, - ), - } { + match ops.add_index(subnet, entry.index, entry.slots) { Ok(_) => { route_data.insert(subnet, entry); Ok(()) } - Err(_) => cleanup_route(switch, route_data, None, entry), + Err(_) => cleanup_route(ops, route_data, None, entry), } } @@ -442,80 +534,6 @@ fn classify_update( } } -fn write_one_target( - switch: &Switch, - subnet_is_ipv4: bool, - idx: u16, - target: &NextHop, -) -> DpdResult<()> { - match target.route.tgt_ip { - IpAddr::V4(tgt_ip) => table::route_ipv4::add_route_target( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ), - IpAddr::V6(tgt_ip) => { - if subnet_is_ipv4 { - table::route_ipv4::add_route_target_v6( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ) - } else { - table::route_ipv6::add_route_target( - switch, - idx, - target.asic_port_id, - tgt_ip, - target.route.vlan_id, - ) - } - } - } -} - -/// Delete one route_target entry at `idx`, dispatching on subnet family. -fn delete_one_target_at( - switch: &Switch, - subnet_is_ipv4: bool, - idx: u16, -) -> DpdResult<()> { - if subnet_is_ipv4 { - table::route_ipv4::delete_route_target(switch, idx) - } else { - table::route_ipv6::delete_route_target(switch, idx) - } -} - -/// Add a route_index entry for `subnet` pointing at `[index, index + slots)`. -fn add_route_index_for( - switch: &Switch, - subnet: IpNet, - index: u16, - slots: u8, -) -> DpdResult<()> { - match subnet { - IpNet::V4(v4) => { - table::route_ipv4::add_route_index(switch, &v4, index, slots) - } - IpNet::V6(v6) => { - table::route_ipv6::add_route_index(switch, &v6, index, slots) - } - } -} - -/// Delete the route_index entry for `subnet`. -fn delete_route_index_for(switch: &Switch, subnet: IpNet) -> DpdResult<()> { - match subnet { - IpNet::V4(v4) => table::route_ipv4::delete_route_index(switch, &v4), - IpNet::V6(v6) => table::route_ipv6::delete_route_index(switch, &v6), - } -} - /// Take the route off the dataplane: remove the in-core entry for `subnet` /// and delete the on-chip `route_index`. On success the caller owns the /// previous `RouteEntry` (or `None` if the subnet had none). On failure @@ -525,16 +543,16 @@ fn delete_route_index_for(switch: &Switch, subnet: IpNet) -> DpdResult<()> { /// It gives the caller sole ownership of the old entry and guarantees /// the dataplane can't reach the slots while they're in flight. fn unhook_route( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, ) -> DpdResult> { let Some(old) = route_data.remove(subnet) else { return Ok(None); }; - if let Err(e) = delete_route_index_for(switch, subnet) { + if let Err(e) = ops.delete_index(subnet) { debug!( - switch.log, + ops.log(), "unhook_route: route_index delete failed for {subnet}: {e:?}" ); route_data.insert(subnet, old); @@ -558,18 +576,18 @@ fn unhook_route( // calls `FreeMap::alloc`; everything else falls through to the // alloc-then-swap path. fn replace_route_targets( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, targets: Vec, ) -> DpdResult<()> { - debug!(switch.log, "replacing targets for {subnet} with: {targets:?}"); + debug!(ops.log(), "replacing targets for {subnet} with: {targets:?}"); // Delete-route path: no classification needed; unhook and free. if targets.is_empty() { - let old_entry = unhook_route(switch, route_data, subnet)?; + let old_entry = unhook_route(ops, route_data, subnet)?; return match old_entry { - Some(entry) => cleanup_route(switch, route_data, None, entry), + Some(entry) => cleanup_route(ops, route_data, None, entry), None => Ok(()), }; } @@ -579,18 +597,18 @@ fn replace_route_targets( match classify_update(route_data.get(subnet), &targets) { RouteTargetUpdate::NoOp => Ok(()), RouteTargetUpdate::ShrinkInPlace { removed } => { - let old = unhook_route(switch, route_data, subnet)? + let old = unhook_route(ops, route_data, subnet)? .expect("subset removal requires existing route"); // shrink_in_place reconstructs the in-core target vec from the // compacted ASIC layout rather than caller-supplied order (see // its body for why), so the caller's vec carries no useful // information past this point. drop(targets); - shrink_in_place(switch, route_data, subnet, old, removed) + shrink_in_place(ops, route_data, subnet, old, removed) } RouteTargetUpdate::Alloc => { - let old_entry = unhook_route(switch, route_data, subnet)?; - alloc_then_swap(switch, route_data, subnet, old_entry, targets) + let old_entry = unhook_route(ops, route_data, subnet)?; + alloc_then_swap(ops, route_data, subnet, old_entry, targets) } } } @@ -601,7 +619,7 @@ fn replace_route_targets( // `old_entry` carries the previous slot reservation that we still need to // free on success or restore on failure. fn alloc_then_swap( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, old_entry: Option, @@ -618,12 +636,12 @@ fn alloc_then_swap( }, Err(e) => { debug!( - switch.log, + ops.log(), "failed to allocate space for the new target list" ); // Restore the old route_index + in-core entry (or no-op if // there was no old entry). - let _ = finalize_route(switch, route_data, subnet, old_entry); + let _ = finalize_route(ops, route_data, subnet, old_entry); return Err(e); } }; @@ -632,10 +650,10 @@ fn alloc_then_swap( let mut idx = new_entry.index; for target in targets { - if let Err(e) = write_one_target(switch, is_ipv4, idx, &target) { - debug!(switch.log, "failed to insert {target:?} into route table"); - let _ = cleanup_route(switch, route_data, None, new_entry); - let _ = finalize_route(switch, route_data, subnet, old_entry); + if let Err(e) = ops.add_target(is_ipv4, idx, &target) { + debug!(ops.log(), "failed to insert {target:?} into route table"); + let _ = cleanup_route(ops, route_data, None, new_entry); + let _ = finalize_route(ops, route_data, subnet, old_entry); return Err(e); } idx += 1; @@ -643,22 +661,22 @@ fn alloc_then_swap( } // Insert the new subnet->index mapping - match finalize_route(switch, route_data, subnet, Some(new_entry.clone())) { + match finalize_route(ops, route_data, subnet, Some(new_entry.clone())) { Ok(()) => { // Finally free all of the table space for the original set of // targets if let Some(entry) = old_entry { - let _ = cleanup_route(switch, route_data, None, entry); + let _ = cleanup_route(ops, route_data, None, entry); } Ok(()) } Err(e) => { - debug!(switch.log, "failed to update index to new target list"); + debug!(ops.log(), "failed to update index to new target list"); // We failed to point at the new set of targets. Free all of the // new data and update the route_index table to point at the // original set of targets. - let _ = cleanup_route(switch, route_data, None, new_entry); - let _ = finalize_route(switch, route_data, subnet, old_entry); + let _ = cleanup_route(ops, route_data, None, new_entry); + let _ = finalize_route(ops, route_data, subnet, old_entry); Err(e) } } @@ -693,7 +711,7 @@ fn alloc_then_swap( // (matching the now-unknown ASIC state) so the next control-plane update // rebuilds from scratch. fn shrink_in_place( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, old: RouteEntry, @@ -728,34 +746,29 @@ fn shrink_in_place( .as_ref() .expect("tail slot is live at this point") .clone(); - if let Err(e) = - delete_one_target_at(switch, is_ipv4, base + removed_idx) - { + if let Err(e) = ops.delete_target(is_ipv4, base + removed_idx) { warn!( - switch.log, + ops.log(), "shrink-in-place compact delete failed at slot {}: {e:?}", base + removed_idx ); restore_after_shrink_failure( - switch, route_data, subnet, &live, &touched, old, + ops, route_data, subnet, &live, &touched, old, ); return Err(e); } live[removed_idx as usize] = None; touched.push(removed_idx); - if let Err(e) = write_one_target( - switch, - is_ipv4, - base + removed_idx, - &tail_contents, - ) { + if let Err(e) = + ops.add_target(is_ipv4, base + removed_idx, &tail_contents) + { warn!( - switch.log, + ops.log(), "shrink-in-place compact add failed at slot {}: {e:?}", base + removed_idx ); restore_after_shrink_failure( - switch, route_data, subnet, &live, &touched, old, + ops, route_data, subnet, &live, &touched, old, ); return Err(e); } @@ -765,13 +778,13 @@ fn shrink_in_place( } // Step 2: install the route_index pointing at the compacted range. - if let Err(e) = add_route_index_for(switch, subnet, base, new_n) { + if let Err(e) = ops.add_index(subnet, base, new_n) { warn!( - switch.log, + ops.log(), "shrink-in-place index re-add failed for {subnet}: {e:?}" ); restore_after_shrink_failure( - switch, route_data, subnet, &live, &touched, old, + ops, route_data, subnet, &live, &touched, old, ); return Err(e); } @@ -783,8 +796,7 @@ fn shrink_in_place( let release_count = old.slots as u16 - new_n as u16; let mut all_clear = true; for offset in 0..release_count { - if delete_one_target_at(switch, is_ipv4, release_base + offset).is_err() - { + if ops.delete_target(is_ipv4, release_base + offset).is_err() { all_clear = false; } } @@ -792,7 +804,7 @@ fn shrink_in_place( route_data.freemap_mut(is_ipv4).free(release_base, release_count); } else { warn!( - switch.log, + ops.log(), "shrink-in-place tail cleanup partially failed for {subnet}; \ leaking slots in the FreeMap's accounting" ); @@ -832,18 +844,18 @@ fn shrink_in_place( // re-insert `old` into the in-core mirror; if rollback itself fails leave // the in-core empty so the next update rebuilds. Consumes `old`. fn restore_after_shrink_failure( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, live: &[Option], touched: &[u16], old: RouteEntry, ) { - if rollback_shrink(switch, subnet, &old, live, touched) { + if rollback_shrink(ops, subnet, &old, live, touched) { route_data.insert(subnet, old); } else { error!( - switch.log, + ops.log(), "shrink-in-place rollback failed for {subnet}; in-core entry \ stays cleared and ASIC state may diverge until the next \ control-plane update on this subnet rebuilds it" @@ -858,7 +870,7 @@ fn restore_after_shrink_failure( /// succeeded, `false` otherwise. Per-slot failures are logged /// individually so a divergence postmortem can name the slots involved. fn rollback_shrink( - switch: &Switch, + ops: &dyn RouteTableOps, subnet: IpNet, old: &RouteEntry, live: &[Option], @@ -871,19 +883,19 @@ fn rollback_shrink( let slot = base + i; let orig = &old.targets[i as usize]; if live[i as usize].is_some() - && let Err(e) = delete_one_target_at(switch, is_ipv4, slot) + && let Err(e) = ops.delete_target(is_ipv4, slot) { - error!(switch.log, "rollback delete failed at slot {slot}: {e:?}"); + error!(ops.log(), "rollback delete failed at slot {slot}: {e:?}"); ok = false; } - if let Err(e) = write_one_target(switch, is_ipv4, slot, orig) { - error!(switch.log, "rollback write failed at slot {slot}: {e:?}"); + if let Err(e) = ops.add_target(is_ipv4, slot, orig) { + error!(ops.log(), "rollback write failed at slot {slot}: {e:?}"); ok = false; } } - if let Err(e) = add_route_index_for(switch, subnet, base, old.slots) { + if let Err(e) = ops.add_index(subnet, base, old.slots) { error!( - switch.log, + ops.log(), "rollback route_index re-add failed for {subnet}: {e:?}" ); ok = false; @@ -892,13 +904,13 @@ fn rollback_shrink( } fn add_route_locked( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, route: Route, asic_port_id: u16, ) -> DpdResult<()> { - info!(switch.log, "adding route {subnet} -> {:?}", route.tgt_ip); + info!(ops.log(), "adding route {subnet} -> {:?}", route.tgt_ip); // Verify that the slot freelist has been initialized let max_targets; @@ -906,12 +918,12 @@ fn add_route_locked( max_targets = MAX_TARGETS_IPV4; route_data .v4_freemap - .maybe_init(switch.table_size(TableType::RouteFwdIpv4)? as u16); + .maybe_init(ops.route_fwd_table_size(true)? as u16); } else { max_targets = MAX_TARGETS_IPV6; route_data .v6_freemap - .maybe_init(switch.table_size(TableType::RouteFwdIpv6)? as u16); + .maybe_init(ops.route_fwd_table_size(false)? as u16); } // Get the old set of targets that we'll be adding to @@ -925,7 +937,7 @@ fn add_route_locked( "exceeded limit of {max_targets} targets for one route" ))) } else { - replace_route_targets(switch, route_data, subnet, targets) + replace_route_targets(ops, route_data, subnet, targets) } } @@ -988,16 +1000,16 @@ async fn set_route( // them. If the route only has a single target, this call will remove the // entire route. fn delete_route_target_locked( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, route: Route, ) -> DpdResult<()> { - info!(switch.log, "deleting route {subnet} -> {}", route.tgt_ip); + info!(ops.log(), "deleting route {subnet} -> {}", route.tgt_ip); // Get set of targets remaining after we remove this entry let entry = route_data.get(subnet).ok_or({ - debug!(switch.log, "No such route"); + debug!(ops.log(), "No such route"); DpdError::Missing("no such route".into()) })?; let targets: Vec = entry @@ -1007,10 +1019,10 @@ fn delete_route_target_locked( .cloned() .collect(); if targets.len() == entry.targets.len() { - debug!(switch.log, "target not found"); + debug!(ops.log(), "target not found"); Err(DpdError::Missing("no such route".into())) } else { - replace_route_targets(switch, route_data, subnet, targets) + replace_route_targets(ops, route_data, subnet, targets) } } @@ -1092,7 +1104,7 @@ pub async fn get_route_ipv6( } fn delete_route_locked( - switch: &Switch, + ops: &dyn RouteTableOps, route_data: &mut RouteData, subnet: IpNet, ) -> DpdResult<()> { @@ -1101,7 +1113,7 @@ fn delete_route_locked( .remove(subnet) .ok_or(DpdError::Missing("no such route".into()))?; - cleanup_route(switch, route_data, Some(subnet), entry) + cleanup_route(ops, route_data, Some(subnet), entry) } // Delete a route and all of its targets @@ -1318,31 +1330,8 @@ mod tests { } } - fn make_switch() -> Switch { - // The asic stub locates its bfrt artifacts by walking up from the - // running binary path, which fails for `cargo test` because the test - // executable doesn't end in "dpd". Pin the P4 directory explicitly so - // the test runs from a workspace checkout regardless of host. - let p4_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .expect("workspace root") - .join("target/proto/opt/oxide/dendrite/sidecar"); - // SAFETY: set_var is unsafe wrt concurrent access; tests in this - // module all compute the same value and don't race meaningfully. - unsafe { std::env::set_var("P4_DIR", &p4_dir) }; - - let log = common::logging::init( - "route-test", - &None, - common::logging::LogFormat::Human, - ) - .expect("test logger"); - let mut switch = - Switch::new(log, "sidecar", crate::config::Config::default()) - .expect("construct stub switch"); - table::init(&mut switch).expect("init tables"); - switch - } + use aal::AsicError; + use std::cell::Cell; /// Total size to use for the per-family FreeMap in tests that exercise /// "table full" / "table fragmented" behavior. Small enough to keep @@ -1350,32 +1339,142 @@ mod tests { /// victim plus a handful of fillers. const TEST_FREEMAP_SIZE: u16 = 64; - /// Pre-initialize the per-family FreeMap to `TEST_FREEMAP_SIZE` so - /// subsequent `add_route_locked` calls don't enlarge it to the stub - /// table's full size. `maybe_init` is idempotent — once we set the - /// geometry, the production code path is a no-op. Must be called - /// before any `add_route_locked` invocation in the test. - fn shrink_test_freemap(rd: &mut RouteData, is_ipv4: bool) { - rd.freemap_mut(is_ipv4).maybe_init(TEST_FREEMAP_SIZE); + /// Which on-chip table operation a failpoint applies to. + #[derive(Clone, Copy)] + enum Op { + AddTarget, + DelTarget, + AddIndex, + DelIndex, + } + + /// In-process implementation of `RouteTableOps` used by the route tests. + /// Every on-chip operation is a no-op (returning `Ok`) unless the + /// corresponding failpoint has been armed via [`TestOps::arm`], in which + /// case the configured call returns a synthetic `Switch(SdeError)` and + /// the failpoint disarms. This lets the route state machine run + /// without any ASIC or P4-runtime presence. + struct TestOps { + log: slog::Logger, + add_target_fail: Cell>, + del_target_fail: Cell>, + add_index_fail: Cell>, + del_index_fail: Cell>, + } + + impl TestOps { + fn new() -> Self { + let log = common::logging::init( + "route-test", + &None, + common::logging::LogFormat::Human, + ) + .expect("test logger"); + TestOps { + log, + add_target_fail: Cell::new(None), + del_target_fail: Cell::new(None), + add_index_fail: Cell::new(None), + del_index_fail: Cell::new(None), + } + } + + fn slot(&self, op: Op) -> &Cell> { + match op { + Op::AddTarget => &self.add_target_fail, + Op::DelTarget => &self.del_target_fail, + Op::AddIndex => &self.add_index_fail, + Op::DelIndex => &self.del_index_fail, + } + } + + /// Fail the `(after+1)`-th subsequent call to `op` with a synthetic + /// SDE error, then disarm. Tests that need multiple failures on + /// the same op should re-arm after each fire. + fn arm(&self, op: Op, after: u32) { + self.slot(op).set(Some(after)); + } + + fn check(&self, op: Op) -> DpdResult<()> { + let cell = self.slot(op); + match cell.get() { + None => Ok(()), + Some(0) => { + cell.set(None); + Err(DpdError::Switch(AsicError::SdeError { + ctx: "test failpoint".into(), + err: "injected failure".into(), + })) + } + Some(n) => { + cell.set(Some(n - 1)); + Ok(()) + } + } + } + } + + impl RouteTableOps for TestOps { + fn log(&self) -> &slog::Logger { + &self.log + } + + fn route_fwd_table_size(&self, _is_ipv4: bool) -> DpdResult { + Ok(TEST_FREEMAP_SIZE as u32) + } + + fn add_target( + &self, + _subnet_is_ipv4: bool, + _idx: u16, + _target: &NextHop, + ) -> DpdResult<()> { + self.check(Op::AddTarget) + } + + fn delete_target( + &self, + _subnet_is_ipv4: bool, + _idx: u16, + ) -> DpdResult<()> { + self.check(Op::DelTarget) + } + + fn add_index( + &self, + _subnet: IpNet, + _index: u16, + _slots: u8, + ) -> DpdResult<()> { + self.check(Op::AddIndex) + } + + fn delete_index(&self, _subnet: IpNet) -> DpdResult<()> { + self.check(Op::DelIndex) + } + } + + /// Build a fresh `(TestOps, RouteData)` pair for a test. `RouteData` + /// starts empty and unsized; the first `add_route_locked` call will + /// initialize it to `TEST_FREEMAP_SIZE` via the trait, so no separate + /// pre-init step is required. + fn make_test_ctx() -> (TestOps, RouteData) { + let ops = TestOps::new(); + let rd = init(ops.log()); + (ops, rd) } /// Install one entry per target onto `victim`. Callers use this against /// an empty table, so any failure is a bug in the test setup. fn install_victim( - switch: &Switch, + ops: &dyn RouteTableOps, rd: &mut RouteData, victim: IpNet, targets: impl IntoIterator, ) { for tgt in targets { - add_route_locked( - switch, - rd, - victim, - make_route(tgt), - FAKE_ASIC_PORT, - ) - .expect("victim add must succeed on an empty table"); + add_route_locked(ops, rd, victim, make_route(tgt), FAKE_ASIC_PORT) + .expect("victim add must succeed on an empty table"); } } @@ -1383,7 +1482,7 @@ mod tests { /// `i = 0, 1, ...`) until any add returns [`DpdError::TableFull`], /// returning the CIDRs that were accepted. fn fill_table( - switch: &Switch, + ops: &dyn RouteTableOps, rd: &mut RouteData, filler: IpAddr, mut cidr_at: impl FnMut(u32) -> IpNet, @@ -1392,7 +1491,7 @@ mod tests { for i in 0u32.. { let cidr = cidr_at(i); match add_route_locked( - switch, + ops, rd, cidr, make_route(filler), @@ -1409,14 +1508,18 @@ mod tests { /// Delete every other entry in `fillers` so the freemap is left with /// size-1 free spans separated by surviving entries — spans that /// `reclaim()` cannot coalesce into anything wider. - fn fragment(switch: &Switch, rd: &mut RouteData, fillers: &[IpNet]) { + fn fragment( + ops: &dyn RouteTableOps, + rd: &mut RouteData, + fillers: &[IpNet], + ) { assert!( fillers.len() >= 2, "table did not accept enough fillers: got {}", fillers.len(), ); for f in fillers.iter().step_by(2) { - delete_route_locked(switch, rd, *f).expect("delete filler"); + delete_route_locked(ops, rd, *f).expect("delete filler"); } } @@ -1523,20 +1626,13 @@ mod tests { /// any size when the loop exits. /// 3. Replace the victim's target set with just the first one, /// i.e. drop the second target. - async fn delete_target_full_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim( - &switch, - &mut rd, - f.victim, - [f.targets[0], f.targets[1]], - ); - fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + fn delete_target_full_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, [f.targets[0], f.targets[1]]); + fill_table(&ops, &mut rd, f.filler, f.filler_cidr); replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[0]]), @@ -1545,10 +1641,10 @@ mod tests { assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[f.targets[0]], 1); } - #[tokio::test] - async fn delete_target_full() { - delete_target_full_scenario(fixtures_v6()).await; - delete_target_full_scenario(fixtures_v4()).await; + #[test] + fn delete_target_full() { + delete_target_full_scenario(fixtures_v6()); + delete_target_full_scenario(fixtures_v4()); } /// Deleting a target must succeed when the forwarding table is full @@ -1566,16 +1662,14 @@ mod tests { /// spans are pairwise non-adjacent and reclaim() cannot coalesce /// them into anything wider. /// 4. Drop the victim's last target. - async fn delete_target_fragmented_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, f.targets); - let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); - fragment(&switch, &mut rd, &fillers); + fn delete_target_fragmented_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, f.targets); + let fillers = fill_table(&ops, &mut rd, f.filler, f.filler_cidr); + fragment(&ops, &mut rd, &fillers); replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops(f.targets[..3].iter().copied()), @@ -1584,10 +1678,10 @@ mod tests { assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &f.targets[..3], 1); } - #[tokio::test] - async fn delete_target_fragmented() { - delete_target_fragmented_scenario(fixtures_v6()).await; - delete_target_fragmented_scenario(fixtures_v4()).await; + #[test] + fn delete_target_fragmented() { + delete_target_fragmented_scenario(fixtures_v6()); + delete_target_fragmented_scenario(fixtures_v4()); } /// Install three NextHops that all share one `tgt_ip` (distinguished @@ -1600,15 +1694,13 @@ mod tests { /// single replace that drops all three sharing the `tgt_ip` (delta=3). /// Exercises the multi-target subset-removal path through /// `shrink_in_place` against either a full or a full+fragmented freemap. - async fn delete_targets_scenario(f: FamilyFixtures, fragmented: bool) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); + fn delete_targets_scenario(f: FamilyFixtures, fragmented: bool) { + let (ops, mut rd) = make_test_ctx(); let shared_tgt = f.targets[0]; let survivor = f.targets[1]; for tag in ["a", "b", "c"] { add_route_locked( - &switch, + &ops, &mut rd, f.victim, Route { @@ -1623,53 +1715,46 @@ mod tests { .expect("victim add must succeed on an empty table"); } add_route_locked( - &switch, + &ops, &mut rd, f.victim, make_route(survivor), FAKE_ASIC_PORT, ) .expect("survivor add must succeed on an empty table"); - let fillers = fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + let fillers = fill_table(&ops, &mut rd, f.filler, f.filler_cidr); if fragmented { - fragment(&switch, &mut rd, &fillers); + fragment(&ops, &mut rd, &fillers); } - replace_route_targets( - &switch, - &mut rd, - f.victim, - next_hops([survivor]), - ) - .expect("multi-target delete must succeed"); + replace_route_targets(&ops, &mut rd, f.victim, next_hops([survivor])) + .expect("multi-target delete must succeed"); assert_post_shrink(&mut rd, f.victim, f.is_ipv4, &[survivor], 3); } - #[tokio::test] - async fn delete_targets_full() { - delete_targets_scenario(fixtures_v6(), false).await; - delete_targets_scenario(fixtures_v4(), false).await; + #[test] + fn delete_targets_full() { + delete_targets_scenario(fixtures_v6(), false); + delete_targets_scenario(fixtures_v4(), false); } - #[tokio::test] - async fn delete_targets_fragmented() { - delete_targets_scenario(fixtures_v6(), true).await; - delete_targets_scenario(fixtures_v4(), true).await; + #[test] + fn delete_targets_fragmented() { + delete_targets_scenario(fixtures_v6(), true); + delete_targets_scenario(fixtures_v4(), true); } /// Growing an existing route on a full table is a legitimate TableFull /// condition; the alloc-then-swap path should surface that, not paper /// over it. - async fn add_target_fails_full_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, [f.targets[0]]); - fill_table(&switch, &mut rd, f.filler, f.filler_cidr); + fn add_target_fails_full_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, [f.targets[0]]); + fill_table(&ops, &mut rd, f.filler, f.filler_cidr); // Grow: replace the 1-target set with a 2-target set (old + new). match replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[0], f.targets[1]]), @@ -1679,10 +1764,10 @@ mod tests { } } - #[tokio::test] - async fn add_target_fails_full() { - add_target_fails_full_scenario(fixtures_v6()).await; - add_target_fails_full_scenario(fixtures_v4()).await; + #[test] + fn add_target_fails_full() { + add_target_fails_full_scenario(fixtures_v6()); + add_target_fails_full_scenario(fixtures_v4()); } /// After a shrink, the in-core `RouteEntry.targets` must reflect the @@ -1692,14 +1777,12 @@ mod tests { /// removed=[2,1] in decreasing order: slot 2 <- live[3]=t3, slot 1 <- /// live[2]=t3 (the just-moved tail). After step 3 drops the released /// tail, the live window is [t0, t3] — that's what in-core must reflect. - async fn shrink_preserves_compacted_order_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, f.targets); + fn shrink_preserves_compacted_order_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, f.targets); replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[3], f.targets[0]]), @@ -1717,10 +1800,10 @@ mod tests { ); } - #[tokio::test] - async fn shrink_preserves_compacted_order() { - shrink_preserves_compacted_order_scenario(fixtures_v6()).await; - shrink_preserves_compacted_order_scenario(fixtures_v4()).await; + #[test] + fn shrink_preserves_compacted_order() { + shrink_preserves_compacted_order_scenario(fixtures_v6()); + shrink_preserves_compacted_order_scenario(fixtures_v4()); } /// Exercise the `removed_idx == tail_idx` branch (skip the copy) for @@ -1728,14 +1811,12 @@ mod tests { /// {3, 1, 0}. Compaction order: j=0 tail=3=removed (skip), j=1 tail=2 /// removed=1 (copy t2 -> slot 1), j=2 tail=1 removed=0 (copy live[1]=t2 /// -> slot 0). Expected in-core: [t2]. - async fn shrink_to_single_non_zero_survivor_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, f.targets); + fn shrink_to_single_non_zero_survivor_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, f.targets); replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[2]]), @@ -1748,10 +1829,10 @@ mod tests { assert_eq!(observed, vec![f.targets[2]]); } - #[tokio::test] - async fn shrink_to_single_non_zero_survivor() { - shrink_to_single_non_zero_survivor_scenario(fixtures_v6()).await; - shrink_to_single_non_zero_survivor_scenario(fixtures_v4()).await; + #[test] + fn shrink_to_single_non_zero_survivor() { + shrink_to_single_non_zero_survivor_scenario(fixtures_v6()); + shrink_to_single_non_zero_survivor_scenario(fixtures_v4()); } /// An identity replace (new target set equals old target set) must @@ -1761,14 +1842,12 @@ mod tests { /// FreeMap's bookkeeping is undisturbed. The index check is the /// strongest signal: any alloc-then-swap path would have moved the /// reservation to a new base. - async fn identity_replace_is_noop_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, f.targets); + fn identity_replace_is_noop_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, f.targets); let before = rd.get(f.victim).expect("victim installed").clone(); - replace_route_targets(&switch, &mut rd, f.victim, next_hops(f.targets)) + replace_route_targets(&ops, &mut rd, f.victim, next_hops(f.targets)) .expect("identity replace must succeed"); let after = rd.get(f.victim).expect("victim must still exist"); @@ -1792,10 +1871,10 @@ mod tests { ); } - #[tokio::test] - async fn identity_replace_is_noop() { - identity_replace_is_noop_scenario(fixtures_v6()).await; - identity_replace_is_noop_scenario(fixtures_v4()).await; + #[test] + fn identity_replace_is_noop() { + identity_replace_is_noop_scenario(fixtures_v6()); + identity_replace_is_noop_scenario(fixtures_v4()); } /// A same-length, non-subset replace (one target swapped for another) @@ -1804,15 +1883,13 @@ mod tests { /// correctly and that the resulting in-core entry reflects the new /// target set. Run against an empty freemap so the alloc itself is /// uncontested — we're testing dispatch, not allocation pressure. - async fn same_size_non_subset_replace_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, [f.targets[0], f.targets[1]]); + fn same_size_non_subset_replace_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, [f.targets[0], f.targets[1]]); let before_index = rd.get(f.victim).expect("victim installed").index; replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[0], f.targets[2]]), @@ -1832,23 +1909,21 @@ mod tests { ); } - #[tokio::test] - async fn same_size_non_subset_replace() { - same_size_non_subset_replace_scenario(fixtures_v6()).await; - same_size_non_subset_replace_scenario(fixtures_v4()).await; + #[test] + fn same_size_non_subset_replace() { + same_size_non_subset_replace_scenario(fixtures_v6()); + same_size_non_subset_replace_scenario(fixtures_v4()); } /// Growing an existing route on an uncontested table must succeed via /// the alloc-then-swap path. Companion to `add_target_fails_full`, /// which covers the same code path under freemap exhaustion. - async fn grow_succeeds_scenario(f: FamilyFixtures) { - let switch = make_switch(); - let mut rd = switch.routes.lock().await; - shrink_test_freemap(&mut rd, f.is_ipv4); - install_victim(&switch, &mut rd, f.victim, [f.targets[0]]); + fn grow_succeeds_scenario(f: FamilyFixtures) { + let (ops, mut rd) = make_test_ctx(); + install_victim(&ops, &mut rd, f.victim, [f.targets[0]]); replace_route_targets( - &switch, + &ops, &mut rd, f.victim, next_hops([f.targets[0], f.targets[1]]), @@ -1865,10 +1940,176 @@ mod tests { assert_eq!(observed, expected, "in-core target set must match new set"); } - #[tokio::test] - async fn grow_succeeds() { - grow_succeeds_scenario(fixtures_v6()).await; - grow_succeeds_scenario(fixtures_v4()).await; + #[test] + fn grow_succeeds() { + grow_succeeds_scenario(fixtures_v6()); + grow_succeeds_scenario(fixtures_v4()); + } + + // ----- failure-injection (rollback) tests ----------------------------- + // + // Drive `shrink_in_place`'s recovery paths by arming `TestOps` failpoints + // at the call indices that land at the step being tested. Each test: + // 1. Builds a fresh `(TestOps, RouteData)` and installs a 4-target + // victim; snapshots the resulting `RouteEntry`. + // 2. Arms one (or two) failpoints. + // 3. Issues a shrink-style replace and expects `Err(Switch(_))`. + // 4. Asserts the documented post-failure invariant — for compact-loop + // and commit-boundary failures the in-core entry is restored + // byte-for-byte and the FreeMap probe shows the original + // reservation still claimed; for the rollback-failure case the + // in-core entry is absent and the reservation leaks. + + /// Install a 4-target victim and return a snapshot of its `RouteEntry` + /// for post-failure comparison. + fn install_for_rollback( + ops: &dyn RouteTableOps, + rd: &mut RouteData, + f: &FamilyFixtures, + ) -> RouteEntry { + install_victim(ops, rd, f.victim, f.targets); + rd.get(f.victim).expect("victim installed").clone() } + /// Assert that a failed shrink restored the in-core entry byte-for-byte + /// and that the FreeMap still considers the original reservation + /// claimed. Uses the same `recycle_bins`-checked-first probe as + /// `identity_replace_is_noop`. + fn assert_rolled_back( + rd: &mut RouteData, + f: &FamilyFixtures, + before: &RouteEntry, + ) { + let after = rd.get(f.victim).expect("in-core entry must be restored"); + assert_eq!( + before, after, + "rollback must restore in-core byte-for-byte" + ); + let probe = rd + .freemap_mut(f.is_ipv4) + .alloc(before.slots) + .expect("freemap must still have room"); + assert_ne!( + probe, before.index, + "rollback must not have freed the original reservation" + ); + } + + /// The compact loop's first `delete_target` fails. Nothing was + /// touched, `touched` is empty, rollback only re-adds the original + /// `route_index`. + /// + /// Drops `t0` rather than `t3` so the compact loop has real work: when + /// `removed_idx == tail_idx` the loop body short-circuits and no + /// `delete_target` is called, leaving the failpoint to fire during the + /// best-effort post-commit tail cleanup (which never surfaces as Err). + #[test] + fn rollback_on_compact_delete_failure() { + let (ops, mut rd) = make_test_ctx(); + let f = fixtures_v6(); + let before = install_for_rollback(&ops, &mut rd, &f); + + ops.arm(Op::DelTarget, 0); + let err = replace_route_targets( + &ops, + &mut rd, + f.victim, + next_hops(f.targets[1..].iter().copied()), + ) + .expect_err("compact-delete failpoint must surface as Err"); + assert!(matches!(err, DpdError::Switch(_))); + + assert_rolled_back(&mut rd, &f, &before); + } + + /// The compact loop's first `add_target` fails after the matching + /// delete succeeded. `live[idx] = None` and `idx ∈ touched`, so + /// rollback must skip the delete (no live entry to remove) and + /// rewrite `original[idx]`. This is the case that motivates the + /// `live[]` / `touched[]` distinction. + #[test] + fn rollback_on_compact_write_failure() { + let (ops, mut rd) = make_test_ctx(); + let f = fixtures_v6(); + let before = install_for_rollback(&ops, &mut rd, &f); + + // Drop two targets (indices 0 and 2 of 4) so the compact loop must + // copy at least once; the first add_target call lands on the copy. + ops.arm(Op::AddTarget, 0); + let err = replace_route_targets( + &ops, + &mut rd, + f.victim, + next_hops([f.targets[1], f.targets[3]]), + ) + .expect_err("compact-write failpoint must surface as Err"); + assert!(matches!(err, DpdError::Switch(_))); + + assert_rolled_back(&mut rd, &f, &before); + } + + /// The compact loop completes successfully but the commit-boundary + /// `add_index` fails. Every removed-slot position has been overwritten + /// with tail contents and is in `touched`; rollback must restore each + /// and re-add the original `route_index` (the second AddIndex call of + /// the run). + #[test] + fn rollback_on_index_readd_failure() { + let (ops, mut rd) = make_test_ctx(); + let f = fixtures_v6(); + let before = install_for_rollback(&ops, &mut rd, &f); + + // First AddIndex call = step 2 (the commit). Second = rollback's + // index re-add, which must succeed for this test's assertions. + ops.arm(Op::AddIndex, 0); + let err = replace_route_targets( + &ops, + &mut rd, + f.victim, + next_hops([f.targets[1], f.targets[3]]), + ) + .expect_err("commit-boundary AddIndex failpoint must surface as Err"); + assert!(matches!(err, DpdError::Switch(_))); + + assert_rolled_back(&mut rd, &f, &before); + } + + /// Rollback-of-rollback failure: trigger a compact-delete failure, then + /// also break the rollback's own `add_index` call. Documented posture + /// is "in-core entry stays cleared; FreeMap reservation leaks until + /// the next control-plane update on this subnet." + /// + /// Drops `t0` for the same reason as `rollback_on_compact_delete_failure`. + #[test] + fn rollback_failure_leaves_in_core_cleared() { + let (ops, mut rd) = make_test_ctx(); + let f = fixtures_v6(); + let before = install_for_rollback(&ops, &mut rd, &f); + + // Compact delete fails → rollback fires. AddIndex's only call this + // run is the rollback's, so arming "fail next" breaks it. + ops.arm(Op::DelTarget, 0); + ops.arm(Op::AddIndex, 0); + let err = replace_route_targets( + &ops, + &mut rd, + f.victim, + next_hops(f.targets[1..].iter().copied()), + ) + .expect_err("compact-delete failpoint must surface as Err"); + assert!(matches!(err, DpdError::Switch(_))); + + assert!( + rd.get(f.victim).is_none(), + "documented posture: in-core entry stays cleared when rollback fails" + ); + let probe = rd + .freemap_mut(f.is_ipv4) + .alloc(before.slots) + .expect("freemap must still have room"); + assert_ne!( + probe, before.index, + "rollback failure must leak the original reservation" + ); + } }