diff --git a/.github/workflows/rescue-demos.yml b/.github/workflows/rescue-demos.yml index 4ead393..e15c2f1 100644 --- a/.github/workflows/rescue-demos.yml +++ b/.github/workflows/rescue-demos.yml @@ -55,10 +55,13 @@ jobs: include: - repo: "freenet:3GEERif5ihbf/freenet-core" only_current_tips: true # snapshot mode + github_repo: "" # --from not supported in snapshot mode - repo: "freenet:96rknpy1GYhZ/freenet-stdlib" only_current_tips: false # history mode + github_repo: "freenet/freenet-stdlib" - repo: "freenet:99TmCayXn6Tm/freenet-git" only_current_tips: false # history mode (self-mirror) + github_repo: "freenet/freenet-git" steps: - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -66,11 +69,32 @@ jobs: - name: Install freenet-git from crates.io run: cargo install freenet-git --locked + # Clone the upstream GitHub repo so rescue can reconstruct missing + # packs from local objects via `--from`. Only history-mode cells + # get a clone; snapshot-mode cells (freenet-core) skip this step + # because `--from` doesn't help with force-pushed orphan commits. + # + # The clone has to be deep enough to contain every commit the + # contract's bundle-tip extensions reference -- which can be + # arbitrarily far back in history depending on how many mirror + # pushes have accumulated. Use a full clone (no --depth) so any + # historical bundle's tip is reachable. For freenet-stdlib and + # freenet-git this is small (a few MiB); for larger history-mode + # repos in the future this should be revisited. + - name: Clone upstream for --from + if: matrix.github_repo != '' + uses: actions/checkout@v4 + with: + repository: ${{ matrix.github_repo }} + path: upstream + fetch-depth: 0 + - name: Rescue env: REPO: ${{ matrix.repo }} WS_URL: ${{ secrets.FREENET_GIT_WS_URL }} ONLY_CURRENT_TIPS: ${{ matrix.only_current_tips }} + GITHUB_REPO: ${{ matrix.github_repo }} run: | set -euo pipefail # Probe the installed binary for --only-current-tips support @@ -112,8 +136,21 @@ jobs: # Pre-0.1.19 binary: outer loop is serial, nothing to set. parallel_args="" fi + # --from (0.1.23+) reconstructs missing pack bytes + # from a local clone when the gateway has evicted them, fixing + # the "1 bundle(s) failed to rescue: GET pack ..." class of + # failures (freenet-git#54 + associated rescue-demos rotation). + # Only history-mode cells get a clone (see matrix matrix + # `github_repo` field) -- snapshot-mode cells fall through to + # the GET-only path because `--from` can't help with force- + # pushed orphan commits whose ranges aren't recorded in + # contract metadata. + from_args="" + if [ -n "$GITHUB_REPO" ] && freenet-git rescue --help 2>&1 | grep -q -- '--from'; then + from_args="--from upstream/.git" + fi # shellcheck disable=SC2086 - freenet-git rescue "$REPO" --ws-url "$WS_URL" $extra $parallel_args + freenet-git rescue "$REPO" --ws-url "$WS_URL" $extra $parallel_args $from_args # Per the GH-Actions failure-notification model: matrix cell # failures show up in the run summary but do not (by default) email diff --git a/Cargo.lock b/Cargo.lock index 057bd00..f43c26e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,7 +619,7 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "freenet-git" -version = "0.1.22" +version = "0.1.23" dependencies = [ "anyhow", "assert_cmd", @@ -650,7 +650,7 @@ dependencies = [ [[package]] name = "freenet-git-encoding" -version = "0.1.22" +version = "0.1.23" dependencies = [ "blake3", "ed25519-dalek", @@ -661,7 +661,7 @@ dependencies = [ [[package]] name = "freenet-git-identity" -version = "0.1.22" +version = "0.1.23" dependencies = [ "bincode", "blake3", @@ -680,7 +680,7 @@ dependencies = [ [[package]] name = "freenet-git-pack-contract" -version = "0.1.22" +version = "0.1.23" dependencies = [ "blake3", "freenet-stdlib", @@ -688,7 +688,7 @@ dependencies = [ [[package]] name = "freenet-git-repo-contract" -version = "0.1.22" +version = "0.1.23" dependencies = [ "bincode", "ed25519-dalek", @@ -698,7 +698,7 @@ dependencies = [ [[package]] name = "freenet-git-types" -version = "0.1.22" +version = "0.1.23" dependencies = [ "bincode", "blake3", diff --git a/Cargo.toml b/Cargo.toml index 1e32b58..239db44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ ] [workspace.package] -version = "0.1.22" +version = "0.1.23" edition = "2021" rust-version = "1.86" license = "LGPL-3.0-only" diff --git a/crates/freenet-git/src/bin/git-remote-freenet.rs b/crates/freenet-git/src/bin/git-remote-freenet.rs index 0dc504e..844555c 100644 --- a/crates/freenet-git/src/bin/git-remote-freenet.rs +++ b/crates/freenet-git/src/bin/git-remote-freenet.rs @@ -1290,10 +1290,17 @@ fn build_pack( bail!("git rev-list failed: {}", rev_list_out.status); } + // `-c pack.threads=1`: the pack that lands on the contract here + // is what `freenet-git rescue --from ` will later try to + // reconstruct byte-for-byte. Default `pack.threads=auto` races + // delta search across cores → non-deterministic pack bytes → + // future rescues' reconstructions silently miss the lookup map. + // Pinning single-threaded is a small per-push wall-clock cost + // for permanent rescue reproducibility. See PR #55 Codex P2 #2. let mut child = Command::new("git") .arg("--git-dir") .arg(git_dir) - .args(["pack-objects", "--stdout"]) + .args(["-c", "pack.threads=1", "pack-objects", "--stdout"]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) diff --git a/crates/freenet-git/src/lib.rs b/crates/freenet-git/src/lib.rs index 2259248..3a55803 100644 --- a/crates/freenet-git/src/lib.rs +++ b/crates/freenet-git/src/lib.rs @@ -6,6 +6,7 @@ pub mod chunked; pub mod ids; +pub mod local_pack; pub mod pack_cache; pub mod state_init; pub mod url; diff --git a/crates/freenet-git/src/local_pack.rs b/crates/freenet-git/src/local_pack.rs new file mode 100644 index 0000000..45d39ed --- /dev/null +++ b/crates/freenet-git/src/local_pack.rs @@ -0,0 +1,359 @@ +//! Pack reconstruction from a local git clone. +//! +//! Powers `freenet-git rescue --from `: when a bundle's pack +//! has been evicted from the gateway AND from any peer reachable via +//! the ring, the rescue would otherwise be stuck (the previous behavior: +//! fail loudly, "1 bundle(s) failed to rescue"). With a local clone of +//! the same repo, we can rebuild the exact same pack bytes locally — +//! `git pack-objects` is byte-for-byte deterministic given the same +//! object set and same git version (verified empirically against the +//! freenet-git self-mirror, 2026-05-17, where pack +//! `7c30464721e743061a50a66fa21c57c9e25e2e57732046223554c28ffaad2c2a` +//! was reproduced from `000c4fde..c682079a` with matching BLAKE3). +//! +//! # Scope (initial implementation) +//! +//! - **History mode only**: snapshot-mode mirrors force-push fresh +//! orphan commits per run, so the relationship between bundle tips +//! and (prev, new) ranges is degenerate. Snapshot-mode contracts +//! with missing data fall back to the existing GET-only path. +//! - **SinglePack only**: ChunkedPack reconstruction requires +//! re-splitting the local pack into chunks of the same `chunk_size` +//! that the original publish used; the chunk_size isn't stored in +//! the contract metadata today. Tracked as follow-up. +//! +//! # Algorithm +//! +//! 1. Read every `bundle-tip:` extension from the contract state +//! (each value is a 20-byte commit SHA — the tip the bundle covers). +//! 2. Order tips by reachable-commit count (`git rev-list --count`), +//! tie-break on tip bytes. For linear history-mode pushes this is +//! the actual push chronology; out-of-order timestamps from merges +//! don't affect it. +//! 3. For each tip, generate MULTIPLE candidate `(prev, new)` packs: +//! chained-from-previous (the common same-ref push case) AND +//! no-prev (the new-ref case, e.g. a tag bundle whose original +//! pack covered everything reachable from the tag). The push code +//! in git-remote-freenet.rs uses `state.refs.get(&dst)` to pick +//! each ref's prev independently, so a single push of both +//! `main:main` and `refs/tags/*:refs/tags/*` creates one +//! main-bundle (prev = last-known main tip) and one tag-bundle +//! per new tag (prev = None). Trying both candidates per tip +//! covers both shapes without needing to record which ref the +//! bundle came from. +//! 4. BLAKE3 every reconstructed pack, store `(pack_hash -> +//! pack_bytes)`. Wrong-content packs (from mis-paired candidates) +//! land under their own hash and are silently never looked up. +//! 5. At rescue time, when `wsclient::get_pack` fails for a bundle, +//! look up the bundle's expected pack hash in the map; if present, +//! PUT those bytes directly. + +use anyhow::{anyhow, bail, Context, Result}; +use freenet_git_types::signing::parse_bundle_tip_extension_key; +use freenet_git_types::{ObjectBundleId, RepoState}; +use std::collections::HashMap; +use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::sync::Arc; + +/// Map from expected pack BLAKE3 → reconstructed pack bytes. The +/// caller (rescue's per-bundle path) looks up the bundle's expected +/// pack_hash and PUTs the bytes when present. Bytes are `Arc`-wrapped +/// so parallel rescues don't deep-clone for each PUT. +pub type LocalPackMap = HashMap<[u8; 32], Arc>>; + +/// Normalize a user-supplied `--from ` to the actual git +/// directory git's `--git-dir` flag expects. If `/.git` is a +/// directory the user passed a worktree root → return `/.git`. +/// Otherwise return `` as-is (handles bare repos and explicit +/// `.git` directory paths). +/// +/// Codex PR #55 P2 #2: without this, a user invoking `--from +/// /path/to/clone` would silently fail every reconstruction because +/// `git --git-dir /path/to/clone` treats the worktree as if it WERE +/// the git directory (no `.git` auto-append) and every command fails. +pub fn normalize_git_dir(path: &Path) -> PathBuf { + let dot_git = path.join(".git"); + if dot_git.is_dir() { + dot_git + } else { + path.to_path_buf() + } +} + +/// Build the local-pack map from a working git directory and the +/// contract's current `RepoState`. +/// +/// Skips bundles that lack a `bundle-tip:` extension (pre-0.1.16 +/// mirrors don't record them) and bundles whose tip commit isn't +/// present in the local clone (the operator's clone is shallower than +/// the contract's history). Logs both cases at the `info` level so the +/// operator knows which bundles were skipped. +/// +/// Returns a map keyed by reconstructed pack BLAKE3. Bundles whose +/// reconstructed pack hash matches the value stored in the contract's +/// object_index entry can be rescued from this map; bundles whose +/// reconstruction produces a different hash (shouldn't happen if pack +/// reproducibility holds) are silently dropped from the map and the +/// rescue falls back to the GET-only path. +pub fn build_local_pack_map(git_dir: &Path, state: &RepoState) -> Result { + let git_dir = normalize_git_dir(git_dir); + let git_dir = git_dir.as_path(); + + // 1. Collect bundle-tip extensions: (bundle_id, tip_commit_sha) + let mut tips: Vec<(ObjectBundleId, [u8; 20])> = Vec::new(); + for (ext_key, entry) in &state.extensions { + let Some(bundle_id) = parse_bundle_tip_extension_key(ext_key) else { + continue; + }; + if entry.value.len() != 20 { + // Malformed extension — skip with a debug log. + tracing::debug!( + "bundle-tip extension for {} has unexpected length {}; skipping", + hex::encode(bundle_id), + entry.value.len() + ); + continue; + } + let mut tip = [0u8; 20]; + tip.copy_from_slice(&entry.value); + tips.push((bundle_id, tip)); + } + + if tips.is_empty() { + eprintln!( + "info: no bundle-tip extensions in contract state; --from cannot \ + reconstruct any bundles (this is normal for pre-0.1.16 mirrors)" + ); + return Ok(LocalPackMap::new()); + } + + // 2. Order tips by ANCESTRY, not committer date. Each push's new + // tip is a descendant of the previous push's tip in a linear + // history-mode chain, so we can order by how many ancestors + // each tip has reachable in the local clone (more ancestors = + // later push). Codex/skeptical PR #55: committer-date sort can + // mis-pair (prev, new) when merge commits or imported history + // produce out-of-order timestamps. + // + // Tips whose commit isn't in the local clone get filtered out. + let mut tips_with_order: Vec<(ObjectBundleId, [u8; 20], u64)> = Vec::with_capacity(tips.len()); + let mut skipped_missing = 0usize; + for (bundle_id, tip) in &tips { + match ancestor_count(git_dir, tip) { + Ok(n) => tips_with_order.push((*bundle_id, *tip, n)), + Err(_) => skipped_missing += 1, + } + } + if skipped_missing > 0 { + eprintln!( + "info: {skipped_missing} bundle tip commit(s) not present in local clone; \ + --from will not be able to reconstruct those bundles" + ); + } + // Two tips with the same reachable-commit count would be siblings + // (neither an ancestor of the other) — shouldn't happen for linear + // history-mode pushes. The tiebreak on tip bytes makes the order + // deterministic across runs even if it occurs. + tips_with_order.sort_by(|a, b| a.2.cmp(&b.2).then(a.1.cmp(&b.1))); + + // 3. For each tip, try multiple candidate `(prev, new)` pairs + // and store every reconstructed pack's hash in the map. Codex + // PR #55 P2 #1: a history-mode push that includes multiple + // refspecs (e.g. `main:main` + `refs/tags/*:refs/tags/*` — + // exactly what the rescue-demos workflow does) creates a + // bundle per ref, each with its own `prev` taken from + // `state.refs.get(&dst)`. A newly-created tag has `prev = + // None` (the bundle's pack is everything reachable from the + // tag), while a branch push uses the branch's previous tip. + // Chaining every tip through one global ordering would miss + // the tag-bundle case. Build BOTH the chained-prev pack AND + // the no-prev pack for each tip; the actual original pack + // matches one of them and lands in the map under its true + // hash. Wrong-content packs land under their (mismatched) + // hash and are silently never looked up. + let mut map = LocalPackMap::new(); + let mut prev_tip: Option<[u8; 20]> = None; + for (_bundle_id, new_tip, _) in &tips_with_order { + let new_hex = hex::encode(new_tip); + + // Candidate A: chained from previous tip in ancestry order + // (the common case for sequential same-ref pushes). + if let Some(prev) = prev_tip.as_ref() { + let prev_hex = hex::encode(prev); + try_reconstruct_into(git_dir, Some(&prev_hex), &new_hex, &mut map); + } + // Candidate B: no prev (the new-ref case, e.g. a newly-pushed + // tag, where the original pack covered everything reachable + // from the tip). Also serves as the very first bundle's + // canonical reconstruction. + try_reconstruct_into(git_dir, None, &new_hex, &mut map); + + prev_tip = Some(*new_tip); + } + + Ok(map) +} + +/// Build the pack for `(have..want]` and insert it into `map` keyed +/// by its reconstructed BLAKE3. Failures are logged at `debug!` and +/// don't propagate — the multi-candidate algorithm intentionally +/// tries both chained-prev AND no-prev for each tip, so one +/// candidate failing per tip is normal (not all tips have a +/// meaningful chained-prev) and surfacing those as info-level would +/// be noise in operator logs. Real "rescue couldn't reconstruct this +/// bundle" surfaces later at the rescue_pack call site when the +/// expected pack_hash isn't in the map. +fn try_reconstruct_into(git_dir: &Path, have: Option<&str>, want: &str, map: &mut LocalPackMap) { + match build_pack_for_range(git_dir, have, want) { + Ok(pack_bytes) => { + let pack_hash: [u8; 32] = blake3::hash(&pack_bytes).as_bytes().to_owned(); + map.entry(pack_hash).or_insert_with(|| Arc::new(pack_bytes)); + } + Err(e) => { + let label = match have { + Some(h) => format!("({h}..{want}]"), + None => format!("(.., {want}]"), + }; + tracing::debug!("local-pack reconstruction failed for {label}: {e}"); + } + } +} + +/// Build a pack for the symmetric difference `(have..want]` from the +/// given git directory, with `pack.threads=1` so the delta search is +/// deterministic. +/// +/// Skeptical-reviewer PR #55 H1: `git pack-objects` with the default +/// `pack.threads=auto` produces non-deterministic output because the +/// per-thread delta search races. Same object set → different deltas +/// → different pack bytes → different BLAKE3. Without `pack.threads=1` +/// the reconstructed map would lose half its entries on CI runners +/// with a different CPU count than the original publisher's machine, +/// silently degrading rescue success rate. Single-threaded delta +/// search trades wall-clock for bit-for-bit reproducibility (rescue +/// runs are not a hot path; correctness > speed here). +/// +/// The original `build_pack` in git-remote-freenet.rs intentionally +/// doesn't carry this flag — push paths create fresh bundles whose +/// `pack_hash` is whatever the pack-objects run produced, so non- +/// determinism doesn't matter there. +fn build_pack_for_range(git_dir: &Path, have: Option<&str>, want: &str) -> Result> { + let mut rev_list = Command::new("git"); + rev_list.arg("--git-dir").arg(git_dir); + rev_list.args(["rev-list", "--objects", want]); + if let Some(h) = have { + rev_list.arg(format!("^{h}")); + } + let rev_list_out = rev_list + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .context("spawn git rev-list")?; + if !rev_list_out.status.success() { + let stderr = String::from_utf8_lossy(&rev_list_out.stderr); + bail!( + "git rev-list failed for range {}..{want}: {} (stderr: {})", + have.unwrap_or(""), + rev_list_out.status, + stderr.trim() + ); + } + + let mut child = Command::new("git") + .arg("--git-dir") + .arg(git_dir) + // -c pack.threads=1: see fn-level docs (H1 fix). + .args(["-c", "pack.threads=1", "pack-objects", "--stdout"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("spawn git pack-objects")?; + { + let mut stdin = child.stdin.take().ok_or_else(|| anyhow!("piped stdin"))?; + for line in BufReader::new(rev_list_out.stdout.as_slice()).lines() { + let line = line?; + let sha = line.split(' ').next().unwrap_or(""); + if !sha.is_empty() { + writeln!(stdin, "{sha}")?; + } + } + } + let out = child.wait_with_output()?; + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + bail!( + "git pack-objects failed: {} (stderr: {})", + out.status, + stderr.trim() + ); + } + Ok(out.stdout) +} + +/// Count of commits reachable from `commit_sha` in the local clone. +/// Used as a topological ordinal for sorting bundle tips by push +/// chronology — for a linear history-mode push chain, the Nth push's +/// tip has more reachable commits than the (N-1)th push's tip. +/// Returns `Err` if the commit isn't in the local clone. +fn ancestor_count(git_dir: &Path, commit_sha: &[u8; 20]) -> Result { + let hex = hex::encode(commit_sha); + let out = Command::new("git") + .arg("--git-dir") + .arg(git_dir) + .args(["rev-list", "--count", &hex]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .context("spawn git rev-list --count")?; + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + bail!("commit {hex} not in local clone: {}", stderr.trim()); + } + let s = String::from_utf8(out.stdout).context("git rev-list output not utf-8")?; + s.trim() + .parse::() + .with_context(|| format!("parse ancestor count from {s:?}")) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + /// Pin: `normalize_git_dir` returns `/.git` when the user + /// passes a worktree root (the documented invocation shape from + /// `--from`'s help text). Codex PR #55 P2 #2 caught that without + /// this, the user's literal `--from /path/to/clone` would silently + /// produce a 0-pack map because `git --git-dir /path/to/clone` + /// treats the worktree itself as the git directory. + #[test] + fn normalize_git_dir_appends_dot_git_for_worktree_root() { + let tmp = TempDir::new().unwrap(); + let dot_git = tmp.path().join(".git"); + fs::create_dir(&dot_git).unwrap(); + assert_eq!(normalize_git_dir(tmp.path()), dot_git); + } + + /// Pin: `normalize_git_dir` passes through paths that don't have a + /// `.git` subdir (bare repos, or an explicit `.git` path). + #[test] + fn normalize_git_dir_passes_through_bare_or_dot_git() { + let tmp = TempDir::new().unwrap(); + // No .git subdir → passed through as-is. + assert_eq!(normalize_git_dir(tmp.path()), tmp.path()); + } + + /// Pin: `build_local_pack_map` returns an empty map for a state + /// with no bundle-tip extensions. Logs the situation but doesn't + /// error — pre-0.1.16 mirrors legitimately don't have these. + #[test] + fn build_local_pack_map_empty_when_no_tip_extensions() { + let state = RepoState::default(); + let result = build_local_pack_map(Path::new("/nonexistent/dir"), &state).unwrap(); + assert!(result.is_empty()); + } +} diff --git a/crates/freenet-git/src/main.rs b/crates/freenet-git/src/main.rs index 7ead5e3..8fdfefc 100644 --- a/crates/freenet-git/src/main.rs +++ b/crates/freenet-git/src/main.rs @@ -208,6 +208,25 @@ enum Cmd { /// clamped up to `1`. #[arg(long, env = "FREENET_GIT_RESCUE_PARALLEL", default_value = "2")] parallel_bundles: usize, + /// Reconstruct missing pack bytes from a local git clone when + /// the gateway no longer has them cached. Pass the path to a + /// working git directory of the same repo (the `.git` of a + /// clone is fine; so is a bare repo). Without this flag, a + /// bundle whose pack has been evicted everywhere fails the + /// rescue with "GET pack ..." — the documented pre-0.1.23 + /// behavior. With `--from `, rescue rebuilds the + /// pack locally via `git pack-objects` and re-PUTs the same + /// byte-for-byte bytes (pack output is deterministic for a + /// given object set + git version). + /// + /// **Scope**: history-mode mirrors + SinglePack bundles only. + /// Snapshot-mode contracts and ChunkedPack bundles fall + /// through to the GET-only path because their reconstruction + /// shape isn't recoverable from the contract metadata today + /// (snapshot mode force-pushes fresh orphan commits per run; + /// ChunkedPack's chunk_size isn't stored in object_index). + #[arg(long, value_name = "GIT_DIR")] + from: Option, }, } @@ -265,6 +284,7 @@ fn run(cli: Cli) -> Result<()> { only_current_tips, rescue_all, parallel_bundles, + from, } => rescue( &url, ws_url.as_deref(), @@ -272,6 +292,7 @@ fn run(cli: Cli) -> Result<()> { only_current_tips, rescue_all, parallel_bundles, + from.as_deref(), ), } } @@ -549,6 +570,7 @@ fn rescue( only_current_tips: bool, rescue_all: bool, parallel_bundles: usize, + from: Option<&std::path::Path>, ) -> Result<()> { let parallel_bundles = parallel_bundles.max(1); let parsed = url::parse(url_str).with_context(|| format!("parse {url_str}"))?; @@ -670,6 +692,31 @@ fn rescue( // is roughly `parallel_bundles + parallel_bundles*8`. drop(api); + // Pre-build the local-pack reconstruction map if --from was + // supplied. Done up-front (before bundle dispatch) so each + // per-bundle task can do a cheap HashMap lookup on its + // expected pack hash. Empty map if `--from` not set; bundles + // whose gateway GET fails will then fall through to the + // existing error path (same as pre-0.1.23 behavior). + let local_pack_map: std::sync::Arc = + if let Some(git_dir) = from { + let map = freenet_git_cli::local_pack::build_local_pack_map(git_dir, &state) + .with_context(|| { + format!( + "build local-pack reconstruction map from --from {}", + git_dir.display() + ) + })?; + eprintln!( + "==> --from {}: reconstructed {} pack(s) locally for fallback", + git_dir.display(), + map.len() + ); + std::sync::Arc::new(map) + } else { + std::sync::Arc::new(freenet_git_cli::local_pack::LocalPackMap::new()) + }; + // FuturesUnordered with admission control: keep at most // `parallel_bundles` rescue tasks in flight; backfill as // each one completes. The driver runs on a current-thread @@ -702,9 +749,11 @@ fn rescue( let ws = ws.clone(); let id_owned = *id; let bundle = record.bundle.clone(); + let local_pack_map = local_pack_map.clone(); in_flight.push(tokio::spawn(async move { let label = format!("bundle {}", hex::encode(id_owned)); - rescue_one_bundle(&ws, &pack_wasm, bundle, timeout, label).await + rescue_one_bundle(&ws, &pack_wasm, bundle, timeout, label, local_pack_map) + .await })); } let Some(join_result) = in_flight.next().await else { @@ -940,6 +989,7 @@ async fn rescue_one_bundle( bundle: freenet_git_types::ObjectBundle, timeout: Duration, label: String, + local_pack_map: std::sync::Arc, ) -> BundleOutcome { match bundle { freenet_git_types::ObjectBundle::SinglePack { pack_hash, .. } => { @@ -953,7 +1003,7 @@ async fn rescue_one_bundle( }; } }; - match rescue_pack(&mut api, pack_wasm, pack_hash, timeout).await { + match rescue_pack(&mut api, pack_wasm, pack_hash, timeout, &local_pack_map).await { Ok(()) => BundleOutcome::Ok { label, kind_label: "SinglePack".to_string(), @@ -990,14 +1040,47 @@ async fn rescue_pack( pack_wasm: &[u8], pack_hash: [u8; 32], timeout: Duration, + local_pack_map: &freenet_git_cli::local_pack::LocalPackMap, ) -> Result<()> { - let bytes = wsclient::get_pack(api, pack_wasm, pack_hash, timeout) - .await - .with_context(|| format!("GET pack {}", hex::encode(pack_hash)))?; - wsclient::put_pack(api, pack_wasm, bytes, timeout) - .await - .with_context(|| format!("PUT pack {}", hex::encode(pack_hash)))?; - Ok(()) + // Try the gateway first. If it has the pack, GET-then-PUT is the + // happy path and the local-reconstruction work was wasted effort + // for this bundle — which is fine, build_local_pack_map runs once + // up-front and the per-bundle cost is just a HashMap lookup. + match wsclient::get_pack(api, pack_wasm, pack_hash, timeout).await { + Ok(bytes) => { + wsclient::put_pack(api, pack_wasm, bytes, timeout) + .await + .with_context(|| format!("PUT pack {}", hex::encode(pack_hash)))?; + Ok(()) + } + Err(gateway_err) => { + // Gateway can't serve this pack. Try local reconstruction + // before reporting failure. The map is empty when --from + // wasn't passed, so this lookup is a cheap no-op for the + // pre-0.1.23 invocation shape. + if let Some(local_bytes) = local_pack_map.get(&pack_hash) { + eprintln!( + "info: pack {} not available from gateway ({gateway_err}); \ + reconstructed locally via --from, re-PUTting", + hex::encode(pack_hash) + ); + // Bytes are Arc> in the map; the (*local_bytes).clone() + // deep-clones once per PUT. The Arc itself is cheap; the deep + // clone is the unavoidable cost of put_pack taking an owned + // Vec. Future: thread Arc through put_pack to skip this. + wsclient::put_pack(api, pack_wasm, (**local_bytes).clone(), timeout) + .await + .with_context(|| { + format!( + "PUT reconstructed pack {} (from local clone)", + hex::encode(pack_hash) + ) + })?; + return Ok(()); + } + Err(gateway_err).with_context(|| format!("GET pack {}", hex::encode(pack_hash))) + } + } } /// Rescue a chunked-pack bundle. Each chunk is GET'd then re-PUT;