From 28bea815e6b9789aca6b5e06a00b2ce1f3513271 Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Wed, 24 Jun 2026 12:43:22 +1000 Subject: [PATCH] feat: summarize detection strategies in --debug output --- src/agent.rs | 20 +---- src/breadcrumbs.rs | 70 ++++++++-------- src/main.rs | 197 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 188 insertions(+), 99 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index c8abd1f..1419f77 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -148,28 +148,16 @@ impl Agent { }) } - pub fn find_for_process(process: &sysinfo::Process, debug: bool) -> Option<&'static Agent> { + pub fn find_for_process(process: &sysinfo::Process) -> Option<&'static Agent> { let name = process.name().to_string_lossy(); - if debug { - eprintln!(" Checking process name: {}", name); - } if let Some(agent) = Self::find_by_name(&name) { - if debug { - eprintln!(" ✓ Matched agent: {}", agent.email); - } return Some(agent); } // Check basename(argv[0]) if let Some(arg0) = process.cmd().first() { let arg0_str = arg0.to_string_lossy(); - if debug { - eprintln!(" Checking basename(argv[0]): {}", arg0_str); - } if let Some(agent) = Self::find_by_name(&arg0_str) { - if debug { - eprintln!(" ✓ Matched agent: {}", agent.email); - } return Some(agent); } } @@ -180,13 +168,7 @@ impl Agent { !arg_str.starts_with('-') }) { let arg_str = arg.to_string_lossy(); - if debug { - eprintln!(" Checking first non-flag arg from argv[1:]: {}", arg_str); - } if let Some(agent) = Self::find_by_name(&arg_str) { - if debug { - eprintln!(" ✓ Matched agent: {}", agent.email); - } return Some(agent); } } diff --git a/src/breadcrumbs.rs b/src/breadcrumbs.rs index 2b5b67d..dd005ad 100644 --- a/src/breadcrumbs.rs +++ b/src/breadcrumbs.rs @@ -39,7 +39,7 @@ fn cwd_matches_repo(cwd: &str, repo_path: &Path) -> bool { /// Read the first few lines of a file looking for a "cwd" field that /// matches the repo path. Returns true on match. -fn file_has_matching_cwd(path: &Path, repo_path: &Path, debug: bool) -> bool { +fn file_has_matching_cwd(path: &Path, repo_path: &Path) -> bool { let file = match fs::File::open(path) { Ok(f) => f, Err(_) => return false, @@ -52,9 +52,6 @@ fn file_has_matching_cwd(path: &Path, repo_path: &Path, debug: bool) -> bool { Err(_) => break, }; if let Some(cwd) = extract_cwd_from_json(&line) { - if debug { - eprintln!(" {} cwd: {}", path.display(), cwd); - } return cwd_matches_repo(cwd, repo_path); } } @@ -64,7 +61,7 @@ fn file_has_matching_cwd(path: &Path, repo_path: &Path, debug: bool) -> bool { /// Walk nested subdirectories (any depth) looking for recent files whose /// first few lines contain a "cwd" field matching the repo path. -fn find_session_file_with_cwd(dir: &Path, ext: &str, repo_path: &Path, cutoff: SystemTime, debug: bool) -> bool { +fn find_session_file_with_cwd(dir: &Path, ext: &str, repo_path: &Path, cutoff: SystemTime) -> bool { let mut dirs_to_visit = vec![dir.to_path_buf()]; while let Some(current) = dirs_to_visit.pop() { @@ -81,7 +78,7 @@ fn find_session_file_with_cwd(dir: &Path, ext: &str, repo_path: &Path, cutoff: S if !has_extension(&path, ext) || !is_recent(&path, cutoff) { continue; } - if file_has_matching_cwd(&path, repo_path, debug) { + if file_has_matching_cwd(&path, repo_path) { return true; } } @@ -90,7 +87,13 @@ fn find_session_file_with_cwd(dir: &Path, ext: &str, repo_path: &Path, cutoff: S false } -fn check_source(agent: &'static Agent, repo_path: &Path, cutoff: SystemTime, debug: bool) -> bool { +fn check_source( + agent: &'static Agent, + repo_path: &Path, + cutoff: SystemTime, + log: &mut Vec, + debug: bool, +) -> bool { let breadcrumb_dir = match agent.breadcrumb_dir { Some(d) => d, None => return false, @@ -103,40 +106,43 @@ fn check_source(agent: &'static Agent, repo_path: &Path, cutoff: SystemTime, deb }; let base = Path::new(&home).join(breadcrumb_dir); - if debug { - eprintln!(" {} breadcrumb dir: {}", agent.email, base.display()); - } - + // Only agents whose breadcrumb directory actually exists are worth + // reporting; skipping silently keeps the debug output focused. if !base.is_dir() { - if debug { - eprintln!(" Not found"); - } return false; } - let matched = find_session_file_with_cwd(&base, breadcrumb_ext, repo_path, cutoff, debug); + let matched = find_session_file_with_cwd(&base, breadcrumb_ext, repo_path, cutoff); - if !matched && debug { - eprintln!(" No match for {}", agent.email); + if debug { + if matched { + log.push(format!(" found {} ({})", agent.email, base.display())); + } else { + log.push(format!(" scanned {} (no recent session in repo)", base.display())); + } } matched } -pub fn detect_agents_from_breadcrumbs(repo_path: &Path, debug: bool) -> Vec<&'static Agent> { +pub fn detect_agents_from_breadcrumbs(repo_path: &Path, log: &mut Vec, debug: bool) -> Vec<&'static Agent> { let cutoff = SystemTime::now() - std::time::Duration::from_secs(CUTOFF_SECS); let mut agents = Vec::new(); if debug { - eprintln!("\n=== Breadcrumb Fallback ==="); + log.push("strategy: breadcrumb session files".to_string()); } for agent in KNOWN_AGENTS { - if check_source(agent, repo_path, cutoff, debug) { + if check_source(agent, repo_path, cutoff, log, debug) { agents.push(agent); } } + if debug && agents.is_empty() { + log.push(" no match".to_string()); + } + agents } @@ -173,7 +179,7 @@ mod tests { #[test] fn test_no_breadcrumbs_returns_empty() { let dir = tempfile::TempDir::new().unwrap(); - let agents = detect_agents_from_breadcrumbs(dir.path(), false); + let agents = detect_agents_from_breadcrumbs(dir.path(), &mut Vec::new(), false); assert!(agents.is_empty()); } @@ -184,8 +190,8 @@ mod tests { let mut f = fs::File::create(&path).unwrap(); writeln!(f, r#"{{"type":"session_meta","cwd":"/Users/foo/myrepo"}}"#).unwrap(); - assert!(file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"), false)); - assert!(!file_has_matching_cwd(&path, Path::new("/Users/bar/other"), false)); + assert!(file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"))); + assert!(!file_has_matching_cwd(&path, Path::new("/Users/bar/other"))); } #[test] @@ -196,8 +202,8 @@ mod tests { writeln!(f, r#"{{"type":"file-history-snapshot","messageId":"abc"}}"#).unwrap(); writeln!(f, r#"{{"type":"user","cwd":"/Users/foo/myrepo"}}"#).unwrap(); - assert!(file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"), false)); - assert!(!file_has_matching_cwd(&path, Path::new("/Users/bar/other"), false)); + assert!(file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"))); + assert!(!file_has_matching_cwd(&path, Path::new("/Users/bar/other"))); } #[test] @@ -208,7 +214,7 @@ mod tests { writeln!(f, r#"{{"type":"something","data":"value"}}"#).unwrap(); writeln!(f, r#"{{"type":"other","data":"value"}}"#).unwrap(); - assert!(!file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"), false)); + assert!(!file_has_matching_cwd(&path, Path::new("/Users/foo/myrepo"))); } #[test] @@ -229,8 +235,7 @@ mod tests { dir.path(), "jsonl", Path::new("/Users/foo/myrepo"), - cutoff, - false + cutoff )); // Non-matching repo @@ -238,8 +243,7 @@ mod tests { dir.path(), "jsonl", Path::new("/Users/bar/other"), - cutoff, - false + cutoff )); } @@ -257,8 +261,7 @@ mod tests { dir.path(), "jsonl", Path::new("/Users/foo/aittributor"), - cutoff, - false + cutoff )); } @@ -281,8 +284,7 @@ mod tests { dir.path(), "jsonl", Path::new("/Users/foo/monorepo"), - cutoff, - false + cutoff )); } } diff --git a/src/main.rs b/src/main.rs index 44b752a..729111d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod breadcrumbs; mod git; use clap::Parser; +use std::collections::{HashSet, VecDeque}; use std::path::PathBuf; use std::sync::mpsc; use std::time::Duration; @@ -31,19 +32,75 @@ struct Cli { debug: bool, } -fn walk_ancestry(system: &System, debug: bool) -> Vec<&'static Agent> { - let mut current_pid = Pid::from_u32(std::process::id()); - let mut agents = Vec::new(); +/// Accumulates debug information while scanning a process tree. +/// +/// All tracking is skipped unless `debug` is set, so the normal +/// (non-debug) code path stays allocation-free. +struct ScanReport { + debug: bool, + scanned: HashSet, + findings: Vec, + /// Emails already reported, to avoid duplicate "found" lines when the + /// same agent shows up under multiple siblings. + logged: HashSet<&'static str>, +} - if debug { - eprintln!("\nWalking ancestry from PID {}...", current_pid); +impl ScanReport { + fn new(debug: bool) -> Self { + Self { + debug, + scanned: HashSet::new(), + findings: Vec::new(), + logged: HashSet::new(), + } } - while let Some(process) = system.process(current_pid) { - if debug { - eprintln!(" PID {}: {:?}", current_pid, process.name()); + fn mark_scanned(&mut self, pid: Pid) { + if self.debug { + self.scanned.insert(pid); + } + } + + fn record_match(&mut self, agent: &'static Agent, pid: Pid, name: &str) { + // `insert` returns false if the email was already recorded. + if self.debug && self.logged.insert(agent.email) { + self.findings.push(format!( + " found {} (pid {}, process \"{}\", cwd matches repo)", + agent.email, pid, name + )); + } + } + + fn flush_into(self, log: &mut Vec) { + if !self.debug { + return; + } + log.push(format!(" scanned {} processes", self.scanned.len())); + if self.findings.is_empty() { + log.push(" no match".to_string()); + } else { + log.extend(self.findings); } - if let Some(agent) = Agent::find_for_process(process, debug) { + } +} + +fn walk_ancestry(system: &System, log: &mut Vec, debug: bool) -> Vec<&'static Agent> { + let mut current_pid = Pid::from_u32(std::process::id()); + let mut agents = Vec::new(); + let mut walked = 0usize; + let mut findings = Vec::new(); + + while let Some(process) = system.process(current_pid) { + walked += 1; + if let Some(agent) = Agent::find_for_process(process) { + if debug { + findings.push(format!( + " found {} (pid {}, process \"{}\")", + agent.email, + current_pid, + process.name().to_string_lossy() + )); + } agents.push(agent); } @@ -55,12 +112,26 @@ fn walk_ancestry(system: &System, debug: bool) -> Vec<&'static Agent> { } } + if debug { + log.push(format!(" walked {} processes", walked)); + if findings.is_empty() { + log.push(" no match".to_string()); + } else { + log.extend(findings); + } + } + agents } -fn check_process_tree(system: &System, root_pid: Pid, repo_path: &PathBuf, debug: bool) -> Vec<&'static Agent> { - let mut queue = std::collections::VecDeque::new(); - let mut visited = std::collections::HashSet::new(); +fn check_process_tree( + system: &System, + root_pid: Pid, + repo_path: &PathBuf, + report: &mut ScanReport, +) -> Vec<&'static Agent> { + let mut queue = VecDeque::new(); + let mut visited = HashSet::new(); let mut agents = Vec::new(); queue.push_back(root_pid); @@ -75,17 +146,13 @@ fn check_process_tree(system: &System, root_pid: Pid, repo_path: &PathBuf, debug None => continue, }; - if debug { - eprintln!(" Checking PID {}: {:?}", pid, process.name()); - } + report.mark_scanned(pid); - if let Some(agent) = Agent::find_for_process(process, debug) + if let Some(agent) = Agent::find_for_process(process) && let Some(cwd) = process.cwd() && cwd.starts_with(repo_path) { - if debug { - eprintln!(" Found agent in tree with matching cwd"); - } + report.record_match(agent, pid, &process.name().to_string_lossy()); agents.push(agent); } @@ -99,14 +166,16 @@ fn check_process_tree(system: &System, root_pid: Pid, repo_path: &PathBuf, debug agents } -fn walk_ancestry_and_descendants(system: &System, repo_path: &PathBuf, debug: bool) -> Vec<&'static Agent> { +fn walk_ancestry_and_descendants( + system: &System, + repo_path: &PathBuf, + log: &mut Vec, + debug: bool, +) -> Vec<&'static Agent> { let mut current_pid = Pid::from_u32(std::process::id()); - let mut checked_ancestors = std::collections::HashSet::new(); + let mut checked_ancestors = HashSet::new(); let mut agents = Vec::new(); - - if debug { - eprintln!("\nWalking ancestry and descendants..."); - } + let mut report = ScanReport::new(debug); while let Some(process) = system.process(current_pid) { if !checked_ancestors.insert(current_pid) { @@ -118,36 +187,46 @@ fn walk_ancestry_and_descendants(system: &System, repo_path: &PathBuf, debug: bo _ => break, }; - if debug { - eprintln!(" Checking siblings of PID {} (parent: {})", current_pid, parent_pid); - } - for sibling in system.processes().values() { if sibling.parent() != Some(parent_pid) { continue; } - agents.extend(check_process_tree(system, sibling.pid(), repo_path, debug)); + agents.extend(check_process_tree(system, sibling.pid(), repo_path, &mut report)); } current_pid = parent_pid; } + report.flush_into(log); + agents } -fn detect_agents(debug: bool) -> Vec<&'static Agent> { +fn detect_agents(log: &mut Vec, debug: bool) -> Vec<&'static Agent> { let mut agents = Vec::new(); if debug { - eprintln!("=== Agent Detection Debug ==="); - eprintln!("\nChecking environment variables..."); + log.push("strategy: environment variables".to_string()); } - if let Some(agent) = Agent::find_by_env() { - if debug { - eprintln!(" ✓ Found agent via env: {}", agent.email); + match Agent::find_by_env() { + Some(agent) => { + if debug { + let vars = agent + .env_vars + .iter() + .map(|(key, _)| *key) + .collect::>() + .join(", "); + log.push(format!(" found {} (env: {})", agent.email, vars)); + } + agents.push(agent); + } + None => { + if debug { + log.push(" no match".to_string()); + } } - agents.push(agent); } let current_dir = match std::env::current_dir() { @@ -156,8 +235,10 @@ fn detect_agents(debug: bool) -> Vec<&'static Agent> { }; let repo_path = find_git_root(¤t_dir).unwrap_or(current_dir); if debug { - eprintln!(" Repository path: {}", repo_path.display()); + log.push(format!("repository: {}", repo_path.display())); + log.push("strategy: process ancestry".to_string()); } + let system = System::new_with_specifics( RefreshKind::nothing().with_processes( ProcessRefreshKind::nothing() @@ -166,31 +247,55 @@ fn detect_agents(debug: bool) -> Vec<&'static Agent> { ), ); - agents.extend(walk_ancestry(&system, debug)); - agents.extend(walk_ancestry_and_descendants(&system, &repo_path, debug)); + agents.extend(walk_ancestry(&system, log, debug)); + + if debug { + log.push("strategy: process tree (siblings and descendants)".to_string()); + } + agents.extend(walk_ancestry_and_descendants(&system, &repo_path, log, debug)); agents } -fn breadcrumb_fallback(debug: bool) -> Vec<&'static Agent> { +fn breadcrumb_fallback(log: &mut Vec, debug: bool) -> Vec<&'static Agent> { let current_dir = std::env::current_dir().unwrap_or_default(); let repo_path = find_git_root(¤t_dir).unwrap_or(current_dir); - breadcrumbs::detect_agents_from_breadcrumbs(&repo_path, debug) + breadcrumbs::detect_agents_from_breadcrumbs(&repo_path, log, debug) } fn first_detected_agent(debug: bool) -> Option<&'static Agent> { + // The breadcrumb scan runs on a separate thread, so each strategy buffers + // its debug output into a `Vec` instead of printing directly. We + // print everything in a fixed order afterwards to keep the report readable. let (bc_tx, bc_rx) = mpsc::channel(); std::thread::spawn(move || { - let _ = bc_tx.send(breadcrumb_fallback(debug)); + let mut bc_log = Vec::new(); + let bc_agents = breadcrumb_fallback(&mut bc_log, debug); + let _ = bc_tx.send((bc_agents, bc_log)); }); - let mut agents = detect_agents(debug); + let mut log = Vec::new(); + let mut agents = detect_agents(&mut log, debug); - if let Ok(bc_agents) = bc_rx.recv() { + if let Ok((bc_agents, bc_log)) = bc_rx.recv() { + log.extend(bc_log); agents.extend(bc_agents); } - agents.into_iter().next() + let chosen = agents.into_iter().next(); + + if debug { + eprintln!("=== aittributor detection ==="); + for line in &log { + eprintln!("{}", line); + } + match chosen { + Some(agent) => eprintln!("\noutcome: attributing to {}", agent.email), + None => eprintln!("\noutcome: no agent detected"), + } + } + + chosen } fn run(cli: Cli) {