diff --git a/packages/core/schemas/webagent-event.json b/packages/core/schemas/webagent-event.json index 0eba620c..5a279620 100644 --- a/packages/core/schemas/webagent-event.json +++ b/packages/core/schemas/webagent-event.json @@ -3920,7 +3920,8 @@ "TASK_ABORTED", "MAX_ITERATIONS", "MAX_ERRORS", - "TASK_FAILED" + "TASK_FAILED", + "ITERATION_TIMEOUT" ], "type": "string" }, diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index afeb6406..b1ea19aa 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -20,6 +20,16 @@ export const DEFAULT_PLANNING_MAX_TOKENS = 1500; /** Max tokens for validation */ export const DEFAULT_VALIDATION_MAX_TOKENS = 1000; +/** + * Per-iteration watchdog timeout in milliseconds (5 minutes). + * + * Bounds a single agent loop iteration (page snapshot + LLM call + tool execution) so a + * hung operation can't freeze the task indefinitely. Generous: an iteration legitimately + * spans an LLM call (~120s) plus tool execution (extract can be another ~120s call), so + * this should only fire on a genuine hang. + */ +export const DEFAULT_ITERATION_TIMEOUT_MS = 300000; + // ============================================================================ // AI Retry Configuration // ============================================================================ diff --git a/packages/core/src/errors.ts b/packages/core/src/errors.ts index 1a8a0fd9..42c40814 100644 --- a/packages/core/src/errors.ts +++ b/packages/core/src/errors.ts @@ -162,6 +162,20 @@ export class NavigationTimeoutException extends BrowserException { } } +/** + * Thrown when a single agent iteration exceeds its watchdog timeout. + * + * Terminal: extends Error (not RecoverableError) because a hung iteration ends the + * task with a clear error rather than being retried. Distinct from a user abort + * (the caller's abortSignal is not aborted by the watchdog). + */ +export class IterationTimeoutError extends Error { + constructor(message: string) { + super(message); + this.name = "IterationTimeoutError"; + } +} + /** * Thrown when the browser connection is lost mid-task (CDP session closed). * WebAgent catches this to trigger a browser restart and execution reset rather diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index aae5f813..4eb75e8c 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -22,6 +22,7 @@ import { Logger } from "./loggers/types.js"; import { ConsoleLogger } from "./loggers/console.js"; import { BrowserDisconnectedError, + IterationTimeoutError, NoStartingUrlError, PlanningError, RecoverableError, @@ -51,6 +52,7 @@ import { nanoid } from "nanoid"; import { getConfigDefaults, type SearchProviderName } from "./config/defaults.js"; import { DEFAULT_GENERATION_MAX_TOKENS, + DEFAULT_ITERATION_TIMEOUT_MS, DEFAULT_PLANNING_MAX_TOKENS, DEFAULT_VALIDATION_MAX_TOKENS, } from "./constants.js"; @@ -127,6 +129,13 @@ export interface WebAgentOptions { unsafeMode?: boolean; /** Timeout for LLM provider calls in milliseconds (default: from config) */ llmProviderTimeoutMs?: number; + /** + * Per-iteration watchdog timeout in milliseconds (default: 300000 / 5 min). A single + * loop iteration (page snapshot + LLM call + tool execution) that exceeds this is + * aborted and the task ends with an ITERATION_TIMEOUT error, instead of hanging + * indefinitely. Generous by default so it only fires on a genuine hang. + */ + iterationTimeoutMs?: number; } export interface ExecuteOptions { @@ -148,6 +157,8 @@ export enum TaskErrorCode { MAX_ERRORS = "MAX_ERRORS", /** Generic task failure */ TASK_FAILED = "TASK_FAILED", + /** A single iteration exceeded its watchdog timeout */ + ITERATION_TIMEOUT = "ITERATION_TIMEOUT", } /** Structured error information for failed tasks */ @@ -241,6 +252,12 @@ export class WebAgent { private currentIterationId: string = ""; private data: any = null; private abortSignal: AbortSignal | undefined = undefined; + /** + * The abort signal for the current iteration: the caller's abortSignal combined with + * the per-iteration watchdog. LLM calls use this so a watchdog timeout aborts the + * in-flight request. Set while an iteration runs, cleared afterward. + */ + private currentIterationSignal: AbortSignal | undefined = undefined; // === Services === private compressor: SnapshotCompressor; @@ -267,6 +284,7 @@ export class WebAgent { private readonly taskId: string | undefined; private readonly firewall: FirewallConfig; private readonly llmProviderTimeoutMs: number; + private readonly iterationTimeoutMs: number; // Host of the caller-provided start URL (options.startingUrl), captured at // execute() time. Trusted by the firewall — navigating somewhere the caller // explicitly named is consent to interact with that host. NOT set from the @@ -310,6 +328,7 @@ export class WebAgent { unsafeMode: Boolean(options.unsafeMode), }); this.llmProviderTimeoutMs = options.llmProviderTimeoutMs ?? defaults.llm_provider_timeout_ms; + this.iterationTimeoutMs = options.iterationTimeoutMs ?? DEFAULT_ITERATION_TIMEOUT_MS; if (this.searchProvider === "parallel-api" && !this.searchApiKey) { throw new Error("parallel_api_key is required when search_provider is 'parallel-api'"); @@ -442,6 +461,53 @@ export class WebAgent { ); } + /** + * Run one iteration's work under a per-iteration watchdog. + * + * Sets `this.currentIterationSignal` to the caller's abortSignal combined with a fresh + * watchdog controller. The directly-issued iteration LLM calls (action `streamText` and + * validation) read this signal, so they abort when the watchdog fires. (The `extract` + * tool runs its own LLM call with the abort signal captured at tool-creation time — the + * external signal — so the watchdog doesn't propagate to it; the `Promise.race` below + * still bounds the iteration regardless.) + * + * Races the work against the timeout — the race is the correctness guarantee: the + * iteration is bounded even if an operation ignores the abort signal (the abort is + * best-effort cleanup of the in-flight LLM call). On timeout, throws + * `IterationTimeoutError`. + */ + private async runWithIterationWatchdog(work: () => Promise): Promise { + const watchdog = new AbortController(); + this.currentIterationSignal = this.abortSignal + ? AbortSignal.any([this.abortSignal, watchdog.signal]) + : watchdog.signal; + + let timer!: ReturnType; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + watchdog.abort(); + reject( + new IterationTimeoutError( + `Iteration exceeded the ${this.iterationTimeoutMs}ms watchdog timeout`, + ), + ); + }, this.iterationTimeoutMs); + }); + + try { + return await Promise.race([work(), timeout]); + } finally { + clearTimeout(timer); + // Only restore the signal on the normal path. If the watchdog fired, leave the + // (now-aborted) signal in place so any work() still running in the background + // cancels its remaining LLM calls instead of falling back to the un-aborted + // external signal. The task ends on timeout, so nothing else needs it reset. + if (!watchdog.signal.aborted) { + this.currentIterationSignal = undefined; + } + } + } + /** * The main execution loop - clean and maintainable */ @@ -551,21 +617,25 @@ export class WebAgent { // after a navigation destroys the page) is routed through // handleBrowserDisconnect rather than escaping runMainLoop as a hard failure. try { - // Add page snapshot if needed - if (needsPageSnapshot) { - // Clear approved refs when page changes: ARIA refs reset on each snapshot, - // so old ref strings may now point to different DOM elements. - // Recoverable blocked action errors deliberately keep needsPageSnapshot=false - // so a blocked submit retry remains tied to the same agent-filled refs. - if (approvedRefs) { - approvedRefs.clear(); + // Run the snapshot + action under the per-iteration watchdog so a hung + // operation (e.g. a stalled LLM stream) can't freeze the task indefinitely. + const result = await this.runWithIterationWatchdog(async () => { + // Add page snapshot if needed + if (needsPageSnapshot) { + // Clear approved refs when page changes: ARIA refs reset on each snapshot, + // so old ref strings may now point to different DOM elements. + // Recoverable blocked action errors deliberately keep needsPageSnapshot=false + // so a blocked submit retry remains tied to the same agent-filled refs. + if (approvedRefs) { + approvedRefs.clear(); + } + agentFilledRefs.clear(); + operationalRefs.clear(); + await this.addPageSnapshot(); } - agentFilledRefs.clear(); - operationalRefs.clear(); - await this.addPageSnapshot(); - } - const result = await this.generateAndProcessAction(task, allTools, executionState); + return await this.generateAndProcessAction(task, allTools, executionState); + }); // Reset error counter on success consecutiveErrors = 0; @@ -586,6 +656,23 @@ export class WebAgent { return { flow: "next" as const, needsPageSnapshot: result.pageChanged }; } catch (error) { + // Iteration watchdog fired — end the task with a clear, distinct error rather + // than retrying a step that has already hung past its bound. + if (error instanceof IterationTimeoutError) { + stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: "IterationTimeoutError" }); + recordSanitizedException(stepSpan, error); + console.error(`[WebAgent] ${error.message}; ending task`); + const message = `Task failed: ${error.message}`; + return { + flow: "return" as const, + value: { + success: false, + finalAnswer: message, + error: { code: TaskErrorCode.ITERATION_TIMEOUT, message }, + }, + }; + } + // Browser disconnects handled specially — don't mark span as error when recovery succeeds if (error instanceof BrowserDisconnectedError) { // May throw if all endpoints exhausted — propagates as hard error @@ -1006,7 +1093,9 @@ export class WebAgent { tools: webActionTools, toolChoice: "required", maxOutputTokens: DEFAULT_GENERATION_MAX_TOKENS, - abortSignal: this.abortSignal, + // Per-iteration watchdog signal (falls back to the caller's signal) so a + // stalled stream is aborted when the watchdog fires. + abortSignal: this.currentIterationSignal ?? this.abortSignal, timeout: this.llmProviderTimeoutMs, }); @@ -1383,7 +1472,7 @@ export class WebAgent { tools: validationTools, toolChoice: "required", // Use "required" for compatibility with providers that don't support specific tool selection maxOutputTokens: DEFAULT_VALIDATION_MAX_TOKENS, - abortSignal: this.abortSignal, + abortSignal: this.currentIterationSignal ?? this.abortSignal, }, { maxAttempts: 2, diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index cb475a8b..4d5868e9 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -691,6 +691,71 @@ describe("WebAgent", () => { }); }); + describe("iteration watchdog", () => { + function mockPlan(): void { + mockGenerateTextWithRetry.mockResolvedValueOnce({ + text: "Planning", + toolResults: [ + { + type: "tool-result", + toolCallId: "plan_1", + toolName: "create_plan", + input: { successCriteria: "Done", plan: "1. do it" }, + output: { successCriteria: "Done", plan: "1. do it" }, + }, + ], + } as any); + } + + // A streamText result whose fullStream never completes (simulates a stalled stream). + function hangingStreamResult(): any { + return { + fullStream: { + async *[Symbol.asyncIterator]() { + await new Promise(() => {}); + }, + }, + toolResults: new Promise(() => {}), + response: new Promise(() => {}), + finishReason: new Promise(() => {}), + usage: new Promise(() => {}), + warnings: new Promise(() => {}), + providerMetadata: new Promise(() => {}), + }; + } + + it("aborts an iteration whose LLM stream never completes and returns ITERATION_TIMEOUT", async () => { + // Real timers + a tiny watchdog so the stalled iteration is bounded quickly. + vi.useRealTimers(); + mockPlan(); + mockStreamText.mockReturnValueOnce(hangingStreamResult()); + + const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 }); + const result = await agent.execute("test task", { startingUrl: "https://example.com" }); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe("ITERATION_TIMEOUT"); + + await agent.close(); + }); + + it("passes a watchdog-derived abort signal to streamText that fires on timeout", async () => { + vi.useRealTimers(); + mockPlan(); + mockStreamText.mockReturnValueOnce(hangingStreamResult()); + + const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 }); + await agent.execute("test task", { startingUrl: "https://example.com" }); + + const call = mockStreamText.mock.calls.at(-1)?.[0]; + expect(call?.abortSignal).toBeInstanceOf(AbortSignal); + // The watchdog fired, so the signal handed to the LLM call is now aborted. + expect(call?.abortSignal?.aborted).toBe(true); + + await agent.close(); + }); + }); + describe("planning", () => { it("should generate plan with URL when not provided", async () => { mockGenerateTextWithRetry.mockResolvedValueOnce({