From f6d5c39e4b9714a32f1156c2f1d148bd4839d364 Mon Sep 17 00:00:00 2001 From: Les Orchard Date: Thu, 4 Jun 2026 15:32:23 -0700 Subject: [PATCH] fix(core): add a per-iteration watchdog to bound the agent loop A single agent iteration had no wall-clock bound: maxIterations caps the count, not time, and the streamText fullStream consumption could stall mid-stream even with the SDK timeout. Either could freeze the whole task indefinitely. Add a per-iteration watchdog in runMainLoop: runWithIterationWatchdog wraps the snapshot + action work in Promise.race against a timeout (default 300s, via the new iterationTimeoutMs option), and exposes an AbortController combined with the caller's signal as this.currentIterationSignal. The race guarantees the iteration is bounded even if an operation ignores the abort signal; the abort is best-effort cleanup of the in-flight LLM call. The action streamText and the validation call now use the per-iteration signal (planning, which runs before the loop, keeps the external signal). On timeout the iteration throws IterationTimeoutError, handled by a dedicated branch that ends the task with the new TaskErrorCode.ITERATION_TIMEOUT, distinct from the user-abort path. Closes #515 Closes #516 Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/schemas/webagent-event.json | 3 +- packages/core/src/constants.ts | 10 ++ packages/core/src/errors.ts | 14 +++ packages/core/src/webAgent.ts | 119 +++++++++++++++++++--- packages/core/test/webAgent.test.ts | 65 ++++++++++++ 5 files changed, 195 insertions(+), 16 deletions(-) diff --git a/packages/core/schemas/webagent-event.json b/packages/core/schemas/webagent-event.json index 0eba620c..5a279620 100644 --- a/packages/core/schemas/webagent-event.json +++ b/packages/core/schemas/webagent-event.json @@ -3920,7 +3920,8 @@ "TASK_ABORTED", "MAX_ITERATIONS", "MAX_ERRORS", - "TASK_FAILED" + "TASK_FAILED", + "ITERATION_TIMEOUT" ], "type": "string" }, diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index afeb6406..b1ea19aa 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -20,6 +20,16 @@ export const DEFAULT_PLANNING_MAX_TOKENS = 1500; /** Max tokens for validation */ export const DEFAULT_VALIDATION_MAX_TOKENS = 1000; +/** + * Per-iteration watchdog timeout in milliseconds (5 minutes). + * + * Bounds a single agent loop iteration (page snapshot + LLM call + tool execution) so a + * hung operation can't freeze the task indefinitely. Generous: an iteration legitimately + * spans an LLM call (~120s) plus tool execution (extract can be another ~120s call), so + * this should only fire on a genuine hang. + */ +export const DEFAULT_ITERATION_TIMEOUT_MS = 300000; + // ============================================================================ // AI Retry Configuration // ============================================================================ diff --git a/packages/core/src/errors.ts b/packages/core/src/errors.ts index 1a8a0fd9..42c40814 100644 --- a/packages/core/src/errors.ts +++ b/packages/core/src/errors.ts @@ -162,6 +162,20 @@ export class NavigationTimeoutException extends BrowserException { } } +/** + * Thrown when a single agent iteration exceeds its watchdog timeout. + * + * Terminal: extends Error (not RecoverableError) because a hung iteration ends the + * task with a clear error rather than being retried. Distinct from a user abort + * (the caller's abortSignal is not aborted by the watchdog). + */ +export class IterationTimeoutError extends Error { + constructor(message: string) { + super(message); + this.name = "IterationTimeoutError"; + } +} + /** * Thrown when the browser connection is lost mid-task (CDP session closed). * WebAgent catches this to trigger a browser restart and execution reset rather diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index aae5f813..4eb75e8c 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -22,6 +22,7 @@ import { Logger } from "./loggers/types.js"; import { ConsoleLogger } from "./loggers/console.js"; import { BrowserDisconnectedError, + IterationTimeoutError, NoStartingUrlError, PlanningError, RecoverableError, @@ -51,6 +52,7 @@ import { nanoid } from "nanoid"; import { getConfigDefaults, type SearchProviderName } from "./config/defaults.js"; import { DEFAULT_GENERATION_MAX_TOKENS, + DEFAULT_ITERATION_TIMEOUT_MS, DEFAULT_PLANNING_MAX_TOKENS, DEFAULT_VALIDATION_MAX_TOKENS, } from "./constants.js"; @@ -127,6 +129,13 @@ export interface WebAgentOptions { unsafeMode?: boolean; /** Timeout for LLM provider calls in milliseconds (default: from config) */ llmProviderTimeoutMs?: number; + /** + * Per-iteration watchdog timeout in milliseconds (default: 300000 / 5 min). A single + * loop iteration (page snapshot + LLM call + tool execution) that exceeds this is + * aborted and the task ends with an ITERATION_TIMEOUT error, instead of hanging + * indefinitely. Generous by default so it only fires on a genuine hang. + */ + iterationTimeoutMs?: number; } export interface ExecuteOptions { @@ -148,6 +157,8 @@ export enum TaskErrorCode { MAX_ERRORS = "MAX_ERRORS", /** Generic task failure */ TASK_FAILED = "TASK_FAILED", + /** A single iteration exceeded its watchdog timeout */ + ITERATION_TIMEOUT = "ITERATION_TIMEOUT", } /** Structured error information for failed tasks */ @@ -241,6 +252,12 @@ export class WebAgent { private currentIterationId: string = ""; private data: any = null; private abortSignal: AbortSignal | undefined = undefined; + /** + * The abort signal for the current iteration: the caller's abortSignal combined with + * the per-iteration watchdog. LLM calls use this so a watchdog timeout aborts the + * in-flight request. Set while an iteration runs, cleared afterward. + */ + private currentIterationSignal: AbortSignal | undefined = undefined; // === Services === private compressor: SnapshotCompressor; @@ -267,6 +284,7 @@ export class WebAgent { private readonly taskId: string | undefined; private readonly firewall: FirewallConfig; private readonly llmProviderTimeoutMs: number; + private readonly iterationTimeoutMs: number; // Host of the caller-provided start URL (options.startingUrl), captured at // execute() time. Trusted by the firewall — navigating somewhere the caller // explicitly named is consent to interact with that host. NOT set from the @@ -310,6 +328,7 @@ export class WebAgent { unsafeMode: Boolean(options.unsafeMode), }); this.llmProviderTimeoutMs = options.llmProviderTimeoutMs ?? defaults.llm_provider_timeout_ms; + this.iterationTimeoutMs = options.iterationTimeoutMs ?? DEFAULT_ITERATION_TIMEOUT_MS; if (this.searchProvider === "parallel-api" && !this.searchApiKey) { throw new Error("parallel_api_key is required when search_provider is 'parallel-api'"); @@ -442,6 +461,53 @@ export class WebAgent { ); } + /** + * Run one iteration's work under a per-iteration watchdog. + * + * Sets `this.currentIterationSignal` to the caller's abortSignal combined with a fresh + * watchdog controller. The directly-issued iteration LLM calls (action `streamText` and + * validation) read this signal, so they abort when the watchdog fires. (The `extract` + * tool runs its own LLM call with the abort signal captured at tool-creation time — the + * external signal — so the watchdog doesn't propagate to it; the `Promise.race` below + * still bounds the iteration regardless.) + * + * Races the work against the timeout — the race is the correctness guarantee: the + * iteration is bounded even if an operation ignores the abort signal (the abort is + * best-effort cleanup of the in-flight LLM call). On timeout, throws + * `IterationTimeoutError`. + */ + private async runWithIterationWatchdog(work: () => Promise): Promise { + const watchdog = new AbortController(); + this.currentIterationSignal = this.abortSignal + ? AbortSignal.any([this.abortSignal, watchdog.signal]) + : watchdog.signal; + + let timer!: ReturnType; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + watchdog.abort(); + reject( + new IterationTimeoutError( + `Iteration exceeded the ${this.iterationTimeoutMs}ms watchdog timeout`, + ), + ); + }, this.iterationTimeoutMs); + }); + + try { + return await Promise.race([work(), timeout]); + } finally { + clearTimeout(timer); + // Only restore the signal on the normal path. If the watchdog fired, leave the + // (now-aborted) signal in place so any work() still running in the background + // cancels its remaining LLM calls instead of falling back to the un-aborted + // external signal. The task ends on timeout, so nothing else needs it reset. + if (!watchdog.signal.aborted) { + this.currentIterationSignal = undefined; + } + } + } + /** * The main execution loop - clean and maintainable */ @@ -551,21 +617,25 @@ export class WebAgent { // after a navigation destroys the page) is routed through // handleBrowserDisconnect rather than escaping runMainLoop as a hard failure. try { - // Add page snapshot if needed - if (needsPageSnapshot) { - // Clear approved refs when page changes: ARIA refs reset on each snapshot, - // so old ref strings may now point to different DOM elements. - // Recoverable blocked action errors deliberately keep needsPageSnapshot=false - // so a blocked submit retry remains tied to the same agent-filled refs. - if (approvedRefs) { - approvedRefs.clear(); + // Run the snapshot + action under the per-iteration watchdog so a hung + // operation (e.g. a stalled LLM stream) can't freeze the task indefinitely. + const result = await this.runWithIterationWatchdog(async () => { + // Add page snapshot if needed + if (needsPageSnapshot) { + // Clear approved refs when page changes: ARIA refs reset on each snapshot, + // so old ref strings may now point to different DOM elements. + // Recoverable blocked action errors deliberately keep needsPageSnapshot=false + // so a blocked submit retry remains tied to the same agent-filled refs. + if (approvedRefs) { + approvedRefs.clear(); + } + agentFilledRefs.clear(); + operationalRefs.clear(); + await this.addPageSnapshot(); } - agentFilledRefs.clear(); - operationalRefs.clear(); - await this.addPageSnapshot(); - } - const result = await this.generateAndProcessAction(task, allTools, executionState); + return await this.generateAndProcessAction(task, allTools, executionState); + }); // Reset error counter on success consecutiveErrors = 0; @@ -586,6 +656,23 @@ export class WebAgent { return { flow: "next" as const, needsPageSnapshot: result.pageChanged }; } catch (error) { + // Iteration watchdog fired — end the task with a clear, distinct error rather + // than retrying a step that has already hung past its bound. + if (error instanceof IterationTimeoutError) { + stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: "IterationTimeoutError" }); + recordSanitizedException(stepSpan, error); + console.error(`[WebAgent] ${error.message}; ending task`); + const message = `Task failed: ${error.message}`; + return { + flow: "return" as const, + value: { + success: false, + finalAnswer: message, + error: { code: TaskErrorCode.ITERATION_TIMEOUT, message }, + }, + }; + } + // Browser disconnects handled specially — don't mark span as error when recovery succeeds if (error instanceof BrowserDisconnectedError) { // May throw if all endpoints exhausted — propagates as hard error @@ -1006,7 +1093,9 @@ export class WebAgent { tools: webActionTools, toolChoice: "required", maxOutputTokens: DEFAULT_GENERATION_MAX_TOKENS, - abortSignal: this.abortSignal, + // Per-iteration watchdog signal (falls back to the caller's signal) so a + // stalled stream is aborted when the watchdog fires. + abortSignal: this.currentIterationSignal ?? this.abortSignal, timeout: this.llmProviderTimeoutMs, }); @@ -1383,7 +1472,7 @@ export class WebAgent { tools: validationTools, toolChoice: "required", // Use "required" for compatibility with providers that don't support specific tool selection maxOutputTokens: DEFAULT_VALIDATION_MAX_TOKENS, - abortSignal: this.abortSignal, + abortSignal: this.currentIterationSignal ?? this.abortSignal, }, { maxAttempts: 2, diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index cb475a8b..4d5868e9 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -691,6 +691,71 @@ describe("WebAgent", () => { }); }); + describe("iteration watchdog", () => { + function mockPlan(): void { + mockGenerateTextWithRetry.mockResolvedValueOnce({ + text: "Planning", + toolResults: [ + { + type: "tool-result", + toolCallId: "plan_1", + toolName: "create_plan", + input: { successCriteria: "Done", plan: "1. do it" }, + output: { successCriteria: "Done", plan: "1. do it" }, + }, + ], + } as any); + } + + // A streamText result whose fullStream never completes (simulates a stalled stream). + function hangingStreamResult(): any { + return { + fullStream: { + async *[Symbol.asyncIterator]() { + await new Promise(() => {}); + }, + }, + toolResults: new Promise(() => {}), + response: new Promise(() => {}), + finishReason: new Promise(() => {}), + usage: new Promise(() => {}), + warnings: new Promise(() => {}), + providerMetadata: new Promise(() => {}), + }; + } + + it("aborts an iteration whose LLM stream never completes and returns ITERATION_TIMEOUT", async () => { + // Real timers + a tiny watchdog so the stalled iteration is bounded quickly. + vi.useRealTimers(); + mockPlan(); + mockStreamText.mockReturnValueOnce(hangingStreamResult()); + + const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 }); + const result = await agent.execute("test task", { startingUrl: "https://example.com" }); + + expect(result.success).toBe(false); + expect(result.error?.code).toBe("ITERATION_TIMEOUT"); + + await agent.close(); + }); + + it("passes a watchdog-derived abort signal to streamText that fires on timeout", async () => { + vi.useRealTimers(); + mockPlan(); + mockStreamText.mockReturnValueOnce(hangingStreamResult()); + + const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 }); + await agent.execute("test task", { startingUrl: "https://example.com" }); + + const call = mockStreamText.mock.calls.at(-1)?.[0]; + expect(call?.abortSignal).toBeInstanceOf(AbortSignal); + // The watchdog fired, so the signal handed to the LLM call is now aborted. + expect(call?.abortSignal?.aborted).toBe(true); + + await agent.close(); + }); + }); + describe("planning", () => { it("should generate plan with URL when not provided", async () => { mockGenerateTextWithRetry.mockResolvedValueOnce({