-
Notifications
You must be signed in to change notification settings - Fork 4
fix(core): add a per-iteration watchdog to bound the agent loop #521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import { Logger } from "./loggers/types.js"; | |
| import { ConsoleLogger } from "./loggers/console.js"; | ||
| import { | ||
| BrowserDisconnectedError, | ||
| IterationTimeoutError, | ||
| NoStartingUrlError, | ||
| PlanningError, | ||
| RecoverableError, | ||
|
|
@@ -51,6 +52,7 @@ import { nanoid } from "nanoid"; | |
| import { getConfigDefaults, type SearchProviderName } from "./config/defaults.js"; | ||
| import { | ||
| DEFAULT_GENERATION_MAX_TOKENS, | ||
| DEFAULT_ITERATION_TIMEOUT_MS, | ||
| DEFAULT_PLANNING_MAX_TOKENS, | ||
| DEFAULT_VALIDATION_MAX_TOKENS, | ||
| } from "./constants.js"; | ||
|
|
@@ -127,6 +129,13 @@ export interface WebAgentOptions { | |
| unsafeMode?: boolean; | ||
| /** Timeout for LLM provider calls in milliseconds (default: from config) */ | ||
| llmProviderTimeoutMs?: number; | ||
| /** | ||
| * Per-iteration watchdog timeout in milliseconds (default: 300000 / 5 min). A single | ||
| * loop iteration (page snapshot + LLM call + tool execution) that exceeds this is | ||
| * aborted and the task ends with an ITERATION_TIMEOUT error, instead of hanging | ||
| * indefinitely. Generous by default so it only fires on a genuine hang. | ||
| */ | ||
| iterationTimeoutMs?: number; | ||
| } | ||
|
|
||
| export interface ExecuteOptions { | ||
|
|
@@ -148,6 +157,8 @@ export enum TaskErrorCode { | |
| MAX_ERRORS = "MAX_ERRORS", | ||
| /** Generic task failure */ | ||
| TASK_FAILED = "TASK_FAILED", | ||
| /** A single iteration exceeded its watchdog timeout */ | ||
| ITERATION_TIMEOUT = "ITERATION_TIMEOUT", | ||
| } | ||
|
|
||
| /** Structured error information for failed tasks */ | ||
|
|
@@ -241,6 +252,12 @@ export class WebAgent { | |
| private currentIterationId: string = ""; | ||
| private data: any = null; | ||
| private abortSignal: AbortSignal | undefined = undefined; | ||
| /** | ||
| * The abort signal for the current iteration: the caller's abortSignal combined with | ||
| * the per-iteration watchdog. LLM calls use this so a watchdog timeout aborts the | ||
| * in-flight request. Set while an iteration runs, cleared afterward. | ||
| */ | ||
| private currentIterationSignal: AbortSignal | undefined = undefined; | ||
|
|
||
| // === Services === | ||
| private compressor: SnapshotCompressor; | ||
|
|
@@ -267,6 +284,7 @@ export class WebAgent { | |
| private readonly taskId: string | undefined; | ||
| private readonly firewall: FirewallConfig; | ||
| private readonly llmProviderTimeoutMs: number; | ||
| private readonly iterationTimeoutMs: number; | ||
| // Host of the caller-provided start URL (options.startingUrl), captured at | ||
| // execute() time. Trusted by the firewall — navigating somewhere the caller | ||
| // explicitly named is consent to interact with that host. NOT set from the | ||
|
|
@@ -310,6 +328,7 @@ export class WebAgent { | |
| unsafeMode: Boolean(options.unsafeMode), | ||
| }); | ||
| this.llmProviderTimeoutMs = options.llmProviderTimeoutMs ?? defaults.llm_provider_timeout_ms; | ||
| this.iterationTimeoutMs = options.iterationTimeoutMs ?? DEFAULT_ITERATION_TIMEOUT_MS; | ||
|
|
||
| if (this.searchProvider === "parallel-api" && !this.searchApiKey) { | ||
| throw new Error("parallel_api_key is required when search_provider is 'parallel-api'"); | ||
|
|
@@ -442,6 +461,53 @@ export class WebAgent { | |
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Run one iteration's work under a per-iteration watchdog. | ||
| * | ||
| * Sets `this.currentIterationSignal` to the caller's abortSignal combined with a fresh | ||
| * watchdog controller. The directly-issued iteration LLM calls (action `streamText` and | ||
| * validation) read this signal, so they abort when the watchdog fires. (The `extract` | ||
| * tool runs its own LLM call with the abort signal captured at tool-creation time — the | ||
| * external signal — so the watchdog doesn't propagate to it; the `Promise.race` below | ||
| * still bounds the iteration regardless.) | ||
| * | ||
| * Races the work against the timeout — the race is the correctness guarantee: the | ||
| * iteration is bounded even if an operation ignores the abort signal (the abort is | ||
| * best-effort cleanup of the in-flight LLM call). On timeout, throws | ||
| * `IterationTimeoutError`. | ||
| */ | ||
| private async runWithIterationWatchdog<T>(work: () => Promise<T>): Promise<T> { | ||
| const watchdog = new AbortController(); | ||
| this.currentIterationSignal = this.abortSignal | ||
| ? AbortSignal.any([this.abortSignal, watchdog.signal]) | ||
| : watchdog.signal; | ||
|
|
||
| let timer!: ReturnType<typeof setTimeout>; | ||
| const timeout = new Promise<never>((_, reject) => { | ||
| timer = setTimeout(() => { | ||
| watchdog.abort(); | ||
| reject( | ||
| new IterationTimeoutError( | ||
| `Iteration exceeded the ${this.iterationTimeoutMs}ms watchdog timeout`, | ||
| ), | ||
| ); | ||
| }, this.iterationTimeoutMs); | ||
| }); | ||
|
|
||
| try { | ||
| return await Promise.race([work(), timeout]); | ||
| } finally { | ||
| clearTimeout(timer); | ||
| // Only restore the signal on the normal path. If the watchdog fired, leave the | ||
| // (now-aborted) signal in place so any work() still running in the background | ||
| // cancels its remaining LLM calls instead of falling back to the un-aborted | ||
| // external signal. The task ends on timeout, so nothing else needs it reset. | ||
| if (!watchdog.signal.aborted) { | ||
| this.currentIterationSignal = undefined; | ||
| } | ||
| } | ||
|
Comment on lines
+485
to
+508
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in f6d5c39 — on the timeout path the finally now leaves the (already-aborted) currentIterationSignal in place (only resets it on the normal path), so any work() still running in the background cancels its remaining LLM calls instead of falling back to the un-aborted external signal. |
||
| } | ||
|
|
||
| /** | ||
| * The main execution loop - clean and maintainable | ||
| */ | ||
|
|
@@ -551,21 +617,25 @@ export class WebAgent { | |
| // after a navigation destroys the page) is routed through | ||
| // handleBrowserDisconnect rather than escaping runMainLoop as a hard failure. | ||
| try { | ||
| // Add page snapshot if needed | ||
| if (needsPageSnapshot) { | ||
| // Clear approved refs when page changes: ARIA refs reset on each snapshot, | ||
| // so old ref strings may now point to different DOM elements. | ||
| // Recoverable blocked action errors deliberately keep needsPageSnapshot=false | ||
| // so a blocked submit retry remains tied to the same agent-filled refs. | ||
| if (approvedRefs) { | ||
| approvedRefs.clear(); | ||
| // Run the snapshot + action under the per-iteration watchdog so a hung | ||
| // operation (e.g. a stalled LLM stream) can't freeze the task indefinitely. | ||
| const result = await this.runWithIterationWatchdog(async () => { | ||
| // Add page snapshot if needed | ||
| if (needsPageSnapshot) { | ||
| // Clear approved refs when page changes: ARIA refs reset on each snapshot, | ||
| // so old ref strings may now point to different DOM elements. | ||
| // Recoverable blocked action errors deliberately keep needsPageSnapshot=false | ||
| // so a blocked submit retry remains tied to the same agent-filled refs. | ||
| if (approvedRefs) { | ||
| approvedRefs.clear(); | ||
| } | ||
| agentFilledRefs.clear(); | ||
| operationalRefs.clear(); | ||
| await this.addPageSnapshot(); | ||
| } | ||
| agentFilledRefs.clear(); | ||
| operationalRefs.clear(); | ||
| await this.addPageSnapshot(); | ||
| } | ||
|
|
||
| const result = await this.generateAndProcessAction(task, allTools, executionState); | ||
| return await this.generateAndProcessAction(task, allTools, executionState); | ||
| }); | ||
|
|
||
| // Reset error counter on success | ||
| consecutiveErrors = 0; | ||
|
|
@@ -586,6 +656,23 @@ export class WebAgent { | |
|
|
||
| return { flow: "next" as const, needsPageSnapshot: result.pageChanged }; | ||
| } catch (error) { | ||
| // Iteration watchdog fired — end the task with a clear, distinct error rather | ||
| // than retrying a step that has already hung past its bound. | ||
| if (error instanceof IterationTimeoutError) { | ||
| stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: "IterationTimeoutError" }); | ||
| recordSanitizedException(stepSpan, error); | ||
| console.error(`[WebAgent] ${error.message}; ending task`); | ||
| const message = `Task failed: ${error.message}`; | ||
| return { | ||
| flow: "return" as const, | ||
| value: { | ||
| success: false, | ||
| finalAnswer: message, | ||
| error: { code: TaskErrorCode.ITERATION_TIMEOUT, message }, | ||
| }, | ||
| }; | ||
| } | ||
|
|
||
| // Browser disconnects handled specially — don't mark span as error when recovery succeeds | ||
| if (error instanceof BrowserDisconnectedError) { | ||
| // May throw if all endpoints exhausted — propagates as hard error | ||
|
|
@@ -1006,7 +1093,9 @@ export class WebAgent { | |
| tools: webActionTools, | ||
| toolChoice: "required", | ||
| maxOutputTokens: DEFAULT_GENERATION_MAX_TOKENS, | ||
| abortSignal: this.abortSignal, | ||
| // Per-iteration watchdog signal (falls back to the caller's signal) so a | ||
| // stalled stream is aborted when the watchdog fires. | ||
| abortSignal: this.currentIterationSignal ?? this.abortSignal, | ||
| timeout: this.llmProviderTimeoutMs, | ||
| }); | ||
|
|
||
|
|
@@ -1383,7 +1472,7 @@ export class WebAgent { | |
| tools: validationTools, | ||
| toolChoice: "required", // Use "required" for compatibility with providers that don't support specific tool selection | ||
| maxOutputTokens: DEFAULT_VALIDATION_MAX_TOKENS, | ||
| abortSignal: this.abortSignal, | ||
| abortSignal: this.currentIterationSignal ?? this.abortSignal, | ||
| }, | ||
| { | ||
| maxAttempts: 2, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch — corrected the docstring in f6d5c39 to scope it accurately: only the action streamText and validation read the watchdog signal; the extract tool uses the external signal captured at tool-creation, and the Promise.race bounds the iteration regardless. Threading the per-iteration signal into the tools is a separable follow-up.