Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion packages/core/schemas/webagent-event.json
Original file line number Diff line number Diff line change
Expand Up @@ -3920,7 +3920,8 @@
"TASK_ABORTED",
"MAX_ITERATIONS",
"MAX_ERRORS",
"TASK_FAILED"
"TASK_FAILED",
"ITERATION_TIMEOUT"
],
"type": "string"
},
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@ export const DEFAULT_PLANNING_MAX_TOKENS = 1500;
/** Max tokens for validation */
export const DEFAULT_VALIDATION_MAX_TOKENS = 1000;

/**
* Per-iteration watchdog timeout in milliseconds (5 minutes).
*
* Bounds a single agent loop iteration (page snapshot + LLM call + tool execution) so a
* hung operation can't freeze the task indefinitely. Generous: an iteration legitimately
* spans an LLM call (~120s) plus tool execution (extract can be another ~120s call), so
* this should only fire on a genuine hang.
*/
export const DEFAULT_ITERATION_TIMEOUT_MS = 300000;

// ============================================================================
// AI Retry Configuration
// ============================================================================
Expand Down
14 changes: 14 additions & 0 deletions packages/core/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,20 @@ export class NavigationTimeoutException extends BrowserException {
}
}

/**
* Thrown when a single agent iteration exceeds its watchdog timeout.
*
* Terminal: extends Error (not RecoverableError) because a hung iteration ends the
* task with a clear error rather than being retried. Distinct from a user abort
* (the caller's abortSignal is not aborted by the watchdog).
*/
export class IterationTimeoutError extends Error {
constructor(message: string) {
super(message);
this.name = "IterationTimeoutError";
}
}

/**
* Thrown when the browser connection is lost mid-task (CDP session closed).
* WebAgent catches this to trigger a browser restart and execution reset rather
Expand Down
119 changes: 104 additions & 15 deletions packages/core/src/webAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { Logger } from "./loggers/types.js";
import { ConsoleLogger } from "./loggers/console.js";
import {
BrowserDisconnectedError,
IterationTimeoutError,
NoStartingUrlError,
PlanningError,
RecoverableError,
Expand Down Expand Up @@ -51,6 +52,7 @@ import { nanoid } from "nanoid";
import { getConfigDefaults, type SearchProviderName } from "./config/defaults.js";
import {
DEFAULT_GENERATION_MAX_TOKENS,
DEFAULT_ITERATION_TIMEOUT_MS,
DEFAULT_PLANNING_MAX_TOKENS,
DEFAULT_VALIDATION_MAX_TOKENS,
} from "./constants.js";
Expand Down Expand Up @@ -127,6 +129,13 @@ export interface WebAgentOptions {
unsafeMode?: boolean;
/** Timeout for LLM provider calls in milliseconds (default: from config) */
llmProviderTimeoutMs?: number;
/**
* Per-iteration watchdog timeout in milliseconds (default: 300000 / 5 min). A single
* loop iteration (page snapshot + LLM call + tool execution) that exceeds this is
* aborted and the task ends with an ITERATION_TIMEOUT error, instead of hanging
* indefinitely. Generous by default so it only fires on a genuine hang.
*/
iterationTimeoutMs?: number;
}

export interface ExecuteOptions {
Expand All @@ -148,6 +157,8 @@ export enum TaskErrorCode {
MAX_ERRORS = "MAX_ERRORS",
/** Generic task failure */
TASK_FAILED = "TASK_FAILED",
/** A single iteration exceeded its watchdog timeout */
ITERATION_TIMEOUT = "ITERATION_TIMEOUT",
}

/** Structured error information for failed tasks */
Expand Down Expand Up @@ -241,6 +252,12 @@ export class WebAgent {
private currentIterationId: string = "";
private data: any = null;
private abortSignal: AbortSignal | undefined = undefined;
/**
* The abort signal for the current iteration: the caller's abortSignal combined with
* the per-iteration watchdog. LLM calls use this so a watchdog timeout aborts the
* in-flight request. Set while an iteration runs, cleared afterward.
*/
private currentIterationSignal: AbortSignal | undefined = undefined;

// === Services ===
private compressor: SnapshotCompressor;
Expand All @@ -267,6 +284,7 @@ export class WebAgent {
private readonly taskId: string | undefined;
private readonly firewall: FirewallConfig;
private readonly llmProviderTimeoutMs: number;
private readonly iterationTimeoutMs: number;
// Host of the caller-provided start URL (options.startingUrl), captured at
// execute() time. Trusted by the firewall — navigating somewhere the caller
// explicitly named is consent to interact with that host. NOT set from the
Expand Down Expand Up @@ -310,6 +328,7 @@ export class WebAgent {
unsafeMode: Boolean(options.unsafeMode),
});
this.llmProviderTimeoutMs = options.llmProviderTimeoutMs ?? defaults.llm_provider_timeout_ms;
this.iterationTimeoutMs = options.iterationTimeoutMs ?? DEFAULT_ITERATION_TIMEOUT_MS;

if (this.searchProvider === "parallel-api" && !this.searchApiKey) {
throw new Error("parallel_api_key is required when search_provider is 'parallel-api'");
Expand Down Expand Up @@ -442,6 +461,53 @@ export class WebAgent {
);
}

/**
* Run one iteration's work under a per-iteration watchdog.
*
* Sets `this.currentIterationSignal` to the caller's abortSignal combined with a fresh
* watchdog controller. The directly-issued iteration LLM calls (action `streamText` and
* validation) read this signal, so they abort when the watchdog fires. (The `extract`
* tool runs its own LLM call with the abort signal captured at tool-creation time — the
* external signal — so the watchdog doesn't propagate to it; the `Promise.race` below
* still bounds the iteration regardless.)
*
* Races the work against the timeout — the race is the correctness guarantee: the
Comment on lines +466 to +474

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — corrected the docstring in f6d5c39 to scope it accurately: only the action streamText and validation read the watchdog signal; the extract tool uses the external signal captured at tool-creation, and the Promise.race bounds the iteration regardless. Threading the per-iteration signal into the tools is a separable follow-up.

* iteration is bounded even if an operation ignores the abort signal (the abort is
* best-effort cleanup of the in-flight LLM call). On timeout, throws
* `IterationTimeoutError`.
*/
private async runWithIterationWatchdog<T>(work: () => Promise<T>): Promise<T> {
const watchdog = new AbortController();
this.currentIterationSignal = this.abortSignal
? AbortSignal.any([this.abortSignal, watchdog.signal])
: watchdog.signal;

let timer!: ReturnType<typeof setTimeout>;
const timeout = new Promise<never>((_, reject) => {
timer = setTimeout(() => {
watchdog.abort();
reject(
new IterationTimeoutError(
`Iteration exceeded the ${this.iterationTimeoutMs}ms watchdog timeout`,
),
);
}, this.iterationTimeoutMs);
});

try {
return await Promise.race([work(), timeout]);
} finally {
clearTimeout(timer);
// Only restore the signal on the normal path. If the watchdog fired, leave the
// (now-aborted) signal in place so any work() still running in the background
// cancels its remaining LLM calls instead of falling back to the un-aborted
// external signal. The task ends on timeout, so nothing else needs it reset.
if (!watchdog.signal.aborted) {
this.currentIterationSignal = undefined;
}
}
Comment on lines +485 to +508

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f6d5c39 — on the timeout path the finally now leaves the (already-aborted) currentIterationSignal in place (only resets it on the normal path), so any work() still running in the background cancels its remaining LLM calls instead of falling back to the un-aborted external signal.

}

/**
* The main execution loop - clean and maintainable
*/
Expand Down Expand Up @@ -551,21 +617,25 @@ export class WebAgent {
// after a navigation destroys the page) is routed through
// handleBrowserDisconnect rather than escaping runMainLoop as a hard failure.
try {
// Add page snapshot if needed
if (needsPageSnapshot) {
// Clear approved refs when page changes: ARIA refs reset on each snapshot,
// so old ref strings may now point to different DOM elements.
// Recoverable blocked action errors deliberately keep needsPageSnapshot=false
// so a blocked submit retry remains tied to the same agent-filled refs.
if (approvedRefs) {
approvedRefs.clear();
// Run the snapshot + action under the per-iteration watchdog so a hung
// operation (e.g. a stalled LLM stream) can't freeze the task indefinitely.
const result = await this.runWithIterationWatchdog(async () => {
// Add page snapshot if needed
if (needsPageSnapshot) {
// Clear approved refs when page changes: ARIA refs reset on each snapshot,
// so old ref strings may now point to different DOM elements.
// Recoverable blocked action errors deliberately keep needsPageSnapshot=false
// so a blocked submit retry remains tied to the same agent-filled refs.
if (approvedRefs) {
approvedRefs.clear();
}
agentFilledRefs.clear();
operationalRefs.clear();
await this.addPageSnapshot();
}
agentFilledRefs.clear();
operationalRefs.clear();
await this.addPageSnapshot();
}

const result = await this.generateAndProcessAction(task, allTools, executionState);
return await this.generateAndProcessAction(task, allTools, executionState);
});

// Reset error counter on success
consecutiveErrors = 0;
Expand All @@ -586,6 +656,23 @@ export class WebAgent {

return { flow: "next" as const, needsPageSnapshot: result.pageChanged };
} catch (error) {
// Iteration watchdog fired — end the task with a clear, distinct error rather
// than retrying a step that has already hung past its bound.
if (error instanceof IterationTimeoutError) {
stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: "IterationTimeoutError" });
recordSanitizedException(stepSpan, error);
console.error(`[WebAgent] ${error.message}; ending task`);
const message = `Task failed: ${error.message}`;
return {
flow: "return" as const,
value: {
success: false,
finalAnswer: message,
error: { code: TaskErrorCode.ITERATION_TIMEOUT, message },
},
};
}

// Browser disconnects handled specially — don't mark span as error when recovery succeeds
if (error instanceof BrowserDisconnectedError) {
// May throw if all endpoints exhausted — propagates as hard error
Expand Down Expand Up @@ -1006,7 +1093,9 @@ export class WebAgent {
tools: webActionTools,
toolChoice: "required",
maxOutputTokens: DEFAULT_GENERATION_MAX_TOKENS,
abortSignal: this.abortSignal,
// Per-iteration watchdog signal (falls back to the caller's signal) so a
// stalled stream is aborted when the watchdog fires.
abortSignal: this.currentIterationSignal ?? this.abortSignal,
timeout: this.llmProviderTimeoutMs,
});

Expand Down Expand Up @@ -1383,7 +1472,7 @@ export class WebAgent {
tools: validationTools,
toolChoice: "required", // Use "required" for compatibility with providers that don't support specific tool selection
maxOutputTokens: DEFAULT_VALIDATION_MAX_TOKENS,
abortSignal: this.abortSignal,
abortSignal: this.currentIterationSignal ?? this.abortSignal,
},
{
maxAttempts: 2,
Expand Down
65 changes: 65 additions & 0 deletions packages/core/test/webAgent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,71 @@ describe("WebAgent", () => {
});
});

describe("iteration watchdog", () => {
function mockPlan(): void {
mockGenerateTextWithRetry.mockResolvedValueOnce({
text: "Planning",
toolResults: [
{
type: "tool-result",
toolCallId: "plan_1",
toolName: "create_plan",
input: { successCriteria: "Done", plan: "1. do it" },
output: { successCriteria: "Done", plan: "1. do it" },
},
],
} as any);
}

// A streamText result whose fullStream never completes (simulates a stalled stream).
function hangingStreamResult(): any {
return {
fullStream: {
async *[Symbol.asyncIterator]() {
await new Promise(() => {});
},
},
toolResults: new Promise(() => {}),
response: new Promise(() => {}),
finishReason: new Promise(() => {}),
usage: new Promise(() => {}),
warnings: new Promise(() => {}),
providerMetadata: new Promise(() => {}),
};
}

it("aborts an iteration whose LLM stream never completes and returns ITERATION_TIMEOUT", async () => {
// Real timers + a tiny watchdog so the stalled iteration is bounded quickly.
vi.useRealTimers();
mockPlan();
mockStreamText.mockReturnValueOnce(hangingStreamResult());

const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 });
const result = await agent.execute("test task", { startingUrl: "https://example.com" });

expect(result.success).toBe(false);
expect(result.error?.code).toBe("ITERATION_TIMEOUT");

await agent.close();
});

it("passes a watchdog-derived abort signal to streamText that fires on timeout", async () => {
vi.useRealTimers();
mockPlan();
mockStreamText.mockReturnValueOnce(hangingStreamResult());

const agent = new WebAgent(mockBrowser, { ...options, iterationTimeoutMs: 50 });
await agent.execute("test task", { startingUrl: "https://example.com" });

const call = mockStreamText.mock.calls.at(-1)?.[0];
expect(call?.abortSignal).toBeInstanceOf(AbortSignal);
// The watchdog fired, so the signal handed to the LLM call is now aborted.
expect(call?.abortSignal?.aborted).toBe(true);

await agent.close();
});
});

describe("planning", () => {
it("should generate plan with URL when not provided", async () => {
mockGenerateTextWithRetry.mockResolvedValueOnce({
Expand Down
Loading