diff --git a/src/McpPage.ts b/src/McpPage.ts index 2fc9590b9..cd2a222c6 100644 --- a/src/McpPage.ts +++ b/src/McpPage.ts @@ -28,6 +28,7 @@ import type { import { getNetworkMultiplierFromString, WaitForHelper, + type WaitForEventsResult, } from './WaitForHelper.js'; /** @@ -132,7 +133,7 @@ export class McpPage implements ContextPage { waitForEventsAfterAction( action: () => Promise, options?: {timeout?: number; handleDialog?: 'accept' | 'dismiss' | string}, - ): Promise { + ): Promise { const helper = this.createWaitForHelper( this.cpuThrottlingRate, getNetworkMultiplierFromString(this.networkConditions), diff --git a/src/WaitForHelper.ts b/src/WaitForHelper.ts index 6a67224c3..ee2e812d7 100644 --- a/src/WaitForHelper.ts +++ b/src/WaitForHelper.ts @@ -127,7 +127,7 @@ export class WaitForHelper { async waitForEventsAfterAction( action: () => Promise, options?: {timeout?: number; handleDialog?: 'accept' | 'dismiss' | string}, - ): Promise { + ): Promise { let dialogOpened = false; if (options?.handleDialog) { const dialogHandler = (dialog: Pick) => { @@ -146,6 +146,7 @@ export class WaitForHelper { }); } + const urlBeforeAction = this.#page.url(); const navigationFinished = this.waitForNavigationStarted() .then(navigationStated => { if (navigationStated) { @@ -170,7 +171,7 @@ export class WaitForHelper { await navigationFinished; if (dialogOpened) { - return; + return {}; } // Wait for stable dom after navigation so we execute in @@ -181,6 +182,30 @@ export class WaitForHelper { } finally { this.#abortController.abort(); } + + const urlAfterAction = this.#page.url(); + return { + ...(urlAfterAction !== urlBeforeAction + ? {navigatedToUrl: urlAfterAction} + : {}), + }; + } +} + +export interface WaitForEventsResult { + /** + * The URL the page navigated to during the action, if a navigation + * occurred. + */ + navigatedToUrl?: string; +} + +export function appendWaitForResult( + response: {appendResponseLine(value: string): void}, + result: WaitForEventsResult, +): void { + if (result.navigatedToUrl) { + response.appendResponseLine(`Page navigated to ${result.navigatedToUrl}.`); } } diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index 8acc24be3..828b8097d 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -24,6 +24,7 @@ import type { ExtensionServiceWorker, } from '../types.js'; import type {PaginationOptions} from '../utils/types.js'; +import type {WaitForEventsResult} from '../WaitForHelper.js'; import type {ToolCategory} from './categories.js'; import type { @@ -260,7 +261,7 @@ export type ContextPage = Readonly<{ waitForEventsAfterAction( action: () => Promise, options?: {timeout?: number; handleDialog?: 'accept' | 'dismiss' | string}, - ): Promise; + ): Promise; getInPageTools(): ToolGroup | undefined; executeInPageTool( toolName: string, diff --git a/src/tools/input.ts b/src/tools/input.ts index 697048779..ec699a359 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -10,6 +10,10 @@ import {zod} from '../third_party/index.js'; import type {ElementHandle, KeyInput} from '../third_party/index.js'; import type {TextSnapshotNode} from '../types.js'; import {parseKey} from '../utils/keyboard.js'; +import { + appendWaitForResult, + type WaitForEventsResult, +} from '../WaitForHelper.js'; import {ToolCategory} from './categories.js'; import type {ContextPage} from './ToolDefinition.js'; @@ -63,7 +67,7 @@ export const click = definePageTool({ const uid = request.params.uid; const handle = await request.page.getElementByUid(uid); try { - await request.page.waitForEventsAfterAction(async () => { + const result = await request.page.waitForEventsAfterAction(async () => { await handle.asLocator().click({ count: request.params.dblClick ? 2 : 1, }); @@ -73,6 +77,7 @@ export const click = definePageTool({ ? `Successfully double clicked on the element` : `Successfully clicked on the element`, ); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -101,7 +106,7 @@ export const clickAt = definePageTool({ blockedByDialog: true, handler: async (request, response) => { const page = request.page; - await page.waitForEventsAfterAction(async () => { + const result = await page.waitForEventsAfterAction(async () => { await page.pptrPage.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, }); @@ -111,6 +116,7 @@ export const clickAt = definePageTool({ ? `Successfully double clicked at the coordinates` : `Successfully clicked at the coordinates`, ); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -137,10 +143,11 @@ export const hover = definePageTool({ const uid = request.params.uid; const handle = await request.page.getElementByUid(uid); try { - await request.page.waitForEventsAfterAction(async () => { + const result = await request.page.waitForEventsAfterAction(async () => { await handle.asLocator().hover(); }); response.appendResponseLine(`Successfully hovered over the element`); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -239,7 +246,7 @@ export const fill = definePageTool({ blockedByDialog: true, handler: async (request, response, context) => { const page = request.page; - await page.waitForEventsAfterAction(async () => { + const result = await page.waitForEventsAfterAction(async () => { await fillFormElement( request.params.uid, request.params.value, @@ -248,6 +255,7 @@ export const fill = definePageTool({ ); }); response.appendResponseLine(`Successfully filled out the element`); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -268,7 +276,7 @@ export const typeText = definePageTool({ blockedByDialog: true, handler: async (request, response) => { const page = request.page; - await page.waitForEventsAfterAction(async () => { + const result = await page.waitForEventsAfterAction(async () => { await page.pptrPage.keyboard.type(request.params.text); if (request.params.submitKey) { await page.pptrPage.keyboard.press( @@ -279,6 +287,7 @@ export const typeText = definePageTool({ response.appendResponseLine( `Typed text "${request.params.text}${request.params.submitKey ? ` + ${request.params.submitKey}` : ''}"`, ); + appendWaitForResult(response, result); }, }); @@ -301,12 +310,13 @@ export const drag = definePageTool({ ); const toHandle = await request.page.getElementByUid(request.params.to_uid); try { - await request.page.waitForEventsAfterAction(async () => { + const result = await request.page.waitForEventsAfterAction(async () => { await fromHandle.drag(toHandle); await new Promise(resolve => setTimeout(resolve, 50)); await toHandle.drop(fromHandle); }); response.appendResponseLine(`Successfully dragged an element`); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -339,8 +349,9 @@ export const fillForm = definePageTool({ blockedByDialog: true, handler: async (request, response, context) => { const page = request.page; + let lastResult: WaitForEventsResult = {}; for (const element of request.params.elements) { - await page.waitForEventsAfterAction(async () => { + lastResult = await page.waitForEventsAfterAction(async () => { await fillFormElement( element.uid, element.value, @@ -350,6 +361,7 @@ export const fillForm = definePageTool({ }); } response.appendResponseLine(`Successfully filled out the form`); + appendWaitForResult(response, lastResult); if (request.params.includeSnapshot) { response.includeSnapshot(); } @@ -429,7 +441,7 @@ export const pressKey = definePageTool({ const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; - await page.waitForEventsAfterAction(async () => { + const result = await page.waitForEventsAfterAction(async () => { for (const modifier of modifiers) { await page.pptrPage.keyboard.down(modifier); } @@ -442,6 +454,7 @@ export const pressKey = definePageTool({ response.appendResponseLine( `Successfully pressed key: ${request.params.key}`, ); + appendWaitForResult(response, result); if (request.params.includeSnapshot) { response.includeSnapshot(); } diff --git a/src/tools/script.ts b/src/tools/script.ts index faabf5c34..7d27be5c5 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -7,6 +7,7 @@ import {zod} from '../third_party/index.js'; import type {Frame, JSHandle, Page, WebWorker} from '../third_party/index.js'; import type {ExtensionServiceWorker} from '../types.js'; +import {appendWaitForResult} from '../WaitForHelper.js'; import {ToolCategory} from './categories.js'; import type {Context, Response} from './ToolDefinition.js'; @@ -85,12 +86,15 @@ Example with arguments: \`(el) => { } const worker = await getWebWorker(context, serviceWorkerId); - await context.getSelectedMcpPage().waitForEventsAfterAction( - async () => { - await performEvaluation(worker, fnString, [], response); - }, - {handleDialog: dialogAction ?? 'accept'}, - ); + const result = await context + .getSelectedMcpPage() + .waitForEventsAfterAction( + async () => { + await performEvaluation(worker, fnString, [], response); + }, + {handleDialog: dialogAction ?? 'accept'}, + ); + appendWaitForResult(response, result); return; } @@ -110,12 +114,13 @@ Example with arguments: \`(el) => { const evaluatable = await getPageOrFrame(page, frames); - await mcpPage.waitForEventsAfterAction( + const result = await mcpPage.waitForEventsAfterAction( async () => { await performEvaluation(evaluatable, fnString, args, response); }, {handleDialog: dialogAction ?? 'accept'}, ); + appendWaitForResult(response, result); } finally { void Promise.allSettled(args.map(arg => arg.dispose())); } diff --git a/tests/tools/input.test.ts b/tests/tools/input.test.ts index b0033ec4d..420b1012d 100644 --- a/tests/tools/input.test.ts +++ b/tests/tools/input.test.ts @@ -130,6 +130,67 @@ describe('input', () => { }); }); + it('reports the new URL when click triggers a navigation', async () => { + server.addHtmlRoute( + '/start', + html`Navigate page`, + ); + server.addHtmlRoute('/after-click', html`
arrived
`); + + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.goto(server.getRoute('/start')); + context.getSelectedMcpPage().textSnapshot = await TextSnapshot.create( + context.getSelectedMcpPage(), + ); + await click.handler( + { + params: { + uid: '1_1', + }, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + const expectedUrl = server.getRoute('/after-click'); + assert.ok( + response.responseLines.some( + line => line === `Page navigated to ${expectedUrl}.`, + ), + `Expected response to mention navigation to ${expectedUrl}, got: ${response.responseLines.join(' | ')}`, + ); + }); + }); + + it('does not report navigation when click does not navigate', async () => { + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setContent( + html``, + ); + context.getSelectedMcpPage().textSnapshot = await TextSnapshot.create( + context.getSelectedMcpPage(), + ); + await click.handler( + { + params: { + uid: '1_1', + }, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + assert.ok( + !response.responseLines.some(line => + line.startsWith('Page navigated to '), + ), + `Did not expect a navigation line, got: ${response.responseLines.join(' | ')}`, + ); + }); + }); + it('waits for stable DOM', async () => { server.addHtmlRoute( '/unstable',