Skip to content

Commit 9a39deb

Browse files
DavertMikDavertMikclaude
authored
feat(mcp): per-test plugin overrides + shell session lifecycle (#5547)
* update docs * updated docs, added browser plugin * feat(mcp): per-test plugin overrides + shell session lifecycle - run_test / run_step_by_step accept a `plugins` object that mirrors the CLI `-p` flag (e.g. `{ screencast: { saveScreenshots: true }, aiTrace: { on: 'fail' }, pause: true }`). Container is re-initialized when the plugin set changes between calls. - start_browser / stop_browser now drive a full shell session like `codeceptjs shell`: bootstrap, recorder.start, suite.before / test.before on start; matching after events plus codecept.teardown on stop. - run_code / snapshot now require an active session (shell or paused test) and return a clear error pointing the agent at start_browser or run_test otherwise. Plugins and listeners that depend on suite.before / test.before now fire correctly during MCP usage. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(trace): TraceReader API + ariaDiff in run_code Move artifact-on-disk reading from mcp-server.js into a TraceReader class in lib/utils/trace.js. Python-style indexing via first / last / nth, kept generic across kinds (aria / html / screenshot / console / storage). Sort by filename — aiTrace's zero-padded step prefix means a lexical sort is chronological. run_code uses it to diff ARIA between the last aiTrace capture and the new one produced by the steps inside this call: const reader = new TraceReader(currentAiTraceDir) const before = reader.last('aria') // run code, aiTrace captures per step const after = reader.last('aria') if (before !== after) result.ariaDiff = ariaDiff(before, after) initCodecept now force-enables aiTrace whenever the MCP server initializes the container — it's the canonical per-step capture, no point in MCP doing its own grabAriaSnapshot. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs(mcp): add Agentic Testing guide; simplify ARIA snapshot pipeline - docs/agents.md: new top-level page covering the MCP loop (open the page → read → run a CodeceptJS command → check → commit), how the agent reads page artifacts, and where MCP fits relative to pause(). - lib/aria.js: trim INTERACTIVE_ROLES to roles that actually take user input (drop container roles like grid/tablist/menubar); remove IGNORED_ROLES unwrap, icon-button auto-naming, and bool/null coercion in attribute values. Names are always emitted; attribute values are passed through as plain strings. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: DavertMik <davert@testomat.io> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 890ca6d commit 9a39deb

5 files changed

Lines changed: 680 additions & 21 deletions

File tree

bin/mcp-server.js

Lines changed: 136 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ import {
1212
snapshotDirFor,
1313
artifactsToFileUrls,
1414
writeTraceMarkdown,
15+
TraceReader,
16+
ariaDiff,
1517
} from '../lib/utils/trace.js'
1618
import event from '../lib/event.js'
19+
import recorder from '../lib/recorder.js'
1720
import { setPauseHandler, pauseNow } from '../lib/pause.js'
1821
import { EventEmitter } from 'events'
1922
import { fileURLToPath, pathToFileURL } from 'url'
@@ -32,6 +35,93 @@ const __dirname = dirname(__filename)
3235
let codecept = null
3336
let containerInitialized = false
3437
let browserStarted = false
38+
let shellSessionActive = false
39+
let bootstrapDone = false
40+
let currentPluginsSig = ''
41+
let currentAiTraceDir = null // mirrors the dir aiTrace plugin computes per test/session
42+
43+
event.dispatcher.on(event.test.before, test => {
44+
try {
45+
const title = (test && (test.fullTitle ? test.fullTitle() : test.title)) || 'MCP Session'
46+
currentAiTraceDir = traceDirFor(test?.file, title, outputBaseDir())
47+
} catch {}
48+
})
49+
50+
const SESSION_REQUIRED_ERROR = 'No active CodeceptJS session. Call `start_browser` to open a shell session, or `run_test` (use `pause()` in the test, or set `pauseAt`) to inspect during a test run.'
51+
52+
async function ensureBootstrap() {
53+
if (bootstrapDone) return
54+
await codecept.bootstrap()
55+
bootstrapDone = true
56+
}
57+
58+
async function startShellSession() {
59+
if (shellSessionActive) return
60+
await ensureBootstrap()
61+
recorder.start()
62+
event.emit(event.suite.before, {
63+
fullTitle: () => 'MCP Session',
64+
tests: [],
65+
retries: () => {},
66+
})
67+
event.emit(event.test.before, {
68+
title: 'MCP Session',
69+
artifacts: {},
70+
retries: () => {},
71+
})
72+
shellSessionActive = true
73+
}
74+
75+
async function endShellSession() {
76+
if (!shellSessionActive) return
77+
try { event.emit(event.test.after, {}) } catch {}
78+
try { event.emit(event.suite.after, {}) } catch {}
79+
try { event.emit(event.all.result, {}) } catch {}
80+
shellSessionActive = false
81+
}
82+
83+
function ensureSession() {
84+
if (shellSessionActive || pausedController) return
85+
throw new Error(SESSION_REQUIRED_ERROR)
86+
}
87+
88+
function normalizePluginOverrides(plugins) {
89+
if (!plugins || typeof plugins !== 'object') return {}
90+
const out = {}
91+
for (const [name, opts] of Object.entries(plugins)) {
92+
if (opts === false) continue
93+
out[name] = (opts === true || opts == null) ? {} : opts
94+
}
95+
return out
96+
}
97+
98+
function applyPluginOverrides(config, plugins) {
99+
config.plugins = config.plugins || {}
100+
for (const [name, opts] of Object.entries(plugins)) {
101+
config.plugins[name] = { ...(config.plugins[name] || {}), ...opts, enabled: true }
102+
}
103+
}
104+
105+
function pluginsSignature(plugins) {
106+
const keys = Object.keys(plugins).sort()
107+
return JSON.stringify(keys.map(k => [k, plugins[k]]))
108+
}
109+
110+
async function teardownContainer() {
111+
if (!containerInitialized) return
112+
await endShellSession()
113+
const helpers = container.helpers()
114+
for (const helperName in helpers) {
115+
const helper = helpers[helperName]
116+
try { if (helper._finish) await helper._finish() } catch {}
117+
}
118+
try { if (codecept?.teardown) await codecept.teardown() } catch {}
119+
containerInitialized = false
120+
browserStarted = false
121+
bootstrapDone = false
122+
codecept = null
123+
currentPluginsSig = ''
124+
}
35125

36126
let runLock = Promise.resolve()
37127
async function withLock(fn) {
@@ -318,8 +408,14 @@ function pausedPayload() {
318408
}
319409
}
320410

321-
async function initCodecept(configPath) {
322-
if (containerInitialized) return
411+
async function initCodecept(configPath, pluginOverrides) {
412+
const plugins = normalizePluginOverrides(pluginOverrides)
413+
const sig = pluginsSignature(plugins)
414+
415+
if (containerInitialized) {
416+
if (!Object.keys(plugins).length || sig === currentPluginsSig) return
417+
await teardownContainer()
418+
}
323419

324420
const testRoot = process.env.CODECEPTJS_PROJECT_DIR || process.cwd()
325421

@@ -344,15 +440,23 @@ async function initCodecept(configPath) {
344440
const { getConfig } = await import('../lib/command/utils.js')
345441
const config = await getConfig(configPath)
346442

443+
// aiTrace is the canonical per-step ARIA/HTML/screenshot capture for MCP.
444+
// Always on so run_code / continue can read the latest snapshot from disk
445+
// instead of double-capturing through grabAriaSnapshot etc.
446+
applyPluginOverrides(config, { aiTrace: {}, ...plugins })
447+
347448
codecept = new Codecept(config, {})
348449
await codecept.init(testRoot)
349450
await container.create(config, {})
350451
await container.started()
351452

352453
containerInitialized = true
353454
browserStarted = true
455+
currentPluginsSig = sig
354456
}
355457

458+
const PLUGINS_DESCRIPTION = 'Enable CodeceptJS plugins for this run, mirroring the CLI `-p` flag. Keys are plugin names (e.g. screencast, aiTrace, pause, pageInfo, heal, retryFailedStep, screenshotOnFail, autoDelay). Value `true` or `{}` enables with defaults; an object merges options, e.g. {"screencast": {"saveScreenshots": true}, "aiTrace": {"on": "fail"}}. Changing the plugin set tears down and re-initializes the container (closes the browser).'
459+
356460
const server = new Server(
357461
{ name: 'codeceptjs-mcp-server', version: '1.0.0' },
358462
{ capabilities: { tools: {} } }
@@ -394,6 +498,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
394498
timeout: { type: 'number' },
395499
config: { type: 'string' },
396500
pauseAt: { type: 'number', description: '1-based step index. Test will pause after the Nth step completes. Useful as a programmatic breakpoint without editing the test.' },
501+
plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
397502
},
398503
required: ['test'],
399504
},
@@ -407,6 +512,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
407512
test: { type: 'string' },
408513
timeout: { type: 'number' },
409514
config: { type: 'string' },
515+
plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
410516
},
411517
required: ['test'],
412518
},
@@ -497,33 +603,26 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
497603

498604
case 'start_browser': {
499605
const configPath = args?.config
500-
if (browserStarted) {
501-
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser already started' }, null, 2) }] }
606+
if (browserStarted && shellSessionActive) {
607+
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session already active' }, null, 2) }] }
502608
}
503609
await initCodecept(configPath)
504-
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser started successfully' }, null, 2) }] }
610+
await startShellSession()
611+
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session started — run_code and snapshot are now available' }, null, 2) }] }
505612
}
506613

507614
case 'stop_browser': {
508615
if (!containerInitialized) {
509616
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser not initialized' }, null, 2) }] }
510617
}
511-
512-
const helpers = container.helpers()
513-
for (const helperName in helpers) {
514-
const helper = helpers[helperName]
515-
try { if (helper._finish) await helper._finish() } catch {}
516-
}
517-
518-
browserStarted = false
519-
containerInitialized = false
520-
618+
await teardownContainer()
521619
return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] }
522620
}
523621

524622
case 'snapshot': {
525623
const { config: configPath, fullPage = false } = args || {}
526624
await initCodecept(configPath)
625+
ensureSession()
527626

528627
const helper = pickActingHelper(container.helpers())
529628
if (!helper) throw new Error('No supported acting helper available (Playwright, Puppeteer, WebDriver).')
@@ -588,6 +687,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
588687
case 'run_code': {
589688
const { code, timeout = 60000, config: configPath, saveArtifacts = true } = args
590689
await initCodecept(configPath)
690+
ensureSession()
591691

592692
const I = container.support('I')
593693
if (!I) throw new Error('I object not available. Make sure helpers are configured.')
@@ -604,6 +704,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
604704
mkdirp.sync(traceDir)
605705
const startedAt = Date.now()
606706

707+
// Pin the latest aiTrace ARIA file before running the code, so we
708+
// can diff after. aiTrace owns per-step capture; we just read it.
709+
const reader = new TraceReader(currentAiTraceDir)
710+
const ariaBefore = reader.last('aria')
711+
607712
const MAX_LOG_ENTRIES = 100
608713
const MAX_LOG_MSG_BYTES = 2000
609714
const MAX_RETURN_BYTES = 20000
@@ -666,6 +771,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
666771
}
667772
}
668773

774+
// Diff against the latest aiTrace ARIA file produced by the steps
775+
// that just ran inside this run_code call.
776+
const ariaAfter = reader.last('aria')
777+
if (ariaBefore && ariaAfter && ariaBefore !== ariaAfter) {
778+
const diff = ariaDiff(ariaBefore, ariaAfter)
779+
if (diff) result.ariaDiff = diff
780+
}
781+
669782
const traceFile = writeTraceMarkdown({
670783
dir: traceDir,
671784
title: 'run_code',
@@ -686,8 +799,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
686799
if (pausedController) {
687800
throw new Error('A previous run_test is still paused. Call "continue" first.')
688801
}
689-
const { test, timeout = 60000, config: configPathArg, pauseAt } = args || {}
690-
await initCodecept(configPathArg)
802+
const { test, timeout = 60000, config: configPathArg, pauseAt, plugins } = args || {}
803+
await initCodecept(configPathArg, plugins)
804+
await endShellSession()
691805

692806
return await withSilencedIO(async () => {
693807
codecept.loadTests()
@@ -740,7 +854,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
740854
let runError = null
741855
const runPromise = (async () => {
742856
try {
743-
await codecept.bootstrap()
857+
await ensureBootstrap()
744858
await codecept.run(testFile)
745859
} catch (err) {
746860
runError = err
@@ -779,8 +893,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
779893
if (pausedController) {
780894
throw new Error('A previous run is still paused. Call "continue" first.')
781895
}
782-
const { test, timeout = 60000, config: configPath } = args || {}
783-
await initCodecept(configPath)
896+
const { test, timeout = 60000, config: configPath, plugins } = args || {}
897+
await initCodecept(configPath, plugins)
898+
await endShellSession()
784899

785900
return await withSilencedIO(async () => {
786901
codecept.loadTests()
@@ -832,7 +947,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
832947
let runError = null
833948
const runPromise = (async () => {
834949
try {
835-
await codecept.bootstrap()
950+
await ensureBootstrap()
836951
await codecept.run(testFile)
837952
} catch (err) {
838953
runError = err

0 commit comments

Comments
 (0)