diff --git a/bun.lock b/bun.lock index 6570d4c..88fa291 100644 --- a/bun.lock +++ b/bun.lock @@ -91,6 +91,7 @@ "dependencies": { "@aqa/adapters": "workspace:*", "@aqa/pack-loader": "workspace:*", + "@aqa/reporter": "workspace:*", "@aqa/runner": "workspace:*", "@aqa/schemas": "workspace:*", "kleur": "^4.1.5", diff --git a/packages/kit/package.json b/packages/kit/package.json index cc0ef06..659b3e6 100644 --- a/packages/kit/package.json +++ b/packages/kit/package.json @@ -20,12 +20,13 @@ "build": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs", "typecheck": "tsc -p tsconfig.json --noEmit", "pretest": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs", - "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/install-agent-files-cmd.test.ts", + "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/install-agent-files-cmd.test.ts test/report-cmd.test.ts", "clean": "node --input-type=module -e \"import { rmSync } from 'node:fs'; for (const p of ['dist','.tsbuildinfo']) { try { rmSync(p, { recursive: true, force: true }); } catch {} }\"" }, "dependencies": { "@aqa/adapters": "workspace:*", "@aqa/pack-loader": "workspace:*", + "@aqa/reporter": "workspace:*", "@aqa/runner": "workspace:*", "@aqa/schemas": "workspace:*", "kleur": "^4.1.5", diff --git a/packages/kit/src/cli/aqa.ts b/packages/kit/src/cli/aqa.ts index 4e3fc37..b8fa94f 100644 --- a/packages/kit/src/cli/aqa.ts +++ b/packages/kit/src/cli/aqa.ts @@ -4,6 +4,7 @@ import { type CheckStatus, runDoctor } from '../commands/doctor.js'; import { runInit } from '../commands/init.js'; import { runInstallAgentFiles } from '../commands/install-agent-files.js'; import { runPackNew } from '../commands/pack-new.js'; +import { runReport } from '../commands/report.js'; import { runRun } from '../commands/run.js'; import { runValidate } from '../commands/validate.js'; @@ -32,6 +33,8 @@ const VALUE_FLAGS = new Set([ 'license', 'targets', 'project-name', + 'run-id', + 'format', ]); function parseArgs(argv: string[]): ParsedArgs { @@ -93,6 +96,7 @@ ${bold('Commands')} install-agent-files --targets … Write CLAUDE.md / AGENTS.md / GEMINI.md / .github/copilot-instructions.md plus per-agent skills under .claude/ .agents/ .gemini/ .github/ run [--profile

] Execute scenarios for the given profile; write events + findings + report [--run-id ] Render the latest (or specified) run as report.md + report.json pack new Scaffold a new pack at /packs// (see the pack authoring guide: https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md — this path is only present in the source repo, not in the npm tarball) @@ -104,6 +108,8 @@ ${bold('Common options')} --seed (run) deterministic run_id seed — useful for replay --targets (install-agent-files) comma-separated targets: claude,codex,gemini,copilot --project-name (install-agent-files) override the slug embedded in instruction files + --run-id (report) target a specific run; default = latest + --format (report) md | json | both (default: both) --sut-type (pack new) api | web | cli | lib | agent | pipeline --description (pack new) one-line summary written into the manifest --author (pack new) manifest author field @@ -256,6 +262,37 @@ async function main(): Promise { } return 0; } + case 'report': { + printHeader('report'); + if (args.flags.has('run-id') && !args.values.has('run-id')) { + console.error(red('aqa report: --run-id requires a value')); + return 1; + } + if (args.flags.has('format') && !args.values.has('format')) { + console.error(red('aqa report: --format requires a value')); + return 1; + } + const reportOpts: Parameters[0] = { root: cwd }; + if (args.values.has('run-id')) reportOpts.runId = args.values.get('run-id') ?? ''; + if (args.values.has('format')) { + const fmt = args.values.get('format') ?? ''; + if (fmt !== 'md' && fmt !== 'json' && fmt !== 'both') { + console.error(red(`aqa report: --format must be md | json | both, got "${fmt}"`)); + return 1; + } + reportOpts.format = fmt; + } + const result = runReport(reportOpts); + if (!result.ok) { + console.error(red(` ✗ ${result.error}`)); + return 1; + } + console.info(` ${green('✓')} ${bold(result.runId)}`); + console.info(` ${dim('runDir: ')}${result.runDir}`); + console.info(` ${dim('findings: ')}${result.findingsCount}`); + for (const f of result.files) console.info(` ${green('+')} ${f}`); + return 0; + } case 'pack': { // Subcommand router for `aqa pack `. const sub = args.positionals[0]; diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts new file mode 100644 index 0000000..1a036c6 --- /dev/null +++ b/packages/kit/src/commands/report.ts @@ -0,0 +1,443 @@ +/** + * `aqa report` — renders the on-disk run artifacts (`events.jsonl` + + * `findings.jsonl`) into a Markdown summary and a stable JSON view for + * downstream dashboards. + * + * Loose contract (intentional, see also `runRun`): + * - `aqa run` writes events + findings, NOT a `run.json` (the canonical + * Run shape is reconstructed from the audit chain). `aqa report` does + * that reconstruction here from the first `run_started` and last + * `run_finished` events so reports remain replayable from the audit + * trail alone, without coupling reporter output to a new sidecar file. + * - Reports are written into the same run directory so a junior can hand + * a single `runDir` to a teammate and get the whole story (events, + * findings, replay artifacts, plus the rendered report). + */ + +import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { renderJson, renderMarkdown } from '@aqa/reporter'; +import { Finding, Run } from '@aqa/schemas'; + +export type ReportFormat = 'md' | 'json' | 'both'; + +// Mirrors @aqa/schemas LongSlug exactly: SlugPattern + max length 256. +// Previous v1.9 iteration used a looser /^[a-z0-9-]{1,80}$/ that both +// capped at 80 instead of 256 AND allowed leading/trailing dashes plus +// `--` runs — so a malformed --run-id could pass this guard yet later +// fail at a Finding.parse() site. Pattern + length checked separately +// so the error message can distinguish "bad characters" from "too long" +// (helpful for hash-suffixed run ids on the edge of the cap). +const RUN_ID_RE = /^[a-z0-9](?:-?[a-z0-9])*$/; +const RUN_ID_MAX_LEN = 256; + +export interface ReportOptions { + root: string; + /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */ + runId?: string; + /** Default 'both'. Controls which artifacts are written. */ + format?: ReportFormat; +} + +export interface ReportOk { + ok: true; + runId: string; + runDir: string; + files: string[]; + findingsCount: number; +} + +export interface ReportErr { + ok: false; + error: string; +} + +export type ReportResult = ReportOk | ReportErr; + +export function runReport(opts: ReportOptions): ReportResult { + const format: ReportFormat = opts.format ?? 'both'; + if (format !== 'md' && format !== 'json' && format !== 'both') { + return { + ok: false, + error: `report: --format must be md | json | both, got "${String(format)}"`, + }; + } + const runsRoot = join(opts.root, '.aqa', 'runs'); + if (!existsSync(runsRoot)) { + return { + ok: false, + error: `report: no runs directory at ${runsRoot} — run \`aqa run --profile smoke\` first`, + }; + } + const runIdResolved = opts.runId ?? latestRunId(runsRoot); + if (!runIdResolved) { + return { + ok: false, + error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`, + }; + } + // Defense-in-depth: a `--run-id` of `../../../etc/passwd` (or similar) + // would otherwise resolve outside `.aqa/runs/`. The schema treats run + // IDs as LongSlug; mirror that constraint at the CLI boundary so a + // typo or malicious input can't drive writes anywhere outside the + // intended directory. + if (runIdResolved.length > RUN_ID_MAX_LEN) { + return { + ok: false, + error: `report: invalid run id — exceeds ${RUN_ID_MAX_LEN}-char LongSlug cap (got ${runIdResolved.length})`, + }; + } + if (!RUN_ID_RE.test(runIdResolved)) { + return { + ok: false, + error: `report: invalid run id "${runIdResolved}" — must match ${RUN_ID_RE.source}`, + }; + } + const runDir = join(runsRoot, runIdResolved); + if (!safeIsDir(runDir)) { + return { ok: false, error: `report: run directory not found: ${runDir}` }; + } + // Refuse symlinked run dirs. A previous run (or an attacker with FS + // write under .aqa/runs/) could leave a symlink pointing outside the + // project; `report.md` / `report.json` writes would then land + // wherever the link points. lstatSync (not statSync) so the test + // doesn't transparently follow the link. + try { + if (lstatSync(runDir).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to write into symlinked run directory ${runDir}`, + }; + } + } catch (e) { + return { + ok: false, + error: `report: cannot stat run directory ${runDir}: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + const eventsPath = join(runDir, 'events.jsonl'); + // Missing artifacts → fail-fast. A silent empty report on a corrupted + // run dir hides the real problem (the run never wrote its audit trail). + if (!existsSync(eventsPath)) { + return { + ok: false, + error: `report: events.jsonl is missing at ${eventsPath} — run is incomplete or corrupted`, + }; + } + let events: ReadonlyArray>; + try { + events = readJsonl(eventsPath); + } catch (e) { + return { + ok: false, + error: `report: cannot read events.jsonl: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + const findingsPath = join(runDir, 'findings.jsonl'); + if (!existsSync(findingsPath)) { + return { + ok: false, + error: `report: findings.jsonl is missing at ${findingsPath} — run is incomplete or corrupted`, + }; + } + let findings: readonly Finding.Finding[]; + try { + findings = readJsonl(findingsPath).map((raw, idx) => { + const parsed = Finding.Finding.safeParse(raw); + if (!parsed.success) { + throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`); + } + return parsed.data; + }); + } catch (e) { + return { + ok: false, + error: `report: cannot read findings.jsonl: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + const runDraft = reconstructRun({ + runId: runIdResolved, + runDir, + events, + findingsCount: findings.length, + }); + // Validate the reconstructed Run against the canonical schema before + // handing it to the renderers. reconstructRun could in theory produce + // a Run.parse-incompatible shape (e.g. terminal state with missing + // finished_at if the audit chain itself is malformed) — surfacing that + // as a structured error keeps `report.json` consumers safe instead of + // shipping a JSON the admin UI silently rejects later. + const runParsed = Run.Run.safeParse(runDraft); + if (!runParsed.success) { + return { + ok: false, + error: `report: reconstructed run failed schema validation (audit chain is malformed): ${runParsed.error.message.split('\n')[0]}`, + }; + } + const run = runParsed.data; + + const written: string[] = []; + // Writes can fail (read-only FS, disk full, permission). Return a + // structured error rather than letting the exception escape into the + // CLI's top-level unhandled-error path so callers get a clean message + // plus an exit code derived from the structured result. + // Per-file symlink check: even with a non-symlinked run dir, a + // pre-existing `report.md`/`report.json` symlink would be followed + // by writeFileSync and let an attacker (or a prior run) redirect the + // writes outside the project. lstat each target before writing. + try { + if (format === 'md' || format === 'both') { + const mdPath = join(runDir, 'report.md'); + if (existsSync(mdPath) && lstatSync(mdPath).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to overwrite symlinked report file ${mdPath}`, + }; + } + writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8'); + written.push(mdPath); + } + if (format === 'json' || format === 'both') { + const jsonPath = join(runDir, 'report.json'); + if (existsSync(jsonPath) && lstatSync(jsonPath).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to overwrite symlinked report file ${jsonPath}`, + }; + } + writeFileSync( + jsonPath, + `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, + 'utf8', + ); + written.push(jsonPath); + } + } catch (e) { + return { + ok: false, + error: `report: cannot write report file: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + return { + ok: true, + runId: runIdResolved, + runDir, + files: written, + findingsCount: findings.length, + }; +} + +function latestRunId(runsRoot: string): string | undefined { + let entries: string[]; + try { + entries = readdirSync(runsRoot); + } catch { + return undefined; + } + // Pick by file mtime, not lexical name. `aqa run --seed` produces + // hash-based IDs (`run-`) that don't sort by recency. mtime is + // monotonic enough for "the most recent run" semantics — lexical name + // is only used as a deterministic tie-breaker (same-millisecond runs). + // + // Filter: only consider directories that look like actual runs + // (presence of events.jsonl, the canonical run-start marker). Without + // this, an unrelated subdirectory under `.aqa/runs/` — or a symlink + // whose target's mtime is newer than any real run — would be selected + // by mtime and either fail with a confusing error or accidentally + // generate a report for the wrong directory. Also reject symlinks at + // the dir-entry level for the same reason as the symlink check in + // runReport: writes into a symlinked dir leak outside the project. + const candidates: Array<{ name: string; mtimeMs: number }> = []; + for (const name of entries) { + if (!RUN_ID_RE.test(name) || name.length > RUN_ID_MAX_LEN) continue; + const dir = join(runsRoot, name); + try { + const lst = lstatSync(dir); + if (lst.isSymbolicLink()) continue; + if (!lst.isDirectory()) continue; + if (!existsSync(join(dir, 'events.jsonl'))) continue; + const st = statSync(dir); + candidates.push({ name, mtimeMs: st.mtimeMs }); + } catch { + // ignore entries we can't stat (broken symlinks, races) + } + } + candidates.sort((a, b) => { + if (b.mtimeMs !== a.mtimeMs) return b.mtimeMs - a.mtimeMs; + return b.name.localeCompare(a.name); + }); + return candidates[0]?.name; +} + +function safeIsDir(p: string): boolean { + try { + return statSync(p).isDirectory(); + } catch { + return false; + } +} + +function readJsonl(path: string): Array> { + // Caller has already confirmed the file exists — this helper only + // returns [] for an empty file (legitimate "zero events" case), + // never for a missing one. + // Strict: a non-empty line that parses as valid JSON but isn't a + // plain object (e.g. `null`, `[]`, `"x"`, `42`) is rejected. Silently + // dropping such lines would turn a corrupted file into a seemingly + // successful report. + const text = readFileSync(path, 'utf8'); + const out: Array> = []; + let lineNo = 0; + for (const raw of text.split('\n')) { + lineNo += 1; + const line = raw.trim(); + if (!line) continue; + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch (e) { + throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`); + } + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + // typeof null === 'object', so explicit null branch. + const got = parsed === null ? 'null' : Array.isArray(parsed) ? 'array' : typeof parsed; + throw new Error(`${path} line ${lineNo}: expected a JSON object, got ${got}`); + } + out.push(parsed as Record); + } + return out; +} + +interface ReconstructInput { + runId: string; + runDir: string; + events: ReadonlyArray>; + findingsCount: number; +} + +function reconstructRun(input: ReconstructInput): Run.Run { + // Best-effort reconstruction from the audit chain — see file header. The + // reporter only reads a small subset of Run fields, but we still build the + // full schema-conformant object so the JSON report stays valid for the + // admin UI and any external dashboard. + const { runId, runDir, events, findingsCount } = input; + const started = pickEvent(events, 'run_started'); + const finished = pickEvent(events, 'run_finished'); + + const startedAt = readString(started, 'ts') ?? new Date(0).toISOString(); + const finishedAt = readString(finished, 'ts'); + const profile = readPayloadString(started, 'profile') ?? 'unknown'; + const project = readPayloadString(started, 'project') ?? 'unknown'; + const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0; + const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount; + + const state: Run.Run['state'] = deriveState(finished, scenariosRun); + + const run: Run.Run = { + schema_version: '1', + id: runId, + started_at: startedAt, + ...(finishedAt ? { finished_at: finishedAt } : {}), + state, + project, + profile, + execution_mode: 'orchestrator', + config_snapshot: { + profile, + execution_mode: 'orchestrator', + packs: [], + // Synthetic placeholder: this report path doesn't recompute the + // config hash from disk (the canonical hash is computed by the + // runner at run-time). The admin UI displays config_hash in + // replay copy, so an all-zeros value will be visible — users + // viewing a CLI-rendered report.json should treat this hash as + // a "not computed" sentinel, not a real digest. Encoded as 64 + // zeros (a valid Sha256 by shape) so the JSON still passes + // Run.parse(). + config_hash: '0'.repeat(64), + }, + totals: { + scenarios: scenariosRun, + findings: totalsFindings, + probes: 0, + llm_tokens_in: 0, + llm_tokens_out: 0, + llm_cost_usd: 0, + }, + artifact_dir: runDir, + }; + return run; +} + +function deriveState( + finished: Record | undefined, + scenariosRun: number, +): Run.Run['state'] { + // `runRun` writes `run_finished` on success AND on most failure paths + // (pack errors, scenario errors, missing scenarios, unsafe paths, runtime + // errors, zero scenarios). Treat any non-zero error counter — or a run + // that completed zero scenarios — as `failed` so the report doesn't + // mislabel broken runs as successes. + if (!finished) return 'running'; + const errorKeys = [ + 'pack_errors', + 'scenario_errors', + 'missing_scenarios', + 'unsafe_paths', + 'runtime_errors', + ] as const; + for (const k of errorKeys) { + const v = readPayloadNumber(finished, k); + if (typeof v === 'number' && v > 0) return 'failed'; + } + if (scenariosRun === 0) return 'failed'; + return 'succeeded'; +} + +function pickEvent( + events: ReadonlyArray>, + kind: string, +): Record | undefined { + // run_started: first match; run_finished: last (a re-run within the same + // dir would have appended a new finalization line). Both kinds appear at + // most once today, but the lookup stays defensive. + if (kind === 'run_finished') { + for (let i = events.length - 1; i >= 0; i--) { + if (events[i]?.kind === kind) return events[i]; + } + return undefined; + } + for (const e of events) { + if (e.kind === kind) return e; + } + return undefined; +} + +function readString(obj: Record | undefined, key: string): string | undefined { + const v = obj?.[key]; + return typeof v === 'string' ? v : undefined; +} + +function readPayloadString( + obj: Record | undefined, + key: string, +): string | undefined { + const payload = obj?.payload; + if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined; + const v = (payload as Record)[key]; + return typeof v === 'string' ? v : undefined; +} + +function readPayloadNumber( + obj: Record | undefined, + key: string, +): number | undefined { + const payload = obj?.payload; + if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined; + const v = (payload as Record)[key]; + return typeof v === 'number' && Number.isFinite(v) ? v : undefined; +} diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts new file mode 100644 index 0000000..1a4444b --- /dev/null +++ b/packages/kit/test/report-cmd.test.ts @@ -0,0 +1,587 @@ +/** + * v1.9 — `aqa report` CLI verb. + * + * Renders the on-disk run artifacts into report.md + report.json. These + * tests build a synthetic run directory (events.jsonl + findings.jsonl) + * because runRun is async, network-touched in some configurations, and + * over-coupled for a focused reporter test. The synthetic dir matches the + * canonical schema shapes from @aqa/schemas Run/Finding. + */ + +import assert from 'node:assert/strict'; +import { createHash } from 'node:crypto'; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + symlinkSync, + writeFileSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, it } from 'node:test'; +import { runReport } from '../dist/commands/report.js'; + +const RUN_ID = '20260520-000000-runabcdef'; +const STARTED_AT = '2026-05-20T00:00:00.000Z'; +const FINISHED_AT = '2026-05-20T00:00:30.000Z'; + +// Slight sleep used to put a real mtime delta between consecutive run dirs +// in the "latest run" test. Some filesystems otherwise group rapid mkdirs +// under the same mtimeMs and force the tie-breaker into action. +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +function makeTempRoot(): string { + return mkdtempSync(join(tmpdir(), 'aqa-report-')); +} + +function makeRunDir(root: string, runId: string): string { + const dir = join(root, '.aqa', 'runs', runId); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function sha256Hex(s: string): string { + return createHash('sha256').update(s, 'utf8').digest('hex'); +} + +/** Build minimal valid hash-chained events.jsonl. */ +function writeEvents( + runDir: string, + opts: { runId: string; profile: string; project: string; findingsCount: number }, +): void { + const events: Array> = []; + let prev: string | null = null; + function append(partial: Omit, 'seq' | 'prev_hash' | 'hash'>): void { + const seq = events.length; + // Hash recomputation here is a stub — the writer's exact canonicalization + // is exercised in @aqa/runner / @aqa/compliance tests. `aqa report` + // doesn't validate the chain (it just parses fields), so any + // deterministic stub hash keeps schema.parse happy. + const body = JSON.stringify({ ...partial, seq }); + const hash = sha256Hex((prev ?? '') + body); + const evt = { schema_version: '1', seq, prev_hash: prev, hash, ...partial }; + events.push(evt); + prev = hash; + } + append({ + ts: STARTED_AT, + run_id: opts.runId, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: opts.profile, project: opts.project }, + }); + append({ + ts: FINISHED_AT, + run_id: opts.runId, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 2, + findings: opts.findingsCount, + pack_errors: 0, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }); + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); +} + +function writeFindings(runDir: string, count: number, runId: string = RUN_ID): void { + const lines: string[] = []; + for (let i = 0; i < count; i++) { + const finding = { + schema_version: '1', + id: `AQA-2026-${String(i + 1).padStart(4, '0')}`, + run_id: runId, + risk_id: 'r-example', + scenario_id: 'scn-example-demo', + title: `Synthetic finding ${i + 1}`, + severity: i === 0 ? 'critical' : 'low', + status: 'draft', + summary: 'reporter smoke test', + evidence: [], + execution_mode: 'orchestrator', + verification_floor: 'scenario_level', + confidence: 0.5, + discovered_at: STARTED_AT, + }; + lines.push(JSON.stringify(finding)); + } + writeFileSync( + join(runDir, 'findings.jsonl'), + `${lines.join('\n')}${lines.length ? '\n' : ''}`, + 'utf8', + ); +} + +describe('aqa report — happy path', () => { + it('renders both report.md and report.json for the explicit run-id', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 2 }); + writeFindings(runDir, 2); + + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true, `expected ok, got ${JSON.stringify(result)}`); + if (!result.ok) return; + + assert.equal(result.runId, RUN_ID); + assert.equal(result.findingsCount, 2); + assert.ok(existsSync(join(runDir, 'report.md')), 'report.md must exist'); + assert.ok(existsSync(join(runDir, 'report.json')), 'report.json must exist'); + + const md = readFileSync(join(runDir, 'report.md'), 'utf8'); + assert.match(md, /# AQA report/); + assert.match(md, /demo/); + assert.match(md, /AQA-2026-0001/); + assert.match(md, /Synthetic finding 1/); + + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + schema_version: string; + run: { id: string; project: string; profile: string; state: string }; + findings: Array<{ id: string }>; + summary: { total: number; severities: Record }; + }; + assert.equal(json.schema_version, '1'); + assert.equal(json.run.id, RUN_ID); + assert.equal(json.run.project, 'demo'); + assert.equal(json.run.profile, 'smoke'); + assert.equal(json.run.state, 'succeeded'); + assert.equal(json.findings.length, 2); + assert.equal(json.summary.total, 2); + assert.equal(json.summary.severities.critical, 1); + assert.equal(json.summary.severities.low, 1); + }); + + it('defaults to the latest run by file mtime, not lexical name (Copilot iter 1 P2)', async () => { + // Critical correctness: `aqa run --seed` produces hash-based IDs + // (run-) that do NOT sort by recency. Picking by mtime keeps + // "latest" honest in mixed-naming directories. Here we intentionally + // create the lexically-EARLIER name LAST so a name-based sort would + // pick the wrong dir. + const root = makeTempRoot(); + const earlierName = 'run-aaaa-but-newer'; // lexically earlier + const olderName = 'run-zzzz-but-older'; // lexically later + const olderDir = makeRunDir(root, olderName); + writeEvents(olderDir, { + runId: olderName, + profile: 'smoke', + project: 'demo', + findingsCount: 1, + }); + writeFindings(olderDir, 1, olderName); + await sleep(20); + const newerDir = makeRunDir(root, earlierName); + writeEvents(newerDir, { + runId: earlierName, + profile: 'release-gate', + project: 'demo', + findingsCount: 3, + }); + writeFindings(newerDir, 3, earlierName); + + const result = runReport({ root }); + assert.equal(result.ok, true); + if (!result.ok) return; + // mtime-newer dir wins even though its name sorts EARLIER lexically. + assert.equal(result.runId, earlierName); + assert.equal(result.findingsCount, 3); + }); + + it('emits only report.md when format=md', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID, format: 'md' }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.ok(existsSync(join(runDir, 'report.md'))); + assert.ok(!existsSync(join(runDir, 'report.json')), 'report.json must not exist for format=md'); + assert.equal(result.files.length, 1); + }); + + it('emits only report.json when format=json', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID, format: 'json' }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.ok(!existsSync(join(runDir, 'report.md'))); + assert.ok(existsSync(join(runDir, 'report.json'))); + }); + + it('handles zero findings without crashing', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.findingsCount, 0); + const md = readFileSync(join(runDir, 'report.md'), 'utf8'); + assert.match(md, /No findings/); + }); +}); + +describe('aqa report — error cases', () => { + it('returns error when .aqa/runs does not exist', () => { + const root = makeTempRoot(); + const result = runReport({ root }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no runs directory/); + }); + + it('returns error when --run-id points to a missing dir', () => { + const root = makeTempRoot(); + makeRunDir(root, RUN_ID); + const result = runReport({ root, runId: 'does-not-exist' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /run directory not found/); + }); + + it('returns error when .aqa/runs exists but is empty', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no runs found/); + }); + + it('returns error on malformed JSONL line in events.jsonl', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), '{not json\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /cannot read events\.jsonl/); + }); + + it('returns error when events.jsonl is missing (Copilot iter 1 P1)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + // findings.jsonl present, events.jsonl missing + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /events\.jsonl is missing/); + }); + + it('returns error when findings.jsonl is missing (Copilot iter 1 P1)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + // findings.jsonl intentionally not created + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /findings\.jsonl is missing/); + }); + + it('rejects a --run-id that would escape .aqa/runs via traversal (Copilot iter 1)', () => { + const root = makeTempRoot(); + // .aqa/runs has to exist or we hit the prior guard first + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: '../../etc/passwd' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /invalid run id/); + }); + + it('rejects a --run-id containing characters outside [a-z0-9-]', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: 'NOT_A_SLUG' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /invalid run id/); + }); + + it('rejects a --run-id with leading/trailing dashes (LongSlug parity — Copilot iter 2)', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + for (const bad of ['-leading', 'trailing-', 'double--dash']) { + const result = runReport({ root, runId: bad }); + assert.equal(result.ok, false, `${bad} must be rejected`); + if (result.ok) continue; + assert.match(result.error, /invalid run id/); + } + }); + + it('rejects a --run-id longer than 256 chars (LongSlug cap — Copilot iter 2)', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: 'a'.repeat(257) }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /exceeds 256-char/); + }); + + it('refuses a run dir that is actually a symlink (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runsRoot = join(root, '.aqa', 'runs'); + mkdirSync(runsRoot, { recursive: true }); + const realDir = makeRunDir(root, 'real-target'); + writeEvents(realDir, { + runId: 'real-target', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(realDir, 0, 'real-target'); + const linkPath = join(runsRoot, 'sneaky-link'); + try { + symlinkSync(realDir, linkPath, 'dir'); + } catch { + // Windows without dev-mode / non-admin can't create symlinks — + // skip this guard by returning instead of asserting; the + // production behaviour is exercised on Linux CI anyway. + return; + } + const result = runReport({ root, runId: 'sneaky-link' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /symlinked run directory/); + }); + + it('rejects a JSONL line that is valid JSON but not a plain object (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), 'null\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /expected a JSON object, got null/); + }); + + it('rejects a JSONL line that parses as a JSON array (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), '[1, 2, 3]\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /expected a JSON object, got array/); + }); + + it('refuses to overwrite a symlinked report.md (Copilot iter 3)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + const outside = join(makeTempRoot(), 'evil-report.md'); + writeFileSync(outside, '# attacker controlled\n', 'utf8'); + try { + symlinkSync(outside, join(runDir, 'report.md'), 'file'); + } catch { + return; // Windows non-admin can't create symlinks; skip. + } + const result = runReport({ root, runId: RUN_ID, format: 'md' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /refusing to overwrite symlinked report file/); + // Crucial: the attacker-controlled file MUST be untouched. + assert.equal(readFileSync(outside, 'utf8'), '# attacker controlled\n'); + }); + + it('latestRunId only considers dirs with events.jsonl (Copilot iter 3)', async () => { + const root = makeTempRoot(); + // Two real run dirs (older + newer) + a non-run subdir that's newer + // than both but lacks events.jsonl. The non-run dir must NOT win. + const olderDir = makeRunDir(root, 'run-aaaa'); + writeEvents(olderDir, { + runId: 'run-aaaa', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(olderDir, 0, 'run-aaaa'); + await sleep(20); + const newerDir = makeRunDir(root, 'run-bbbb'); + writeEvents(newerDir, { + runId: 'run-bbbb', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(newerDir, 0, 'run-bbbb'); + await sleep(20); + // Pollute .aqa/runs with a NEWER non-run directory. + mkdirSync(join(root, '.aqa', 'runs', 'readme-stash'), { recursive: true }); + + const result = runReport({ root }); + assert.equal(result.ok, true); + if (!result.ok) return; + // Must be the most-recent REAL run, not the polluting subdirectory. + assert.equal(result.runId, 'run-bbbb'); + }); +}); + +describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => { + it('marks state=failed when run_finished payload has pack_errors > 0', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + // Custom events with non-zero pack_errors + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + { + schema_version: '1', + seq: 1, + prev_hash: '0'.repeat(64), + hash: '1'.repeat(64), + ts: FINISHED_AT, + run_id: RUN_ID, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 0, + findings: 0, + pack_errors: 1, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string }; + }; + assert.equal(json.run.state, 'failed'); + }); + + it('marks state=failed when scenarios_run is 0 even with no error counters', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + // writeEvents above sets scenarios_run: 2 — override with a custom file + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + { + schema_version: '1', + seq: 1, + prev_hash: '0'.repeat(64), + hash: '1'.repeat(64), + ts: FINISHED_AT, + run_id: RUN_ID, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 0, + findings: 0, + pack_errors: 0, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string }; + }; + assert.equal(json.run.state, 'failed'); + }); + + it('marks state=running when no run_finished event is present', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string; finished_at?: string }; + }; + assert.equal(json.run.state, 'running'); + assert.equal(json.run.finished_at, undefined); + }); + + it('returns error on schema-invalid finding', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 }); + writeFileSync(join(runDir, 'findings.jsonl'), `${JSON.stringify({ id: 'broken' })}\n`, 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /cannot read findings\.jsonl/); + }); +});