diff --git a/bun.lock b/bun.lock
index 6570d4c..88fa291 100644
--- a/bun.lock
+++ b/bun.lock
@@ -91,6 +91,7 @@
"dependencies": {
"@aqa/adapters": "workspace:*",
"@aqa/pack-loader": "workspace:*",
+ "@aqa/reporter": "workspace:*",
"@aqa/runner": "workspace:*",
"@aqa/schemas": "workspace:*",
"kleur": "^4.1.5",
diff --git a/packages/kit/package.json b/packages/kit/package.json
index cc0ef06..659b3e6 100644
--- a/packages/kit/package.json
+++ b/packages/kit/package.json
@@ -20,12 +20,13 @@
"build": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs",
"typecheck": "tsc -p tsconfig.json --noEmit",
"pretest": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs",
- "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/install-agent-files-cmd.test.ts",
+ "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/install-agent-files-cmd.test.ts test/report-cmd.test.ts",
"clean": "node --input-type=module -e \"import { rmSync } from 'node:fs'; for (const p of ['dist','.tsbuildinfo']) { try { rmSync(p, { recursive: true, force: true }); } catch {} }\""
},
"dependencies": {
"@aqa/adapters": "workspace:*",
"@aqa/pack-loader": "workspace:*",
+ "@aqa/reporter": "workspace:*",
"@aqa/runner": "workspace:*",
"@aqa/schemas": "workspace:*",
"kleur": "^4.1.5",
diff --git a/packages/kit/src/cli/aqa.ts b/packages/kit/src/cli/aqa.ts
index 4e3fc37..b8fa94f 100644
--- a/packages/kit/src/cli/aqa.ts
+++ b/packages/kit/src/cli/aqa.ts
@@ -4,6 +4,7 @@ import { type CheckStatus, runDoctor } from '../commands/doctor.js';
import { runInit } from '../commands/init.js';
import { runInstallAgentFiles } from '../commands/install-agent-files.js';
import { runPackNew } from '../commands/pack-new.js';
+import { runReport } from '../commands/report.js';
import { runRun } from '../commands/run.js';
import { runValidate } from '../commands/validate.js';
@@ -32,6 +33,8 @@ const VALUE_FLAGS = new Set([
'license',
'targets',
'project-name',
+ 'run-id',
+ 'format',
]);
function parseArgs(argv: string[]): ParsedArgs {
@@ -93,6 +96,7 @@ ${bold('Commands')}
install-agent-files --targets … Write CLAUDE.md / AGENTS.md / GEMINI.md / .github/copilot-instructions.md
plus per-agent skills under .claude/ .agents/ .gemini/ .github/
run [--profile
] Execute scenarios for the given profile; write events + findings
+ report [--run-id ] Render the latest (or specified) run as report.md + report.json
pack new Scaffold a new pack at /packs// (see the pack authoring
guide: https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md
— this path is only present in the source repo, not in the npm tarball)
@@ -104,6 +108,8 @@ ${bold('Common options')}
--seed (run) deterministic run_id seed — useful for replay
--targets (install-agent-files) comma-separated targets: claude,codex,gemini,copilot
--project-name (install-agent-files) override the slug embedded in instruction files
+ --run-id (report) target a specific run; default = latest
+ --format (report) md | json | both (default: both)
--sut-type (pack new) api | web | cli | lib | agent | pipeline
--description (pack new) one-line summary written into the manifest
--author (pack new) manifest author field
@@ -256,6 +262,37 @@ async function main(): Promise {
}
return 0;
}
+ case 'report': {
+ printHeader('report');
+ if (args.flags.has('run-id') && !args.values.has('run-id')) {
+ console.error(red('aqa report: --run-id requires a value'));
+ return 1;
+ }
+ if (args.flags.has('format') && !args.values.has('format')) {
+ console.error(red('aqa report: --format requires a value'));
+ return 1;
+ }
+ const reportOpts: Parameters[0] = { root: cwd };
+ if (args.values.has('run-id')) reportOpts.runId = args.values.get('run-id') ?? '';
+ if (args.values.has('format')) {
+ const fmt = args.values.get('format') ?? '';
+ if (fmt !== 'md' && fmt !== 'json' && fmt !== 'both') {
+ console.error(red(`aqa report: --format must be md | json | both, got "${fmt}"`));
+ return 1;
+ }
+ reportOpts.format = fmt;
+ }
+ const result = runReport(reportOpts);
+ if (!result.ok) {
+ console.error(red(` ✗ ${result.error}`));
+ return 1;
+ }
+ console.info(` ${green('✓')} ${bold(result.runId)}`);
+ console.info(` ${dim('runDir: ')}${result.runDir}`);
+ console.info(` ${dim('findings: ')}${result.findingsCount}`);
+ for (const f of result.files) console.info(` ${green('+')} ${f}`);
+ return 0;
+ }
case 'pack': {
// Subcommand router for `aqa pack `.
const sub = args.positionals[0];
diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts
new file mode 100644
index 0000000..1a036c6
--- /dev/null
+++ b/packages/kit/src/commands/report.ts
@@ -0,0 +1,443 @@
+/**
+ * `aqa report` — renders the on-disk run artifacts (`events.jsonl` +
+ * `findings.jsonl`) into a Markdown summary and a stable JSON view for
+ * downstream dashboards.
+ *
+ * Loose contract (intentional, see also `runRun`):
+ * - `aqa run` writes events + findings, NOT a `run.json` (the canonical
+ * Run shape is reconstructed from the audit chain). `aqa report` does
+ * that reconstruction here from the first `run_started` and last
+ * `run_finished` events so reports remain replayable from the audit
+ * trail alone, without coupling reporter output to a new sidecar file.
+ * - Reports are written into the same run directory so a junior can hand
+ * a single `runDir` to a teammate and get the whole story (events,
+ * findings, replay artifacts, plus the rendered report).
+ */
+
+import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { renderJson, renderMarkdown } from '@aqa/reporter';
+import { Finding, Run } from '@aqa/schemas';
+
+export type ReportFormat = 'md' | 'json' | 'both';
+
+// Mirrors @aqa/schemas LongSlug exactly: SlugPattern + max length 256.
+// Previous v1.9 iteration used a looser /^[a-z0-9-]{1,80}$/ that both
+// capped at 80 instead of 256 AND allowed leading/trailing dashes plus
+// `--` runs — so a malformed --run-id could pass this guard yet later
+// fail at a Finding.parse() site. Pattern + length checked separately
+// so the error message can distinguish "bad characters" from "too long"
+// (helpful for hash-suffixed run ids on the edge of the cap).
+const RUN_ID_RE = /^[a-z0-9](?:-?[a-z0-9])*$/;
+const RUN_ID_MAX_LEN = 256;
+
+export interface ReportOptions {
+ root: string;
+ /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */
+ runId?: string;
+ /** Default 'both'. Controls which artifacts are written. */
+ format?: ReportFormat;
+}
+
+export interface ReportOk {
+ ok: true;
+ runId: string;
+ runDir: string;
+ files: string[];
+ findingsCount: number;
+}
+
+export interface ReportErr {
+ ok: false;
+ error: string;
+}
+
+export type ReportResult = ReportOk | ReportErr;
+
+export function runReport(opts: ReportOptions): ReportResult {
+ const format: ReportFormat = opts.format ?? 'both';
+ if (format !== 'md' && format !== 'json' && format !== 'both') {
+ return {
+ ok: false,
+ error: `report: --format must be md | json | both, got "${String(format)}"`,
+ };
+ }
+ const runsRoot = join(opts.root, '.aqa', 'runs');
+ if (!existsSync(runsRoot)) {
+ return {
+ ok: false,
+ error: `report: no runs directory at ${runsRoot} — run \`aqa run --profile smoke\` first`,
+ };
+ }
+ const runIdResolved = opts.runId ?? latestRunId(runsRoot);
+ if (!runIdResolved) {
+ return {
+ ok: false,
+ error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`,
+ };
+ }
+ // Defense-in-depth: a `--run-id` of `../../../etc/passwd` (or similar)
+ // would otherwise resolve outside `.aqa/runs/`. The schema treats run
+ // IDs as LongSlug; mirror that constraint at the CLI boundary so a
+ // typo or malicious input can't drive writes anywhere outside the
+ // intended directory.
+ if (runIdResolved.length > RUN_ID_MAX_LEN) {
+ return {
+ ok: false,
+ error: `report: invalid run id — exceeds ${RUN_ID_MAX_LEN}-char LongSlug cap (got ${runIdResolved.length})`,
+ };
+ }
+ if (!RUN_ID_RE.test(runIdResolved)) {
+ return {
+ ok: false,
+ error: `report: invalid run id "${runIdResolved}" — must match ${RUN_ID_RE.source}`,
+ };
+ }
+ const runDir = join(runsRoot, runIdResolved);
+ if (!safeIsDir(runDir)) {
+ return { ok: false, error: `report: run directory not found: ${runDir}` };
+ }
+ // Refuse symlinked run dirs. A previous run (or an attacker with FS
+ // write under .aqa/runs/) could leave a symlink pointing outside the
+ // project; `report.md` / `report.json` writes would then land
+ // wherever the link points. lstatSync (not statSync) so the test
+ // doesn't transparently follow the link.
+ try {
+ if (lstatSync(runDir).isSymbolicLink()) {
+ return {
+ ok: false,
+ error: `report: refusing to write into symlinked run directory ${runDir}`,
+ };
+ }
+ } catch (e) {
+ return {
+ ok: false,
+ error: `report: cannot stat run directory ${runDir}: ${e instanceof Error ? e.message : String(e)}`,
+ };
+ }
+
+ const eventsPath = join(runDir, 'events.jsonl');
+ // Missing artifacts → fail-fast. A silent empty report on a corrupted
+ // run dir hides the real problem (the run never wrote its audit trail).
+ if (!existsSync(eventsPath)) {
+ return {
+ ok: false,
+ error: `report: events.jsonl is missing at ${eventsPath} — run is incomplete or corrupted`,
+ };
+ }
+ let events: ReadonlyArray>;
+ try {
+ events = readJsonl(eventsPath);
+ } catch (e) {
+ return {
+ ok: false,
+ error: `report: cannot read events.jsonl: ${e instanceof Error ? e.message : String(e)}`,
+ };
+ }
+
+ const findingsPath = join(runDir, 'findings.jsonl');
+ if (!existsSync(findingsPath)) {
+ return {
+ ok: false,
+ error: `report: findings.jsonl is missing at ${findingsPath} — run is incomplete or corrupted`,
+ };
+ }
+ let findings: readonly Finding.Finding[];
+ try {
+ findings = readJsonl(findingsPath).map((raw, idx) => {
+ const parsed = Finding.Finding.safeParse(raw);
+ if (!parsed.success) {
+ throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`);
+ }
+ return parsed.data;
+ });
+ } catch (e) {
+ return {
+ ok: false,
+ error: `report: cannot read findings.jsonl: ${e instanceof Error ? e.message : String(e)}`,
+ };
+ }
+
+ const runDraft = reconstructRun({
+ runId: runIdResolved,
+ runDir,
+ events,
+ findingsCount: findings.length,
+ });
+ // Validate the reconstructed Run against the canonical schema before
+ // handing it to the renderers. reconstructRun could in theory produce
+ // a Run.parse-incompatible shape (e.g. terminal state with missing
+ // finished_at if the audit chain itself is malformed) — surfacing that
+ // as a structured error keeps `report.json` consumers safe instead of
+ // shipping a JSON the admin UI silently rejects later.
+ const runParsed = Run.Run.safeParse(runDraft);
+ if (!runParsed.success) {
+ return {
+ ok: false,
+ error: `report: reconstructed run failed schema validation (audit chain is malformed): ${runParsed.error.message.split('\n')[0]}`,
+ };
+ }
+ const run = runParsed.data;
+
+ const written: string[] = [];
+ // Writes can fail (read-only FS, disk full, permission). Return a
+ // structured error rather than letting the exception escape into the
+ // CLI's top-level unhandled-error path so callers get a clean message
+ // plus an exit code derived from the structured result.
+ // Per-file symlink check: even with a non-symlinked run dir, a
+ // pre-existing `report.md`/`report.json` symlink would be followed
+ // by writeFileSync and let an attacker (or a prior run) redirect the
+ // writes outside the project. lstat each target before writing.
+ try {
+ if (format === 'md' || format === 'both') {
+ const mdPath = join(runDir, 'report.md');
+ if (existsSync(mdPath) && lstatSync(mdPath).isSymbolicLink()) {
+ return {
+ ok: false,
+ error: `report: refusing to overwrite symlinked report file ${mdPath}`,
+ };
+ }
+ writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8');
+ written.push(mdPath);
+ }
+ if (format === 'json' || format === 'both') {
+ const jsonPath = join(runDir, 'report.json');
+ if (existsSync(jsonPath) && lstatSync(jsonPath).isSymbolicLink()) {
+ return {
+ ok: false,
+ error: `report: refusing to overwrite symlinked report file ${jsonPath}`,
+ };
+ }
+ writeFileSync(
+ jsonPath,
+ `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`,
+ 'utf8',
+ );
+ written.push(jsonPath);
+ }
+ } catch (e) {
+ return {
+ ok: false,
+ error: `report: cannot write report file: ${e instanceof Error ? e.message : String(e)}`,
+ };
+ }
+
+ return {
+ ok: true,
+ runId: runIdResolved,
+ runDir,
+ files: written,
+ findingsCount: findings.length,
+ };
+}
+
+function latestRunId(runsRoot: string): string | undefined {
+ let entries: string[];
+ try {
+ entries = readdirSync(runsRoot);
+ } catch {
+ return undefined;
+ }
+ // Pick by file mtime, not lexical name. `aqa run --seed` produces
+ // hash-based IDs (`run-`) that don't sort by recency. mtime is
+ // monotonic enough for "the most recent run" semantics — lexical name
+ // is only used as a deterministic tie-breaker (same-millisecond runs).
+ //
+ // Filter: only consider directories that look like actual runs
+ // (presence of events.jsonl, the canonical run-start marker). Without
+ // this, an unrelated subdirectory under `.aqa/runs/` — or a symlink
+ // whose target's mtime is newer than any real run — would be selected
+ // by mtime and either fail with a confusing error or accidentally
+ // generate a report for the wrong directory. Also reject symlinks at
+ // the dir-entry level for the same reason as the symlink check in
+ // runReport: writes into a symlinked dir leak outside the project.
+ const candidates: Array<{ name: string; mtimeMs: number }> = [];
+ for (const name of entries) {
+ if (!RUN_ID_RE.test(name) || name.length > RUN_ID_MAX_LEN) continue;
+ const dir = join(runsRoot, name);
+ try {
+ const lst = lstatSync(dir);
+ if (lst.isSymbolicLink()) continue;
+ if (!lst.isDirectory()) continue;
+ if (!existsSync(join(dir, 'events.jsonl'))) continue;
+ const st = statSync(dir);
+ candidates.push({ name, mtimeMs: st.mtimeMs });
+ } catch {
+ // ignore entries we can't stat (broken symlinks, races)
+ }
+ }
+ candidates.sort((a, b) => {
+ if (b.mtimeMs !== a.mtimeMs) return b.mtimeMs - a.mtimeMs;
+ return b.name.localeCompare(a.name);
+ });
+ return candidates[0]?.name;
+}
+
+function safeIsDir(p: string): boolean {
+ try {
+ return statSync(p).isDirectory();
+ } catch {
+ return false;
+ }
+}
+
+function readJsonl(path: string): Array> {
+ // Caller has already confirmed the file exists — this helper only
+ // returns [] for an empty file (legitimate "zero events" case),
+ // never for a missing one.
+ // Strict: a non-empty line that parses as valid JSON but isn't a
+ // plain object (e.g. `null`, `[]`, `"x"`, `42`) is rejected. Silently
+ // dropping such lines would turn a corrupted file into a seemingly
+ // successful report.
+ const text = readFileSync(path, 'utf8');
+ const out: Array> = [];
+ let lineNo = 0;
+ for (const raw of text.split('\n')) {
+ lineNo += 1;
+ const line = raw.trim();
+ if (!line) continue;
+ let parsed: unknown;
+ try {
+ parsed = JSON.parse(line);
+ } catch (e) {
+ throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`);
+ }
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+ // typeof null === 'object', so explicit null branch.
+ const got = parsed === null ? 'null' : Array.isArray(parsed) ? 'array' : typeof parsed;
+ throw new Error(`${path} line ${lineNo}: expected a JSON object, got ${got}`);
+ }
+ out.push(parsed as Record);
+ }
+ return out;
+}
+
+interface ReconstructInput {
+ runId: string;
+ runDir: string;
+ events: ReadonlyArray>;
+ findingsCount: number;
+}
+
+function reconstructRun(input: ReconstructInput): Run.Run {
+ // Best-effort reconstruction from the audit chain — see file header. The
+ // reporter only reads a small subset of Run fields, but we still build the
+ // full schema-conformant object so the JSON report stays valid for the
+ // admin UI and any external dashboard.
+ const { runId, runDir, events, findingsCount } = input;
+ const started = pickEvent(events, 'run_started');
+ const finished = pickEvent(events, 'run_finished');
+
+ const startedAt = readString(started, 'ts') ?? new Date(0).toISOString();
+ const finishedAt = readString(finished, 'ts');
+ const profile = readPayloadString(started, 'profile') ?? 'unknown';
+ const project = readPayloadString(started, 'project') ?? 'unknown';
+ const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0;
+ const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount;
+
+ const state: Run.Run['state'] = deriveState(finished, scenariosRun);
+
+ const run: Run.Run = {
+ schema_version: '1',
+ id: runId,
+ started_at: startedAt,
+ ...(finishedAt ? { finished_at: finishedAt } : {}),
+ state,
+ project,
+ profile,
+ execution_mode: 'orchestrator',
+ config_snapshot: {
+ profile,
+ execution_mode: 'orchestrator',
+ packs: [],
+ // Synthetic placeholder: this report path doesn't recompute the
+ // config hash from disk (the canonical hash is computed by the
+ // runner at run-time). The admin UI displays config_hash in
+ // replay copy, so an all-zeros value will be visible — users
+ // viewing a CLI-rendered report.json should treat this hash as
+ // a "not computed" sentinel, not a real digest. Encoded as 64
+ // zeros (a valid Sha256 by shape) so the JSON still passes
+ // Run.parse().
+ config_hash: '0'.repeat(64),
+ },
+ totals: {
+ scenarios: scenariosRun,
+ findings: totalsFindings,
+ probes: 0,
+ llm_tokens_in: 0,
+ llm_tokens_out: 0,
+ llm_cost_usd: 0,
+ },
+ artifact_dir: runDir,
+ };
+ return run;
+}
+
+function deriveState(
+ finished: Record | undefined,
+ scenariosRun: number,
+): Run.Run['state'] {
+ // `runRun` writes `run_finished` on success AND on most failure paths
+ // (pack errors, scenario errors, missing scenarios, unsafe paths, runtime
+ // errors, zero scenarios). Treat any non-zero error counter — or a run
+ // that completed zero scenarios — as `failed` so the report doesn't
+ // mislabel broken runs as successes.
+ if (!finished) return 'running';
+ const errorKeys = [
+ 'pack_errors',
+ 'scenario_errors',
+ 'missing_scenarios',
+ 'unsafe_paths',
+ 'runtime_errors',
+ ] as const;
+ for (const k of errorKeys) {
+ const v = readPayloadNumber(finished, k);
+ if (typeof v === 'number' && v > 0) return 'failed';
+ }
+ if (scenariosRun === 0) return 'failed';
+ return 'succeeded';
+}
+
+function pickEvent(
+ events: ReadonlyArray>,
+ kind: string,
+): Record | undefined {
+ // run_started: first match; run_finished: last (a re-run within the same
+ // dir would have appended a new finalization line). Both kinds appear at
+ // most once today, but the lookup stays defensive.
+ if (kind === 'run_finished') {
+ for (let i = events.length - 1; i >= 0; i--) {
+ if (events[i]?.kind === kind) return events[i];
+ }
+ return undefined;
+ }
+ for (const e of events) {
+ if (e.kind === kind) return e;
+ }
+ return undefined;
+}
+
+function readString(obj: Record | undefined, key: string): string | undefined {
+ const v = obj?.[key];
+ return typeof v === 'string' ? v : undefined;
+}
+
+function readPayloadString(
+ obj: Record | undefined,
+ key: string,
+): string | undefined {
+ const payload = obj?.payload;
+ if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined;
+ const v = (payload as Record)[key];
+ return typeof v === 'string' ? v : undefined;
+}
+
+function readPayloadNumber(
+ obj: Record | undefined,
+ key: string,
+): number | undefined {
+ const payload = obj?.payload;
+ if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined;
+ const v = (payload as Record)[key];
+ return typeof v === 'number' && Number.isFinite(v) ? v : undefined;
+}
diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts
new file mode 100644
index 0000000..1a4444b
--- /dev/null
+++ b/packages/kit/test/report-cmd.test.ts
@@ -0,0 +1,587 @@
+/**
+ * v1.9 — `aqa report` CLI verb.
+ *
+ * Renders the on-disk run artifacts into report.md + report.json. These
+ * tests build a synthetic run directory (events.jsonl + findings.jsonl)
+ * because runRun is async, network-touched in some configurations, and
+ * over-coupled for a focused reporter test. The synthetic dir matches the
+ * canonical schema shapes from @aqa/schemas Run/Finding.
+ */
+
+import assert from 'node:assert/strict';
+import { createHash } from 'node:crypto';
+import {
+ existsSync,
+ mkdirSync,
+ mkdtempSync,
+ readFileSync,
+ symlinkSync,
+ writeFileSync,
+} from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { describe, it } from 'node:test';
+import { runReport } from '../dist/commands/report.js';
+
+const RUN_ID = '20260520-000000-runabcdef';
+const STARTED_AT = '2026-05-20T00:00:00.000Z';
+const FINISHED_AT = '2026-05-20T00:00:30.000Z';
+
+// Slight sleep used to put a real mtime delta between consecutive run dirs
+// in the "latest run" test. Some filesystems otherwise group rapid mkdirs
+// under the same mtimeMs and force the tie-breaker into action.
+function sleep(ms: number): Promise {
+ return new Promise((r) => setTimeout(r, ms));
+}
+
+function makeTempRoot(): string {
+ return mkdtempSync(join(tmpdir(), 'aqa-report-'));
+}
+
+function makeRunDir(root: string, runId: string): string {
+ const dir = join(root, '.aqa', 'runs', runId);
+ mkdirSync(dir, { recursive: true });
+ return dir;
+}
+
+function sha256Hex(s: string): string {
+ return createHash('sha256').update(s, 'utf8').digest('hex');
+}
+
+/** Build minimal valid hash-chained events.jsonl. */
+function writeEvents(
+ runDir: string,
+ opts: { runId: string; profile: string; project: string; findingsCount: number },
+): void {
+ const events: Array> = [];
+ let prev: string | null = null;
+ function append(partial: Omit, 'seq' | 'prev_hash' | 'hash'>): void {
+ const seq = events.length;
+ // Hash recomputation here is a stub — the writer's exact canonicalization
+ // is exercised in @aqa/runner / @aqa/compliance tests. `aqa report`
+ // doesn't validate the chain (it just parses fields), so any
+ // deterministic stub hash keeps schema.parse happy.
+ const body = JSON.stringify({ ...partial, seq });
+ const hash = sha256Hex((prev ?? '') + body);
+ const evt = { schema_version: '1', seq, prev_hash: prev, hash, ...partial };
+ events.push(evt);
+ prev = hash;
+ }
+ append({
+ ts: STARTED_AT,
+ run_id: opts.runId,
+ kind: 'run_started',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: { profile: opts.profile, project: opts.project },
+ });
+ append({
+ ts: FINISHED_AT,
+ run_id: opts.runId,
+ kind: 'run_finished',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: {
+ scenarios_run: 2,
+ findings: opts.findingsCount,
+ pack_errors: 0,
+ scenario_errors: 0,
+ missing_scenarios: 0,
+ unsafe_paths: 0,
+ runtime_errors: 0,
+ },
+ });
+ writeFileSync(
+ join(runDir, 'events.jsonl'),
+ `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+ 'utf8',
+ );
+}
+
+function writeFindings(runDir: string, count: number, runId: string = RUN_ID): void {
+ const lines: string[] = [];
+ for (let i = 0; i < count; i++) {
+ const finding = {
+ schema_version: '1',
+ id: `AQA-2026-${String(i + 1).padStart(4, '0')}`,
+ run_id: runId,
+ risk_id: 'r-example',
+ scenario_id: 'scn-example-demo',
+ title: `Synthetic finding ${i + 1}`,
+ severity: i === 0 ? 'critical' : 'low',
+ status: 'draft',
+ summary: 'reporter smoke test',
+ evidence: [],
+ execution_mode: 'orchestrator',
+ verification_floor: 'scenario_level',
+ confidence: 0.5,
+ discovered_at: STARTED_AT,
+ };
+ lines.push(JSON.stringify(finding));
+ }
+ writeFileSync(
+ join(runDir, 'findings.jsonl'),
+ `${lines.join('\n')}${lines.length ? '\n' : ''}`,
+ 'utf8',
+ );
+}
+
+describe('aqa report — happy path', () => {
+ it('renders both report.md and report.json for the explicit run-id', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 2 });
+ writeFindings(runDir, 2);
+
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, true, `expected ok, got ${JSON.stringify(result)}`);
+ if (!result.ok) return;
+
+ assert.equal(result.runId, RUN_ID);
+ assert.equal(result.findingsCount, 2);
+ assert.ok(existsSync(join(runDir, 'report.md')), 'report.md must exist');
+ assert.ok(existsSync(join(runDir, 'report.json')), 'report.json must exist');
+
+ const md = readFileSync(join(runDir, 'report.md'), 'utf8');
+ assert.match(md, /# AQA report/);
+ assert.match(md, /demo/);
+ assert.match(md, /AQA-2026-0001/);
+ assert.match(md, /Synthetic finding 1/);
+
+ const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+ schema_version: string;
+ run: { id: string; project: string; profile: string; state: string };
+ findings: Array<{ id: string }>;
+ summary: { total: number; severities: Record };
+ };
+ assert.equal(json.schema_version, '1');
+ assert.equal(json.run.id, RUN_ID);
+ assert.equal(json.run.project, 'demo');
+ assert.equal(json.run.profile, 'smoke');
+ assert.equal(json.run.state, 'succeeded');
+ assert.equal(json.findings.length, 2);
+ assert.equal(json.summary.total, 2);
+ assert.equal(json.summary.severities.critical, 1);
+ assert.equal(json.summary.severities.low, 1);
+ });
+
+ it('defaults to the latest run by file mtime, not lexical name (Copilot iter 1 P2)', async () => {
+ // Critical correctness: `aqa run --seed` produces hash-based IDs
+ // (run-) that do NOT sort by recency. Picking by mtime keeps
+ // "latest" honest in mixed-naming directories. Here we intentionally
+ // create the lexically-EARLIER name LAST so a name-based sort would
+ // pick the wrong dir.
+ const root = makeTempRoot();
+ const earlierName = 'run-aaaa-but-newer'; // lexically earlier
+ const olderName = 'run-zzzz-but-older'; // lexically later
+ const olderDir = makeRunDir(root, olderName);
+ writeEvents(olderDir, {
+ runId: olderName,
+ profile: 'smoke',
+ project: 'demo',
+ findingsCount: 1,
+ });
+ writeFindings(olderDir, 1, olderName);
+ await sleep(20);
+ const newerDir = makeRunDir(root, earlierName);
+ writeEvents(newerDir, {
+ runId: earlierName,
+ profile: 'release-gate',
+ project: 'demo',
+ findingsCount: 3,
+ });
+ writeFindings(newerDir, 3, earlierName);
+
+ const result = runReport({ root });
+ assert.equal(result.ok, true);
+ if (!result.ok) return;
+ // mtime-newer dir wins even though its name sorts EARLIER lexically.
+ assert.equal(result.runId, earlierName);
+ assert.equal(result.findingsCount, 3);
+ });
+
+ it('emits only report.md when format=md', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ writeFindings(runDir, 0);
+
+ const result = runReport({ root, runId: RUN_ID, format: 'md' });
+ assert.equal(result.ok, true);
+ if (!result.ok) return;
+ assert.ok(existsSync(join(runDir, 'report.md')));
+ assert.ok(!existsSync(join(runDir, 'report.json')), 'report.json must not exist for format=md');
+ assert.equal(result.files.length, 1);
+ });
+
+ it('emits only report.json when format=json', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ writeFindings(runDir, 0);
+
+ const result = runReport({ root, runId: RUN_ID, format: 'json' });
+ assert.equal(result.ok, true);
+ if (!result.ok) return;
+ assert.ok(!existsSync(join(runDir, 'report.md')));
+ assert.ok(existsSync(join(runDir, 'report.json')));
+ });
+
+ it('handles zero findings without crashing', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ writeFindings(runDir, 0);
+
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, true);
+ if (!result.ok) return;
+ assert.equal(result.findingsCount, 0);
+ const md = readFileSync(join(runDir, 'report.md'), 'utf8');
+ assert.match(md, /No findings/);
+ });
+});
+
+describe('aqa report — error cases', () => {
+ it('returns error when .aqa/runs does not exist', () => {
+ const root = makeTempRoot();
+ const result = runReport({ root });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /no runs directory/);
+ });
+
+ it('returns error when --run-id points to a missing dir', () => {
+ const root = makeTempRoot();
+ makeRunDir(root, RUN_ID);
+ const result = runReport({ root, runId: 'does-not-exist' });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /run directory not found/);
+ });
+
+ it('returns error when .aqa/runs exists but is empty', () => {
+ const root = makeTempRoot();
+ mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+ const result = runReport({ root });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /no runs found/);
+ });
+
+ it('returns error on malformed JSONL line in events.jsonl', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeFileSync(join(runDir, 'events.jsonl'), '{not json\n', 'utf8');
+ writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /cannot read events\.jsonl/);
+ });
+
+ it('returns error when events.jsonl is missing (Copilot iter 1 P1)', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ // findings.jsonl present, events.jsonl missing
+ writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /events\.jsonl is missing/);
+ });
+
+ it('returns error when findings.jsonl is missing (Copilot iter 1 P1)', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ // findings.jsonl intentionally not created
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /findings\.jsonl is missing/);
+ });
+
+ it('rejects a --run-id that would escape .aqa/runs via traversal (Copilot iter 1)', () => {
+ const root = makeTempRoot();
+ // .aqa/runs has to exist or we hit the prior guard first
+ mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+ const result = runReport({ root, runId: '../../etc/passwd' });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /invalid run id/);
+ });
+
+ it('rejects a --run-id containing characters outside [a-z0-9-]', () => {
+ const root = makeTempRoot();
+ mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+ const result = runReport({ root, runId: 'NOT_A_SLUG' });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /invalid run id/);
+ });
+
+ it('rejects a --run-id with leading/trailing dashes (LongSlug parity — Copilot iter 2)', () => {
+ const root = makeTempRoot();
+ mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+ for (const bad of ['-leading', 'trailing-', 'double--dash']) {
+ const result = runReport({ root, runId: bad });
+ assert.equal(result.ok, false, `${bad} must be rejected`);
+ if (result.ok) continue;
+ assert.match(result.error, /invalid run id/);
+ }
+ });
+
+ it('rejects a --run-id longer than 256 chars (LongSlug cap — Copilot iter 2)', () => {
+ const root = makeTempRoot();
+ mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+ const result = runReport({ root, runId: 'a'.repeat(257) });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /exceeds 256-char/);
+ });
+
+ it('refuses a run dir that is actually a symlink (Copilot iter 2)', () => {
+ const root = makeTempRoot();
+ const runsRoot = join(root, '.aqa', 'runs');
+ mkdirSync(runsRoot, { recursive: true });
+ const realDir = makeRunDir(root, 'real-target');
+ writeEvents(realDir, {
+ runId: 'real-target',
+ profile: 'smoke',
+ project: 'demo',
+ findingsCount: 0,
+ });
+ writeFindings(realDir, 0, 'real-target');
+ const linkPath = join(runsRoot, 'sneaky-link');
+ try {
+ symlinkSync(realDir, linkPath, 'dir');
+ } catch {
+ // Windows without dev-mode / non-admin can't create symlinks —
+ // skip this guard by returning instead of asserting; the
+ // production behaviour is exercised on Linux CI anyway.
+ return;
+ }
+ const result = runReport({ root, runId: 'sneaky-link' });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /symlinked run directory/);
+ });
+
+ it('rejects a JSONL line that is valid JSON but not a plain object (Copilot iter 2)', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeFileSync(join(runDir, 'events.jsonl'), 'null\n', 'utf8');
+ writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /expected a JSON object, got null/);
+ });
+
+ it('rejects a JSONL line that parses as a JSON array (Copilot iter 2)', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeFileSync(join(runDir, 'events.jsonl'), '[1, 2, 3]\n', 'utf8');
+ writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /expected a JSON object, got array/);
+ });
+
+ it('refuses to overwrite a symlinked report.md (Copilot iter 3)', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ writeFindings(runDir, 0);
+ const outside = join(makeTempRoot(), 'evil-report.md');
+ writeFileSync(outside, '# attacker controlled\n', 'utf8');
+ try {
+ symlinkSync(outside, join(runDir, 'report.md'), 'file');
+ } catch {
+ return; // Windows non-admin can't create symlinks; skip.
+ }
+ const result = runReport({ root, runId: RUN_ID, format: 'md' });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /refusing to overwrite symlinked report file/);
+ // Crucial: the attacker-controlled file MUST be untouched.
+ assert.equal(readFileSync(outside, 'utf8'), '# attacker controlled\n');
+ });
+
+ it('latestRunId only considers dirs with events.jsonl (Copilot iter 3)', async () => {
+ const root = makeTempRoot();
+ // Two real run dirs (older + newer) + a non-run subdir that's newer
+ // than both but lacks events.jsonl. The non-run dir must NOT win.
+ const olderDir = makeRunDir(root, 'run-aaaa');
+ writeEvents(olderDir, {
+ runId: 'run-aaaa',
+ profile: 'smoke',
+ project: 'demo',
+ findingsCount: 0,
+ });
+ writeFindings(olderDir, 0, 'run-aaaa');
+ await sleep(20);
+ const newerDir = makeRunDir(root, 'run-bbbb');
+ writeEvents(newerDir, {
+ runId: 'run-bbbb',
+ profile: 'smoke',
+ project: 'demo',
+ findingsCount: 0,
+ });
+ writeFindings(newerDir, 0, 'run-bbbb');
+ await sleep(20);
+ // Pollute .aqa/runs with a NEWER non-run directory.
+ mkdirSync(join(root, '.aqa', 'runs', 'readme-stash'), { recursive: true });
+
+ const result = runReport({ root });
+ assert.equal(result.ok, true);
+ if (!result.ok) return;
+ // Must be the most-recent REAL run, not the polluting subdirectory.
+ assert.equal(result.runId, 'run-bbbb');
+ });
+});
+
+describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => {
+ it('marks state=failed when run_finished payload has pack_errors > 0', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ // Custom events with non-zero pack_errors
+ const events = [
+ {
+ schema_version: '1',
+ seq: 0,
+ prev_hash: null,
+ hash: '0'.repeat(64),
+ ts: STARTED_AT,
+ run_id: RUN_ID,
+ kind: 'run_started',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: { profile: 'smoke', project: 'demo' },
+ },
+ {
+ schema_version: '1',
+ seq: 1,
+ prev_hash: '0'.repeat(64),
+ hash: '1'.repeat(64),
+ ts: FINISHED_AT,
+ run_id: RUN_ID,
+ kind: 'run_finished',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: {
+ scenarios_run: 0,
+ findings: 0,
+ pack_errors: 1,
+ scenario_errors: 0,
+ missing_scenarios: 0,
+ unsafe_paths: 0,
+ runtime_errors: 0,
+ },
+ },
+ ];
+ writeFileSync(
+ join(runDir, 'events.jsonl'),
+ `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+ 'utf8',
+ );
+ writeFindings(runDir, 0);
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, true);
+ const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+ run: { state: string };
+ };
+ assert.equal(json.run.state, 'failed');
+ });
+
+ it('marks state=failed when scenarios_run is 0 even with no error counters', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+ // writeEvents above sets scenarios_run: 2 — override with a custom file
+ const events = [
+ {
+ schema_version: '1',
+ seq: 0,
+ prev_hash: null,
+ hash: '0'.repeat(64),
+ ts: STARTED_AT,
+ run_id: RUN_ID,
+ kind: 'run_started',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: { profile: 'smoke', project: 'demo' },
+ },
+ {
+ schema_version: '1',
+ seq: 1,
+ prev_hash: '0'.repeat(64),
+ hash: '1'.repeat(64),
+ ts: FINISHED_AT,
+ run_id: RUN_ID,
+ kind: 'run_finished',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: {
+ scenarios_run: 0,
+ findings: 0,
+ pack_errors: 0,
+ scenario_errors: 0,
+ missing_scenarios: 0,
+ unsafe_paths: 0,
+ runtime_errors: 0,
+ },
+ },
+ ];
+ writeFileSync(
+ join(runDir, 'events.jsonl'),
+ `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+ 'utf8',
+ );
+ writeFindings(runDir, 0);
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, true);
+ const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+ run: { state: string };
+ };
+ assert.equal(json.run.state, 'failed');
+ });
+
+ it('marks state=running when no run_finished event is present', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ const events = [
+ {
+ schema_version: '1',
+ seq: 0,
+ prev_hash: null,
+ hash: '0'.repeat(64),
+ ts: STARTED_AT,
+ run_id: RUN_ID,
+ kind: 'run_started',
+ actor: { type: 'orchestrator', id: 'aqa-cli' },
+ payload: { profile: 'smoke', project: 'demo' },
+ },
+ ];
+ writeFileSync(
+ join(runDir, 'events.jsonl'),
+ `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+ 'utf8',
+ );
+ writeFindings(runDir, 0);
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, true);
+ const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+ run: { state: string; finished_at?: string };
+ };
+ assert.equal(json.run.state, 'running');
+ assert.equal(json.run.finished_at, undefined);
+ });
+
+ it('returns error on schema-invalid finding', () => {
+ const root = makeTempRoot();
+ const runDir = makeRunDir(root, RUN_ID);
+ writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 });
+ writeFileSync(join(runDir, 'findings.jsonl'), `${JSON.stringify({ id: 'broken' })}\n`, 'utf8');
+ const result = runReport({ root, runId: RUN_ID });
+ assert.equal(result.ok, false);
+ if (result.ok) return;
+ assert.match(result.error, /cannot read findings\.jsonl/);
+ });
+});