From 7d579777facf97cfb7ff37e60b278615a00a2ab2 Mon Sep 17 00:00:00 2001 From: "lorenzo.padovani@padosoft.com" Date: Wed, 20 May 2026 23:14:18 +0200 Subject: [PATCH 1/4] feat(v1.9): add `aqa report` CLI verb (sub-task 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cables `@aqa/reporter` into a real CLI verb. README quick-start has referenced `aqa report` since v0.0.1 but the verb itself was never wired — `bunx aqa report` errored at parse time. CLI surface: aqa report [--run-id ] [--format md|json|both] Behaviour: - `--run-id` omitted → defaults to the latest run directory (lexical sort on run-id, which has an ISO-like prefix so newest wins) - reads `.aqa/runs//events.jsonl` + `findings.jsonl` - reconstructs a schema-conformant Run object from the first `run_started` and last `run_finished` events (no new sidecar files — reports stay replayable from the audit trail alone) - writes `report.md` (renderMarkdown) + `report.json` (renderJson) into the same run directory; the JSON shape is the stable one the admin UI already consumes - structured errors for: missing runs dir, missing run-id, malformed JSONL, schema-invalid findings - `--format md|json` opt-out for each artifact Tests: 10 new node:test cases in packages/kit/test/report-cmd.test.ts cover happy path (5: full render, latest-default, md-only, json-only, zero-findings) and error cases (5: missing dir, missing run-id, empty dir, malformed JSONL, schema-invalid finding). Synthetic run dirs match @aqa/schemas Run/Finding exactly. Co-Authored-By: Claude Opus 4.7 (1M context) --- bun.lock | 1 + packages/kit/package.json | 3 +- packages/kit/src/cli/aqa.ts | 60 +++++- packages/kit/src/commands/report.ts | 268 +++++++++++++++++++++++++++ packages/kit/test/report-cmd.test.ts | 265 ++++++++++++++++++++++++++ 5 files changed, 588 insertions(+), 9 deletions(-) create mode 100644 packages/kit/src/commands/report.ts create mode 100644 packages/kit/test/report-cmd.test.ts diff --git a/bun.lock b/bun.lock index 16bba94..a0e788a 100644 --- a/bun.lock +++ b/bun.lock @@ -90,6 +90,7 @@ }, "dependencies": { "@aqa/pack-loader": "workspace:*", + "@aqa/reporter": "workspace:*", "@aqa/runner": "workspace:*", "@aqa/schemas": "workspace:*", "kleur": "^4.1.5", diff --git a/packages/kit/package.json b/packages/kit/package.json index ebbdc1e..424f3d5 100644 --- a/packages/kit/package.json +++ b/packages/kit/package.json @@ -20,11 +20,12 @@ "build": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs", "typecheck": "tsc -p tsconfig.json --noEmit", "pretest": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs", - "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts", + "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/report-cmd.test.ts", "clean": "node --input-type=module -e \"import { rmSync } from 'node:fs'; for (const p of ['dist','.tsbuildinfo']) { try { rmSync(p, { recursive: true, force: true }); } catch {} }\"" }, "dependencies": { "@aqa/pack-loader": "workspace:*", + "@aqa/reporter": "workspace:*", "@aqa/runner": "workspace:*", "@aqa/schemas": "workspace:*", "kleur": "^4.1.5", diff --git a/packages/kit/src/cli/aqa.ts b/packages/kit/src/cli/aqa.ts index 2e3f3ef..946bd27 100644 --- a/packages/kit/src/cli/aqa.ts +++ b/packages/kit/src/cli/aqa.ts @@ -3,6 +3,7 @@ import { bold, cyan, dim, green, red, yellow } from 'kleur/colors'; import { type CheckStatus, runDoctor } from '../commands/doctor.js'; import { runInit } from '../commands/init.js'; import { runPackNew } from '../commands/pack-new.js'; +import { runReport } from '../commands/report.js'; import { runRun } from '../commands/run.js'; import { runValidate } from '../commands/validate.js'; @@ -22,7 +23,16 @@ interface ParsedArgs { values: Map; } -const VALUE_FLAGS = new Set(['profile', 'seed', 'sut-type', 'description', 'author', 'license']); +const VALUE_FLAGS = new Set([ + 'profile', + 'seed', + 'sut-type', + 'description', + 'author', + 'license', + 'run-id', + 'format', +]); function parseArgs(argv: string[]): ParsedArgs { const out: ParsedArgs = { command: null, positionals: [], flags: new Set(), values: new Map() }; @@ -77,19 +87,22 @@ ${bold('Usage')} aqa [options] ${bold('Commands')} - init [name] Scaffold .aqa/{project,risk-map,profiles}.yaml + testing.md - doctor Report kit health (runtime, .aqa, agent docs, validation) - validate Validate .aqa/* against @aqa/schemas - run [--profile

] Execute scenarios for the given profile; write events + findings - pack new Scaffold a new pack at /packs// (see the pack authoring guide: - https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md - — this path is only present in the source repo, not in the npm tarball) + init [name] Scaffold .aqa/{project,risk-map,profiles}.yaml + testing.md + doctor Report kit health (runtime, .aqa, agent docs, validation) + validate Validate .aqa/* against @aqa/schemas + run [--profile

] Execute scenarios for the given profile; write events + findings + report [--run-id ] Render the latest (or specified) run as report.md + report.json + pack new Scaffold a new pack at /packs// (see the pack authoring + guide: https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md + — this path is only present in the source repo, not in the npm tarball) ${bold('Common options')} --force (init / pack new) overwrite existing files/directory --dry-run (init) don't write to disk; print what would happen --profile (run) profile key from .aqa/profiles.yaml --seed (run) deterministic run_id seed — useful for replay + --run-id (report) target a specific run; default = latest + --format (report) md | json | both (default: both) --sut-type (pack new) api | web | cli | lib | agent | pipeline --description (pack new) one-line summary written into the manifest --author (pack new) manifest author field @@ -199,6 +212,37 @@ async function main(): Promise { } return 0; } + case 'report': { + printHeader('report'); + if (args.flags.has('run-id') && !args.values.has('run-id')) { + console.error(red('aqa report: --run-id requires a value')); + return 1; + } + if (args.flags.has('format') && !args.values.has('format')) { + console.error(red('aqa report: --format requires a value')); + return 1; + } + const reportOpts: Parameters[0] = { root: cwd }; + if (args.values.has('run-id')) reportOpts.runId = args.values.get('run-id') ?? ''; + if (args.values.has('format')) { + const fmt = args.values.get('format') ?? ''; + if (fmt !== 'md' && fmt !== 'json' && fmt !== 'both') { + console.error(red(`aqa report: --format must be md | json | both, got "${fmt}"`)); + return 1; + } + reportOpts.format = fmt; + } + const result = runReport(reportOpts); + if (!result.ok) { + console.error(red(` ✗ ${result.error}`)); + return 1; + } + console.info(` ${green('✓')} ${bold(result.runId)}`); + console.info(` ${dim('runDir: ')}${result.runDir}`); + console.info(` ${dim('findings: ')}${result.findingsCount}`); + for (const f of result.files) console.info(` ${green('+')} ${f}`); + return 0; + } case 'pack': { // Subcommand router for `aqa pack `. const sub = args.positionals[0]; diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts new file mode 100644 index 0000000..f0d8f3c --- /dev/null +++ b/packages/kit/src/commands/report.ts @@ -0,0 +1,268 @@ +/** + * `aqa report` — renders the on-disk run artifacts (`events.jsonl` + + * `findings.jsonl`) into a Markdown summary and a stable JSON view for + * downstream dashboards. + * + * Loose contract (intentional, see also `runRun`): + * - `aqa run` writes events + findings, NOT a `run.json` (the canonical + * Run shape is reconstructed from the audit chain). `aqa report` does + * that reconstruction here from the first `run_started` and last + * `run_finished` events so reports remain replayable from the audit + * trail alone, without coupling reporter output to a new sidecar file. + * - Reports are written into the same run directory so a junior can hand + * a single `runDir` to a teammate and get the whole story (events, + * findings, replay artifacts, plus the rendered report). + */ + +import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { renderJson, renderMarkdown } from '@aqa/reporter'; +import { Finding, type Run } from '@aqa/schemas'; + +export type ReportFormat = 'md' | 'json' | 'both'; + +export interface ReportOptions { + root: string; + /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */ + runId?: string; + /** Default 'both'. Controls which artifacts are written. */ + format?: ReportFormat; +} + +export interface ReportOk { + ok: true; + runId: string; + runDir: string; + files: string[]; + findingsCount: number; +} + +export interface ReportErr { + ok: false; + error: string; +} + +export type ReportResult = ReportOk | ReportErr; + +export function runReport(opts: ReportOptions): ReportResult { + const format: ReportFormat = opts.format ?? 'both'; + if (format !== 'md' && format !== 'json' && format !== 'both') { + return { + ok: false, + error: `report: --format must be md | json | both, got "${String(format)}"`, + }; + } + const runsRoot = join(opts.root, '.aqa', 'runs'); + if (!existsSync(runsRoot)) { + return { + ok: false, + error: `report: no runs directory at ${runsRoot} — run \`aqa run --profile smoke\` first`, + }; + } + const runIdResolved = opts.runId ?? latestRunId(runsRoot); + if (!runIdResolved) { + return { + ok: false, + error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`, + }; + } + const runDir = join(runsRoot, runIdResolved); + if (!safeIsDir(runDir)) { + return { ok: false, error: `report: run directory not found: ${runDir}` }; + } + + let events: ReadonlyArray>; + try { + events = readJsonl(join(runDir, 'events.jsonl')); + } catch (e) { + return { + ok: false, + error: `report: cannot read events.jsonl: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + let findings: readonly Finding.Finding[]; + try { + findings = readJsonl(join(runDir, 'findings.jsonl')).map((raw, idx) => { + const parsed = Finding.Finding.safeParse(raw); + if (!parsed.success) { + throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`); + } + return parsed.data; + }); + } catch (e) { + return { + ok: false, + error: `report: cannot read findings.jsonl: ${e instanceof Error ? e.message : String(e)}`, + }; + } + + const run = reconstructRun({ + runId: runIdResolved, + runDir, + events, + findingsCount: findings.length, + }); + + const written: string[] = []; + if (format === 'md' || format === 'both') { + const mdPath = join(runDir, 'report.md'); + writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8'); + written.push(mdPath); + } + if (format === 'json' || format === 'both') { + const jsonPath = join(runDir, 'report.json'); + writeFileSync(jsonPath, `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, 'utf8'); + written.push(jsonPath); + } + + return { + ok: true, + runId: runIdResolved, + runDir, + files: written, + findingsCount: findings.length, + }; +} + +function latestRunId(runsRoot: string): string | undefined { + let entries: string[]; + try { + entries = readdirSync(runsRoot); + } catch { + return undefined; + } + const dirs = entries + .filter((name) => safeIsDir(join(runsRoot, name))) + // Run IDs include an ISO-like prefix; lexical sort orders by recency. + .sort(); + return dirs.at(-1); +} + +function safeIsDir(p: string): boolean { + try { + return statSync(p).isDirectory(); + } catch { + return false; + } +} + +function readJsonl(path: string): Array> { + if (!existsSync(path)) return []; + const text = readFileSync(path, 'utf8'); + const out: Array> = []; + let lineNo = 0; + for (const raw of text.split('\n')) { + lineNo += 1; + const line = raw.trim(); + if (!line) continue; + try { + const obj = JSON.parse(line) as unknown; + if (obj && typeof obj === 'object' && !Array.isArray(obj)) { + out.push(obj as Record); + } + } catch (e) { + throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`); + } + } + return out; +} + +interface ReconstructInput { + runId: string; + runDir: string; + events: ReadonlyArray>; + findingsCount: number; +} + +function reconstructRun(input: ReconstructInput): Run.Run { + // Best-effort reconstruction from the audit chain — see file header. The + // reporter only reads a small subset of Run fields, but we still build the + // full schema-conformant object so the JSON report stays valid for the + // admin UI and any external dashboard. + const { runId, runDir, events, findingsCount } = input; + const started = pickEvent(events, 'run_started'); + const finished = pickEvent(events, 'run_finished'); + + const startedAt = readString(started, 'ts') ?? new Date(0).toISOString(); + const finishedAt = readString(finished, 'ts'); + const profile = readPayloadString(started, 'profile') ?? 'unknown'; + const project = readPayloadString(started, 'project') ?? 'unknown'; + const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0; + const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount; + + const state: Run.Run['state'] = finished ? 'succeeded' : 'running'; + + const run: Run.Run = { + schema_version: '1', + id: runId, + started_at: startedAt, + ...(finishedAt ? { finished_at: finishedAt } : {}), + state, + project, + profile, + execution_mode: 'orchestrator', + config_snapshot: { + profile, + execution_mode: 'orchestrator', + packs: [], + // Deterministic dummy: this report path doesn't recompute the config + // hash from disk. The admin UI keys on `run.id`, not `config_hash`. + config_hash: '0'.repeat(64), + }, + totals: { + scenarios: scenariosRun, + findings: totalsFindings, + probes: 0, + llm_tokens_in: 0, + llm_tokens_out: 0, + llm_cost_usd: 0, + }, + artifact_dir: runDir, + }; + return run; +} + +function pickEvent( + events: ReadonlyArray>, + kind: string, +): Record | undefined { + // run_started: first match; run_finished: last (a re-run within the same + // dir would have appended a new finalization line). Both kinds appear at + // most once today, but the lookup stays defensive. + if (kind === 'run_finished') { + for (let i = events.length - 1; i >= 0; i--) { + if (events[i]?.kind === kind) return events[i]; + } + return undefined; + } + for (const e of events) { + if (e.kind === kind) return e; + } + return undefined; +} + +function readString(obj: Record | undefined, key: string): string | undefined { + const v = obj?.[key]; + return typeof v === 'string' ? v : undefined; +} + +function readPayloadString( + obj: Record | undefined, + key: string, +): string | undefined { + const payload = obj?.payload; + if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined; + const v = (payload as Record)[key]; + return typeof v === 'string' ? v : undefined; +} + +function readPayloadNumber( + obj: Record | undefined, + key: string, +): number | undefined { + const payload = obj?.payload; + if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined; + const v = (payload as Record)[key]; + return typeof v === 'number' && Number.isFinite(v) ? v : undefined; +} diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts new file mode 100644 index 0000000..7065410 --- /dev/null +++ b/packages/kit/test/report-cmd.test.ts @@ -0,0 +1,265 @@ +/** + * v1.9 — `aqa report` CLI verb. + * + * Renders the on-disk run artifacts into report.md + report.json. These + * tests build a synthetic run directory (events.jsonl + findings.jsonl) + * because runRun is async, network-touched in some configurations, and + * over-coupled for a focused reporter test. The synthetic dir matches the + * canonical schema shapes from @aqa/schemas Run/Finding. + */ + +import assert from 'node:assert/strict'; +import { createHash } from 'node:crypto'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, it } from 'node:test'; +import { runReport } from '../dist/commands/report.js'; + +const RUN_ID = '20260520-000000-runabcdef'; +const ALT_RUN_ID = '20260520-010000-runzzzzzz'; +const STARTED_AT = '2026-05-20T00:00:00.000Z'; +const FINISHED_AT = '2026-05-20T00:00:30.000Z'; + +function makeTempRoot(): string { + return mkdtempSync(join(tmpdir(), 'aqa-report-')); +} + +function makeRunDir(root: string, runId: string): string { + const dir = join(root, '.aqa', 'runs', runId); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function sha256Hex(s: string): string { + return createHash('sha256').update(s, 'utf8').digest('hex'); +} + +/** Build minimal valid hash-chained events.jsonl. */ +function writeEvents( + runDir: string, + opts: { runId: string; profile: string; project: string; findingsCount: number }, +): void { + const events: Array> = []; + let prev: string | null = null; + function append(partial: Omit, 'seq' | 'prev_hash' | 'hash'>): void { + const seq = events.length; + // Hash recomputation here is a stub — the writer's exact canonicalization + // is exercised in @aqa/runner / @aqa/compliance tests. `aqa report` + // doesn't validate the chain (it just parses fields), so any + // deterministic stub hash keeps schema.parse happy. + const body = JSON.stringify({ ...partial, seq }); + const hash = sha256Hex((prev ?? '') + body); + const evt = { schema_version: '1', seq, prev_hash: prev, hash, ...partial }; + events.push(evt); + prev = hash; + } + append({ + ts: STARTED_AT, + run_id: opts.runId, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: opts.profile, project: opts.project }, + }); + append({ + ts: FINISHED_AT, + run_id: opts.runId, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 2, + findings: opts.findingsCount, + pack_errors: 0, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }); + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); +} + +function writeFindings(runDir: string, count: number): void { + const lines: string[] = []; + for (let i = 0; i < count; i++) { + const finding = { + schema_version: '1', + id: `AQA-2026-${String(i + 1).padStart(4, '0')}`, + run_id: RUN_ID, + risk_id: 'r-example', + scenario_id: 'scn-example-demo', + title: `Synthetic finding ${i + 1}`, + severity: i === 0 ? 'critical' : 'low', + status: 'draft', + summary: 'reporter smoke test', + evidence: [], + execution_mode: 'orchestrator', + verification_floor: 'scenario_level', + confidence: 0.5, + discovered_at: STARTED_AT, + }; + lines.push(JSON.stringify(finding)); + } + writeFileSync( + join(runDir, 'findings.jsonl'), + `${lines.join('\n')}${lines.length ? '\n' : ''}`, + 'utf8', + ); +} + +describe('aqa report — happy path', () => { + it('renders both report.md and report.json for the explicit run-id', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 2 }); + writeFindings(runDir, 2); + + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true, `expected ok, got ${JSON.stringify(result)}`); + if (!result.ok) return; + + assert.equal(result.runId, RUN_ID); + assert.equal(result.findingsCount, 2); + assert.ok(existsSync(join(runDir, 'report.md')), 'report.md must exist'); + assert.ok(existsSync(join(runDir, 'report.json')), 'report.json must exist'); + + const md = readFileSync(join(runDir, 'report.md'), 'utf8'); + assert.match(md, /# AQA report/); + assert.match(md, /demo/); + assert.match(md, /AQA-2026-0001/); + assert.match(md, /Synthetic finding 1/); + + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + schema_version: string; + run: { id: string; project: string; profile: string; state: string }; + findings: Array<{ id: string }>; + summary: { total: number; severities: Record }; + }; + assert.equal(json.schema_version, '1'); + assert.equal(json.run.id, RUN_ID); + assert.equal(json.run.project, 'demo'); + assert.equal(json.run.profile, 'smoke'); + assert.equal(json.run.state, 'succeeded'); + assert.equal(json.findings.length, 2); + assert.equal(json.summary.total, 2); + assert.equal(json.summary.severities.critical, 1); + assert.equal(json.summary.severities.low, 1); + }); + + it('defaults to the latest run when --run-id is omitted (lexical sort)', () => { + const root = makeTempRoot(); + // Older run first; alt run lexically later so it should be chosen. + const olderDir = makeRunDir(root, RUN_ID); + writeEvents(olderDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 }); + writeFindings(olderDir, 1); + const newerDir = makeRunDir(root, ALT_RUN_ID); + writeEvents(newerDir, { + runId: ALT_RUN_ID, + profile: 'release-gate', + project: 'demo', + findingsCount: 3, + }); + writeFindings(newerDir, 3); + + const result = runReport({ root }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.runId, ALT_RUN_ID); + assert.equal(result.findingsCount, 3); + }); + + it('emits only report.md when format=md', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID, format: 'md' }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.ok(existsSync(join(runDir, 'report.md'))); + assert.ok(!existsSync(join(runDir, 'report.json')), 'report.json must not exist for format=md'); + assert.equal(result.files.length, 1); + }); + + it('emits only report.json when format=json', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID, format: 'json' }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.ok(!existsSync(join(runDir, 'report.md'))); + assert.ok(existsSync(join(runDir, 'report.json'))); + }); + + it('handles zero findings without crashing', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.findingsCount, 0); + const md = readFileSync(join(runDir, 'report.md'), 'utf8'); + assert.match(md, /No findings/); + }); +}); + +describe('aqa report — error cases', () => { + it('returns error when .aqa/runs does not exist', () => { + const root = makeTempRoot(); + const result = runReport({ root }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no runs directory/); + }); + + it('returns error when --run-id points to a missing dir', () => { + const root = makeTempRoot(); + makeRunDir(root, RUN_ID); + const result = runReport({ root, runId: 'does-not-exist' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /run directory not found/); + }); + + it('returns error when .aqa/runs exists but is empty', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no runs found/); + }); + + it('returns error on malformed JSONL line in events.jsonl', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), '{not json\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /cannot read events\.jsonl/); + }); + + it('returns error on schema-invalid finding', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 }); + writeFileSync(join(runDir, 'findings.jsonl'), `${JSON.stringify({ id: 'broken' })}\n`, 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /cannot read findings\.jsonl/); + }); +}); From c310d975368100c7aea60e4e847b4b80ab4fe8a4 Mon Sep 17 00:00:00 2001 From: "lorenzo.padovani@padosoft.com" Date: Wed, 20 May 2026 23:29:07 +0200 Subject: [PATCH 2/4] fix(v1.9): address Copilot iter 1 on report PR #53 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 actionable comments, all addressed: 1. P1 — state was unconditionally 'succeeded' when run_finished was present. But runRun writes run_finished even on most failure paths (pack_errors / scenario_errors / missing_scenarios / unsafe_paths / runtime_errors / zero scenarios). Now derives state by inspecting the payload counters: any non-zero error counter OR zero scenarios → 'failed'; no run_finished → 'running'; else 'succeeded'. Added 3 regression tests. 2. P2 — latest-run selection used lexical sort on directory name, which silently picks the wrong run when `aqa run --seed` produces hash-based IDs (run-). Now uses statSync.mtimeMs (most recent wins) with the directory name as a deterministic tie-breaker. Added a regression test that intentionally writes the lexically-earlier name LAST. 3. P1 — readJsonl returned [] for missing files. That made `aqa report` succeed on a corrupted/incomplete run dir and emit a synthetic empty report. Now both events.jsonl and findings.jsonl are required up-front with explicit existence checks; readJsonl assumes existence. Added 2 regression tests. 4. Path-traversal — --run-id was forwarded to join() without validation, so a value like '../..' could read/write files outside .aqa/runs. Added a LongSlug-shaped regex (/^[a-z0-9-]{1,80}$/) at the CLI boundary that rejects traversal and any non-slug input. Added 2 regression tests. 5. writeFileSync wasn't wrapped — a read-only FS or disk-full would bubble through the CLI's top-level unhandled-error path instead of returning a structured ReportErr. Wrapped both writes in a single try/catch. Test fixture bug also fixed: writeFindings now accepts an optional runId parameter (default RUN_ID) so findings written for ALT_RUN_ID-style directories carry the matching run_id, not a stale constant. 17 tests now pass (10 original + 7 new regression tests). Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/kit/src/commands/report.ts | 127 +++++++++++++--- packages/kit/test/report-cmd.test.ts | 218 +++++++++++++++++++++++++-- 2 files changed, 315 insertions(+), 30 deletions(-) diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts index f0d8f3c..91473b3 100644 --- a/packages/kit/src/commands/report.ts +++ b/packages/kit/src/commands/report.ts @@ -21,6 +21,12 @@ import { Finding, type Run } from '@aqa/schemas'; export type ReportFormat = 'md' | 'json' | 'both'; +// LongSlug (per @aqa/schemas) cap. Reject anything that would let `runId` +// escape `.aqa/runs/` via traversal (`../..`, absolute paths, backslashes). +// The schema's own LongSlug also enforces ^[a-z0-9-]+$ — we mirror that +// here rather than import to keep the report command self-contained. +const RUN_ID_RE = /^[a-z0-9-]{1,80}$/; + export interface ReportOptions { root: string; /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */ @@ -66,14 +72,34 @@ export function runReport(opts: ReportOptions): ReportResult { error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`, }; } + // Defense-in-depth: a `--run-id` of `../../../etc/passwd` (or similar) + // would otherwise resolve outside `.aqa/runs/`. The schema treats run + // IDs as LongSlug; mirror that constraint at the CLI boundary so a + // typo or malicious input can't drive writes anywhere outside the + // intended directory. + if (!RUN_ID_RE.test(runIdResolved)) { + return { + ok: false, + error: `report: invalid run id "${runIdResolved}" — must match ${RUN_ID_RE.source}`, + }; + } const runDir = join(runsRoot, runIdResolved); if (!safeIsDir(runDir)) { return { ok: false, error: `report: run directory not found: ${runDir}` }; } + const eventsPath = join(runDir, 'events.jsonl'); + // Missing artifacts → fail-fast. A silent empty report on a corrupted + // run dir hides the real problem (the run never wrote its audit trail). + if (!existsSync(eventsPath)) { + return { + ok: false, + error: `report: events.jsonl is missing at ${eventsPath} — run is incomplete or corrupted`, + }; + } let events: ReadonlyArray>; try { - events = readJsonl(join(runDir, 'events.jsonl')); + events = readJsonl(eventsPath); } catch (e) { return { ok: false, @@ -81,9 +107,16 @@ export function runReport(opts: ReportOptions): ReportResult { }; } + const findingsPath = join(runDir, 'findings.jsonl'); + if (!existsSync(findingsPath)) { + return { + ok: false, + error: `report: findings.jsonl is missing at ${findingsPath} — run is incomplete or corrupted`, + }; + } let findings: readonly Finding.Finding[]; try { - findings = readJsonl(join(runDir, 'findings.jsonl')).map((raw, idx) => { + findings = readJsonl(findingsPath).map((raw, idx) => { const parsed = Finding.Finding.safeParse(raw); if (!parsed.success) { throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`); @@ -105,15 +138,30 @@ export function runReport(opts: ReportOptions): ReportResult { }); const written: string[] = []; - if (format === 'md' || format === 'both') { - const mdPath = join(runDir, 'report.md'); - writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8'); - written.push(mdPath); - } - if (format === 'json' || format === 'both') { - const jsonPath = join(runDir, 'report.json'); - writeFileSync(jsonPath, `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, 'utf8'); - written.push(jsonPath); + // Writes can fail (read-only FS, disk full, permission). Return a + // structured error rather than letting the exception escape into the + // CLI's top-level unhandled-error path so callers get a clean message + // plus an exit code derived from the structured result. + try { + if (format === 'md' || format === 'both') { + const mdPath = join(runDir, 'report.md'); + writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8'); + written.push(mdPath); + } + if (format === 'json' || format === 'both') { + const jsonPath = join(runDir, 'report.json'); + writeFileSync( + jsonPath, + `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, + 'utf8', + ); + written.push(jsonPath); + } + } catch (e) { + return { + ok: false, + error: `report: cannot write report file: ${e instanceof Error ? e.message : String(e)}`, + }; } return { @@ -132,11 +180,27 @@ function latestRunId(runsRoot: string): string | undefined { } catch { return undefined; } - const dirs = entries - .filter((name) => safeIsDir(join(runsRoot, name))) - // Run IDs include an ISO-like prefix; lexical sort orders by recency. - .sort(); - return dirs.at(-1); + // Pick by file mtime, not lexical name. `aqa run --seed` produces + // hash-based IDs (`run-`) that don't sort by recency. mtime is + // monotonic enough for "the most recent run" semantics — lexical name + // is only used as a deterministic tie-breaker (same-millisecond runs). + const candidates: Array<{ name: string; mtimeMs: number }> = []; + for (const name of entries) { + const dir = join(runsRoot, name); + try { + const st = statSync(dir); + if (st.isDirectory()) { + candidates.push({ name, mtimeMs: st.mtimeMs }); + } + } catch { + // ignore entries we can't stat (broken symlinks, races) + } + } + candidates.sort((a, b) => { + if (b.mtimeMs !== a.mtimeMs) return b.mtimeMs - a.mtimeMs; + return b.name.localeCompare(a.name); + }); + return candidates[0]?.name; } function safeIsDir(p: string): boolean { @@ -148,7 +212,9 @@ function safeIsDir(p: string): boolean { } function readJsonl(path: string): Array> { - if (!existsSync(path)) return []; + // Caller has already confirmed the file exists — this helper only + // returns [] for an empty file (legitimate "zero events" case), + // never for a missing one. const text = readFileSync(path, 'utf8'); const out: Array> = []; let lineNo = 0; @@ -191,7 +257,7 @@ function reconstructRun(input: ReconstructInput): Run.Run { const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0; const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount; - const state: Run.Run['state'] = finished ? 'succeeded' : 'running'; + const state: Run.Run['state'] = deriveState(finished, scenariosRun); const run: Run.Run = { schema_version: '1', @@ -223,6 +289,31 @@ function reconstructRun(input: ReconstructInput): Run.Run { return run; } +function deriveState( + finished: Record | undefined, + scenariosRun: number, +): Run.Run['state'] { + // `runRun` writes `run_finished` on success AND on most failure paths + // (pack errors, scenario errors, missing scenarios, unsafe paths, runtime + // errors, zero scenarios). Treat any non-zero error counter — or a run + // that completed zero scenarios — as `failed` so the report doesn't + // mislabel broken runs as successes. + if (!finished) return 'running'; + const errorKeys = [ + 'pack_errors', + 'scenario_errors', + 'missing_scenarios', + 'unsafe_paths', + 'runtime_errors', + ] as const; + for (const k of errorKeys) { + const v = readPayloadNumber(finished, k); + if (typeof v === 'number' && v > 0) return 'failed'; + } + if (scenariosRun === 0) return 'failed'; + return 'succeeded'; +} + function pickEvent( events: ReadonlyArray>, kind: string, diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts index 7065410..535d360 100644 --- a/packages/kit/test/report-cmd.test.ts +++ b/packages/kit/test/report-cmd.test.ts @@ -17,10 +17,16 @@ import { describe, it } from 'node:test'; import { runReport } from '../dist/commands/report.js'; const RUN_ID = '20260520-000000-runabcdef'; -const ALT_RUN_ID = '20260520-010000-runzzzzzz'; const STARTED_AT = '2026-05-20T00:00:00.000Z'; const FINISHED_AT = '2026-05-20T00:00:30.000Z'; +// Slight sleep used to put a real mtime delta between consecutive run dirs +// in the "latest run" test. Some filesystems otherwise group rapid mkdirs +// under the same mtimeMs and force the tie-breaker into action. +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + function makeTempRoot(): string { return mkdtempSync(join(tmpdir(), 'aqa-report-')); } @@ -83,13 +89,13 @@ function writeEvents( ); } -function writeFindings(runDir: string, count: number): void { +function writeFindings(runDir: string, count: number, runId: string = RUN_ID): void { const lines: string[] = []; for (let i = 0; i < count; i++) { const finding = { schema_version: '1', id: `AQA-2026-${String(i + 1).padStart(4, '0')}`, - run_id: RUN_ID, + run_id: runId, risk_id: 'r-example', scenario_id: 'scn-example-demo', title: `Synthetic finding ${i + 1}`, @@ -150,25 +156,38 @@ describe('aqa report — happy path', () => { assert.equal(json.summary.severities.low, 1); }); - it('defaults to the latest run when --run-id is omitted (lexical sort)', () => { + it('defaults to the latest run by file mtime, not lexical name (Copilot iter 1 P2)', async () => { + // Critical correctness: `aqa run --seed` produces hash-based IDs + // (run-) that do NOT sort by recency. Picking by mtime keeps + // "latest" honest in mixed-naming directories. Here we intentionally + // create the lexically-EARLIER name LAST so a name-based sort would + // pick the wrong dir. const root = makeTempRoot(); - // Older run first; alt run lexically later so it should be chosen. - const olderDir = makeRunDir(root, RUN_ID); - writeEvents(olderDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 }); - writeFindings(olderDir, 1); - const newerDir = makeRunDir(root, ALT_RUN_ID); + const earlierName = 'run-aaaa-but-newer'; // lexically earlier + const olderName = 'run-zzzz-but-older'; // lexically later + const olderDir = makeRunDir(root, olderName); + writeEvents(olderDir, { + runId: olderName, + profile: 'smoke', + project: 'demo', + findingsCount: 1, + }); + writeFindings(olderDir, 1, olderName); + await sleep(20); + const newerDir = makeRunDir(root, earlierName); writeEvents(newerDir, { - runId: ALT_RUN_ID, + runId: earlierName, profile: 'release-gate', project: 'demo', findingsCount: 3, }); - writeFindings(newerDir, 3); + writeFindings(newerDir, 3, earlierName); const result = runReport({ root }); assert.equal(result.ok, true); if (!result.ok) return; - assert.equal(result.runId, ALT_RUN_ID); + // mtime-newer dir wins even though its name sorts EARLIER lexically. + assert.equal(result.runId, earlierName); assert.equal(result.findingsCount, 3); }); @@ -252,6 +271,181 @@ describe('aqa report — error cases', () => { assert.match(result.error, /cannot read events\.jsonl/); }); + it('returns error when events.jsonl is missing (Copilot iter 1 P1)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + // findings.jsonl present, events.jsonl missing + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /events\.jsonl is missing/); + }); + + it('returns error when findings.jsonl is missing (Copilot iter 1 P1)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + // findings.jsonl intentionally not created + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /findings\.jsonl is missing/); + }); + + it('rejects a --run-id that would escape .aqa/runs via traversal (Copilot iter 1)', () => { + const root = makeTempRoot(); + // .aqa/runs has to exist or we hit the prior guard first + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: '../../etc/passwd' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /invalid run id/); + }); + + it('rejects a --run-id containing characters outside [a-z0-9-]', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: 'NOT_A_SLUG' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /invalid run id/); + }); +}); + +describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => { + it('marks state=failed when run_finished payload has pack_errors > 0', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + // Custom events with non-zero pack_errors + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + { + schema_version: '1', + seq: 1, + prev_hash: '0'.repeat(64), + hash: '1'.repeat(64), + ts: FINISHED_AT, + run_id: RUN_ID, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 0, + findings: 0, + pack_errors: 1, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string }; + }; + assert.equal(json.run.state, 'failed'); + }); + + it('marks state=failed when scenarios_run is 0 even with no error counters', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + // writeEvents above sets scenarios_run: 2 — override with a custom file + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + { + schema_version: '1', + seq: 1, + prev_hash: '0'.repeat(64), + hash: '1'.repeat(64), + ts: FINISHED_AT, + run_id: RUN_ID, + kind: 'run_finished', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { + scenarios_run: 0, + findings: 0, + pack_errors: 0, + scenario_errors: 0, + missing_scenarios: 0, + unsafe_paths: 0, + runtime_errors: 0, + }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string }; + }; + assert.equal(json.run.state, 'failed'); + }); + + it('marks state=running when no run_finished event is present', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + const events = [ + { + schema_version: '1', + seq: 0, + prev_hash: null, + hash: '0'.repeat(64), + ts: STARTED_AT, + run_id: RUN_ID, + kind: 'run_started', + actor: { type: 'orchestrator', id: 'aqa-cli' }, + payload: { profile: 'smoke', project: 'demo' }, + }, + ]; + writeFileSync( + join(runDir, 'events.jsonl'), + `${events.map((e) => JSON.stringify(e)).join('\n')}\n`, + 'utf8', + ); + writeFindings(runDir, 0); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, true); + const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as { + run: { state: string; finished_at?: string }; + }; + assert.equal(json.run.state, 'running'); + assert.equal(json.run.finished_at, undefined); + }); + it('returns error on schema-invalid finding', () => { const root = makeTempRoot(); const runDir = makeRunDir(root, RUN_ID); From aad5295774ddd37b324e038770eecec1fd2a80b1 Mon Sep 17 00:00:00 2001 From: "lorenzo.padovani@padosoft.com" Date: Thu, 21 May 2026 00:17:21 +0200 Subject: [PATCH 3/4] fix(v1.9): address Copilot iter 2 on report PR #53 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4 actionable comments, all addressed: 1. RUN_ID_RE diverged from real LongSlug. Previous /^[a-z0-9-]{1,80}$/ capped at 80 (vs schema's 256) and allowed leading/trailing dashes plus `--` runs that real LongSlug rejects — so a malformed --run-id could slip past this guard and fail later at Finding.parse(). Now uses the canonical SlugPattern /^[a-z0-9](?:-?[a-z0-9])*$/ + a separate 256-char length check so error messages distinguish "bad characters" from "too long". Added regression tests for both. 2. safeIsDir() followed symlinks via statSync. An attacker (or a prior run) leaving a symlink under .aqa/runs/ pointing outside the project would let report.md/report.json land anywhere the link pointed. Now explicit lstatSync().isSymbolicLink() check refuses symlinked run dirs. Added a regression test (skipped on Windows when symlink perms aren't available). 3. readJsonl() silently dropped any non-empty line that parsed as valid JSON but wasn't a plain object (null, [], "x", 42). For events.jsonl that turned corruption into a seemingly-successful report. Now strict: throws "expected a JSON object, got " with explicit null branch (typeof null === 'object' in JS so the naive type check needed a guard). Added 2 regression tests (null + array). 4. Misleading comment claimed admin UI keys on run.id and not config_hash. Admin actually renders config_hash in replay copy, so the all-zeros placeholder will be visible. Replaced with an honest "treat as not-computed sentinel" note so future maintainers don't try to fix a bug that isn't there. 22 tests now pass (was 17 → +5 new regression tests). Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/kit/src/commands/report.ts | 66 ++++++++++++++++++----- packages/kit/test/report-cmd.test.ts | 78 +++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 13 deletions(-) diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts index 91473b3..ca3e536 100644 --- a/packages/kit/src/commands/report.ts +++ b/packages/kit/src/commands/report.ts @@ -14,18 +14,22 @@ * findings, replay artifacts, plus the rendered report). */ -import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; +import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { renderJson, renderMarkdown } from '@aqa/reporter'; import { Finding, type Run } from '@aqa/schemas'; export type ReportFormat = 'md' | 'json' | 'both'; -// LongSlug (per @aqa/schemas) cap. Reject anything that would let `runId` -// escape `.aqa/runs/` via traversal (`../..`, absolute paths, backslashes). -// The schema's own LongSlug also enforces ^[a-z0-9-]+$ — we mirror that -// here rather than import to keep the report command self-contained. -const RUN_ID_RE = /^[a-z0-9-]{1,80}$/; +// Mirrors @aqa/schemas LongSlug exactly: SlugPattern + max length 256. +// Previous v1.9 iteration used a looser /^[a-z0-9-]{1,80}$/ that both +// capped at 80 instead of 256 AND allowed leading/trailing dashes plus +// `--` runs — so a malformed --run-id could pass this guard yet later +// fail at a Finding.parse() site. Pattern + length checked separately +// so the error message can distinguish "bad characters" from "too long" +// (helpful for hash-suffixed run ids on the edge of the cap). +const RUN_ID_RE = /^[a-z0-9](?:-?[a-z0-9])*$/; +const RUN_ID_MAX_LEN = 256; export interface ReportOptions { root: string; @@ -77,6 +81,12 @@ export function runReport(opts: ReportOptions): ReportResult { // IDs as LongSlug; mirror that constraint at the CLI boundary so a // typo or malicious input can't drive writes anywhere outside the // intended directory. + if (runIdResolved.length > RUN_ID_MAX_LEN) { + return { + ok: false, + error: `report: invalid run id — exceeds ${RUN_ID_MAX_LEN}-char LongSlug cap (got ${runIdResolved.length})`, + }; + } if (!RUN_ID_RE.test(runIdResolved)) { return { ok: false, @@ -87,6 +97,24 @@ export function runReport(opts: ReportOptions): ReportResult { if (!safeIsDir(runDir)) { return { ok: false, error: `report: run directory not found: ${runDir}` }; } + // Refuse symlinked run dirs. A previous run (or an attacker with FS + // write under .aqa/runs/) could leave a symlink pointing outside the + // project; `report.md` / `report.json` writes would then land + // wherever the link points. lstatSync (not statSync) so the test + // doesn't transparently follow the link. + try { + if (lstatSync(runDir).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to write into symlinked run directory ${runDir}`, + }; + } + } catch (e) { + return { + ok: false, + error: `report: cannot stat run directory ${runDir}: ${e instanceof Error ? e.message : String(e)}`, + }; + } const eventsPath = join(runDir, 'events.jsonl'); // Missing artifacts → fail-fast. A silent empty report on a corrupted @@ -215,6 +243,10 @@ function readJsonl(path: string): Array> { // Caller has already confirmed the file exists — this helper only // returns [] for an empty file (legitimate "zero events" case), // never for a missing one. + // Strict: a non-empty line that parses as valid JSON but isn't a + // plain object (e.g. `null`, `[]`, `"x"`, `42`) is rejected. Silently + // dropping such lines would turn a corrupted file into a seemingly + // successful report. const text = readFileSync(path, 'utf8'); const out: Array> = []; let lineNo = 0; @@ -222,14 +254,18 @@ function readJsonl(path: string): Array> { lineNo += 1; const line = raw.trim(); if (!line) continue; + let parsed: unknown; try { - const obj = JSON.parse(line) as unknown; - if (obj && typeof obj === 'object' && !Array.isArray(obj)) { - out.push(obj as Record); - } + parsed = JSON.parse(line); } catch (e) { throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`); } + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + // typeof null === 'object', so explicit null branch. + const got = parsed === null ? 'null' : Array.isArray(parsed) ? 'array' : typeof parsed; + throw new Error(`${path} line ${lineNo}: expected a JSON object, got ${got}`); + } + out.push(parsed as Record); } return out; } @@ -272,8 +308,14 @@ function reconstructRun(input: ReconstructInput): Run.Run { profile, execution_mode: 'orchestrator', packs: [], - // Deterministic dummy: this report path doesn't recompute the config - // hash from disk. The admin UI keys on `run.id`, not `config_hash`. + // Synthetic placeholder: this report path doesn't recompute the + // config hash from disk (the canonical hash is computed by the + // runner at run-time). The admin UI displays config_hash in + // replay copy, so an all-zeros value will be visible — users + // viewing a CLI-rendered report.json should treat this hash as + // a "not computed" sentinel, not a real digest. Encoded as 64 + // zeros (a valid Sha256 by shape) so the JSON still passes + // Run.parse(). config_hash: '0'.repeat(64), }, totals: { diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts index 535d360..307f97e 100644 --- a/packages/kit/test/report-cmd.test.ts +++ b/packages/kit/test/report-cmd.test.ts @@ -10,7 +10,14 @@ import assert from 'node:assert/strict'; import { createHash } from 'node:crypto'; -import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'node:fs'; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + symlinkSync, + writeFileSync, +} from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { describe, it } from 'node:test'; @@ -311,6 +318,75 @@ describe('aqa report — error cases', () => { if (result.ok) return; assert.match(result.error, /invalid run id/); }); + + it('rejects a --run-id with leading/trailing dashes (LongSlug parity — Copilot iter 2)', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + for (const bad of ['-leading', 'trailing-', 'double--dash']) { + const result = runReport({ root, runId: bad }); + assert.equal(result.ok, false, `${bad} must be rejected`); + if (result.ok) continue; + assert.match(result.error, /invalid run id/); + } + }); + + it('rejects a --run-id longer than 256 chars (LongSlug cap — Copilot iter 2)', () => { + const root = makeTempRoot(); + mkdirSync(join(root, '.aqa', 'runs'), { recursive: true }); + const result = runReport({ root, runId: 'a'.repeat(257) }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /exceeds 256-char/); + }); + + it('refuses a run dir that is actually a symlink (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runsRoot = join(root, '.aqa', 'runs'); + mkdirSync(runsRoot, { recursive: true }); + const realDir = makeRunDir(root, 'real-target'); + writeEvents(realDir, { + runId: 'real-target', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(realDir, 0, 'real-target'); + const linkPath = join(runsRoot, 'sneaky-link'); + try { + symlinkSync(realDir, linkPath, 'dir'); + } catch { + // Windows without dev-mode / non-admin can't create symlinks — + // skip this guard by returning instead of asserting; the + // production behaviour is exercised on Linux CI anyway. + return; + } + const result = runReport({ root, runId: 'sneaky-link' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /symlinked run directory/); + }); + + it('rejects a JSONL line that is valid JSON but not a plain object (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), 'null\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /expected a JSON object, got null/); + }); + + it('rejects a JSONL line that parses as a JSON array (Copilot iter 2)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeFileSync(join(runDir, 'events.jsonl'), '[1, 2, 3]\n', 'utf8'); + writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8'); + const result = runReport({ root, runId: RUN_ID }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /expected a JSON object, got array/); + }); }); describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => { From 2a3857f192f64bda7448f9d5764185da698efed3 Mon Sep 17 00:00:00 2001 From: "lorenzo.padovani@padosoft.com" Date: Thu, 21 May 2026 01:32:10 +0200 Subject: [PATCH 4/4] fix(v1.9): address Copilot iter 3 on report PR #53 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3 actionable comments, all addressed: 1. latestRunId considered any directory entry, including symlinks and non-run subdirs. A `.aqa/runs/readme-stash/` with the newest mtime would shadow real runs. Now filters: name matches RUN_ID_RE + not-a-symlink (lstat) + contains events.jsonl. Added a regression test that pollutes `.aqa/runs/` with a non-run subdir and asserts the latest REAL run wins. 2. Per-file symlink protection. The earlier symlink guard only checked the run directory itself. A pre-existing `report.md` or `report.json` symlink would let writeFileSync follow the link and redirect the write outside the project. Now lstat-checks each target file before writing; refuses if it's a symlink. Added a regression test that creates a symlink to an outside file, attempts the report, and asserts both (a) refusal and (b) attacker-controlled file remains byte-identical. 3. reconstructRun could in principle produce Run-schema-incompatible output if the audit chain itself is malformed. Now validates the reconstructed object via Run.Run.safeParse before handing it to the renderers — surfaces a structured error instead of writing a report.json that the admin UI would silently reject. Also flipped the import of Run from type-only to value (needed for safeParse). 24 tests now pass (was 22 → +2 regression tests). Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/kit/src/commands/report.ts | 52 +++++++++++++++++++++++++--- packages/kit/test/report-cmd.test.ts | 52 ++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 5 deletions(-) diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts index ca3e536..1a036c6 100644 --- a/packages/kit/src/commands/report.ts +++ b/packages/kit/src/commands/report.ts @@ -17,7 +17,7 @@ import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { renderJson, renderMarkdown } from '@aqa/reporter'; -import { Finding, type Run } from '@aqa/schemas'; +import { Finding, Run } from '@aqa/schemas'; export type ReportFormat = 'md' | 'json' | 'both'; @@ -158,26 +158,56 @@ export function runReport(opts: ReportOptions): ReportResult { }; } - const run = reconstructRun({ + const runDraft = reconstructRun({ runId: runIdResolved, runDir, events, findingsCount: findings.length, }); + // Validate the reconstructed Run against the canonical schema before + // handing it to the renderers. reconstructRun could in theory produce + // a Run.parse-incompatible shape (e.g. terminal state with missing + // finished_at if the audit chain itself is malformed) — surfacing that + // as a structured error keeps `report.json` consumers safe instead of + // shipping a JSON the admin UI silently rejects later. + const runParsed = Run.Run.safeParse(runDraft); + if (!runParsed.success) { + return { + ok: false, + error: `report: reconstructed run failed schema validation (audit chain is malformed): ${runParsed.error.message.split('\n')[0]}`, + }; + } + const run = runParsed.data; const written: string[] = []; // Writes can fail (read-only FS, disk full, permission). Return a // structured error rather than letting the exception escape into the // CLI's top-level unhandled-error path so callers get a clean message // plus an exit code derived from the structured result. + // Per-file symlink check: even with a non-symlinked run dir, a + // pre-existing `report.md`/`report.json` symlink would be followed + // by writeFileSync and let an attacker (or a prior run) redirect the + // writes outside the project. lstat each target before writing. try { if (format === 'md' || format === 'both') { const mdPath = join(runDir, 'report.md'); + if (existsSync(mdPath) && lstatSync(mdPath).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to overwrite symlinked report file ${mdPath}`, + }; + } writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8'); written.push(mdPath); } if (format === 'json' || format === 'both') { const jsonPath = join(runDir, 'report.json'); + if (existsSync(jsonPath) && lstatSync(jsonPath).isSymbolicLink()) { + return { + ok: false, + error: `report: refusing to overwrite symlinked report file ${jsonPath}`, + }; + } writeFileSync( jsonPath, `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, @@ -212,14 +242,26 @@ function latestRunId(runsRoot: string): string | undefined { // hash-based IDs (`run-`) that don't sort by recency. mtime is // monotonic enough for "the most recent run" semantics — lexical name // is only used as a deterministic tie-breaker (same-millisecond runs). + // + // Filter: only consider directories that look like actual runs + // (presence of events.jsonl, the canonical run-start marker). Without + // this, an unrelated subdirectory under `.aqa/runs/` — or a symlink + // whose target's mtime is newer than any real run — would be selected + // by mtime and either fail with a confusing error or accidentally + // generate a report for the wrong directory. Also reject symlinks at + // the dir-entry level for the same reason as the symlink check in + // runReport: writes into a symlinked dir leak outside the project. const candidates: Array<{ name: string; mtimeMs: number }> = []; for (const name of entries) { + if (!RUN_ID_RE.test(name) || name.length > RUN_ID_MAX_LEN) continue; const dir = join(runsRoot, name); try { + const lst = lstatSync(dir); + if (lst.isSymbolicLink()) continue; + if (!lst.isDirectory()) continue; + if (!existsSync(join(dir, 'events.jsonl'))) continue; const st = statSync(dir); - if (st.isDirectory()) { - candidates.push({ name, mtimeMs: st.mtimeMs }); - } + candidates.push({ name, mtimeMs: st.mtimeMs }); } catch { // ignore entries we can't stat (broken symlinks, races) } diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts index 307f97e..1a4444b 100644 --- a/packages/kit/test/report-cmd.test.ts +++ b/packages/kit/test/report-cmd.test.ts @@ -387,6 +387,58 @@ describe('aqa report — error cases', () => { if (result.ok) return; assert.match(result.error, /expected a JSON object, got array/); }); + + it('refuses to overwrite a symlinked report.md (Copilot iter 3)', () => { + const root = makeTempRoot(); + const runDir = makeRunDir(root, RUN_ID); + writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 }); + writeFindings(runDir, 0); + const outside = join(makeTempRoot(), 'evil-report.md'); + writeFileSync(outside, '# attacker controlled\n', 'utf8'); + try { + symlinkSync(outside, join(runDir, 'report.md'), 'file'); + } catch { + return; // Windows non-admin can't create symlinks; skip. + } + const result = runReport({ root, runId: RUN_ID, format: 'md' }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /refusing to overwrite symlinked report file/); + // Crucial: the attacker-controlled file MUST be untouched. + assert.equal(readFileSync(outside, 'utf8'), '# attacker controlled\n'); + }); + + it('latestRunId only considers dirs with events.jsonl (Copilot iter 3)', async () => { + const root = makeTempRoot(); + // Two real run dirs (older + newer) + a non-run subdir that's newer + // than both but lacks events.jsonl. The non-run dir must NOT win. + const olderDir = makeRunDir(root, 'run-aaaa'); + writeEvents(olderDir, { + runId: 'run-aaaa', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(olderDir, 0, 'run-aaaa'); + await sleep(20); + const newerDir = makeRunDir(root, 'run-bbbb'); + writeEvents(newerDir, { + runId: 'run-bbbb', + profile: 'smoke', + project: 'demo', + findingsCount: 0, + }); + writeFindings(newerDir, 0, 'run-bbbb'); + await sleep(20); + // Pollute .aqa/runs with a NEWER non-run directory. + mkdirSync(join(root, '.aqa', 'runs', 'readme-stash'), { recursive: true }); + + const result = runReport({ root }); + assert.equal(result.ok, true); + if (!result.ok) return; + // Must be the most-recent REAL run, not the polluting subdirectory. + assert.equal(result.runId, 'run-bbbb'); + }); }); describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => {