From 7d579777facf97cfb7ff37e60b278615a00a2ab2 Mon Sep 17 00:00:00 2001
From: "lorenzo.padovani@padosoft.com" <lorenzo.padovani@padosoft.com>
Date: Wed, 20 May 2026 23:14:18 +0200
Subject: [PATCH 1/4] feat(v1.9): add `aqa report` CLI verb (sub-task 2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cables `@aqa/reporter` into a real CLI verb. README quick-start has
referenced `aqa report` since v0.0.1 but the verb itself was never wired
— `bunx aqa report` errored at parse time.

CLI surface:
  aqa report [--run-id <id>] [--format md|json|both]

Behaviour:
- `--run-id` omitted → defaults to the latest run directory (lexical sort
  on run-id, which has an ISO-like prefix so newest wins)
- reads `.aqa/runs/<id>/events.jsonl` + `findings.jsonl`
- reconstructs a schema-conformant Run object from the first
  `run_started` and last `run_finished` events (no new sidecar files —
  reports stay replayable from the audit trail alone)
- writes `report.md` (renderMarkdown) + `report.json` (renderJson) into
  the same run directory; the JSON shape is the stable one the admin UI
  already consumes
- structured errors for: missing runs dir, missing run-id, malformed
  JSONL, schema-invalid findings
- `--format md|json` opt-out for each artifact

Tests: 10 new node:test cases in
packages/kit/test/report-cmd.test.ts cover happy path (5: full render,
latest-default, md-only, json-only, zero-findings) and error cases (5:
missing dir, missing run-id, empty dir, malformed JSONL, schema-invalid
finding). Synthetic run dirs match @aqa/schemas Run/Finding exactly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 bun.lock                             |   1 +
 packages/kit/package.json            |   3 +-
 packages/kit/src/cli/aqa.ts          |  60 +++++-
 packages/kit/src/commands/report.ts  | 268 +++++++++++++++++++++++++++
 packages/kit/test/report-cmd.test.ts | 265 ++++++++++++++++++++++++++
 5 files changed, 588 insertions(+), 9 deletions(-)
 create mode 100644 packages/kit/src/commands/report.ts
 create mode 100644 packages/kit/test/report-cmd.test.ts
diff --git a/bun.lock b/bun.lock
index 16bba94..a0e788a 100644
--- a/bun.lock
+++ b/bun.lock
@@ -90,6 +90,7 @@
       },
       "dependencies": {
         "@aqa/pack-loader": "workspace:*",
+        "@aqa/reporter": "workspace:*",
         "@aqa/runner": "workspace:*",
         "@aqa/schemas": "workspace:*",
         "kleur": "^4.1.5",
diff --git a/packages/kit/package.json b/packages/kit/package.json
index ebbdc1e..424f3d5 100644
--- a/packages/kit/package.json
+++ b/packages/kit/package.json
@@ -20,11 +20,12 @@
     "build": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs",
     "typecheck": "tsc -p tsconfig.json --noEmit",
     "pretest": "tsc -p tsconfig.json && node scripts/bundle-packs.mjs",
-    "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts",
+    "test": "node --experimental-strip-types --no-warnings=ExperimentalWarning --test test/cli.test.ts test/profiler.test.ts test/run-cmd.test.ts test/pack-new.test.ts test/report-cmd.test.ts",
     "clean": "node --input-type=module -e \"import { rmSync } from 'node:fs'; for (const p of ['dist','.tsbuildinfo']) { try { rmSync(p, { recursive: true, force: true }); } catch {} }\""
   },
   "dependencies": {
     "@aqa/pack-loader": "workspace:*",
+    "@aqa/reporter": "workspace:*",
     "@aqa/runner": "workspace:*",
     "@aqa/schemas": "workspace:*",
     "kleur": "^4.1.5",
diff --git a/packages/kit/src/cli/aqa.ts b/packages/kit/src/cli/aqa.ts
index 2e3f3ef..946bd27 100644
--- a/packages/kit/src/cli/aqa.ts
+++ b/packages/kit/src/cli/aqa.ts
@@ -3,6 +3,7 @@ import { bold, cyan, dim, green, red, yellow } from 'kleur/colors';
 import { type CheckStatus, runDoctor } from '../commands/doctor.js';
 import { runInit } from '../commands/init.js';
 import { runPackNew } from '../commands/pack-new.js';
+import { runReport } from '../commands/report.js';
 import { runRun } from '../commands/run.js';
 import { runValidate } from '../commands/validate.js';
 
@@ -22,7 +23,16 @@ interface ParsedArgs {
   values: Map<string, string>;
 }
 
-const VALUE_FLAGS = new Set(['profile', 'seed', 'sut-type', 'description', 'author', 'license']);
+const VALUE_FLAGS = new Set([
+  'profile',
+  'seed',
+  'sut-type',
+  'description',
+  'author',
+  'license',
+  'run-id',
+  'format',
+]);
 
 function parseArgs(argv: string[]): ParsedArgs {
   const out: ParsedArgs = { command: null, positionals: [], flags: new Set(), values: new Map() };
@@ -77,19 +87,22 @@ ${bold('Usage')}
   aqa <command> [options]
 
 ${bold('Commands')}
-  init [name]            Scaffold .aqa/{project,risk-map,profiles}.yaml + testing.md
-  doctor                 Report kit health (runtime, .aqa, agent docs, validation)
-  validate               Validate .aqa/* against @aqa/schemas
-  run [--profile <p>]    Execute scenarios for the given profile; write events + findings
-  pack new <slug>        Scaffold a new pack at <cwd>/packs/<slug>/ (see the pack authoring guide:
-                         https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md
-                         — this path is only present in the source repo, not in the npm tarball)
+  init [name]                       Scaffold .aqa/{project,risk-map,profiles}.yaml + testing.md
+  doctor                            Report kit health (runtime, .aqa, agent docs, validation)
+  validate                          Validate .aqa/* against @aqa/schemas
+  run [--profile <p>]               Execute scenarios for the given profile; write events + findings
+  report [--run-id <id>]            Render the latest (or specified) run as report.md + report.json
+  pack new <slug>                   Scaffold a new pack at <cwd>/packs/<slug>/ (see the pack authoring
+                                    guide: https://github.com/padosoft/agentic-qa-kit/blob/main/docs/PACK-AUTHORING.md
+                                    — this path is only present in the source repo, not in the npm tarball)
 
 ${bold('Common options')}
   --force                (init / pack new) overwrite existing files/directory
   --dry-run              (init) don't write to disk; print what would happen
   --profile <name>       (run) profile key from .aqa/profiles.yaml
   --seed <string>        (run) deterministic run_id seed — useful for replay
+  --run-id <id>          (report) target a specific run; default = latest
+  --format <fmt>         (report) md | json | both (default: both)
   --sut-type <type>      (pack new) api | web | cli | lib | agent | pipeline
   --description <text>   (pack new) one-line summary written into the manifest
   --author <name>        (pack new) manifest author field
@@ -199,6 +212,37 @@ async function main(): Promise<number> {
       }
       return 0;
     }
+    case 'report': {
+      printHeader('report');
+      if (args.flags.has('run-id') && !args.values.has('run-id')) {
+        console.error(red('aqa report: --run-id requires a value'));
+        return 1;
+      }
+      if (args.flags.has('format') && !args.values.has('format')) {
+        console.error(red('aqa report: --format requires a value'));
+        return 1;
+      }
+      const reportOpts: Parameters<typeof runReport>[0] = { root: cwd };
+      if (args.values.has('run-id')) reportOpts.runId = args.values.get('run-id') ?? '';
+      if (args.values.has('format')) {
+        const fmt = args.values.get('format') ?? '';
+        if (fmt !== 'md' && fmt !== 'json' && fmt !== 'both') {
+          console.error(red(`aqa report: --format must be md | json | both, got "${fmt}"`));
+          return 1;
+        }
+        reportOpts.format = fmt;
+      }
+      const result = runReport(reportOpts);
+      if (!result.ok) {
+        console.error(red(`  ✗ ${result.error}`));
+        return 1;
+      }
+      console.info(`  ${green('✓')} ${bold(result.runId)}`);
+      console.info(`    ${dim('runDir:    ')}${result.runDir}`);
+      console.info(`    ${dim('findings:  ')}${result.findingsCount}`);
+      for (const f of result.files) console.info(`    ${green('+')} ${f}`);
+      return 0;
+    }
     case 'pack': {
       // Subcommand router for `aqa pack <subcommand>`.
       const sub = args.positionals[0];
diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts
new file mode 100644
index 0000000..f0d8f3c
--- /dev/null
+++ b/packages/kit/src/commands/report.ts
@@ -0,0 +1,268 @@
+/**
+ * `aqa report` — renders the on-disk run artifacts (`events.jsonl` +
+ * `findings.jsonl`) into a Markdown summary and a stable JSON view for
+ * downstream dashboards.
+ *
+ * Loose contract (intentional, see also `runRun`):
+ *  - `aqa run` writes events + findings, NOT a `run.json` (the canonical
+ *    Run shape is reconstructed from the audit chain). `aqa report` does
+ *    that reconstruction here from the first `run_started` and last
+ *    `run_finished` events so reports remain replayable from the audit
+ *    trail alone, without coupling reporter output to a new sidecar file.
+ *  - Reports are written into the same run directory so a junior can hand
+ *    a single `runDir` to a teammate and get the whole story (events,
+ *    findings, replay artifacts, plus the rendered report).
+ */
+
+import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { renderJson, renderMarkdown } from '@aqa/reporter';
+import { Finding, type Run } from '@aqa/schemas';
+
+export type ReportFormat = 'md' | 'json' | 'both';
+
+export interface ReportOptions {
+  root: string;
+  /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */
+  runId?: string;
+  /** Default 'both'. Controls which artifacts are written. */
+  format?: ReportFormat;
+}
+
+export interface ReportOk {
+  ok: true;
+  runId: string;
+  runDir: string;
+  files: string[];
+  findingsCount: number;
+}
+
+export interface ReportErr {
+  ok: false;
+  error: string;
+}
+
+export type ReportResult = ReportOk | ReportErr;
+
+export function runReport(opts: ReportOptions): ReportResult {
+  const format: ReportFormat = opts.format ?? 'both';
+  if (format !== 'md' && format !== 'json' && format !== 'both') {
+    return {
+      ok: false,
+      error: `report: --format must be md | json | both, got "${String(format)}"`,
+    };
+  }
+  const runsRoot = join(opts.root, '.aqa', 'runs');
+  if (!existsSync(runsRoot)) {
+    return {
+      ok: false,
+      error: `report: no runs directory at ${runsRoot} — run \`aqa run --profile smoke\` first`,
+    };
+  }
+  const runIdResolved = opts.runId ?? latestRunId(runsRoot);
+  if (!runIdResolved) {
+    return {
+      ok: false,
+      error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`,
+    };
+  }
+  const runDir = join(runsRoot, runIdResolved);
+  if (!safeIsDir(runDir)) {
+    return { ok: false, error: `report: run directory not found: ${runDir}` };
+  }
+
+  let events: ReadonlyArray<Record<string, unknown>>;
+  try {
+    events = readJsonl(join(runDir, 'events.jsonl'));
+  } catch (e) {
+    return {
+      ok: false,
+      error: `report: cannot read events.jsonl: ${e instanceof Error ? e.message : String(e)}`,
+    };
+  }
+
+  let findings: readonly Finding.Finding[];
+  try {
+    findings = readJsonl(join(runDir, 'findings.jsonl')).map((raw, idx) => {
+      const parsed = Finding.Finding.safeParse(raw);
+      if (!parsed.success) {
+        throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`);
+      }
+      return parsed.data;
+    });
+  } catch (e) {
+    return {
+      ok: false,
+      error: `report: cannot read findings.jsonl: ${e instanceof Error ? e.message : String(e)}`,
+    };
+  }
+
+  const run = reconstructRun({
+    runId: runIdResolved,
+    runDir,
+    events,
+    findingsCount: findings.length,
+  });
+
+  const written: string[] = [];
+  if (format === 'md' || format === 'both') {
+    const mdPath = join(runDir, 'report.md');
+    writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8');
+    written.push(mdPath);
+  }
+  if (format === 'json' || format === 'both') {
+    const jsonPath = join(runDir, 'report.json');
+    writeFileSync(jsonPath, `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, 'utf8');
+    written.push(jsonPath);
+  }
+
+  return {
+    ok: true,
+    runId: runIdResolved,
+    runDir,
+    files: written,
+    findingsCount: findings.length,
+  };
+}
+
+function latestRunId(runsRoot: string): string | undefined {
+  let entries: string[];
+  try {
+    entries = readdirSync(runsRoot);
+  } catch {
+    return undefined;
+  }
+  const dirs = entries
+    .filter((name) => safeIsDir(join(runsRoot, name)))
+    // Run IDs include an ISO-like prefix; lexical sort orders by recency.
+    .sort();
+  return dirs.at(-1);
+}
+
+function safeIsDir(p: string): boolean {
+  try {
+    return statSync(p).isDirectory();
+  } catch {
+    return false;
+  }
+}
+
+function readJsonl(path: string): Array<Record<string, unknown>> {
+  if (!existsSync(path)) return [];
+  const text = readFileSync(path, 'utf8');
+  const out: Array<Record<string, unknown>> = [];
+  let lineNo = 0;
+  for (const raw of text.split('\n')) {
+    lineNo += 1;
+    const line = raw.trim();
+    if (!line) continue;
+    try {
+      const obj = JSON.parse(line) as unknown;
+      if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
+        out.push(obj as Record<string, unknown>);
+      }
+    } catch (e) {
+      throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`);
+    }
+  }
+  return out;
+}
+
+interface ReconstructInput {
+  runId: string;
+  runDir: string;
+  events: ReadonlyArray<Record<string, unknown>>;
+  findingsCount: number;
+}
+
+function reconstructRun(input: ReconstructInput): Run.Run {
+  // Best-effort reconstruction from the audit chain — see file header. The
+  // reporter only reads a small subset of Run fields, but we still build the
+  // full schema-conformant object so the JSON report stays valid for the
+  // admin UI and any external dashboard.
+  const { runId, runDir, events, findingsCount } = input;
+  const started = pickEvent(events, 'run_started');
+  const finished = pickEvent(events, 'run_finished');
+
+  const startedAt = readString(started, 'ts') ?? new Date(0).toISOString();
+  const finishedAt = readString(finished, 'ts');
+  const profile = readPayloadString(started, 'profile') ?? 'unknown';
+  const project = readPayloadString(started, 'project') ?? 'unknown';
+  const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0;
+  const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount;
+
+  const state: Run.Run['state'] = finished ? 'succeeded' : 'running';
+
+  const run: Run.Run = {
+    schema_version: '1',
+    id: runId,
+    started_at: startedAt,
+    ...(finishedAt ? { finished_at: finishedAt } : {}),
+    state,
+    project,
+    profile,
+    execution_mode: 'orchestrator',
+    config_snapshot: {
+      profile,
+      execution_mode: 'orchestrator',
+      packs: [],
+      // Deterministic dummy: this report path doesn't recompute the config
+      // hash from disk. The admin UI keys on `run.id`, not `config_hash`.
+      config_hash: '0'.repeat(64),
+    },
+    totals: {
+      scenarios: scenariosRun,
+      findings: totalsFindings,
+      probes: 0,
+      llm_tokens_in: 0,
+      llm_tokens_out: 0,
+      llm_cost_usd: 0,
+    },
+    artifact_dir: runDir,
+  };
+  return run;
+}
+
+function pickEvent(
+  events: ReadonlyArray<Record<string, unknown>>,
+  kind: string,
+): Record<string, unknown> | undefined {
+  // run_started: first match; run_finished: last (a re-run within the same
+  // dir would have appended a new finalization line). Both kinds appear at
+  // most once today, but the lookup stays defensive.
+  if (kind === 'run_finished') {
+    for (let i = events.length - 1; i >= 0; i--) {
+      if (events[i]?.kind === kind) return events[i];
+    }
+    return undefined;
+  }
+  for (const e of events) {
+    if (e.kind === kind) return e;
+  }
+  return undefined;
+}
+
+function readString(obj: Record<string, unknown> | undefined, key: string): string | undefined {
+  const v = obj?.[key];
+  return typeof v === 'string' ? v : undefined;
+}
+
+function readPayloadString(
+  obj: Record<string, unknown> | undefined,
+  key: string,
+): string | undefined {
+  const payload = obj?.payload;
+  if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined;
+  const v = (payload as Record<string, unknown>)[key];
+  return typeof v === 'string' ? v : undefined;
+}
+
+function readPayloadNumber(
+  obj: Record<string, unknown> | undefined,
+  key: string,
+): number | undefined {
+  const payload = obj?.payload;
+  if (!payload || typeof payload !== 'object' || Array.isArray(payload)) return undefined;
+  const v = (payload as Record<string, unknown>)[key];
+  return typeof v === 'number' && Number.isFinite(v) ? v : undefined;
+}
diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts
new file mode 100644
index 0000000..7065410
--- /dev/null
+++ b/packages/kit/test/report-cmd.test.ts
@@ -0,0 +1,265 @@
+/**
+ * v1.9 — `aqa report` CLI verb.
+ *
+ * Renders the on-disk run artifacts into report.md + report.json. These
+ * tests build a synthetic run directory (events.jsonl + findings.jsonl)
+ * because runRun is async, network-touched in some configurations, and
+ * over-coupled for a focused reporter test. The synthetic dir matches the
+ * canonical schema shapes from @aqa/schemas Run/Finding.
+ */
+
+import assert from 'node:assert/strict';
+import { createHash } from 'node:crypto';
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { describe, it } from 'node:test';
+import { runReport } from '../dist/commands/report.js';
+
+const RUN_ID = '20260520-000000-runabcdef';
+const ALT_RUN_ID = '20260520-010000-runzzzzzz';
+const STARTED_AT = '2026-05-20T00:00:00.000Z';
+const FINISHED_AT = '2026-05-20T00:00:30.000Z';
+
+function makeTempRoot(): string {
+  return mkdtempSync(join(tmpdir(), 'aqa-report-'));
+}
+
+function makeRunDir(root: string, runId: string): string {
+  const dir = join(root, '.aqa', 'runs', runId);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function sha256Hex(s: string): string {
+  return createHash('sha256').update(s, 'utf8').digest('hex');
+}
+
+/** Build minimal valid hash-chained events.jsonl. */
+function writeEvents(
+  runDir: string,
+  opts: { runId: string; profile: string; project: string; findingsCount: number },
+): void {
+  const events: Array<Record<string, unknown>> = [];
+  let prev: string | null = null;
+  function append(partial: Omit<Record<string, unknown>, 'seq' | 'prev_hash' | 'hash'>): void {
+    const seq = events.length;
+    // Hash recomputation here is a stub — the writer's exact canonicalization
+    // is exercised in @aqa/runner / @aqa/compliance tests. `aqa report`
+    // doesn't validate the chain (it just parses fields), so any
+    // deterministic stub hash keeps schema.parse happy.
+    const body = JSON.stringify({ ...partial, seq });
+    const hash = sha256Hex((prev ?? '') + body);
+    const evt = { schema_version: '1', seq, prev_hash: prev, hash, ...partial };
+    events.push(evt);
+    prev = hash;
+  }
+  append({
+    ts: STARTED_AT,
+    run_id: opts.runId,
+    kind: 'run_started',
+    actor: { type: 'orchestrator', id: 'aqa-cli' },
+    payload: { profile: opts.profile, project: opts.project },
+  });
+  append({
+    ts: FINISHED_AT,
+    run_id: opts.runId,
+    kind: 'run_finished',
+    actor: { type: 'orchestrator', id: 'aqa-cli' },
+    payload: {
+      scenarios_run: 2,
+      findings: opts.findingsCount,
+      pack_errors: 0,
+      scenario_errors: 0,
+      missing_scenarios: 0,
+      unsafe_paths: 0,
+      runtime_errors: 0,
+    },
+  });
+  writeFileSync(
+    join(runDir, 'events.jsonl'),
+    `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+    'utf8',
+  );
+}
+
+function writeFindings(runDir: string, count: number): void {
+  const lines: string[] = [];
+  for (let i = 0; i < count; i++) {
+    const finding = {
+      schema_version: '1',
+      id: `AQA-2026-${String(i + 1).padStart(4, '0')}`,
+      run_id: RUN_ID,
+      risk_id: 'r-example',
+      scenario_id: 'scn-example-demo',
+      title: `Synthetic finding ${i + 1}`,
+      severity: i === 0 ? 'critical' : 'low',
+      status: 'draft',
+      summary: 'reporter smoke test',
+      evidence: [],
+      execution_mode: 'orchestrator',
+      verification_floor: 'scenario_level',
+      confidence: 0.5,
+      discovered_at: STARTED_AT,
+    };
+    lines.push(JSON.stringify(finding));
+  }
+  writeFileSync(
+    join(runDir, 'findings.jsonl'),
+    `${lines.join('\n')}${lines.length ? '\n' : ''}`,
+    'utf8',
+  );
+}
+
+describe('aqa report — happy path', () => {
+  it('renders both report.md and report.json for the explicit run-id', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 2 });
+    writeFindings(runDir, 2);
+
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, true, `expected ok, got ${JSON.stringify(result)}`);
+    if (!result.ok) return;
+
+    assert.equal(result.runId, RUN_ID);
+    assert.equal(result.findingsCount, 2);
+    assert.ok(existsSync(join(runDir, 'report.md')), 'report.md must exist');
+    assert.ok(existsSync(join(runDir, 'report.json')), 'report.json must exist');
+
+    const md = readFileSync(join(runDir, 'report.md'), 'utf8');
+    assert.match(md, /# AQA report/);
+    assert.match(md, /demo/);
+    assert.match(md, /AQA-2026-0001/);
+    assert.match(md, /Synthetic finding 1/);
+
+    const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+      schema_version: string;
+      run: { id: string; project: string; profile: string; state: string };
+      findings: Array<{ id: string }>;
+      summary: { total: number; severities: Record<string, number> };
+    };
+    assert.equal(json.schema_version, '1');
+    assert.equal(json.run.id, RUN_ID);
+    assert.equal(json.run.project, 'demo');
+    assert.equal(json.run.profile, 'smoke');
+    assert.equal(json.run.state, 'succeeded');
+    assert.equal(json.findings.length, 2);
+    assert.equal(json.summary.total, 2);
+    assert.equal(json.summary.severities.critical, 1);
+    assert.equal(json.summary.severities.low, 1);
+  });
+
+  it('defaults to the latest run when --run-id is omitted (lexical sort)', () => {
+    const root = makeTempRoot();
+    // Older run first; alt run lexically later so it should be chosen.
+    const olderDir = makeRunDir(root, RUN_ID);
+    writeEvents(olderDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 });
+    writeFindings(olderDir, 1);
+    const newerDir = makeRunDir(root, ALT_RUN_ID);
+    writeEvents(newerDir, {
+      runId: ALT_RUN_ID,
+      profile: 'release-gate',
+      project: 'demo',
+      findingsCount: 3,
+    });
+    writeFindings(newerDir, 3);
+
+    const result = runReport({ root });
+    assert.equal(result.ok, true);
+    if (!result.ok) return;
+    assert.equal(result.runId, ALT_RUN_ID);
+    assert.equal(result.findingsCount, 3);
+  });
+
+  it('emits only report.md when format=md', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    writeFindings(runDir, 0);
+
+    const result = runReport({ root, runId: RUN_ID, format: 'md' });
+    assert.equal(result.ok, true);
+    if (!result.ok) return;
+    assert.ok(existsSync(join(runDir, 'report.md')));
+    assert.ok(!existsSync(join(runDir, 'report.json')), 'report.json must not exist for format=md');
+    assert.equal(result.files.length, 1);
+  });
+
+  it('emits only report.json when format=json', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    writeFindings(runDir, 0);
+
+    const result = runReport({ root, runId: RUN_ID, format: 'json' });
+    assert.equal(result.ok, true);
+    if (!result.ok) return;
+    assert.ok(!existsSync(join(runDir, 'report.md')));
+    assert.ok(existsSync(join(runDir, 'report.json')));
+  });
+
+  it('handles zero findings without crashing', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    writeFindings(runDir, 0);
+
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, true);
+    if (!result.ok) return;
+    assert.equal(result.findingsCount, 0);
+    const md = readFileSync(join(runDir, 'report.md'), 'utf8');
+    assert.match(md, /No findings/);
+  });
+});
+
+describe('aqa report — error cases', () => {
+  it('returns error when .aqa/runs does not exist', () => {
+    const root = makeTempRoot();
+    const result = runReport({ root });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /no runs directory/);
+  });
+
+  it('returns error when --run-id points to a missing dir', () => {
+    const root = makeTempRoot();
+    makeRunDir(root, RUN_ID);
+    const result = runReport({ root, runId: 'does-not-exist' });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /run directory not found/);
+  });
+
+  it('returns error when .aqa/runs exists but is empty', () => {
+    const root = makeTempRoot();
+    mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+    const result = runReport({ root });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /no runs found/);
+  });
+
+  it('returns error on malformed JSONL line in events.jsonl', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeFileSync(join(runDir, 'events.jsonl'), '{not json\n', 'utf8');
+    writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /cannot read events\.jsonl/);
+  });
+
+  it('returns error on schema-invalid finding', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 });
+    writeFileSync(join(runDir, 'findings.jsonl'), `${JSON.stringify({ id: 'broken' })}\n`, 'utf8');
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /cannot read findings\.jsonl/);
+  });
+});

From c310d975368100c7aea60e4e847b4b80ab4fe8a4 Mon Sep 17 00:00:00 2001
From: "lorenzo.padovani@padosoft.com" <lorenzo.padovani@padosoft.com>
Date: Wed, 20 May 2026 23:29:07 +0200
Subject: [PATCH 2/4] fix(v1.9): address Copilot iter 1 on report PR #53
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

5 actionable comments, all addressed:

1. P1 — state was unconditionally 'succeeded' when run_finished was
   present. But runRun writes run_finished even on most failure paths
   (pack_errors / scenario_errors / missing_scenarios / unsafe_paths /
   runtime_errors / zero scenarios). Now derives state by inspecting the
   payload counters: any non-zero error counter OR zero scenarios → 'failed';
   no run_finished → 'running'; else 'succeeded'. Added 3 regression tests.

2. P2 — latest-run selection used lexical sort on directory name, which
   silently picks the wrong run when `aqa run --seed` produces hash-based
   IDs (run-<sha>). Now uses statSync.mtimeMs (most recent wins) with the
   directory name as a deterministic tie-breaker. Added a regression test
   that intentionally writes the lexically-earlier name LAST.

3. P1 — readJsonl returned [] for missing files. That made `aqa report`
   succeed on a corrupted/incomplete run dir and emit a synthetic empty
   report. Now both events.jsonl and findings.jsonl are required up-front
   with explicit existence checks; readJsonl assumes existence. Added 2
   regression tests.

4. Path-traversal — --run-id was forwarded to join() without validation,
   so a value like '../..' could read/write files outside .aqa/runs.
   Added a LongSlug-shaped regex (/^[a-z0-9-]{1,80}$/) at the CLI
   boundary that rejects traversal and any non-slug input. Added 2
   regression tests.

5. writeFileSync wasn't wrapped — a read-only FS or disk-full would
   bubble through the CLI's top-level unhandled-error path instead of
   returning a structured ReportErr. Wrapped both writes in a single
   try/catch.

Test fixture bug also fixed: writeFindings now accepts an optional runId
parameter (default RUN_ID) so findings written for ALT_RUN_ID-style
directories carry the matching run_id, not a stale constant.

17 tests now pass (10 original + 7 new regression tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/kit/src/commands/report.ts  | 127 +++++++++++++---
 packages/kit/test/report-cmd.test.ts | 218 +++++++++++++++++++++++++--
 2 files changed, 315 insertions(+), 30 deletions(-)

diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts
index f0d8f3c..91473b3 100644
--- a/packages/kit/src/commands/report.ts
+++ b/packages/kit/src/commands/report.ts
@@ -21,6 +21,12 @@ import { Finding, type Run } from '@aqa/schemas';
 
 export type ReportFormat = 'md' | 'json' | 'both';
 
+// LongSlug (per @aqa/schemas) cap. Reject anything that would let `runId`
+// escape `.aqa/runs/` via traversal (`../..`, absolute paths, backslashes).
+// The schema's own LongSlug also enforces ^[a-z0-9-]+$ — we mirror that
+// here rather than import to keep the report command self-contained.
+const RUN_ID_RE = /^[a-z0-9-]{1,80}$/;
+
 export interface ReportOptions {
   root: string;
   /** Run id (== directory name under `.aqa/runs/`). Omit to use the latest run. */
@@ -66,14 +72,34 @@ export function runReport(opts: ReportOptions): ReportResult {
       error: `report: no runs found under ${runsRoot} — run \`aqa run --profile smoke\` first`,
     };
   }
+  // Defense-in-depth: a `--run-id` of `../../../etc/passwd` (or similar)
+  // would otherwise resolve outside `.aqa/runs/`. The schema treats run
+  // IDs as LongSlug; mirror that constraint at the CLI boundary so a
+  // typo or malicious input can't drive writes anywhere outside the
+  // intended directory.
+  if (!RUN_ID_RE.test(runIdResolved)) {
+    return {
+      ok: false,
+      error: `report: invalid run id "${runIdResolved}" — must match ${RUN_ID_RE.source}`,
+    };
+  }
   const runDir = join(runsRoot, runIdResolved);
   if (!safeIsDir(runDir)) {
     return { ok: false, error: `report: run directory not found: ${runDir}` };
   }
 
+  const eventsPath = join(runDir, 'events.jsonl');
+  // Missing artifacts → fail-fast. A silent empty report on a corrupted
+  // run dir hides the real problem (the run never wrote its audit trail).
+  if (!existsSync(eventsPath)) {
+    return {
+      ok: false,
+      error: `report: events.jsonl is missing at ${eventsPath} — run is incomplete or corrupted`,
+    };
+  }
   let events: ReadonlyArray<Record<string, unknown>>;
   try {
-    events = readJsonl(join(runDir, 'events.jsonl'));
+    events = readJsonl(eventsPath);
   } catch (e) {
     return {
       ok: false,
@@ -81,9 +107,16 @@ export function runReport(opts: ReportOptions): ReportResult {
     };
   }
 
+  const findingsPath = join(runDir, 'findings.jsonl');
+  if (!existsSync(findingsPath)) {
+    return {
+      ok: false,
+      error: `report: findings.jsonl is missing at ${findingsPath} — run is incomplete or corrupted`,
+    };
+  }
   let findings: readonly Finding.Finding[];
   try {
-    findings = readJsonl(join(runDir, 'findings.jsonl')).map((raw, idx) => {
+    findings = readJsonl(findingsPath).map((raw, idx) => {
       const parsed = Finding.Finding.safeParse(raw);
       if (!parsed.success) {
         throw new Error(`findings.jsonl line ${idx + 1}: ${parsed.error.message}`);
@@ -105,15 +138,30 @@ export function runReport(opts: ReportOptions): ReportResult {
   });
 
   const written: string[] = [];
-  if (format === 'md' || format === 'both') {
-    const mdPath = join(runDir, 'report.md');
-    writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8');
-    written.push(mdPath);
-  }
-  if (format === 'json' || format === 'both') {
-    const jsonPath = join(runDir, 'report.json');
-    writeFileSync(jsonPath, `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`, 'utf8');
-    written.push(jsonPath);
+  // Writes can fail (read-only FS, disk full, permission). Return a
+  // structured error rather than letting the exception escape into the
+  // CLI's top-level unhandled-error path so callers get a clean message
+  // plus an exit code derived from the structured result.
+  try {
+    if (format === 'md' || format === 'both') {
+      const mdPath = join(runDir, 'report.md');
+      writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8');
+      written.push(mdPath);
+    }
+    if (format === 'json' || format === 'both') {
+      const jsonPath = join(runDir, 'report.json');
+      writeFileSync(
+        jsonPath,
+        `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`,
+        'utf8',
+      );
+      written.push(jsonPath);
+    }
+  } catch (e) {
+    return {
+      ok: false,
+      error: `report: cannot write report file: ${e instanceof Error ? e.message : String(e)}`,
+    };
   }
 
   return {
@@ -132,11 +180,27 @@ function latestRunId(runsRoot: string): string | undefined {
   } catch {
     return undefined;
   }
-  const dirs = entries
-    .filter((name) => safeIsDir(join(runsRoot, name)))
-    // Run IDs include an ISO-like prefix; lexical sort orders by recency.
-    .sort();
-  return dirs.at(-1);
+  // Pick by file mtime, not lexical name. `aqa run --seed` produces
+  // hash-based IDs (`run-<sha>`) that don't sort by recency. mtime is
+  // monotonic enough for "the most recent run" semantics — lexical name
+  // is only used as a deterministic tie-breaker (same-millisecond runs).
+  const candidates: Array<{ name: string; mtimeMs: number }> = [];
+  for (const name of entries) {
+    const dir = join(runsRoot, name);
+    try {
+      const st = statSync(dir);
+      if (st.isDirectory()) {
+        candidates.push({ name, mtimeMs: st.mtimeMs });
+      }
+    } catch {
+      // ignore entries we can't stat (broken symlinks, races)
+    }
+  }
+  candidates.sort((a, b) => {
+    if (b.mtimeMs !== a.mtimeMs) return b.mtimeMs - a.mtimeMs;
+    return b.name.localeCompare(a.name);
+  });
+  return candidates[0]?.name;
 }
 
 function safeIsDir(p: string): boolean {
@@ -148,7 +212,9 @@ function safeIsDir(p: string): boolean {
 }
 
 function readJsonl(path: string): Array<Record<string, unknown>> {
-  if (!existsSync(path)) return [];
+  // Caller has already confirmed the file exists — this helper only
+  // returns [] for an empty file (legitimate "zero events" case),
+  // never for a missing one.
   const text = readFileSync(path, 'utf8');
   const out: Array<Record<string, unknown>> = [];
   let lineNo = 0;
@@ -191,7 +257,7 @@ function reconstructRun(input: ReconstructInput): Run.Run {
   const scenariosRun = readPayloadNumber(finished, 'scenarios_run') ?? 0;
   const totalsFindings = readPayloadNumber(finished, 'findings') ?? findingsCount;
 
-  const state: Run.Run['state'] = finished ? 'succeeded' : 'running';
+  const state: Run.Run['state'] = deriveState(finished, scenariosRun);
 
   const run: Run.Run = {
     schema_version: '1',
@@ -223,6 +289,31 @@ function reconstructRun(input: ReconstructInput): Run.Run {
   return run;
 }
 
+function deriveState(
+  finished: Record<string, unknown> | undefined,
+  scenariosRun: number,
+): Run.Run['state'] {
+  // `runRun` writes `run_finished` on success AND on most failure paths
+  // (pack errors, scenario errors, missing scenarios, unsafe paths, runtime
+  // errors, zero scenarios). Treat any non-zero error counter — or a run
+  // that completed zero scenarios — as `failed` so the report doesn't
+  // mislabel broken runs as successes.
+  if (!finished) return 'running';
+  const errorKeys = [
+    'pack_errors',
+    'scenario_errors',
+    'missing_scenarios',
+    'unsafe_paths',
+    'runtime_errors',
+  ] as const;
+  for (const k of errorKeys) {
+    const v = readPayloadNumber(finished, k);
+    if (typeof v === 'number' && v > 0) return 'failed';
+  }
+  if (scenariosRun === 0) return 'failed';
+  return 'succeeded';
+}
+
 function pickEvent(
   events: ReadonlyArray<Record<string, unknown>>,
   kind: string,
diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts
index 7065410..535d360 100644
--- a/packages/kit/test/report-cmd.test.ts
+++ b/packages/kit/test/report-cmd.test.ts
@@ -17,10 +17,16 @@ import { describe, it } from 'node:test';
 import { runReport } from '../dist/commands/report.js';
 
 const RUN_ID = '20260520-000000-runabcdef';
-const ALT_RUN_ID = '20260520-010000-runzzzzzz';
 const STARTED_AT = '2026-05-20T00:00:00.000Z';
 const FINISHED_AT = '2026-05-20T00:00:30.000Z';
 
+// Slight sleep used to put a real mtime delta between consecutive run dirs
+// in the "latest run" test. Some filesystems otherwise group rapid mkdirs
+// under the same mtimeMs and force the tie-breaker into action.
+function sleep(ms: number): Promise<void> {
+  return new Promise((r) => setTimeout(r, ms));
+}
+
 function makeTempRoot(): string {
   return mkdtempSync(join(tmpdir(), 'aqa-report-'));
 }
@@ -83,13 +89,13 @@ function writeEvents(
   );
 }
 
-function writeFindings(runDir: string, count: number): void {
+function writeFindings(runDir: string, count: number, runId: string = RUN_ID): void {
   const lines: string[] = [];
   for (let i = 0; i < count; i++) {
     const finding = {
       schema_version: '1',
       id: `AQA-2026-${String(i + 1).padStart(4, '0')}`,
-      run_id: RUN_ID,
+      run_id: runId,
       risk_id: 'r-example',
       scenario_id: 'scn-example-demo',
       title: `Synthetic finding ${i + 1}`,
@@ -150,25 +156,38 @@ describe('aqa report — happy path', () => {
     assert.equal(json.summary.severities.low, 1);
   });
 
-  it('defaults to the latest run when --run-id is omitted (lexical sort)', () => {
+  it('defaults to the latest run by file mtime, not lexical name (Copilot iter 1 P2)', async () => {
+    // Critical correctness: `aqa run --seed` produces hash-based IDs
+    // (run-<sha>) that do NOT sort by recency. Picking by mtime keeps
+    // "latest" honest in mixed-naming directories. Here we intentionally
+    // create the lexically-EARLIER name LAST so a name-based sort would
+    // pick the wrong dir.
     const root = makeTempRoot();
-    // Older run first; alt run lexically later so it should be chosen.
-    const olderDir = makeRunDir(root, RUN_ID);
-    writeEvents(olderDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 1 });
-    writeFindings(olderDir, 1);
-    const newerDir = makeRunDir(root, ALT_RUN_ID);
+    const earlierName = 'run-aaaa-but-newer'; // lexically earlier
+    const olderName = 'run-zzzz-but-older'; // lexically later
+    const olderDir = makeRunDir(root, olderName);
+    writeEvents(olderDir, {
+      runId: olderName,
+      profile: 'smoke',
+      project: 'demo',
+      findingsCount: 1,
+    });
+    writeFindings(olderDir, 1, olderName);
+    await sleep(20);
+    const newerDir = makeRunDir(root, earlierName);
     writeEvents(newerDir, {
-      runId: ALT_RUN_ID,
+      runId: earlierName,
       profile: 'release-gate',
       project: 'demo',
       findingsCount: 3,
     });
-    writeFindings(newerDir, 3);
+    writeFindings(newerDir, 3, earlierName);
 
     const result = runReport({ root });
     assert.equal(result.ok, true);
     if (!result.ok) return;
-    assert.equal(result.runId, ALT_RUN_ID);
+    // mtime-newer dir wins even though its name sorts EARLIER lexically.
+    assert.equal(result.runId, earlierName);
     assert.equal(result.findingsCount, 3);
   });
 
@@ -252,6 +271,181 @@ describe('aqa report — error cases', () => {
     assert.match(result.error, /cannot read events\.jsonl/);
   });
 
+  it('returns error when events.jsonl is missing (Copilot iter 1 P1)', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    // findings.jsonl present, events.jsonl missing
+    writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /events\.jsonl is missing/);
+  });
+
+  it('returns error when findings.jsonl is missing (Copilot iter 1 P1)', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    // findings.jsonl intentionally not created
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /findings\.jsonl is missing/);
+  });
+
+  it('rejects a --run-id that would escape .aqa/runs via traversal (Copilot iter 1)', () => {
+    const root = makeTempRoot();
+    // .aqa/runs has to exist or we hit the prior guard first
+    mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+    const result = runReport({ root, runId: '../../etc/passwd' });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /invalid run id/);
+  });
+
+  it('rejects a --run-id containing characters outside [a-z0-9-]', () => {
+    const root = makeTempRoot();
+    mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+    const result = runReport({ root, runId: 'NOT_A_SLUG' });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /invalid run id/);
+  });
+});
+
+describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => {
+  it('marks state=failed when run_finished payload has pack_errors > 0', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    // Custom events with non-zero pack_errors
+    const events = [
+      {
+        schema_version: '1',
+        seq: 0,
+        prev_hash: null,
+        hash: '0'.repeat(64),
+        ts: STARTED_AT,
+        run_id: RUN_ID,
+        kind: 'run_started',
+        actor: { type: 'orchestrator', id: 'aqa-cli' },
+        payload: { profile: 'smoke', project: 'demo' },
+      },
+      {
+        schema_version: '1',
+        seq: 1,
+        prev_hash: '0'.repeat(64),
+        hash: '1'.repeat(64),
+        ts: FINISHED_AT,
+        run_id: RUN_ID,
+        kind: 'run_finished',
+        actor: { type: 'orchestrator', id: 'aqa-cli' },
+        payload: {
+          scenarios_run: 0,
+          findings: 0,
+          pack_errors: 1,
+          scenario_errors: 0,
+          missing_scenarios: 0,
+          unsafe_paths: 0,
+          runtime_errors: 0,
+        },
+      },
+    ];
+    writeFileSync(
+      join(runDir, 'events.jsonl'),
+      `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+      'utf8',
+    );
+    writeFindings(runDir, 0);
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, true);
+    const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+      run: { state: string };
+    };
+    assert.equal(json.run.state, 'failed');
+  });
+
+  it('marks state=failed when scenarios_run is 0 even with no error counters', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    // writeEvents above sets scenarios_run: 2 — override with a custom file
+    const events = [
+      {
+        schema_version: '1',
+        seq: 0,
+        prev_hash: null,
+        hash: '0'.repeat(64),
+        ts: STARTED_AT,
+        run_id: RUN_ID,
+        kind: 'run_started',
+        actor: { type: 'orchestrator', id: 'aqa-cli' },
+        payload: { profile: 'smoke', project: 'demo' },
+      },
+      {
+        schema_version: '1',
+        seq: 1,
+        prev_hash: '0'.repeat(64),
+        hash: '1'.repeat(64),
+        ts: FINISHED_AT,
+        run_id: RUN_ID,
+        kind: 'run_finished',
+        actor: { type: 'orchestrator', id: 'aqa-cli' },
+        payload: {
+          scenarios_run: 0,
+          findings: 0,
+          pack_errors: 0,
+          scenario_errors: 0,
+          missing_scenarios: 0,
+          unsafe_paths: 0,
+          runtime_errors: 0,
+        },
+      },
+    ];
+    writeFileSync(
+      join(runDir, 'events.jsonl'),
+      `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+      'utf8',
+    );
+    writeFindings(runDir, 0);
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, true);
+    const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+      run: { state: string };
+    };
+    assert.equal(json.run.state, 'failed');
+  });
+
+  it('marks state=running when no run_finished event is present', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    const events = [
+      {
+        schema_version: '1',
+        seq: 0,
+        prev_hash: null,
+        hash: '0'.repeat(64),
+        ts: STARTED_AT,
+        run_id: RUN_ID,
+        kind: 'run_started',
+        actor: { type: 'orchestrator', id: 'aqa-cli' },
+        payload: { profile: 'smoke', project: 'demo' },
+      },
+    ];
+    writeFileSync(
+      join(runDir, 'events.jsonl'),
+      `${events.map((e) => JSON.stringify(e)).join('\n')}\n`,
+      'utf8',
+    );
+    writeFindings(runDir, 0);
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, true);
+    const json = JSON.parse(readFileSync(join(runDir, 'report.json'), 'utf8')) as {
+      run: { state: string; finished_at?: string };
+    };
+    assert.equal(json.run.state, 'running');
+    assert.equal(json.run.finished_at, undefined);
+  });
+
   it('returns error on schema-invalid finding', () => {
     const root = makeTempRoot();
     const runDir = makeRunDir(root, RUN_ID);

From aad5295774ddd37b324e038770eecec1fd2a80b1 Mon Sep 17 00:00:00 2001
From: "lorenzo.padovani@padosoft.com" <lorenzo.padovani@padosoft.com>
Date: Thu, 21 May 2026 00:17:21 +0200
Subject: [PATCH 3/4] fix(v1.9): address Copilot iter 2 on report PR #53
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

4 actionable comments, all addressed:

1. RUN_ID_RE diverged from real LongSlug. Previous /^[a-z0-9-]{1,80}$/
   capped at 80 (vs schema's 256) and allowed leading/trailing dashes
   plus `--` runs that real LongSlug rejects — so a malformed --run-id
   could slip past this guard and fail later at Finding.parse(). Now
   uses the canonical SlugPattern /^[a-z0-9](?:-?[a-z0-9])*$/ + a
   separate 256-char length check so error messages distinguish "bad
   characters" from "too long". Added regression tests for both.

2. safeIsDir() followed symlinks via statSync. An attacker (or a prior
   run) leaving a symlink under .aqa/runs/<runId> pointing outside the
   project would let report.md/report.json land anywhere the link
   pointed. Now explicit lstatSync().isSymbolicLink() check refuses
   symlinked run dirs. Added a regression test (skipped on Windows when
   symlink perms aren't available).

3. readJsonl() silently dropped any non-empty line that parsed as valid
   JSON but wasn't a plain object (null, [], "x", 42). For events.jsonl
   that turned corruption into a seemingly-successful report. Now
   strict: throws "expected a JSON object, got <type>" with explicit
   null branch (typeof null === 'object' in JS so the naive type check
   needed a guard). Added 2 regression tests (null + array).

4. Misleading comment claimed admin UI keys on run.id and not
   config_hash. Admin actually renders config_hash in replay copy, so
   the all-zeros placeholder will be visible. Replaced with an honest
   "treat as not-computed sentinel" note so future maintainers don't
   try to fix a bug that isn't there.

22 tests now pass (was 17 → +5 new regression tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/kit/src/commands/report.ts  | 66 ++++++++++++++++++-----
 packages/kit/test/report-cmd.test.ts | 78 +++++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts
index 91473b3..ca3e536 100644
--- a/packages/kit/src/commands/report.ts
+++ b/packages/kit/src/commands/report.ts
@@ -14,18 +14,22 @@
  *    findings, replay artifacts, plus the rendered report).
  */
 
-import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
+import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { renderJson, renderMarkdown } from '@aqa/reporter';
 import { Finding, type Run } from '@aqa/schemas';
 
 export type ReportFormat = 'md' | 'json' | 'both';
 
-// LongSlug (per @aqa/schemas) cap. Reject anything that would let `runId`
-// escape `.aqa/runs/` via traversal (`../..`, absolute paths, backslashes).
-// The schema's own LongSlug also enforces ^[a-z0-9-]+$ — we mirror that
-// here rather than import to keep the report command self-contained.
-const RUN_ID_RE = /^[a-z0-9-]{1,80}$/;
+// Mirrors @aqa/schemas LongSlug exactly: SlugPattern + max length 256.
+// Previous v1.9 iteration used a looser /^[a-z0-9-]{1,80}$/ that both
+// capped at 80 instead of 256 AND allowed leading/trailing dashes plus
+// `--` runs — so a malformed --run-id could pass this guard yet later
+// fail at a Finding.parse() site. Pattern + length checked separately
+// so the error message can distinguish "bad characters" from "too long"
+// (helpful for hash-suffixed run ids on the edge of the cap).
+const RUN_ID_RE = /^[a-z0-9](?:-?[a-z0-9])*$/;
+const RUN_ID_MAX_LEN = 256;
 
 export interface ReportOptions {
   root: string;
@@ -77,6 +81,12 @@ export function runReport(opts: ReportOptions): ReportResult {
   // IDs as LongSlug; mirror that constraint at the CLI boundary so a
   // typo or malicious input can't drive writes anywhere outside the
   // intended directory.
+  if (runIdResolved.length > RUN_ID_MAX_LEN) {
+    return {
+      ok: false,
+      error: `report: invalid run id — exceeds ${RUN_ID_MAX_LEN}-char LongSlug cap (got ${runIdResolved.length})`,
+    };
+  }
   if (!RUN_ID_RE.test(runIdResolved)) {
     return {
       ok: false,
@@ -87,6 +97,24 @@ export function runReport(opts: ReportOptions): ReportResult {
   if (!safeIsDir(runDir)) {
     return { ok: false, error: `report: run directory not found: ${runDir}` };
   }
+  // Refuse symlinked run dirs. A previous run (or an attacker with FS
+  // write under .aqa/runs/) could leave a symlink pointing outside the
+  // project; `report.md` / `report.json` writes would then land
+  // wherever the link points. lstatSync (not statSync) so the test
+  // doesn't transparently follow the link.
+  try {
+    if (lstatSync(runDir).isSymbolicLink()) {
+      return {
+        ok: false,
+        error: `report: refusing to write into symlinked run directory ${runDir}`,
+      };
+    }
+  } catch (e) {
+    return {
+      ok: false,
+      error: `report: cannot stat run directory ${runDir}: ${e instanceof Error ? e.message : String(e)}`,
+    };
+  }
 
   const eventsPath = join(runDir, 'events.jsonl');
   // Missing artifacts → fail-fast. A silent empty report on a corrupted
@@ -215,6 +243,10 @@ function readJsonl(path: string): Array<Record<string, unknown>> {
   // Caller has already confirmed the file exists — this helper only
   // returns [] for an empty file (legitimate "zero events" case),
   // never for a missing one.
+  // Strict: a non-empty line that parses as valid JSON but isn't a
+  // plain object (e.g. `null`, `[]`, `"x"`, `42`) is rejected. Silently
+  // dropping such lines would turn a corrupted file into a seemingly
+  // successful report.
   const text = readFileSync(path, 'utf8');
   const out: Array<Record<string, unknown>> = [];
   let lineNo = 0;
@@ -222,14 +254,18 @@ function readJsonl(path: string): Array<Record<string, unknown>> {
     lineNo += 1;
     const line = raw.trim();
     if (!line) continue;
+    let parsed: unknown;
     try {
-      const obj = JSON.parse(line) as unknown;
-      if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
-        out.push(obj as Record<string, unknown>);
-      }
+      parsed = JSON.parse(line);
     } catch (e) {
       throw new Error(`${path} line ${lineNo}: ${e instanceof Error ? e.message : String(e)}`);
     }
+    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+      // typeof null === 'object', so explicit null branch.
+      const got = parsed === null ? 'null' : Array.isArray(parsed) ? 'array' : typeof parsed;
+      throw new Error(`${path} line ${lineNo}: expected a JSON object, got ${got}`);
+    }
+    out.push(parsed as Record<string, unknown>);
   }
   return out;
 }
@@ -272,8 +308,14 @@ function reconstructRun(input: ReconstructInput): Run.Run {
       profile,
       execution_mode: 'orchestrator',
       packs: [],
-      // Deterministic dummy: this report path doesn't recompute the config
-      // hash from disk. The admin UI keys on `run.id`, not `config_hash`.
+      // Synthetic placeholder: this report path doesn't recompute the
+      // config hash from disk (the canonical hash is computed by the
+      // runner at run-time). The admin UI displays config_hash in
+      // replay copy, so an all-zeros value will be visible — users
+      // viewing a CLI-rendered report.json should treat this hash as
+      // a "not computed" sentinel, not a real digest. Encoded as 64
+      // zeros (a valid Sha256 by shape) so the JSON still passes
+      // Run.parse().
       config_hash: '0'.repeat(64),
     },
     totals: {
diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts
index 535d360..307f97e 100644
--- a/packages/kit/test/report-cmd.test.ts
+++ b/packages/kit/test/report-cmd.test.ts
@@ -10,7 +10,14 @@
 
 import assert from 'node:assert/strict';
 import { createHash } from 'node:crypto';
-import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  symlinkSync,
+  writeFileSync,
+} from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { describe, it } from 'node:test';
@@ -311,6 +318,75 @@ describe('aqa report — error cases', () => {
     if (result.ok) return;
     assert.match(result.error, /invalid run id/);
   });
+
+  it('rejects a --run-id with leading/trailing dashes (LongSlug parity — Copilot iter 2)', () => {
+    const root = makeTempRoot();
+    mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+    for (const bad of ['-leading', 'trailing-', 'double--dash']) {
+      const result = runReport({ root, runId: bad });
+      assert.equal(result.ok, false, `${bad} must be rejected`);
+      if (result.ok) continue;
+      assert.match(result.error, /invalid run id/);
+    }
+  });
+
+  it('rejects a --run-id longer than 256 chars (LongSlug cap — Copilot iter 2)', () => {
+    const root = makeTempRoot();
+    mkdirSync(join(root, '.aqa', 'runs'), { recursive: true });
+    const result = runReport({ root, runId: 'a'.repeat(257) });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /exceeds 256-char/);
+  });
+
+  it('refuses a run dir that is actually a symlink (Copilot iter 2)', () => {
+    const root = makeTempRoot();
+    const runsRoot = join(root, '.aqa', 'runs');
+    mkdirSync(runsRoot, { recursive: true });
+    const realDir = makeRunDir(root, 'real-target');
+    writeEvents(realDir, {
+      runId: 'real-target',
+      profile: 'smoke',
+      project: 'demo',
+      findingsCount: 0,
+    });
+    writeFindings(realDir, 0, 'real-target');
+    const linkPath = join(runsRoot, 'sneaky-link');
+    try {
+      symlinkSync(realDir, linkPath, 'dir');
+    } catch {
+      // Windows without dev-mode / non-admin can't create symlinks —
+      // skip this guard by returning instead of asserting; the
+      // production behaviour is exercised on Linux CI anyway.
+      return;
+    }
+    const result = runReport({ root, runId: 'sneaky-link' });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /symlinked run directory/);
+  });
+
+  it('rejects a JSONL line that is valid JSON but not a plain object (Copilot iter 2)', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeFileSync(join(runDir, 'events.jsonl'), 'null\n', 'utf8');
+    writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /expected a JSON object, got null/);
+  });
+
+  it('rejects a JSONL line that parses as a JSON array (Copilot iter 2)', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeFileSync(join(runDir, 'events.jsonl'), '[1, 2, 3]\n', 'utf8');
+    writeFileSync(join(runDir, 'findings.jsonl'), '', 'utf8');
+    const result = runReport({ root, runId: RUN_ID });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /expected a JSON object, got array/);
+  });
 });
 
 describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => {

From 2a3857f192f64bda7448f9d5764185da698efed3 Mon Sep 17 00:00:00 2001
From: "lorenzo.padovani@padosoft.com" <lorenzo.padovani@padosoft.com>
Date: Thu, 21 May 2026 01:32:10 +0200
Subject: [PATCH 4/4] fix(v1.9): address Copilot iter 3 on report PR #53
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

3 actionable comments, all addressed:

1. latestRunId considered any directory entry, including symlinks and
   non-run subdirs. A `.aqa/runs/readme-stash/` with the newest mtime
   would shadow real runs. Now filters: name matches RUN_ID_RE +
   not-a-symlink (lstat) + contains events.jsonl. Added a regression
   test that pollutes `.aqa/runs/` with a non-run subdir and asserts
   the latest REAL run wins.

2. Per-file symlink protection. The earlier symlink guard only checked
   the run directory itself. A pre-existing `report.md` or `report.json`
   symlink would let writeFileSync follow the link and redirect the
   write outside the project. Now lstat-checks each target file before
   writing; refuses if it's a symlink. Added a regression test that
   creates a symlink to an outside file, attempts the report, and
   asserts both (a) refusal and (b) attacker-controlled file remains
   byte-identical.

3. reconstructRun could in principle produce Run-schema-incompatible
   output if the audit chain itself is malformed. Now validates the
   reconstructed object via Run.Run.safeParse before handing it to the
   renderers — surfaces a structured error instead of writing a
   report.json that the admin UI would silently reject. Also flipped
   the import of Run from type-only to value (needed for safeParse).

24 tests now pass (was 22 → +2 regression tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/kit/src/commands/report.ts  | 52 +++++++++++++++++++++++++---
 packages/kit/test/report-cmd.test.ts | 52 ++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/packages/kit/src/commands/report.ts b/packages/kit/src/commands/report.ts
index ca3e536..1a036c6 100644
--- a/packages/kit/src/commands/report.ts
+++ b/packages/kit/src/commands/report.ts
@@ -17,7 +17,7 @@
 import { existsSync, lstatSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { renderJson, renderMarkdown } from '@aqa/reporter';
-import { Finding, type Run } from '@aqa/schemas';
+import { Finding, Run } from '@aqa/schemas';
 
 export type ReportFormat = 'md' | 'json' | 'both';
 
@@ -158,26 +158,56 @@ export function runReport(opts: ReportOptions): ReportResult {
     };
   }
 
-  const run = reconstructRun({
+  const runDraft = reconstructRun({
     runId: runIdResolved,
     runDir,
     events,
     findingsCount: findings.length,
   });
+  // Validate the reconstructed Run against the canonical schema before
+  // handing it to the renderers. reconstructRun could in theory produce
+  // a Run.parse-incompatible shape (e.g. terminal state with missing
+  // finished_at if the audit chain itself is malformed) — surfacing that
+  // as a structured error keeps `report.json` consumers safe instead of
+  // shipping a JSON the admin UI silently rejects later.
+  const runParsed = Run.Run.safeParse(runDraft);
+  if (!runParsed.success) {
+    return {
+      ok: false,
+      error: `report: reconstructed run failed schema validation (audit chain is malformed): ${runParsed.error.message.split('\n')[0]}`,
+    };
+  }
+  const run = runParsed.data;
 
   const written: string[] = [];
   // Writes can fail (read-only FS, disk full, permission). Return a
   // structured error rather than letting the exception escape into the
   // CLI's top-level unhandled-error path so callers get a clean message
   // plus an exit code derived from the structured result.
+  // Per-file symlink check: even with a non-symlinked run dir, a
+  // pre-existing `report.md`/`report.json` symlink would be followed
+  // by writeFileSync and let an attacker (or a prior run) redirect the
+  // writes outside the project. lstat each target before writing.
   try {
     if (format === 'md' || format === 'both') {
       const mdPath = join(runDir, 'report.md');
+      if (existsSync(mdPath) && lstatSync(mdPath).isSymbolicLink()) {
+        return {
+          ok: false,
+          error: `report: refusing to overwrite symlinked report file ${mdPath}`,
+        };
+      }
       writeFileSync(mdPath, renderMarkdown({ run, findings }), 'utf8');
       written.push(mdPath);
     }
     if (format === 'json' || format === 'both') {
       const jsonPath = join(runDir, 'report.json');
+      if (existsSync(jsonPath) && lstatSync(jsonPath).isSymbolicLink()) {
+        return {
+          ok: false,
+          error: `report: refusing to overwrite symlinked report file ${jsonPath}`,
+        };
+      }
       writeFileSync(
         jsonPath,
         `${JSON.stringify(renderJson({ run, findings }), null, 2)}\n`,
@@ -212,14 +242,26 @@ function latestRunId(runsRoot: string): string | undefined {
   // hash-based IDs (`run-<sha>`) that don't sort by recency. mtime is
   // monotonic enough for "the most recent run" semantics — lexical name
   // is only used as a deterministic tie-breaker (same-millisecond runs).
+  //
+  // Filter: only consider directories that look like actual runs
+  // (presence of events.jsonl, the canonical run-start marker). Without
+  // this, an unrelated subdirectory under `.aqa/runs/` — or a symlink
+  // whose target's mtime is newer than any real run — would be selected
+  // by mtime and either fail with a confusing error or accidentally
+  // generate a report for the wrong directory. Also reject symlinks at
+  // the dir-entry level for the same reason as the symlink check in
+  // runReport: writes into a symlinked dir leak outside the project.
   const candidates: Array<{ name: string; mtimeMs: number }> = [];
   for (const name of entries) {
+    if (!RUN_ID_RE.test(name) || name.length > RUN_ID_MAX_LEN) continue;
     const dir = join(runsRoot, name);
     try {
+      const lst = lstatSync(dir);
+      if (lst.isSymbolicLink()) continue;
+      if (!lst.isDirectory()) continue;
+      if (!existsSync(join(dir, 'events.jsonl'))) continue;
       const st = statSync(dir);
-      if (st.isDirectory()) {
-        candidates.push({ name, mtimeMs: st.mtimeMs });
-      }
+      candidates.push({ name, mtimeMs: st.mtimeMs });
     } catch {
       // ignore entries we can't stat (broken symlinks, races)
     }
diff --git a/packages/kit/test/report-cmd.test.ts b/packages/kit/test/report-cmd.test.ts
index 307f97e..1a4444b 100644
--- a/packages/kit/test/report-cmd.test.ts
+++ b/packages/kit/test/report-cmd.test.ts
@@ -387,6 +387,58 @@ describe('aqa report — error cases', () => {
     if (result.ok) return;
     assert.match(result.error, /expected a JSON object, got array/);
   });
+
+  it('refuses to overwrite a symlinked report.md (Copilot iter 3)', () => {
+    const root = makeTempRoot();
+    const runDir = makeRunDir(root, RUN_ID);
+    writeEvents(runDir, { runId: RUN_ID, profile: 'smoke', project: 'demo', findingsCount: 0 });
+    writeFindings(runDir, 0);
+    const outside = join(makeTempRoot(), 'evil-report.md');
+    writeFileSync(outside, '# attacker controlled\n', 'utf8');
+    try {
+      symlinkSync(outside, join(runDir, 'report.md'), 'file');
+    } catch {
+      return; // Windows non-admin can't create symlinks; skip.
+    }
+    const result = runReport({ root, runId: RUN_ID, format: 'md' });
+    assert.equal(result.ok, false);
+    if (result.ok) return;
+    assert.match(result.error, /refusing to overwrite symlinked report file/);
+    // Crucial: the attacker-controlled file MUST be untouched.
+    assert.equal(readFileSync(outside, 'utf8'), '# attacker controlled\n');
+  });
+
+  it('latestRunId only considers dirs with events.jsonl (Copilot iter 3)', async () => {
+    const root = makeTempRoot();
+    // Two real run dirs (older + newer) + a non-run subdir that's newer
+    // than both but lacks events.jsonl. The non-run dir must NOT win.
+    const olderDir = makeRunDir(root, 'run-aaaa');
+    writeEvents(olderDir, {
+      runId: 'run-aaaa',
+      profile: 'smoke',
+      project: 'demo',
+      findingsCount: 0,
+    });
+    writeFindings(olderDir, 0, 'run-aaaa');
+    await sleep(20);
+    const newerDir = makeRunDir(root, 'run-bbbb');
+    writeEvents(newerDir, {
+      runId: 'run-bbbb',
+      profile: 'smoke',
+      project: 'demo',
+      findingsCount: 0,
+    });
+    writeFindings(newerDir, 0, 'run-bbbb');
+    await sleep(20);
+    // Pollute .aqa/runs with a NEWER non-run directory.
+    mkdirSync(join(root, '.aqa', 'runs', 'readme-stash'), { recursive: true });
+
+    const result = runReport({ root });
+    assert.equal(result.ok, true);
+    if (!result.ok) return;
+    // Must be the most-recent REAL run, not the polluting subdirectory.
+    assert.equal(result.runId, 'run-bbbb');
+  });
 });
 
 describe('aqa report — state reconstruction (Copilot iter 1 P1)', () => {