diff --git a/packages/cli/src/commands/analyze.test.ts b/packages/cli/src/commands/analyze.test.ts
index a4a9f330..2ede5161 100644
--- a/packages/cli/src/commands/analyze.test.ts
+++ b/packages/cli/src/commands/analyze.test.ts
@@ -14,15 +14,19 @@
import assert from "node:assert/strict";
import { spawn } from "node:child_process";
-import { mkdtemp, writeFile } from "node:fs/promises";
+import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "node:test";
import { upsertRegistry } from "../registry.js";
import {
checkFastPath,
+ detectCoverageReport,
isWorkingTreeDirty,
+ resolveCoverageEnabled,
resolveMaxSummariesCap,
+ resolveSbomEnabled,
+ resolveScanEnabled,
resolveSummariesEnabled,
} from "./analyze.js";
@@ -148,37 +152,66 @@ test("resolveMaxSummariesCap: seed of 5 yields a cap of 0 under the 10% rule", a
});
// ---------------------------------------------------------------------------
-// resolveSummariesEnabled — env kill-switch + P04 default-on contract.
+// resolveSummariesEnabled — fast-default contract: LLM summaries are opt-in.
+// `codehub analyze` runs tree-sitter + SCIP + cochange phases only by default,
+// so a fresh invocation never spends on Bedrock or blocks on a network hop.
// ---------------------------------------------------------------------------
-test("resolveSummariesEnabled: default-on when both env and flag are absent (P04)", () => {
- assert.equal(resolveSummariesEnabled(undefined, {}), true);
+test("resolveSummariesEnabled: default-off when both env and flag are absent", () => {
+ assert.equal(resolveSummariesEnabled(undefined, {}), false);
});
-test("resolveSummariesEnabled: explicit --summaries keeps it on", () => {
+test("resolveSummariesEnabled: explicit --summaries opts in", () => {
assert.equal(resolveSummariesEnabled(true, {}), true);
});
-test("resolveSummariesEnabled: explicit --no-summaries turns it off", () => {
+test("resolveSummariesEnabled: explicit --no-summaries stays off", () => {
assert.equal(resolveSummariesEnabled(false, {}), false);
});
+test("resolveSummariesEnabled: CODEHUB_BEDROCK_SUMMARIES=1 opts in (env-only)", () => {
+ // Operators can enable summaries for a whole CI job without editing every
+ // invocation. Only the literal "1" triggers — anything else is treated as
+ // absent, mirroring the kill-switch semantics below.
+ assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_SUMMARIES: "1" }), true);
+ assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_SUMMARIES: "0" }), false);
+ assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_SUMMARIES: "" }), false);
+});
+
test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=1 kills the phase", () => {
assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "1" }), false);
});
-test("resolveSummariesEnabled: env kill-switch wins over --summaries=true", () => {
+test("resolveSummariesEnabled: kill-switch wins over --summaries=true", () => {
// Operator passed --summaries explicitly but the env var forces off.
// Required so CI / restricted environments can lock out Bedrock without
// auditing every invocation site.
assert.equal(resolveSummariesEnabled(true, { CODEHUB_BEDROCK_DISABLED: "1" }), false);
});
-test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=0 does not kill the phase", () => {
- // Only the literal "1" triggers the kill-switch — anything else is a
- // no-op. This keeps operator intent unambiguous.
- assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "0" }), true);
- assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "" }), true);
+test("resolveSummariesEnabled: kill-switch wins over CODEHUB_BEDROCK_SUMMARIES=1", () => {
+ // Both env vars set → disable wins. This lets a CI environment pin the
+ // opt-in globally while still allowing per-job kill-switch overrides.
+ assert.equal(
+ resolveSummariesEnabled(undefined, {
+ CODEHUB_BEDROCK_SUMMARIES: "1",
+ CODEHUB_BEDROCK_DISABLED: "1",
+ }),
+ false,
+ );
+});
+
+test("resolveSummariesEnabled: --no-summaries wins over CODEHUB_BEDROCK_SUMMARIES=1", () => {
+ // Explicit CLI false beats env opt-in. Matches how --no-flag usually
+ // wins against ambient config everywhere else in the CLI.
+ assert.equal(resolveSummariesEnabled(false, { CODEHUB_BEDROCK_SUMMARIES: "1" }), false);
+});
+
+test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=0 does not enable the phase", () => {
+ // Only the literal "1" on the opt-in var flips this; anything else leaves
+ // summaries in their (fast, off) default.
+ assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "0" }), false);
+ assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "" }), false);
});
// ---------------------------------------------------------------------------
@@ -245,3 +278,118 @@ test("isWorkingTreeDirty: returns false when the git binary is unavailable", asy
else process.env["PATH"] = originalPath;
}
});
+
+// ---------------------------------------------------------------------------
+// resolveSbomEnabled — default on, --no-sbom opts out.
+// ---------------------------------------------------------------------------
+
+test("resolveSbomEnabled: default-on when flag is absent", () => {
+ assert.equal(resolveSbomEnabled(undefined), true);
+});
+
+test("resolveSbomEnabled: explicit true keeps it on", () => {
+ assert.equal(resolveSbomEnabled(true), true);
+});
+
+test("resolveSbomEnabled: explicit false turns it off (--no-sbom)", () => {
+ assert.equal(resolveSbomEnabled(false), false);
+});
+
+// ---------------------------------------------------------------------------
+// resolveScanEnabled — default on, --no-scan opts out.
+// ---------------------------------------------------------------------------
+
+test("resolveScanEnabled: default-on when flag is absent", () => {
+ assert.equal(resolveScanEnabled(undefined), true);
+});
+
+test("resolveScanEnabled: explicit true keeps it on", () => {
+ assert.equal(resolveScanEnabled(true), true);
+});
+
+test("resolveScanEnabled: explicit false turns it off (--no-scan)", () => {
+ assert.equal(resolveScanEnabled(false), false);
+});
+
+// ---------------------------------------------------------------------------
+// detectCoverageReport + resolveCoverageEnabled — auto-detect semantics.
+// ---------------------------------------------------------------------------
+
+test("detectCoverageReport: returns undefined when no report exists", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-none-"));
+ assert.equal(await detectCoverageReport(dir), undefined);
+});
+
+test("detectCoverageReport: finds coverage/lcov.info", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-lcov-"));
+ await mkdir(join(dir, "coverage"), { recursive: true });
+ await writeFile(join(dir, "coverage", "lcov.info"), "TN:\n");
+ assert.equal(await detectCoverageReport(dir), "coverage/lcov.info");
+});
+
+test("detectCoverageReport: finds top-level lcov.info", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-lcov2-"));
+ await writeFile(join(dir, "lcov.info"), "TN:\n");
+ assert.equal(await detectCoverageReport(dir), "lcov.info");
+});
+
+test("detectCoverageReport: finds coverage.xml (cobertura)", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-xml-"));
+ await writeFile(join(dir, "coverage.xml"), "\n");
+ assert.equal(await detectCoverageReport(dir), "coverage.xml");
+});
+
+test("detectCoverageReport: finds jacoco xml at the Gradle path", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-jacoco-"));
+ await mkdir(join(dir, "build", "reports", "jacoco", "test"), { recursive: true });
+ await writeFile(join(dir, "build", "reports", "jacoco", "test", "jacocoTestReport.xml"), "");
+ assert.equal(await detectCoverageReport(dir), "build/reports/jacoco/test/jacocoTestReport.xml");
+});
+
+test("detectCoverageReport: finds coverage.json (coverage.py)", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-json-"));
+ await writeFile(join(dir, "coverage.json"), "{}\n");
+ assert.equal(await detectCoverageReport(dir), "coverage.json");
+});
+
+test("detectCoverageReport: prefers coverage/lcov.info over top-level lcov.info", async () => {
+ // Probe order matches the phase's `CANDIDATES` array so the analyze
+ // wrapper and the phase agree on which report is the "one" when a
+ // repo has both.
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-both-"));
+ await mkdir(join(dir, "coverage"), { recursive: true });
+ await writeFile(join(dir, "coverage", "lcov.info"), "TN:\n");
+ await writeFile(join(dir, "lcov.info"), "TN:\n");
+ assert.equal(await detectCoverageReport(dir), "coverage/lcov.info");
+});
+
+test("resolveCoverageEnabled: explicit true short-circuits detection", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-force-on-"));
+ // No report on disk; explicit true still returns true so the phase
+ // runs and the operator sees the "no report found" warning.
+ assert.equal(await resolveCoverageEnabled(true, dir), true);
+});
+
+test("resolveCoverageEnabled: explicit false short-circuits detection", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-force-off-"));
+ await writeFile(join(dir, "lcov.info"), "TN:\n");
+ // Report IS on disk; explicit false still returns false so the phase
+ // is a silent no-op.
+ assert.equal(await resolveCoverageEnabled(false, dir), false);
+});
+
+test("resolveCoverageEnabled: undefined + no report → undefined (silent)", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-auto-none-"));
+ // No flag, no report → plumb `undefined` through so the phase is a
+ // silent no-op. Critically, this does NOT return `false` — that would
+ // still be equivalent behavior from the phase's perspective, but
+ // `undefined` is the documented "auto" sentinel and round-trips
+ // through `pipelineOptions` as omitted-key.
+ assert.equal(await resolveCoverageEnabled(undefined, dir), undefined);
+});
+
+test("resolveCoverageEnabled: undefined + report found → true (auto-on)", async () => {
+ const dir = await mkdtemp(join(tmpdir(), "och-analyze-cov-auto-on-"));
+ await writeFile(join(dir, "lcov.info"), "TN:\n");
+ assert.equal(await resolveCoverageEnabled(undefined, dir), true);
+});
diff --git a/packages/cli/src/commands/analyze.ts b/packages/cli/src/commands/analyze.ts
index 4877ed07..1a7976b4 100644
--- a/packages/cli/src/commands/analyze.ts
+++ b/packages/cli/src/commands/analyze.ts
@@ -79,23 +79,40 @@ export interface AnalyzeOptions {
readonly verbose?: boolean;
readonly skipAgentsMd?: boolean;
/**
- * When true, emit `.codehub/sbom.cyclonedx.json` and
- * `.codehub/sbom.spdx.json` from Dependency nodes. Off by default so
- * `codehub analyze` stays quiet for repos where supply-chain docs are
- * out of scope.
+ * Emit `.codehub/sbom.cyclonedx.json` and `.codehub/sbom.spdx.json`
+ * from Dependency nodes. **Default: on.** Serialization is cheap, purely
+ * local, and every CI pipeline that scans artifacts wants one. Pass
+ * `false` (CLI: `--no-sbom`) to suppress.
*/
readonly sbom?: boolean;
/**
- * When true, run the coverage overlay phase which detects lcov /
- * cobertura / jacoco / coverage.py reports and populates
- * `coveragePercent` + `coveredLines` on File nodes. Off by default.
+ * Run the coverage overlay phase — detects lcov / cobertura / jacoco /
+ * coverage.py reports and populates `coveragePercent` + `coveredLines`
+ * on File nodes. **Default: auto.** When `undefined`, `runAnalyze`
+ * probes the repo for a report at the well-known paths and enables the
+ * phase only when one is found (silent no-op otherwise). Pass `true` to
+ * force-enable and surface the "no report found" warning, or `false`
+ * (CLI: `--no-coverage`) to suppress entirely.
*/
readonly coverage?: boolean;
/**
- * When true (the post-P04 default), the `summarize` phase walks LSP-
- * confirmed callable symbols and invokes Bedrock to generate structured
- * summaries within the resolved cost cap. Pass `false` (or
- * `CODEHUB_BEDROCK_DISABLED=1`) to force the phase off.
+ * Run Priority-1 security scanners at the end of `analyze` and write
+ * `.codehub/scan.sarif` + ingest findings into the graph. **Default:
+ * on.** Most scanners are local binaries (semgrep, bandit, ruff,
+ * vulture, radon, detect-secrets, betterleaks, ty); the network-backed
+ * ones (osv-scanner, grype, npm/pip audit) are silently skipped when
+ * `--offline` is set. Pass `false` (CLI: `--no-scan`) to suppress — the
+ * graph pipeline runs unchanged.
+ */
+ readonly scan?: boolean;
+ /**
+ * Opt into the `summarize` phase — walks LSP-confirmed callable symbols
+ * and invokes Bedrock to generate structured summaries within the
+ * resolved cost cap. **Off by default**: a bare `codehub analyze` is
+ * fast, local, deterministic, and never spends on LLM calls. Enable
+ * per-invocation with `true` (CLI: `--summaries`) or environment-wide
+ * with `CODEHUB_BEDROCK_SUMMARIES=1`. `CODEHUB_BEDROCK_DISABLED=1`
+ * force-disables regardless of flag state.
*/
readonly summaries?: boolean;
/**
@@ -196,12 +213,23 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi
// reports mode="full" with reason="no-prior-graph".
const incrementalFrom = opts.force === true ? undefined : await loadPreviousGraph(repoPath);
- // Resolve the effective `summaries` flag. P04 flipped the default ON, so
- // `undefined` now means "on". The `CODEHUB_BEDROCK_DISABLED=1` env kill-
- // switch forces off regardless of the flag; `offline` is enforced later
- // inside the phase itself (the phase's own invariant).
+ // Resolve the effective `summaries` flag. Summaries are opt-in: a bare
+ // `codehub analyze` runs the fast, local, deterministic pipeline
+ // (tree-sitter + SCIP + cochanges) and skips the Bedrock summarize phase
+ // entirely. Opt in via `--summaries` or `CODEHUB_BEDROCK_SUMMARIES=1`.
+ // The `CODEHUB_BEDROCK_DISABLED=1` env kill-switch forces off regardless
+ // of the flag; `offline` is enforced later inside the phase itself.
const summariesEnabled = resolveSummariesEnabled(opts.summaries, process.env);
+ // Resolve sbom/coverage/scan defaults. SBOM and scan default ON (cheap,
+ // local, and they feed the MCP surface agents actually use). Coverage
+ // auto-detects: probe the known report paths and only enable the phase
+ // when one exists — so bare `codehub analyze` on a repo with no coverage
+ // data stays silent instead of warning about a missing report.
+ const sbomEnabled = resolveSbomEnabled(opts.sbom);
+ const scanEnabled = resolveScanEnabled(opts.scan);
+ const coverageResolved = await resolveCoverageEnabled(opts.coverage, repoPath);
+
// Open a read-only store upfront so the `summarize` phase can probe the
// prior summary rows before work is queued AND so we can inspect the
// prior run's `storeMeta.stats` to resolve `--max-summaries auto`. We
@@ -250,8 +278,8 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi
...(opts.embeddingsBatchSize !== undefined
? { embeddingsBatchSize: opts.embeddingsBatchSize }
: {}),
- ...(opts.sbom !== undefined ? { sbom: opts.sbom } : {}),
- ...(opts.coverage !== undefined ? { coverage: opts.coverage } : {}),
+ sbom: sbomEnabled,
+ ...(coverageResolved !== undefined ? { coverage: coverageResolved } : {}),
summaries: summariesEnabled,
maxSummariesPerRun: resolvedMaxSummaries,
...(opts.summaryModel !== undefined ? { summaryModel: opts.summaryModel } : {}),
@@ -430,6 +458,30 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi
`graph ${result.graphHash.slice(0, 8)}, ${durationMs} ms${incrementalLine}${cacheLine}`,
);
+ // Scan phase — run Priority-1 scanners and write .codehub/scan.sarif so
+ // `verdict`, `list_findings`, and `list_findings_delta` work on day one.
+ // Run AFTER the graph + registry write so a scanner failure cannot
+ // regress the index. Network-backed scanners (osv-scanner, grype, npm/
+ // pip audit) self-skip under --offline. We do NOT propagate the scan's
+ // severity-gated exit code — analyze remains the "build the graph"
+ // command; operators who want the gate invoke `codehub verdict` or
+ // `codehub scan` directly.
+ if (scanEnabled) {
+ try {
+ const scanMod = await import("./scan.js");
+ const scanSummary = await scanMod.runScan(repoPath, {
+ repo: repoName,
+ ...(opts.home !== undefined ? { home: opts.home } : {}),
+ });
+ log(
+ `codehub analyze: scan — ${scanSummary.runs.length} scanner(s), ` +
+ `${scanSummary.totalFindings} finding(s), sarif=${scanSummary.outputPath}`,
+ );
+ } catch (err) {
+ log(`codehub analyze: scan skipped: ${(err as Error).message}`);
+ }
+ }
+
return {
repoPath,
repoName,
@@ -518,16 +570,20 @@ export async function loadPreviousGraph(
/**
* Resolve the effective `summaries` flag, honoring the
- * `CODEHUB_BEDROCK_DISABLED=1` env kill-switch and the P04 default-on
- * contract (absent flag → enabled).
+ * `CODEHUB_BEDROCK_DISABLED=1` env kill-switch.
+ *
+ * `codehub analyze` is a fast, local, deterministic index by default —
+ * tree-sitter + SCIP + cochanges + graph phases only. The Bedrock-backed
+ * summarize phase is opt-in via `--summaries` (or `CODEHUB_BEDROCK_SUMMARIES=1`)
+ * so a fresh `codehub analyze` never spends on LLM calls, blocks on a
+ * network hop, or needs AWS creds.
*
- * Truth table (post-P04):
- * - env var set + flag undefined → false (kill-switch wins)
- * - env var set + flag true → false (kill-switch wins)
- * - env var set + flag false → false
- * - env var unset + flag undefined → true (default on)
- * - env var unset + flag true → true
- * - env var unset + flag false → false (explicit --no-summaries)
+ * Truth table:
+ * - env kill-switch set (any flag state) → false (kill-switch wins)
+ * - env opt-in set + flag undefined → true (env opts in)
+ * - flag true → true (explicit --summaries)
+ * - flag false → false (explicit --no-summaries)
+ * - flag undefined + no env → false (default off — fast path)
*
* Exported for unit tests; the production call site reads `process.env`.
*/
@@ -536,9 +592,98 @@ export function resolveSummariesEnabled(
env: NodeJS.ProcessEnv | Record,
): boolean {
if (env["CODEHUB_BEDROCK_DISABLED"] === "1") return false;
+ if (flag === true) return true;
+ if (flag === false) return false;
+ return env["CODEHUB_BEDROCK_SUMMARIES"] === "1";
+}
+
+/**
+ * Resolve the effective `sbom` flag. Default ON — serializing Dependency
+ * nodes to CycloneDX + SPDX is cheap, local, and every supply-chain audit
+ * wants it. Pass `false` to suppress.
+ *
+ * Exported for unit tests.
+ */
+export function resolveSbomEnabled(flag: boolean | undefined): boolean {
+ return flag !== false;
+}
+
+/**
+ * Resolve the effective `scan` flag. Default ON — Priority-1 scanners are
+ * mostly local binaries that produce the SARIF `verdict`, `list_findings`,
+ * and `list_findings_delta` all read. Pass `false` (CLI: `--no-scan`) to
+ * suppress — the scanners that need network (osv-scanner, grype, npm/pip
+ * audit) are silently skipped anyway when `--offline` is set, so the
+ * on-default stays honest under offline operation.
+ *
+ * Exported for unit tests.
+ */
+export function resolveScanEnabled(flag: boolean | undefined): boolean {
return flag !== false;
}
+/**
+ * Coverage-report candidate paths, mirrored from
+ * `packages/ingestion/src/pipeline/phases/coverage.ts:58-64`. Kept in sync
+ * by hand: the analyze wrapper needs to know whether a report exists
+ * *before* it sets `options.coverage=true`, because the phase warns when
+ * coverage is explicitly enabled but no report is found. When `undefined`
+ * is plumbed through instead, the phase is a silent no-op.
+ */
+const COVERAGE_CANDIDATE_PATHS = [
+ "coverage/lcov.info",
+ "lcov.info",
+ "coverage.xml",
+ "build/reports/jacoco/test/jacocoTestReport.xml",
+ "coverage.json",
+] as const;
+
+/**
+ * Probe the repo for a coverage report at one of the known paths. Returns
+ * the first match (relative to `repoPath`) or `undefined`. Used by the
+ * analyze wrapper to decide whether to enable the coverage phase when no
+ * explicit flag is passed.
+ *
+ * Exported so tests can assert which paths are probed without actually
+ * running `runAnalyze`.
+ */
+export async function detectCoverageReport(repoPath: string): Promise {
+ const { access } = await import("node:fs/promises");
+ for (const rel of COVERAGE_CANDIDATE_PATHS) {
+ try {
+ await access(resolve(repoPath, rel));
+ return rel;
+ } catch {
+ // Intentional: we're probing; missing-file is the whole point.
+ }
+ }
+ return undefined;
+}
+
+/**
+ * Resolve the effective `coverage` flag, honoring explicit true/false and
+ * silently auto-detecting when the flag is `undefined`. This lets a bare
+ * `codehub analyze` overlay coverage on File nodes when a report is
+ * present and stay silent otherwise (no spurious "no report found"
+ * warning on repos that don't have tests).
+ *
+ * - `flag === true` → pipeline sees `true` (phase runs, warns if absent).
+ * - `flag === false` → pipeline sees `false` (phase no-op).
+ * - `flag === undefined` + report found → pipeline sees `true`.
+ * - `flag === undefined` + no report → pipeline sees `undefined` (no-op).
+ *
+ * Exported for unit tests.
+ */
+export async function resolveCoverageEnabled(
+ flag: boolean | undefined,
+ repoPath: string,
+): Promise {
+ if (flag === true) return true;
+ if (flag === false) return false;
+ const detected = await detectCoverageReport(repoPath);
+ return detected !== undefined ? true : undefined;
+}
+
/**
* Resolve `--max-summaries auto` / explicit numeric caps into a concrete
* numeric budget the pipeline can consume.
diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
index 55b24b82..253cf73b 100644
--- a/packages/cli/src/index.ts
+++ b/packages/cli/src/index.ts
@@ -48,16 +48,29 @@ program
.option("--skip-agents-md", "Do not write the AGENTS.md / CLAUDE.md stanza")
.option(
"--sbom",
- "Emit .codehub/sbom.cyclonedx.json + .codehub/sbom.spdx.json from Dependency nodes",
+ "Emit .codehub/sbom.cyclonedx.json + .codehub/sbom.spdx.json from Dependency nodes. Default ON — use --no-sbom to suppress.",
+ )
+ .option("--no-sbom", "Suppress SBOM emission. Equivalent to omitting `sbom: true`.")
+ .option(
+ "--coverage",
+ "Force the coverage overlay phase on and warn when no report is found. Default AUTO — `codehub analyze` auto-detects lcov/cobertura/jacoco/coverage.py reports and silently skips when none exist.",
+ )
+ .option("--no-coverage", "Force the coverage overlay phase off even when a report is present.")
+ .option(
+ "--scan",
+ "Run Priority-1 scanners after analyze, write .codehub/scan.sarif, and ingest findings into the graph. Default ON — use --no-scan to suppress.",
+ )
+ .option(
+ "--no-scan",
+ "Skip the post-analyze scan step. The graph pipeline runs unchanged; `codehub verdict` / `list_findings` work against the last SARIF on disk.",
)
- .option("--coverage", "Overlay lcov/cobertura/jacoco/coverage.py report onto File nodes")
.option(
"--summaries",
- "Enable the summarize phase (default ON: structured Bedrock summaries per callable). Use --no-summaries to disable.",
+ "Opt into the summarize phase (structured Bedrock summaries per callable). Default OFF — `codehub analyze` is fast, local, deterministic by default. Also enabled by CODEHUB_BEDROCK_SUMMARIES=1.",
)
.option(
"--no-summaries",
- "Disable the summarize phase entirely (equivalent to CODEHUB_BEDROCK_DISABLED=1).",
+ "Explicitly disable the summarize phase (equivalent to CODEHUB_BEDROCK_DISABLED=1). Only meaningful when combined with CODEHUB_BEDROCK_SUMMARIES=1.",
)
.option(
"--max-summaries ",
@@ -93,10 +106,16 @@ program
process.env["OCH_NATIVE_PARSER"] = "1";
}
// Pass the raw flag straight through to `runAnalyze`. The env
- // kill-switch (`CODEHUB_BEDROCK_DISABLED=1`) is re-checked inside
- // `runAnalyze` via `resolveSummariesEnabled` so tests that call
- // `runAnalyze` directly honor the same truth table.
- const summaries = opts["summaries"] === false ? false : undefined;
+ // kill-switch (`CODEHUB_BEDROCK_DISABLED=1`) and the env opt-in
+ // (`CODEHUB_BEDROCK_SUMMARIES=1`) are re-checked inside `runAnalyze`
+ // via `resolveSummariesEnabled` so tests that call `runAnalyze`
+ // directly honor the same truth table. Summaries are OFF by default
+ // — the fast, local, deterministic analyze path. Pass `--summaries`
+ // or set `CODEHUB_BEDROCK_SUMMARIES=1` to opt in.
+ let summaries: boolean | undefined;
+ if (opts["summaries"] === true) summaries = true;
+ else if (opts["summaries"] === false) summaries = false;
+ else summaries = undefined;
// --max-summaries accepts either a positive integer or the literal
// string "auto". Unknown strings fall back to "auto" so the CLI never
@@ -137,9 +156,18 @@ program
offline: opts["offline"] === true,
verbose: opts["verbose"] === true,
skipAgentsMd: opts["skipAgentsMd"] === true,
- sbom: opts["sbom"] === true,
- coverage: opts["coverage"] === true,
- ...(summaries === false ? { summaries } : {}),
+ // `sbom`, `coverage`, `scan` are three-state (true / false / auto).
+ // commander encodes `--no-sbom` as `opts.sbom === false`, `--sbom` as
+ // `true`, and omitted as `undefined`. Forward all three verbatim —
+ // `runAnalyze` reads the resolvers (resolveSbomEnabled / resolveScan-
+ // Enabled / resolveCoverageEnabled) to pick the effective value.
+ ...(opts["sbom"] === false ? { sbom: false as const } : {}),
+ ...(opts["sbom"] === true ? { sbom: true as const } : {}),
+ ...(opts["coverage"] === false ? { coverage: false as const } : {}),
+ ...(opts["coverage"] === true ? { coverage: true as const } : {}),
+ ...(opts["scan"] === false ? { scan: false as const } : {}),
+ ...(opts["scan"] === true ? { scan: true as const } : {}),
+ ...(summaries !== undefined ? { summaries } : {}),
maxSummariesPerRun,
...(typeof opts["summaryModel"] === "string" ? { summaryModel: opts["summaryModel"] } : {}),
skills: opts["skills"] === true,
diff --git a/packages/docs/src/content/docs/guides/indexing-a-repo.md b/packages/docs/src/content/docs/guides/indexing-a-repo.md
index 25582df0..46b9344e 100644
--- a/packages/docs/src/content/docs/guides/indexing-a-repo.md
+++ b/packages/docs/src/content/docs/guides/indexing-a-repo.md
@@ -96,13 +96,36 @@ On the default LadybugDB layout:
On the single-file DuckDB fallback, `graph.duckdb` replaces both
`graph.lbug` and `temporal.duckdb`.
-## Other useful flags
-
-- `--sbom` — emit a CycloneDX SBOM alongside the index.
-- `--coverage` — bridge coverage data into the graph.
+## What runs by default
+
+A bare `codehub analyze` produces a production-grade `.codehub/` folder
+in one command:
+
+- Graph pipeline (tree-sitter parse + SCIP resolution + communities +
+ processes + cochanges + ownership + dependencies + detectors).
+- SBOM emission (CycloneDX + SPDX) — **default on**; suppress with
+ `--no-sbom`.
+- Priority-1 scanners → `.codehub/scan.sarif` + findings ingested into
+ the graph — **default on**; suppress with `--no-scan`.
+ Network-backed scanners (osv-scanner, grype, npm/pip audit) self-skip
+ under `--offline`, so the on-default stays honest.
+- Coverage overlay — **default auto**: runs only when a report is
+ present at `coverage/lcov.info`, `lcov.info`, `coverage.xml`,
+ `build/reports/jacoco/test/jacocoTestReport.xml`, or `coverage.json`.
+ Silent no-op otherwise. Force with `--coverage`; force off with
+ `--no-coverage`.
+
+Everything else — embeddings, summaries, skills — is opt-in.
+
+## Opt-in flags
+
+- `--embeddings` — compute semantic vectors for queries by meaning.
+ Requires `codehub setup --embeddings` first.
- `--summaries` / `--no-summaries` — LLM-generated symbol summaries
- (default on; capped by `--max-summaries`, default auto = 10% of
- callables, hard cap 500).
+ (default off — `codehub analyze` is fast, local, deterministic by
+ default; opt in with `--summaries` or `CODEHUB_BEDROCK_SUMMARIES=1`).
+ When enabled, the budget is capped by `--max-summaries`, default
+ `auto` = 10% of callables, hard cap 500.
- `--skills` — generate Claude Code skills from the graph.
- `--native-parser` — opt into the native tree-sitter N-API addon on
Node 22 (the default runtime is `web-tree-sitter` / WASM).
diff --git a/packages/docs/src/content/docs/reference/cli.md b/packages/docs/src/content/docs/reference/cli.md
index 39d9866f..8d5e8e73 100644
--- a/packages/docs/src/content/docs/reference/cli.md
+++ b/packages/docs/src/content/docs/reference/cli.md
@@ -30,9 +30,10 @@ codehub analyze [path]
| `--offline` | off | Zero sockets. |
| `--verbose` | off | Per-phase pipeline progress. |
| `--skip-agents-md` | off | Skip the AGENTS.md / CLAUDE.md stanza. |
-| `--sbom` | off | Emit `sbom.cyclonedx.json` + `sbom.spdx.json` from `Dependency` nodes. |
-| `--coverage` | off | Overlay lcov / cobertura / jacoco / coverage.py reports onto `File` nodes. |
-| `--summaries` / `--no-summaries` | on | LLM symbol summaries (Bedrock). |
+| `--sbom` / `--no-sbom` | **on** | Emit `sbom.cyclonedx.json` + `sbom.spdx.json` from `Dependency` nodes. Use `--no-sbom` to suppress. |
+| `--scan` / `--no-scan` | **on** | Run Priority-1 scanners, write `.codehub/scan.sarif`, and ingest findings into the graph. Network-backed scanners (osv-scanner, grype, npm/pip audit) self-skip under `--offline`. Use `--no-scan` to suppress. |
+| `--coverage` / `--no-coverage` | **auto** | Overlay lcov / cobertura / jacoco / coverage.py reports onto `File` nodes. `auto` probes `coverage/lcov.info`, `lcov.info`, `coverage.xml`, `build/reports/jacoco/test/jacocoTestReport.xml`, `coverage.json` in that order and enables the phase when one exists (silent no-op otherwise). `--coverage` forces on and warns if nothing is found; `--no-coverage` forces off. |
+| `--summaries` / `--no-summaries` | off | LLM symbol summaries (Bedrock). Opt in with `--summaries` or `CODEHUB_BEDROCK_SUMMARIES=1`; kill with `--no-summaries` or `CODEHUB_BEDROCK_DISABLED=1`. |
| `--max-summaries ` | `auto` (10% of SCIP-confirmed callables, cap 500) | Summary budget. |
| `--summary-model ` | — | Override the Bedrock summary model id. |
| `--skills` | off | Emit one `SKILL.md` per Community (≥5 symbols) under `.codehub/skills/`. |
diff --git a/packages/docs/src/content/docs/reference/configuration.md b/packages/docs/src/content/docs/reference/configuration.md
index 23a232d4..3507645d 100644
--- a/packages/docs/src/content/docs/reference/configuration.md
+++ b/packages/docs/src/content/docs/reference/configuration.md
@@ -56,7 +56,8 @@ When none of the above are set, the local ONNX backend
|---|---|
| `CODEHUB_DISABLE_SCIP` | Set to `1` to make the `scip-index` ingestion phase a no-op. Heuristic edges still flow. |
| `CODEHUB_ALLOW_BUILD_SCRIPTS` | Set to `1` to allow SCIP indexers that require a build (Rust, Java) to run. Off by default for clean-room safety. |
-| `CODEHUB_BEDROCK_DISABLED` | Set to `1` to disable the LLM summarize phase. Equivalent to `--no-summaries`. |
+| `CODEHUB_BEDROCK_SUMMARIES` | Set to `1` to opt the LLM summarize phase in. Equivalent to `--summaries`. Off by default — `codehub analyze` runs fast, local, deterministic phases only. |
+| `CODEHUB_BEDROCK_DISABLED` | Set to `1` to force-disable the LLM summarize phase. Equivalent to `--no-summaries`. Wins over `CODEHUB_BEDROCK_SUMMARIES=1` and `--summaries`. |
| `NO_COLOR` | Standard convention; disables colored console output. |
## On-disk layout: `.codehub/`