diff --git a/CHANGELOG.md b/CHANGELOG.md
index 347ee4e..b50d274 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,23 @@ All notable changes to this project are documented here. The format is based on
 
 ## [Unreleased]
 
+### Added — graph foundation: edge confidence + architecture analytics (requires a one-time reindex)
+- **Edge confidence audit trail.** Every graph edge now carries a `confidence`:
+  `extracted` (exact — a same-file symbol or repo-unique name), `inferred` (a
+  heuristic resolved it, e.g. an import path-suffix match), or `ambiguous` (a named
+  target we could not pin to a unique node). `refs` and `impact` surface it so an
+  empty or short answer over `ambiguous`/`inferred` edges reads as inconclusive,
+  not as proof. Confidence is derived from *how* an edge resolved — never guessed by
+  an LLM; the index stays fully local. **Bumps `SCHEMA_VERSION` 2 → 3.** Older
+  indexes stay readable; `index`/`update` detect the mismatch and rebuild.
+- **Architecture analytics (`graph/analysis.py`), zero new dependencies.** A pure,
+  deterministic pass over the resolved edge graph computes communities (greedy
+  modularity / Louvain local-move — does not collapse cliques joined by one bridge),
+  god nodes (most-connected symbols/files), surprising connections (edges bridging
+  weakly-linked communities), auto-labelled modules, and suggested questions. The
+  summary is cached in `meta['graph_analysis']` at build time for instant reads.
+  (Surfaced via the `architecture` command and HTML export in following changes.)
+
 ### Changed — retrieval ranking & fusion (requires a one-time reindex)
 - **RRF fusion rescaled and re-keyed.** Fused scores were ~`w/k` (≈0.017), an order
   of magnitude below the reranker's bounded bonuses, so rerank silently became the
diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md
index 21f2735..490dc85 100644
--- a/docs/SCHEMA.md
+++ b/docs/SCHEMA.md
@@ -75,7 +75,13 @@ CREATE TABLE edges (
     dst_name      TEXT,                         -- raw target text (for unresolved edges)
     file_id       INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
     line          INTEGER,
-    resolved      INTEGER NOT NULL DEFAULT 0
+    resolved      INTEGER NOT NULL DEFAULT 0,
+    -- Honesty audit trail: how the edge's target was determined.
+    --   extracted = exact (same-file symbol, or a repo-unique name)
+    --   inferred  = a heuristic resolved it (import path-suffix match)
+    --   ambiguous = a named target we could not pin to a unique node
+    -- Set by the global graph pass; never inferred by an LLM (the index is local).
+    confidence    TEXT NOT NULL DEFAULT 'extracted'
 );
 CREATE INDEX idx_edges_src ON edges(src_kind, src_id);
 CREATE INDEX idx_edges_dst ON edges(dst_kind, dst_id);
diff --git a/src/codebase_index/graph/analysis.py b/src/codebase_index/graph/analysis.py
new file mode 100644
index 0000000..e309b0c
--- /dev/null
+++ b/src/codebase_index/graph/analysis.py
@@ -0,0 +1,468 @@
+"""Architecture analytics over the resolved edge graph — zero external deps.
+
+This is the codebase-index take on graphify's community detection / god nodes /
+surprising connections, implemented in pure, deterministic Python so the core
+install stays dependency-free and the results are stable across runs (which
+matters for the golden-snapshot tests and CI).
+
+What it computes from the in-memory adjacency of resolved edges:
+
+  * communities  - label propagation groups tightly-connected nodes into
+                   "modules". Deterministic: nodes are visited in a fixed key
+                   order and ties break to the smallest label, so the same graph
+                   always yields the same partition.
+  * god nodes    - the most-connected nodes (weighted degree). These are the
+                   symbols/files most of the codebase leans on.
+  * surprising   - edges that bridge two otherwise weakly-connected communities.
+                   The cross-module links you would not think to look for.
+  * questions    - template-generated starting questions seeded from the god
+                   nodes and the bridges, mirroring graphify's GRAPH_REPORT.
+
+The summary is cached in meta['graph_analysis'] by refresh_analysis() at build
+time; the `architecture` command and HTML export read it back instantly.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+from collections import Counter, defaultdict
+from typing import Any, Optional
+
+from ..storage import repo
+
+# How many items to keep in the cached summary. Bounded so the meta JSON stays
+# small even on very large repos.
+MAX_GOD_NODES = 20
+MAX_SURPRISING = 12
+MAX_QUESTIONS = 8
+TOP_NODES_PER_COMMUNITY = 5
+MAX_COMMUNITIES_IN_SUMMARY = 40
+# A community smaller than this is noise for reporting (isolated/leaf nodes).
+MIN_REPORTED_COMMUNITY = 2
+# A pair of communities joined by at most this many edges is a "bridge".
+BRIDGE_MAX_EDGES = 2
+# Cap on local-move passes; the partition almost always settles in 2-4.
+_LOCAL_MOVE_PASSES = 20
+
+ANALYSIS_META_KEY = "graph_analysis"
+
+Node = tuple[str, int]  # (kind, id)
+
+
+# ---------------------------------------------------------------------------
+# Graph construction
+# ---------------------------------------------------------------------------
+
+def build_adjacency(
+    edges: list[sqlite3.Row],
+    key_fn=None,
+) -> tuple[dict[Any, Counter], dict[tuple[Any, Any], int]]:
+    """Undirected weighted adjacency + per-edge multiplicity, from resolved edges.
+
+    Self-loops are dropped (they distort degree and never bridge communities).
+
+    ``key_fn(kind, id) -> hashable | None`` maps an edge endpoint to a node key
+    (returning None drops the edge). analyze() passes a *content* key
+    (kind:path:name:line) so the partition is identical across platforms — symbol
+    ids depend on file-walk order, which differs between OSes. The default keys by
+    (kind, id), used by the algorithm unit tests.
+    """
+    def kf(kind: str, nid: int):
+        return key_fn(kind, nid) if key_fn is not None else (kind, nid)
+
+    adj: dict[Any, Counter] = defaultdict(Counter)
+    edge_weight: dict[tuple[Any, Any], int] = defaultdict(int)
+    for e in edges:
+        src = kf(e["src_kind"], int(e["src_id"]))
+        dst = kf(e["dst_kind"], int(e["dst_id"]))
+        if src is None or dst is None or src == dst:
+            continue
+        adj[src][dst] += 1
+        adj[dst][src] += 1
+        edge_weight[_canonical_pair(src, dst)] += 1
+    return adj, edge_weight
+
+
+def _canonical_pair(a: Any, b: Any) -> tuple[Any, Any]:
+    return (a, b) if a <= b else (b, a)
+
+
+# The graph algorithms below are generic over the node-key type: analyze() calls
+# them with (kind, id) tuples; the HTML/interop export reuses them with string
+# keys. Typing the key as Any keeps both call sites valid.
+def weighted_degree(adj: dict[Any, Counter]) -> dict[Any, int]:
+    return {node: sum(neighbors.values()) for node, neighbors in adj.items()}
+
+
+# ---------------------------------------------------------------------------
+# Community detection — deterministic label propagation
+# ---------------------------------------------------------------------------
+
+def detect_communities(adj: dict[Any, Counter]) -> dict[Any, int]:
+    """Partition nodes into communities by greedy modularity. Returns {node: id}.
+
+    This is the local-moving phase of the Louvain method, made deterministic:
+    every node starts alone, then in a fixed key order each node moves to the
+    neighbouring community that yields the largest modularity gain (ties break to
+    the smallest community id). Passes repeat until no node moves. Unlike label
+    propagation it does not collapse two cliques joined by a single bridge — the
+    bridge's gain cannot beat the dense intra-clique structure. Labels are
+    renumbered to dense, size-ranked ids so community 0 is always the largest.
+    """
+    nodes = sorted(adj.keys())
+    if not nodes:
+        return {}
+
+    deg = weighted_degree(adj)
+    two_m = sum(deg.values())  # = 2 * total edge weight
+    if two_m == 0:
+        return _renumber_by_size({node: idx for idx, node in enumerate(nodes)})
+
+    comm: dict[Any, int] = {node: idx for idx, node in enumerate(nodes)}
+    # Σ_tot per community: total weighted degree of its members.
+    sigma_tot: dict[int, int] = {idx: deg[node] for idx, node in enumerate(nodes)}
+
+    for _ in range(_LOCAL_MOVE_PASSES):
+        moved = False
+        for node in nodes:
+            ki = deg[node]
+            ci = comm[node]
+            # Detach node from its current community.
+            sigma_tot[ci] -= ki
+
+            # Weight from node into each neighbouring community.
+            links: Counter = Counter()
+            for neighbor, w in adj[node].items():
+                if neighbor != node:
+                    links[comm[neighbor]] += w
+
+            # Pick the community maximising  w_in - Σ_tot * k_i / (2m).
+            # Baseline = staying isolated (its own now-empty community), gain 0.
+            best_c = ci
+            best_gain = links.get(ci, 0) - sigma_tot[ci] * ki / two_m
+            for c, w_in in sorted(links.items()):
+                gain = w_in - sigma_tot[c] * ki / two_m
+                if gain > best_gain + 1e-12:
+                    best_gain, best_c = gain, c
+
+            comm[node] = best_c
+            sigma_tot[best_c] += ki
+            if best_c != ci:
+                moved = True
+        if not moved:
+            break
+
+    return _renumber_by_size(comm)
+
+
+def _renumber_by_size(label: dict[Any, int]) -> dict[Any, int]:
+    """Renumber raw labels to dense ids ordered by community size (desc), then by
+    smallest member key — so the mapping is stable run to run."""
+    members: dict[int, list[Any]] = defaultdict(list)
+    for node, lbl in label.items():
+        members[lbl].append(node)
+    order = sorted(members, key=lambda lbl: (-len(members[lbl]), min(members[lbl])))
+    remap = {old: new for new, old in enumerate(order)}
+    return {node: remap[lbl] for node, lbl in label.items()}
+
+
+def modularity(adj: dict[Any, Counter], communities: dict[Any, int]) -> float:
+    """Newman modularity Q of the partition — a quality score in roughly [-0.5, 1].
+
+    Higher means the communities capture more edge density than chance. Reported
+    so the user can judge how meaningful the module split is.
+    """
+    m2 = sum(sum(neighbors.values()) for neighbors in adj.values())  # = 2 * |E|
+    if m2 == 0:
+        return 0.0
+    deg = weighted_degree(adj)
+    q = 0.0
+    for node, neighbors in adj.items():
+        ci = communities[node]
+        for neighbor, weight in neighbors.items():
+            if communities[neighbor] == ci:
+                q += weight - deg[node] * deg[neighbor] / m2
+    return round(q / m2, 4)
+
+
+# ---------------------------------------------------------------------------
+# Node labelling
+# ---------------------------------------------------------------------------
+
+def _node_index(conn: sqlite3.Connection) -> dict[Node, dict]:
+    """(kind, id) -> display metadata {kind, name, path, degree fields}."""
+    rows = repo.all_graph_nodes(conn)
+    index: dict[Node, dict] = {}
+    for f in rows["file"]:
+        index[("file", int(f["id"]))] = {
+            "kind": "file",
+            "name": f["path"].rsplit("/", 1)[-1],
+            "path": f["path"],
+        }
+    for s in rows["symbol"]:
+        index[("symbol", int(s["id"]))] = {
+            "kind": "symbol",
+            "name": s["name"],
+            "symbol_kind": s["kind"],
+            "path": s["path"],
+            "line_start": s["line_start"],
+            "in_degree": int(s["in_degree"]),
+            "out_degree": int(s["out_degree"]),
+        }
+    return index
+
+
+def _stable_key(meta: dict) -> str:
+    """A platform-stable node key from content, not from the volatile symbol id.
+
+    Symbol ids are assigned in file-walk order, which differs across OSes; keying
+    the graph by path/name/line keeps communities and god-node ranking identical
+    everywhere (so the golden snapshots hold on Linux/macOS/Windows alike).
+    """
+    if meta["kind"] == "file":
+        return f"file::{meta['path']}"
+    return f"symbol::{meta['path']}::{meta['name']}::{meta.get('line_start', '')}"
+
+
+def _dir_of(path: str) -> str:
+    return path.rsplit("/", 1)[0] if "/" in path else "(root)"
+
+
+def _is_test_path(path: str) -> bool:
+    """Test files cluster with the code they exercise; don't let them name the module."""
+    lower = path.lower()
+    parts = lower.split("/")
+    if any(p in ("test", "tests", "__tests__", "spec", "specs") for p in parts):
+        return True
+    base = parts[-1]
+    return base.startswith("test_") or base.startswith("test.") or "_test." in base or ".test." in base
+
+
+def label_community(members: list[Any], node_index: dict[Any, dict]) -> str:
+    """Name a community by the directory most of its (non-test) nodes live in.
+
+    A 2-5 word, plain-language module name is what graphify asks an LLM for; here
+    we derive it deterministically from the dominant source directory, which for
+    code is a strong proxy for "what this module is". Test paths are discounted so
+    a cluster of production symbols isn't mislabelled "tests" just because its test
+    files outnumber it; a community that is *only* tests still gets named for them.
+    """
+    prod: Counter = Counter()
+    allp: Counter = Counter()
+    for node in members:
+        meta = node_index.get(node)
+        if not (meta and meta.get("path")):
+            continue
+        d = _dir_of(meta["path"])
+        allp[d] += 1
+        if not _is_test_path(meta["path"]):
+            prod[d] += 1
+    dirs = prod or allp
+    if not dirs:
+        return "module"
+    # Most common dir; tie -> shortest then lexicographically smallest (stable).
+    top = min(dirs.items(), key=lambda kv: (-kv[1], len(kv[0]), kv[0]))
+    return top[0]
+
+
+# ---------------------------------------------------------------------------
+# God nodes / surprising connections / questions
+# ---------------------------------------------------------------------------
+
+def god_nodes(
+    adj: dict[Any, Counter],
+    communities: dict[Any, int],
+    node_index: dict[Any, dict],
+    *,
+    limit: int = MAX_GOD_NODES,
+) -> list[dict]:
+    """Most-connected nodes by weighted degree (the load-bearing ones)."""
+    deg = weighted_degree(adj)
+    ranked = sorted(deg, key=lambda n: (-deg[n], str(n)))
+    out: list[dict] = []
+    for node in ranked[:limit]:
+        meta = node_index.get(node)
+        if meta is None:
+            continue
+        out.append(
+            {
+                "kind": meta["kind"],
+                "name": meta["name"],
+                "path": meta.get("path"),
+                "degree": deg[node],
+                "community": communities.get(node, -1),
+            }
+        )
+    return out
+
+
+def surprising_connections(
+    edge_weight: dict[tuple[Any, Any], int],
+    communities: dict[Any, int],
+    node_index: dict[Any, dict],
+    *,
+    limit: int = MAX_SURPRISING,
+) -> list[dict]:
+    """Edges that bridge two communities barely connected to each other.
+
+    For each unordered community pair we count how many edges cross between them;
+    a pair joined by only a handful of edges is a surprising structural link. We
+    surface the actual endpoint pair for each such bridge.
+    """
+    pair_edges: dict[tuple[int, int], list[tuple[Any, Any]]] = defaultdict(list)
+    for (a, b), _w in edge_weight.items():
+        ca, cb = communities.get(a, -1), communities.get(b, -1)
+        if ca == cb or ca < 0 or cb < 0:
+            continue
+        key = (ca, cb) if ca < cb else (cb, ca)
+        pair_edges[key].append((a, b))
+
+    bridges = [
+        (pair, endpoints)
+        for pair, endpoints in pair_edges.items()
+        if len(endpoints) <= BRIDGE_MAX_EDGES
+    ]
+    # Rarest bridges first (a single edge between modules is the most surprising),
+    # then by community-pair id for stability.
+    bridges.sort(key=lambda item: (len(item[1]), item[0]))
+
+    out: list[dict] = []
+    for (ca, cb), endpoints in bridges[:limit]:
+        a, b = sorted(endpoints)[0]
+        ma, mb = node_index.get(a), node_index.get(b)
+        if ma is None or mb is None:
+            continue
+        out.append(
+            {
+                "from": {"kind": ma["kind"], "name": ma["name"], "path": ma.get("path")},
+                "to": {"kind": mb["kind"], "name": mb["name"], "path": mb.get("path")},
+                "from_community": ca,
+                "to_community": cb,
+                "edge_count": len(endpoints),
+            }
+        )
+    return out
+
+
+def suggest_questions(
+    gods: list[dict],
+    surprising: list[dict],
+    community_labels: dict[int, str],
+    *,
+    limit: int = MAX_QUESTIONS,
+) -> list[str]:
+    """Starter questions seeded from the structure, like graphify's report."""
+    questions: list[str] = []
+    for g in gods[:3]:
+        if g["kind"] == "symbol":
+            questions.append(f"How does `{g['name']}` work?")
+            questions.append(f"What breaks if `{g['name']}` changes?")
+        else:
+            questions.append(f"What is the role of `{g['name']}` in the architecture?")
+    for s in surprising[:3]:
+        la = community_labels.get(s["from_community"], f"community {s['from_community']}")
+        lb = community_labels.get(s["to_community"], f"community {s['to_community']}")
+        if la != lb:
+            questions.append(f"How is `{la}` connected to `{lb}`?")
+    # De-dup, preserve order.
+    seen: set[str] = set()
+    deduped: list[str] = []
+    for q in questions:
+        if q not in seen:
+            seen.add(q)
+            deduped.append(q)
+    return deduped[:limit]
+
+
+# ---------------------------------------------------------------------------
+# Top-level entry points
+# ---------------------------------------------------------------------------
+
+def analyze(conn: sqlite3.Connection) -> dict:
+    """Compute the full architecture-analytics summary (does not persist it)."""
+    edges = repo.all_resolved_edges(conn)
+    id_index = _node_index(conn)  # (kind, id) -> meta
+
+    # Key the graph by stable content keys, not by volatile symbol ids, so the
+    # result is identical across platforms. node_index then maps that stable key
+    # back to display metadata.
+    node_index: dict[str, dict] = {}
+
+    def key_fn(kind: str, nid: int):
+        meta = id_index.get((kind, nid))
+        if meta is None:
+            return None
+        k = _stable_key(meta)
+        node_index.setdefault(k, meta)
+        return k
+
+    adj, edge_weight = build_adjacency(edges, key_fn)
+
+    communities = detect_communities(adj)
+    members: dict[int, list[str]] = defaultdict(list)
+    for node, cid in communities.items():
+        members[cid].append(node)
+
+    community_labels = {cid: label_community(nodes, node_index) for cid, nodes in members.items()}
+    deg = weighted_degree(adj)
+
+    community_summaries: list[dict] = []
+    reported = sorted(members, key=lambda cid: (-len(members[cid]), cid))
+    for cid in reported:
+        nodes = members[cid]
+        if len(nodes) < MIN_REPORTED_COMMUNITY:
+            continue
+        top = sorted(nodes, key=lambda n: (-deg.get(n, 0), str(n)))[:TOP_NODES_PER_COMMUNITY]
+        community_summaries.append(
+            {
+                "id": cid,
+                "label": community_labels[cid],
+                "size": len(nodes),
+                "top_nodes": [
+                    {
+                        "kind": node_index[n]["kind"],
+                        "name": node_index[n]["name"],
+                        "path": node_index[n].get("path"),
+                        "degree": deg.get(n, 0),
+                    }
+                    for n in top
+                    if n in node_index
+                ],
+            }
+        )
+        if len(community_summaries) >= MAX_COMMUNITIES_IN_SUMMARY:
+            break
+
+    gods = god_nodes(adj, communities, node_index)
+    surprising = surprising_connections(edge_weight, communities, node_index)
+    questions = suggest_questions(gods, surprising, community_labels)
+
+    return {
+        "node_count": len(adj),
+        "edge_count": sum(edge_weight.values()),
+        "community_count": sum(1 for nodes in members.values() if len(nodes) >= MIN_REPORTED_COMMUNITY),
+        "modularity": modularity(adj, communities),
+        "communities": community_summaries,
+        "god_nodes": gods,
+        "surprising": surprising,
+        "questions": questions,
+    }
+
+
+def refresh_analysis(conn: sqlite3.Connection) -> dict:
+    """Compute and cache the analysis summary into meta['graph_analysis']."""
+    summary = analyze(conn)
+    repo.set_meta(conn, ANALYSIS_META_KEY, json.dumps(summary, ensure_ascii=False))
+    return summary
+
+
+def load_analysis(conn: sqlite3.Connection) -> Optional[dict]:
+    """Read the cached analysis summary, or None if the build never produced one."""
+    raw = repo.get_meta(conn, ANALYSIS_META_KEY)
+    if not raw:
+        return None
+    try:
+        return json.loads(raw)
+    except (ValueError, TypeError):
+        return None
diff --git a/src/codebase_index/graph/builder.py b/src/codebase_index/graph/builder.py
index f2e342b..4547883 100644
--- a/src/codebase_index/graph/builder.py
+++ b/src/codebase_index/graph/builder.py
@@ -26,7 +26,20 @@
 def build_graph(conn: sqlite3.Connection) -> dict[str, int]:
     resolved = resolve_edges(conn)
     repo.recompute_degrees(conn)
+    # Everything still unresolved that names a target is, by definition, a target we
+    # could not pin to a unique node — record it as 'ambiguous' for the honesty trail.
+    repo.mark_ambiguous_edges(conn)
     total_unresolved = len(repo.unresolved_edges(conn))
+    # Architecture analytics (communities / god nodes / surprising bridges) are a
+    # derived view of the graph. Compute once per build and cache the JSON in meta so
+    # the `architecture` command and the HTML export read it instantly. Never let an
+    # analysis failure fail the build — the graph itself is already written.
+    try:
+        from . import analysis
+
+        analysis.refresh_analysis(conn)
+    except Exception:  # pragma: no cover - defensive; analytics are best-effort
+        pass
     return {"resolved": resolved, "unresolved": total_unresolved}
 
 
@@ -38,17 +51,20 @@ def resolve_edges(conn: sqlite3.Connection) -> int:
     unique_symbols = repo.unique_symbol_ids_by_name(conn)
     suffix_map = _path_suffix_map(repo.all_file_ids_with_paths(conn))
 
-    resolutions: list[tuple[str, int, int]] = []
+    # (dst_kind, dst_id, edge_id, confidence). A repo-unique symbol name is an exact
+    # hit -> 'extracted'; an import resolved only by path-suffix matching is a best-
+    # effort heuristic -> 'inferred'.
+    resolutions: list[tuple[str, int, int, str]] = []
     for edge in edges:
         name = edge["dst_name"]
         if edge["edge_type"] == "import":
             file_id = _module_to_file_id(suffix_map, name, lang=edge["lang"])
             if file_id is not None:
-                resolutions.append(("file", file_id, edge["id"]))
+                resolutions.append(("file", file_id, edge["id"], "inferred"))
         elif edge["edge_type"] in _SYMBOL_EDGE_TYPES:
             sym_id = unique_symbols.get(name)
             if sym_id is not None:
-                resolutions.append(("symbol", sym_id, edge["id"]))
+                resolutions.append(("symbol", sym_id, edge["id"], "extracted"))
 
     repo.resolve_edges_bulk(conn, resolutions)
     return len(resolutions)
diff --git a/src/codebase_index/graph/expand.py b/src/codebase_index/graph/expand.py
index deebed1..02ee989 100644
--- a/src/codebase_index/graph/expand.py
+++ b/src/codebase_index/graph/expand.py
@@ -51,14 +51,14 @@ def _seed_nodes(conn: sqlite3.Connection, target: str) -> list[tuple[str, int]]:
 
 
 def _neighbors(conn, kind, node_id, direction):
-    """Yield (next_kind, next_id, edge_type) for the requested direction(s)."""
+    """Yield (next_kind, next_id, edge_type, confidence) for the requested direction(s)."""
     if direction in ("up", "both"):
         for e in repo.incoming_edges(conn, kind, node_id):
-            yield e["src_kind"], int(e["src_id"]), e["edge_type"]
+            yield e["src_kind"], int(e["src_id"]), e["edge_type"], e["confidence"]
     if direction in ("down", "both"):
         for e in repo.outgoing_edges(conn, kind, node_id):
             if e["dst_id"] is not None:
-                yield e["dst_kind"], int(e["dst_id"]), e["edge_type"]
+                yield e["dst_kind"], int(e["dst_id"]), e["edge_type"], e["confidence"]
 
 
 def _node_meta(conn, kind, node_id) -> Optional[ImpactNode]:
@@ -92,7 +92,7 @@ def walk_impact(
         kind, node_id, dist = queue.popleft()
         if dist >= depth:
             continue
-        for nk, nid, etype in _neighbors(conn, kind, node_id, direction):
+        for nk, nid, etype, conf in _neighbors(conn, kind, node_id, direction):
             if (nk, nid) in visited:
                 continue
             visited.add((nk, nid))
@@ -101,6 +101,7 @@ def walk_impact(
                 continue
             meta.distance = dist + 1
             meta.via_edge = etype
+            meta.via_confidence = conf
             out.append(meta)
             queue.append((nk, nid, dist + 1))
     return out
diff --git a/src/codebase_index/models.py b/src/codebase_index/models.py
index b35d959..b87ec79 100644
--- a/src/codebase_index/models.py
+++ b/src/codebase_index/models.py
@@ -113,6 +113,9 @@ class RefSite(BaseModel):
     path: str
     line: int
     kind: str
+    # Audit trail (see edges.confidence): 'extracted' = exact match, 'inferred' =
+    # heuristic, 'ambiguous' = unresolved/non-unique. Defaults keep older callers valid.
+    confidence: str = "extracted"
 
 
 class RefsResponse(BaseModel):
@@ -129,6 +132,7 @@ class ImpactNode(BaseModel):
     line_start: Optional[int] = None
     distance: int                   # BFS hops from the target (1 = direct)
     via_edge: Optional[str] = None  # edge_type that linked it (import|call|extends|...)
+    via_confidence: Optional[str] = None  # confidence of the linking edge (audit trail)
 
 
 class ImpactResponse(BaseModel):
diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py
index afd6a76..f7b4151 100644
--- a/src/codebase_index/output/markdown.py
+++ b/src/codebase_index/output/markdown.py
@@ -130,6 +130,15 @@ def _coverage_line(coverage) -> Optional[str]:
     return None
 
 
+# Audit-trail glyphs: an exact edge needs no annotation; inferred/ambiguous ones
+# warn the reader that the link is a heuristic or could not be pinned down.
+_CONF_MARK = {"extracted": "", "inferred": "~ inferred", "ambiguous": "? ambiguous"}
+
+
+def _conf_mark(confidence: Optional[str]) -> str:
+    return _CONF_MARK.get(confidence or "extracted", confidence or "")
+
+
 def render_refs(resp: RefsResponse) -> str:
     lines = [_header(resp.query, resp.index.exists, resp.index.stale)]
     lines.append("")
@@ -140,10 +149,12 @@ def render_refs(resp: RefsResponse) -> str:
             lines.append(note)
         return "\n".join(lines).rstrip() + "\n"
 
-    lines.append("| kind | path | line |")
-    lines.append("|------|------|------|")
+    lines.append("| kind | path | line | confidence |")
+    lines.append("|------|------|------|------------|")
     for site in resp.sites:
-        lines.append(f"| {site.kind} | `{site.path}` | {site.line} |")
+        lines.append(
+            f"| {site.kind} | `{site.path}` | {site.line} | {_conf_mark(site.confidence) or 'exact'} |"
+        )
     if note:
         lines.append(note)
     return "\n".join(lines).rstrip() + "\n"
@@ -171,7 +182,9 @@ def render_impact(resp: ImpactResponse) -> str:
     for n in sorted(resp.nodes, key=lambda x: (x.distance, x.path, x.line_start or 0)):
         loc = f"{n.path}:{n.line_start}" if n.line_start else n.path
         node_name = f"`{n.name}`" if n.name else "—"
-        lines.append(f"| {n.distance} | {n.via_edge or ''} | {n.kind} | {node_name} | `{loc}` |")
+        mark = _conf_mark(n.via_confidence)
+        via = f"{n.via_edge or ''} {mark}".strip()
+        lines.append(f"| {n.distance} | {via} | {n.kind} | {node_name} | `{loc}` |")
     if note:
         lines.append(note)
     return "\n".join(lines).rstrip() + "\n"
diff --git a/src/codebase_index/retrieval/searchers.py b/src/codebase_index/retrieval/searchers.py
index 138bf80..d82aca5 100644
--- a/src/codebase_index/retrieval/searchers.py
+++ b/src/codebase_index/retrieval/searchers.py
@@ -234,11 +234,17 @@ def symbol_lookup(
 def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsResponse:
     defs = repo.symbols_by_name(conn, name, exact=True)
     sites = [
-        RefSite(path=row["path"], line=row["line"], kind="call")
+        RefSite(
+            path=row["path"],
+            line=row["line"],
+            kind="call",
+            confidence=row["confidence"] if "confidence" in row.keys() else "extracted",
+        )
         for row in repo.refs_for_name(conn, name)
     ]
     if kind == "all":
         sites.extend(
+            # A definition is the symbol itself — exact by construction.
             RefSite(path=row["path"], line=row["line_start"], kind="definition")
             for row in defs
         )
diff --git a/src/codebase_index/storage/db.py b/src/codebase_index/storage/db.py
index 60abbcd..7cb1da1 100644
--- a/src/codebase_index/storage/db.py
+++ b/src/codebase_index/storage/db.py
@@ -8,7 +8,8 @@
 from typing import Optional
 
 # 2: chunks gained a denormalized `symbol_names` column (FTS symbol-name boost).
-SCHEMA_VERSION = 2
+# 3: edges gained a `confidence` column (extracted/inferred/ambiguous audit trail).
+SCHEMA_VERSION = 3
 
 
 class Database:
diff --git a/src/codebase_index/storage/repo.py b/src/codebase_index/storage/repo.py
index 77a54cd..6bef9eb 100644
--- a/src/codebase_index/storage/repo.py
+++ b/src/codebase_index/storage/repo.py
@@ -253,11 +253,15 @@ def replace_edges(
     conn.executemany(
         """
         INSERT INTO edges
-            (edge_type, src_kind, src_id, dst_kind, dst_id, dst_name, file_id, line, resolved)
+            (edge_type, src_kind, src_id, dst_kind, dst_id, dst_name, file_id, line,
+             resolved, confidence)
         VALUES
-            (:edge_type, :src_kind, :src_id, :dst_kind, :dst_id, :dst_name, :file_id, :line, :resolved)
+            (:edge_type, :src_kind, :src_id, :dst_kind, :dst_id, :dst_name, :file_id, :line,
+             :resolved, :confidence)
         """,
-        [{**edge, "file_id": file_id} for edge in edges],
+        # confidence defaults to 'extracted' for callers (and tests) that predate the
+        # audit-trail column; the global graph pass refines it (see graph/builder.py).
+        [{"confidence": "extracted", **edge, "file_id": file_id} for edge in edges],
     )
     return len(edges)
 
@@ -271,6 +275,7 @@ def refs_for_name(conn: sqlite3.Connection, name: str) -> list[sqlite3.Row]:
         """
         SELECT e.line AS line, f.path AS path, e.edge_type AS edge_type,
                e.resolved AS resolved, e.src_id AS src_id, e.src_kind AS src_kind,
+               e.confidence AS confidence,
                src.name AS src_name, src.qualified AS src_qualified
         FROM edges e
         JOIN files f ON f.id = e.file_id
@@ -388,15 +393,59 @@ def resolve_edge(conn: sqlite3.Connection, edge_id: int, dst_kind: str, dst_id:
 
 
 def resolve_edges_bulk(
-    conn: sqlite3.Connection, resolutions: Sequence[tuple[str, int, int]]
+    conn: sqlite3.Connection, resolutions: Sequence[tuple[str, int, int, str]]
 ) -> None:
-    """Apply (dst_kind, dst_id, edge_id) resolutions in one executemany."""
+    """Apply (dst_kind, dst_id, edge_id, confidence) resolutions in one executemany.
+
+    confidence records *how* the target was found: 'extracted' for an exact match
+    (a repo-unique symbol name), 'inferred' for a heuristic (import path-suffix).
+    """
     conn.executemany(
-        "UPDATE edges SET dst_kind = ?, dst_id = ?, resolved = 1 WHERE id = ?",
-        resolutions,
+        "UPDATE edges SET dst_kind = ?, dst_id = ?, resolved = 1, confidence = ? WHERE id = ?",
+        [(dst_kind, dst_id, confidence, edge_id) for dst_kind, dst_id, edge_id, confidence in resolutions],
     )
 
 
+def mark_ambiguous_edges(conn: sqlite3.Connection) -> int:
+    """Flag every still-unresolved edge that names a target as 'ambiguous'.
+
+    Run after the global resolution pass: an edge with a dst_name that no unique
+    symbol/file claims is one we could not pin down (a non-unique name, or an import
+    of code outside the repo). Marking it keeps refs/impact honest — an empty or
+    short answer over ambiguous edges is inconclusive, not proof of "no callers".
+    """
+    cur = conn.execute(
+        "UPDATE edges SET confidence = 'ambiguous' "
+        "WHERE resolved = 0 AND dst_name IS NOT NULL AND confidence != 'ambiguous'"
+    )
+    return cur.rowcount if cur.rowcount is not None else 0
+
+
+def all_resolved_edges(conn: sqlite3.Connection) -> list[sqlite3.Row]:
+    """Every resolved edge as (src_kind, src_id, dst_kind, dst_id, edge_type, confidence).
+
+    The in-memory adjacency the graph analysis (communities / god nodes / bridges)
+    is built from. Unresolved edges are skipped — they have no concrete endpoint.
+    """
+    return conn.execute(
+        "SELECT src_kind, src_id, dst_kind, dst_id, edge_type, confidence FROM edges "
+        "WHERE resolved = 1 AND dst_id IS NOT NULL"
+    ).fetchall()
+
+
+def all_graph_nodes(conn: sqlite3.Connection) -> dict[str, list[sqlite3.Row]]:
+    """File and symbol rows keyed by kind, for labelling graph-analysis nodes."""
+    return {
+        "file": conn.execute("SELECT id, path FROM files").fetchall(),
+        "symbol": conn.execute(
+            "SELECT s.id AS id, s.name AS name, s.kind AS kind, f.path AS path, "
+            "       s.line_start AS line_start, "
+            "       s.in_degree AS in_degree, s.out_degree AS out_degree "
+            "FROM symbols s JOIN files f ON f.id = s.file_id"
+        ).fetchall(),
+    }
+
+
 def name_ref_counts(conn: sqlite3.Connection, names: Sequence[str]) -> dict[str, int]:
     """Count edges targeting each name (any resolution state), keyed by dst_name.
 
@@ -458,7 +507,7 @@ def symbols_in_file(conn: sqlite3.Connection, file_id: int) -> list[sqlite3.Row]
 
 def incoming_edges(conn: sqlite3.Connection, kind: str, node_id: int) -> list[sqlite3.Row]:
     return conn.execute(
-        "SELECT id, edge_type, src_kind, src_id, file_id, line FROM edges "
+        "SELECT id, edge_type, src_kind, src_id, file_id, line, confidence FROM edges "
         "WHERE resolved = 1 AND dst_kind = ? AND dst_id = ?",
         (kind, node_id),
     ).fetchall()
@@ -466,7 +515,7 @@ def incoming_edges(conn: sqlite3.Connection, kind: str, node_id: int) -> list[sq
 
 def outgoing_edges(conn: sqlite3.Connection, kind: str, node_id: int) -> list[sqlite3.Row]:
     return conn.execute(
-        "SELECT id, edge_type, dst_kind, dst_id, file_id, line FROM edges "
+        "SELECT id, edge_type, dst_kind, dst_id, file_id, line, confidence FROM edges "
         "WHERE resolved = 1 AND src_kind = ? AND src_id = ?",
         (kind, node_id),
     ).fetchall()
diff --git a/src/codebase_index/storage/schema.sql b/src/codebase_index/storage/schema.sql
index 10bde93..bed17bb 100644
--- a/src/codebase_index/storage/schema.sql
+++ b/src/codebase_index/storage/schema.sql
@@ -63,7 +63,13 @@ CREATE TABLE IF NOT EXISTS edges (
     dst_name      TEXT,
     file_id       INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
     line          INTEGER,
-    resolved      INTEGER NOT NULL DEFAULT 0
+    resolved      INTEGER NOT NULL DEFAULT 0,
+    -- Honesty audit trail (see docs/SCHEMA.md). How sure are we this edge points
+    -- where it claims? 'extracted' = exact match (same-file symbol or a repo-unique
+    -- name); 'inferred' = a heuristic resolved it (import path-suffix); 'ambiguous'
+    -- = a name/import we could not pin to a unique target. Set at build time by the
+    -- global graph pass; never guessed by an LLM (the index is fully local).
+    confidence    TEXT NOT NULL DEFAULT 'extracted'
 );
 CREATE INDEX IF NOT EXISTS idx_edges_src  ON edges(src_kind, src_id);
 CREATE INDEX IF NOT EXISTS idx_edges_dst  ON edges(dst_kind, dst_id);
diff --git a/tests/golden/impact_user_model.json b/tests/golden/impact_user_model.json
index 9ec0c77..cb8d033 100644
--- a/tests/golden/impact_user_model.json
+++ b/tests/golden/impact_user_model.json
@@ -23,6 +23,7 @@
       "line_start": null,
       "name": null,
       "path": "src/api/service.py",
+      "via_confidence": "inferred",
       "via_edge": "import"
     },
     {
@@ -31,6 +32,7 @@
       "line_start": 7,
       "name": "AdminUser",
       "path": "src/api/service.py",
+      "via_confidence": "extracted",
       "via_edge": "extends"
     }
   ],
diff --git a/tests/golden/mcp_find_refs.json b/tests/golden/mcp_find_refs.json
index abdd727..13208fa 100644
--- a/tests/golden/mcp_find_refs.json
+++ b/tests/golden/mcp_find_refs.json
@@ -15,16 +15,19 @@
   "schema_version": 1,
   "sites": [
     {
+      "confidence": "extracted",
       "kind": "call",
       "line": 11,
       "path": "src/api/service.py"
     },
     {
+      "confidence": "extracted",
       "kind": "definition",
       "line": 4,
       "path": "src/auth/token.py"
     },
     {
+      "confidence": "extracted",
       "kind": "call",
       "line": 11,
       "path": "src/auth/token.py"
diff --git a/tests/golden/mcp_impact_of.json b/tests/golden/mcp_impact_of.json
index 5fc14dc..1629921 100644
--- a/tests/golden/mcp_impact_of.json
+++ b/tests/golden/mcp_impact_of.json
@@ -23,6 +23,7 @@
       "line_start": null,
       "name": null,
       "path": "src/api/service.py",
+      "via_confidence": "inferred",
       "via_edge": "import"
     },
     {
@@ -31,6 +32,7 @@
       "line_start": 7,
       "name": "AdminUser",
       "path": "src/api/service.py",
+      "via_confidence": "extracted",
       "via_edge": "extends"
     }
   ],
diff --git a/tests/golden/refs_refresh_access_token.json b/tests/golden/refs_refresh_access_token.json
index f5693df..1ce827e 100644
--- a/tests/golden/refs_refresh_access_token.json
+++ b/tests/golden/refs_refresh_access_token.json
@@ -14,16 +14,19 @@
   "query": "refresh_access_token",
   "sites": [
     {
+      "confidence": "extracted",
       "kind": "call",
       "line": 11,
       "path": "src/api/service.py"
     },
     {
+      "confidence": "extracted",
       "kind": "definition",
       "line": 4,
       "path": "src/auth/token.py"
     },
     {
+      "confidence": "extracted",
       "kind": "call",
       "line": 11,
       "path": "src/auth/token.py"
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
new file mode 100644
index 0000000..60eb90a
--- /dev/null
+++ b/tests/test_analysis.py
@@ -0,0 +1,194 @@
+"""Tests for graph.analysis — communities / god nodes / surprising bridges.
+
+The pure-Python graph functions are deterministic, so the assertions pin exact
+structure (two cliques joined by one bridge → two communities + one surprising
+link) rather than fuzzy thresholds.
+"""
+
+from __future__ import annotations
+
+from codebase_index.config import Config
+from codebase_index.graph import analysis
+from codebase_index.indexer.pipeline import build_index
+from codebase_index.parsers.base import Symbol
+from codebase_index.storage import repo
+from codebase_index.storage.db import Database
+
+
+# --- pure-Python graph algorithms (no DB) -------------------------------------
+
+def _two_cliques_with_bridge():
+    """Two triangles (A0-A1-A2) and (B0-B1-B2) joined by a single A0-B0 edge."""
+    edges = []
+
+    def edge(s, d):
+        return {"src_kind": "symbol", "src_id": s, "dst_kind": "symbol", "dst_id": d}
+
+    # clique A: ids 0,1,2 ; clique B: ids 10,11,12
+    for a, b in [(0, 1), (1, 2), (0, 2)]:
+        edges.append(edge(a, b))
+    for a, b in [(10, 11), (11, 12), (10, 12)]:
+        edges.append(edge(a, b))
+    edges.append(edge(0, 10))  # the bridge
+    return edges
+
+
+def test_detect_communities_splits_two_cliques():
+    adj, _ = analysis.build_adjacency(_two_cliques_with_bridge())
+    comm = analysis.detect_communities(adj)
+    # All of clique A share one label; all of clique B share another; they differ.
+    a_labels = {comm[("symbol", i)] for i in (0, 1, 2)}
+    b_labels = {comm[("symbol", i)] for i in (10, 11, 12)}
+    assert len(a_labels) == 1
+    assert len(b_labels) == 1
+    assert a_labels != b_labels
+
+
+def test_modularity_is_positive_for_clear_structure():
+    adj, _ = analysis.build_adjacency(_two_cliques_with_bridge())
+    comm = analysis.detect_communities(adj)
+    assert analysis.modularity(adj, comm) > 0.0
+
+
+def test_god_nodes_rank_by_degree():
+    # Make node 0 a hub: connect it to many leaves.
+    edges = [
+        {"src_kind": "symbol", "src_id": 0, "dst_kind": "symbol", "dst_id": leaf}
+        for leaf in range(1, 6)
+    ]
+    adj, _ = analysis.build_adjacency(edges)
+    comm = analysis.detect_communities(adj)
+    node_index = {
+        ("symbol", i): {"kind": "symbol", "name": f"sym{i}", "path": "src/x.py"}
+        for i in range(6)
+    }
+    gods = analysis.god_nodes(adj, comm, node_index, limit=3)
+    assert gods[0]["name"] == "sym0"
+    assert gods[0]["degree"] == 5
+
+
+def test_surprising_connection_finds_the_bridge():
+    adj, edge_weight = analysis.build_adjacency(_two_cliques_with_bridge())
+    comm = analysis.detect_communities(adj)
+    node_index = {
+        ("symbol", i): {"kind": "symbol", "name": f"sym{i}", "path": "src/a.py"}
+        for i in (0, 1, 2)
+    }
+    node_index.update(
+        {
+            ("symbol", i): {"kind": "symbol", "name": f"sym{i}", "path": "src/b.py"}
+            for i in (10, 11, 12)
+        }
+    )
+    surprising = analysis.surprising_connections(edge_weight, comm, node_index)
+    assert len(surprising) == 1
+    names = {surprising[0]["from"]["name"], surprising[0]["to"]["name"]}
+    assert names == {"sym0", "sym10"}
+    assert surprising[0]["edge_count"] == 1
+
+
+def test_label_community_uses_dominant_directory():
+    node_index = {
+        ("symbol", 1): {"kind": "symbol", "name": "a", "path": "src/auth/token.py"},
+        ("symbol", 2): {"kind": "symbol", "name": "b", "path": "src/auth/login.py"},
+        ("symbol", 3): {"kind": "symbol", "name": "c", "path": "src/db/conn.py"},
+    }
+    label = analysis.label_community([("symbol", 1), ("symbol", 2), ("symbol", 3)], node_index)
+    assert label == "src/auth"
+
+
+def test_label_community_discounts_test_paths():
+    # Two production symbols in src/storage and three test files that exercise them:
+    # tests outnumber prod, but the module should still be named for the prod code.
+    node_index = {
+        ("symbol", 1): {"kind": "symbol", "name": "a", "path": "src/storage/db.py"},
+        ("symbol", 2): {"kind": "symbol", "name": "b", "path": "src/storage/repo.py"},
+        ("symbol", 3): {"kind": "symbol", "name": "t1", "path": "tests/test_db.py"},
+        ("symbol", 4): {"kind": "symbol", "name": "t2", "path": "tests/test_repo.py"},
+        ("symbol", 5): {"kind": "symbol", "name": "t3", "path": "tests/test_x.py"},
+    }
+    members = [("symbol", i) for i in range(1, 6)]
+    assert analysis.label_community(members, node_index) == "src/storage"
+    # A community that is *only* tests still gets named for them.
+    only_tests = [("symbol", i) for i in (3, 4, 5)]
+    assert analysis.label_community(only_tests, node_index) == "tests"
+
+
+def test_suggest_questions_seeds_from_structure():
+    gods = [{"kind": "symbol", "name": "Engine", "path": "x", "degree": 9, "community": 0}]
+    surprising = [
+        {
+            "from": {"kind": "symbol", "name": "a", "path": "x"},
+            "to": {"kind": "symbol", "name": "b", "path": "y"},
+            "from_community": 0,
+            "to_community": 1,
+            "edge_count": 1,
+        }
+    ]
+    qs = analysis.suggest_questions(gods, surprising, {0: "core", 1: "io"})
+    assert any("Engine" in q for q in qs)
+    assert any("core" in q and "io" in q for q in qs)
+
+
+# --- integration against a real built index -----------------------------------
+
+def _seed_two_modules(db: Database) -> None:
+    """auth module (token<-login) and db module (query<-exec), bridged login->query."""
+    auth = repo.upsert_file(
+        db.conn, path="src/auth/token.py", lang="python", size_bytes=1, sha256="a",
+        mtime_ns=1, git_status=None, parser="treesitter", indexed_at="t", is_generated=False,
+    )
+    db_f = repo.upsert_file(
+        db.conn, path="src/db/query.py", lang="python", size_bytes=1, sha256="b",
+        mtime_ns=1, git_status=None, parser="treesitter", indexed_at="t", is_generated=False,
+    )
+    a = repo.replace_symbols(db.conn, auth, [
+        Symbol(name="make_token", kind="function", line_start=1, line_end=2),
+        Symbol(name="login", kind="function", line_start=3, line_end=4),
+    ])
+    b = repo.replace_symbols(db.conn, db_f, [
+        Symbol(name="run_query", kind="function", line_start=1, line_end=2),
+        Symbol(name="exec_stmt", kind="function", line_start=3, line_end=4),
+    ])
+    repo.replace_edges(db.conn, auth, [
+        {"edge_type": "call", "src_kind": "symbol", "src_id": a[1],
+         "dst_kind": None, "dst_id": None, "dst_name": "make_token", "line": 3, "resolved": 0},
+        {"edge_type": "call", "src_kind": "symbol", "src_id": a[1],
+         "dst_kind": None, "dst_id": None, "dst_name": "run_query", "line": 4, "resolved": 0},
+    ])
+    repo.replace_edges(db.conn, db_f, [
+        {"edge_type": "call", "src_kind": "symbol", "src_id": b[0],
+         "dst_kind": None, "dst_id": None, "dst_name": "exec_stmt", "line": 2, "resolved": 0},
+    ])
+
+
+def test_analyze_and_cache_roundtrip(tmp_path):
+    from codebase_index.graph.builder import build_graph
+
+    db = Database(tmp_path / "index.sqlite").open()
+    _seed_two_modules(db)
+    build_graph(db.conn)  # resolves edges + refresh_analysis caches the summary
+
+    cached = analysis.load_analysis(db.conn)
+    assert cached is not None
+    assert cached["node_count"] > 0
+    assert cached["god_nodes"], "expected at least one god node"
+    # Recomputing directly matches the cached summary's headline numbers.
+    fresh = analysis.analyze(db.conn)
+    assert fresh["node_count"] == cached["node_count"]
+    assert fresh["edge_count"] == cached["edge_count"]
+    db.close()
+
+
+def test_analyze_on_sample_repo(sample_repo, tmp_path):
+    cfg = Config()
+    cfg.root = str(sample_repo)
+    db = Database(tmp_path / "index.sqlite").open()
+    build_index(cfg, db, root=sample_repo)
+
+    summary = analysis.load_analysis(db.conn)
+    assert summary is not None
+    assert summary["node_count"] >= 1
+    assert isinstance(summary["communities"], list)
+    assert isinstance(summary["questions"], list)
+    db.close()
diff --git a/tests/test_graph.py b/tests/test_graph.py
index f8e5110..f18c0ba 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -63,6 +63,22 @@ def test_build_graph_resolves_symbol_and_import_edges(tmp_path):
     db.close()
 
 
+def test_build_graph_sets_edge_confidence(tmp_path):
+    db = _db(tmp_path)
+    _seed(db)
+    build_graph(db.conn)
+    conf = {
+        (r["edge_type"], r["dst_name"]): r["confidence"]
+        for r in db.conn.execute("SELECT edge_type, dst_name, confidence FROM edges")
+    }
+    # exact unique-name symbol match, import resolved by path-suffix heuristic,
+    # and a callee no symbol defines.
+    assert conf[("call", "refresh_access_token")] == "extracted"
+    assert conf[("import", "auth.token")] == "inferred"
+    assert conf[("call", "does_not_exist")] == "ambiguous"
+    db.close()
+
+
 def _file(db, path, sha="x"):
     return repo.upsert_file(
         db.conn, path=path, lang="python", size_bytes=1, sha256=sha,