diff --git a/CHANGELOG.md b/CHANGELOG.md index 02f1bb5..e31b3b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,17 @@ All notable changes to this project are documented here. The format is based on ## [Unreleased] +### Added — graph navigation: `path` and `describe` +- **`codebase-index path `** — shortest undirected dependency/call path + between two symbols or files ("how is X connected to Y"). Renders the node chain + annotated with each link's edge type and confidence; `inferred`/`ambiguous` hops + are marked, so a path is only as trustworthy as its weakest edge. +- **`codebase-index describe `** — a node card: definition(s), direct + callers and callees (with confidence), in/out degree, the symbol's module, and + its god-node rank if it has one. The graphify `explain Symbol` idea, named + `describe` so it doesn't collide with the existing how-it-works `explain`. +- **`path_between` and `describe_symbol` MCP tools** expose both to agents. + ### Added — `architecture` command + `architecture_overview` MCP tool - **`codebase-index architecture`** prints a high-level map of the codebase from the analytics cached at index time: detected modules (with auto-derived labels), diff --git a/README.md b/README.md index 1fc48f6..658a4c5 100644 --- a/README.md +++ b/README.md @@ -100,8 +100,8 @@ See [CHANGELOG.md](CHANGELOG.md) and MCP is now available as a stdio server via `codebase-index mcp --root `. It exposes `healthcheck`, `search_code`, `find_symbol`, `find_refs`, -`impact_of`, `explain_code`, `architecture_overview`, and `index_stats`; -see [docs/MCP.md](docs/MCP.md). +`impact_of`, `explain_code`, `architecture_overview`, `path_between`, +`describe_symbol`, and `index_stats`; see [docs/MCP.md](docs/MCP.md). ``` You: "Where is user authentication implemented?" @@ -391,6 +391,12 @@ codebase-index impact "src/auth/AuthService.ts" # Map the codebase: modules, god nodes, surprising links, suggested questions codebase-index architecture +# How are two symbols/files connected? Shortest dependency/call path +codebase-index path "renew" "refresh_access_token" + +# Node card: definition, callers, callees, centrality, module +codebase-index describe "Database" + # View index statistics codebase-index stats diff --git a/docs/MCP.md b/docs/MCP.md index c68ced9..033f5e8 100644 --- a/docs/MCP.md +++ b/docs/MCP.md @@ -38,6 +38,8 @@ The MCP server exposes the same retrieval contract as the CLI. | `impact_of` | Return affected files/symbols from graph expansion | `impact` | | `explain_code` | Intent-aware retrieval packet for a natural-language question | `explain` | | `architecture_overview` | Modules, god nodes, surprising connections, suggested questions | `architecture` | +| `path_between` | Shortest dependency/call path between two symbols or files | `path` | +| `describe_symbol` | Node card: definition, callers, callees, centrality, module | `describe` | | `index_stats` | Return counts, language coverage, graph stats, freshness | `stats` | ## Output contract @@ -66,7 +68,7 @@ branch on the contract without sniffing the shape: version. The current version is **1**. - `tool` (string) — the emitting tool name (`search_code`, `find_symbol`, `find_refs`, `impact_of`, `explain_code`, `architecture_overview`, - `index_stats`, `healthcheck`). + `path_between`, `describe_symbol`, `index_stats`, `healthcheck`). - The no-index / error path carries the same envelope plus an `"error"` field. Rules: @@ -158,7 +160,7 @@ same trust boundaries: - Done: `src/codebase_index/mcp/server.py` thin adapter over retrieval/storage code. - Done: `codebase-index mcp --root ` CLI entrypoint. - Done: `healthcheck`, `search_code`, `find_symbol`, `find_refs`, `impact_of`, `explain_code`, - `architecture_overview`, and `index_stats` tools. + `architecture_overview`, `path_between`, `describe_symbol`, and `index_stats` tools. - Done: focused tests for tool registration, missing-index behavior, config resolution, and run entrypoint. - Done: explicit `schema_version` + `tool` envelope on every structured tool payload (including the error path), asserted by `tests/test_mcp_server.py` and `tests/test_mcp_golden.py`. diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py index ff7e66c..7c79dfd 100644 --- a/src/codebase_index/cli.py +++ b/src/codebase_index/cli.py @@ -521,6 +521,45 @@ def architecture( ) +@app.command("path") +def path_between( + ctx: typer.Context, + source: str = typer.Argument(..., help="File path or symbol name to start from."), + target: str = typer.Argument(..., help="File path or symbol name to reach."), + json_flag: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."), +) -> None: + """Shortest dependency/call path between two symbols or files (how are they connected).""" + from .graph.navigate import path_payload + from .output import json as json_renderer + from .output import markdown as md_renderer + from .storage.db import Database + + is_json = json_flag or bool(ctx.obj and ctx.obj.get("json")) + db_path, _cfg = _ensure_index(ctx) + with Database(db_path) as db: + payload = path_payload(db.conn, source, target) + typer.echo(json_renderer.render(payload) if is_json else md_renderer.render_path(payload)) + + +@app.command("describe") +def describe( + ctx: typer.Context, + symbol: str = typer.Argument(..., help="Symbol name to describe."), + json_flag: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."), +) -> None: + """Node card for a symbol: definition, callers, callees, centrality, module.""" + from .graph.navigate import describe_payload + from .output import json as json_renderer + from .output import markdown as md_renderer + from .storage.db import Database + + is_json = json_flag or bool(ctx.obj and ctx.obj.get("json")) + db_path, _cfg = _ensure_index(ctx) + with Database(db_path) as db: + payload = describe_payload(db.conn, symbol) + typer.echo(json_renderer.render(payload) if is_json else md_renderer.render_describe(payload)) + + @app.command("graph") def graph_view( ctx: typer.Context, diff --git a/src/codebase_index/graph/navigate.py b/src/codebase_index/graph/navigate.py new file mode 100644 index 0000000..086e9a6 --- /dev/null +++ b/src/codebase_index/graph/navigate.py @@ -0,0 +1,201 @@ +"""Graph navigation: shortest path between two nodes, and a node "card". + +graphify ships `path A B` (how are two things connected?) and `explain Symbol` +(what is this node?). codebase-index already uses `explain` for how-it-works +retrieval, so the node card lives under `describe` to avoid colliding with it. + +Both walk the *resolved* edge graph and carry the Phase-1 confidence trail, so a +path through an `inferred`/`ambiguous` edge is visibly less certain than one +through `extracted` edges. +""" + +from __future__ import annotations + +import sqlite3 +from collections import deque +from typing import Optional + +from ..storage import repo + +# BFS safety valve: stop exploring after this many nodes so `path` stays cheap on +# very large graphs (the shortest path, if short, is found long before this). +_MAX_VISITS = 20000 + +Node = tuple[str, int] + + +def _freshness(conn: sqlite3.Connection) -> dict: + return { + "exists": True, + "stale": False, + "built_at": repo.get_meta(conn, "built_at"), + "head_commit": repo.get_meta(conn, "head_commit"), + } + + +def _resolve_targets(conn: sqlite3.Connection, token: str) -> list[Node]: + """Resolve a path/symbol token to one or more graph nodes (file or symbols).""" + frow = repo.file_by_path(conn, token) + if frow is not None: + return [("file", int(frow["id"]))] + sym_rows = repo.symbols_by_name(conn, token, exact=True) + if sym_rows: + return [("symbol", int(r["id"])) for r in sym_rows] + suffix = repo.files_with_suffix(conn, token) + if len(suffix) == 1: + return [("file", int(suffix[0]["id"]))] + return [] + + +def _node_ref(conn: sqlite3.Connection, kind: str, node_id: int) -> Optional[dict]: + if kind == "file": + row = conn.execute("SELECT path FROM files WHERE id = ?", (node_id,)).fetchone() + if row is None: + return None + return {"kind": "file", "name": row["path"].rsplit("/", 1)[-1], "path": row["path"], + "line_start": None} + row = conn.execute( + "SELECT s.name AS name, s.kind AS kind, s.line_start AS line_start, f.path AS path " + "FROM symbols s JOIN files f ON f.id = s.file_id WHERE s.id = ?", + (node_id,), + ).fetchone() + if row is None: + return None + return {"kind": "symbol", "name": row["name"], "symbol_kind": row["kind"], + "path": row["path"], "line_start": row["line_start"]} + + +def _undirected_neighbors(conn: sqlite3.Connection, kind: str, node_id: int): + """Yield (next_kind, next_id, edge_type, confidence, direction) ignoring edge + direction — `path` answers "how are these connected", not "who calls whom".""" + for e in repo.incoming_edges(conn, kind, node_id): + yield e["src_kind"], int(e["src_id"]), e["edge_type"], e["confidence"], "in" + for e in repo.outgoing_edges(conn, kind, node_id): + if e["dst_id"] is not None: + yield e["dst_kind"], int(e["dst_id"]), e["edge_type"], e["confidence"], "out" + + +# --------------------------------------------------------------------------- +# path A B +# --------------------------------------------------------------------------- + +def path_payload(conn: sqlite3.Connection, src: str, dst: str) -> dict: + """Shortest undirected path between two nodes, with the edge audit trail.""" + src_seeds = _resolve_targets(conn, src) + dst_seeds = set(_resolve_targets(conn, dst)) + base = {"src": src, "dst": dst, "index": _freshness(conn), "nodes": [], "steps": []} + if not src_seeds or not dst_seeds: + missing = src if not src_seeds else dst + return {**base, "found": False, "reason": f"Could not resolve `{missing}` to an indexed node."} + + # Multi-source BFS from every src node; stop at the first dst node reached. + parent: dict[Node, Optional[Node]] = {seed: None for seed in src_seeds} + via: dict[Node, tuple] = {} + queue: deque[Node] = deque(src_seeds) + found: Optional[Node] = None + visits = 0 + while queue and visits < _MAX_VISITS: + node = queue.popleft() + visits += 1 + if node in dst_seeds: + found = node + break + for nk, nid, etype, conf, direction in _undirected_neighbors(conn, *node): + nxt = (nk, nid) + if nxt not in parent: + parent[nxt] = node + via[nxt] = (etype, conf, direction) + queue.append(nxt) + + if found is None: + return {**base, "found": False, + "reason": "No path found between the two nodes in the resolved graph."} + + # Reconstruct from `found` back to a src seed. + chain: list[Node] = [] + cur: Optional[Node] = found + while cur is not None: + chain.append(cur) + cur = parent[cur] + chain.reverse() + + nodes = [ref for n in chain if (ref := _node_ref(conn, *n)) is not None] + steps = [] + for prev, nxt in zip(chain, chain[1:]): + etype, conf, direction = via[nxt] + a, b = _node_ref(conn, *prev), _node_ref(conn, *nxt) + if a and b: + steps.append({"from": a, "to": b, "edge_type": etype, + "confidence": conf, "direction": direction}) + return {**base, "found": True, "hops": len(steps), "nodes": nodes, "steps": steps} + + +# --------------------------------------------------------------------------- +# describe +# --------------------------------------------------------------------------- + +def describe_payload(conn: sqlite3.Connection, query: str) -> dict: + """A node card: definition(s), callers, callees, centrality, module, god status.""" + base = {"query": query, "index": _freshness(conn)} + sym_rows = repo.symbols_by_name(conn, query, exact=True) + if not sym_rows: + return {**base, "found": False, + "reason": f"No symbol named `{query}` is indexed. Try `search` or `symbol`."} + + definitions = [ + { + "name": r["name"], + "qualified": r["qualified"], + "kind": r["kind"], + "path": r["path"], + "line_start": r["line_start"], + "line_end": r["line_end"], + "signature": r["signature"], + "in_degree": int(r["in_degree"]), + "out_degree": int(r["out_degree"]), + } + for r in sym_rows + ] + # Primary = most-connected definition (the one worth describing in depth). + primary_row = max(sym_rows, key=lambda r: int(r["in_degree"]) + int(r["out_degree"])) + primary_id = int(primary_row["id"]) + + callers = [ + {"path": r["path"], "line": r["line"], "confidence": r["confidence"]} + for r in repo.refs_for_name(conn, query) + ] + callees = [] + for e in repo.outgoing_edges(conn, "symbol", primary_id): + if e["dst_id"] is None: + continue + ref = _node_ref(conn, e["dst_kind"], int(e["dst_id"])) + if ref is not None: + callees.append({**ref, "edge_type": e["edge_type"], "confidence": e["confidence"]}) + + module = primary_row["path"].rsplit("/", 1)[0] if "/" in primary_row["path"] else "(root)" + god = _god_rank(conn, primary_row["name"], primary_row["path"]) + + return { + **base, + "found": True, + "definitions": definitions, + "primary": {"name": primary_row["name"], "path": primary_row["path"], + "module": module, "god_rank": god, + "in_degree": int(primary_row["in_degree"]), + "out_degree": int(primary_row["out_degree"])}, + "callers": callers, + "callees": callees, + } + + +def _god_rank(conn: sqlite3.Connection, name: str, path: str) -> Optional[int]: + """1-based rank of this symbol among the cached god nodes, or None.""" + from . import analysis + + summary = analysis.load_analysis(conn) + if not summary: + return None + for idx, g in enumerate(summary.get("god_nodes", []), start=1): + if g.get("name") == name and g.get("path") == path: + return idx + return None diff --git a/src/codebase_index/mcp/server.py b/src/codebase_index/mcp/server.py index 203423c..cf708c7 100644 --- a/src/codebase_index/mcp/server.py +++ b/src/codebase_index/mcp/server.py @@ -283,6 +283,52 @@ def architecture_overview() -> str: return _emit("architecture_overview", payload) +@_tool() +def path_between(source: str, target: str) -> str: + """Shortest dependency/call path between two symbols or files. + + Answers "how is X connected to Y" — returns the chain of nodes and the edge + types (with confidence) linking them. Useful for tracing how a request reaches + the database, or how two modules touch. + + Args: + source: File path (relative) or symbol name to start from. + target: File path (relative) or symbol name to reach. + """ + db_path, _ = _resolve_db() + if not db_path.exists(): + return _emit("path_between", _no_index_payload()) + + from ..graph.navigate import path_payload + from ..storage.db import Database + + with Database(db_path) as db: + payload = path_payload(db.conn, source, target) + return _emit("path_between", payload) + + +@_tool() +def describe_symbol(symbol: str) -> str: + """Node card for a symbol: definition(s), callers, callees, centrality, module. + + A compact "what is this and how does it sit in the graph" view — the in/out + degree, its module, whether it's a god node, and its direct callers/callees. + + Args: + symbol: Symbol name to describe (e.g. "Database", "build_index"). + """ + db_path, _ = _resolve_db() + if not db_path.exists(): + return _emit("describe_symbol", _no_index_payload()) + + from ..graph.navigate import describe_payload + from ..storage.db import Database + + with Database(db_path) as db: + payload = describe_payload(db.conn, symbol) + return _emit("describe_symbol", payload) + + @_tool() def index_stats() -> str: """Return index freshness, file count, symbol count, and per-language coverage.""" diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py index 50245e0..f120c5b 100644 --- a/src/codebase_index/output/markdown.py +++ b/src/codebase_index/output/markdown.py @@ -160,6 +160,76 @@ def render_refs(resp: RefsResponse) -> str: return "\n".join(lines).rstrip() + "\n" +def _node_label(ref: dict) -> str: + name = ref.get("name") + path = ref.get("path") or "" + return f"`{name}` ({path})" if name and ref.get("kind") == "symbol" else f"`{path}`" + + +def render_path(payload: dict) -> str: + """Render a path between two nodes as an arrow chain annotated with edge types.""" + head = f"**path:** `{payload['src']}` → `{payload['dst']}`" + if not payload.get("found"): + return f"{head}\n\n_{payload.get('reason', 'No path found.')}_\n" + + lines = [f"{head} · **{payload.get('hops', 0)} hop(s)**", ""] + nodes = payload.get("nodes", []) + steps = payload.get("steps", []) + # Render as: A --edge(conf)--> B --edge--> C + if nodes: + lines.append(_node_label(nodes[0])) + for step, nxt in zip(steps, nodes[1:]): + mark = _conf_mark(step.get("confidence")) + edge = f"{step['edge_type']}{' ' + mark if mark else ''}" + arrow = "→" if step.get("direction") == "out" else "←" + lines.append(f" {arrow} _{edge}_ {arrow}") + lines.append(_node_label(nxt)) + return "\n".join(lines).rstrip() + "\n" + + +def render_describe(payload: dict) -> str: + """Render a symbol node card: definition, centrality, callers, callees.""" + head = f"**describe:** `{payload['query']}`" + if not payload.get("found"): + return f"{head}\n\n_{payload.get('reason', 'Not found.')}_\n" + + p = payload.get("primary", {}) + god = f" · god node #{p['god_rank']}" if p.get("god_rank") else "" + lines = [ + f"{head} · module `{p.get('module', '?')}` · " + f"in {p.get('in_degree', 0)} / out {p.get('out_degree', 0)}{god}", + "", + ] + + defs = payload.get("definitions", []) + if defs: + lines.append("**definition(s):**") + for d in defs: + sig = f" — `{d['signature']}`" if d.get("signature") else "" + lines.append(f"- {d['kind']} `{d.get('qualified') or d['name']}` " + f"at `{d['path']}:{d['line_start']}`{sig}") + lines.append("") + + callers = payload.get("callers", []) + if callers: + lines.append(f"**callers ({len(callers)}):**") + for c in callers[:20]: + mark = _conf_mark(c.get("confidence")) + lines.append(f"- `{c['path']}:{c['line']}`{' · ' + mark if mark else ''}") + lines.append("") + + callees = payload.get("callees", []) + if callees: + lines.append(f"**callees ({len(callees)}):**") + for c in callees[:20]: + mark = _conf_mark(c.get("confidence")) + lines.append(f"- {_node_label(c)} _{c.get('edge_type', '')}_" + f"{' · ' + mark if mark else ''}") + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + def render_architecture(payload: dict) -> str: """Render the architecture overview: modules, god nodes, surprising links, questions.""" if not payload.get("available", False): diff --git a/tests/golden/describe_refresh.json b/tests/golden/describe_refresh.json new file mode 100644 index 0000000..16ec90d --- /dev/null +++ b/tests/golden/describe_refresh.json @@ -0,0 +1,44 @@ +{ + "callees": [], + "callers": [ + { + "confidence": "extracted", + "line": 11, + "path": "src/api/service.py" + }, + { + "confidence": "extracted", + "line": 11, + "path": "src/auth/token.py" + } + ], + "definitions": [ + { + "in_degree": 2, + "kind": "function", + "line_end": 6, + "line_start": 4, + "name": "refresh_access_token", + "out_degree": 0, + "path": "src/auth/token.py", + "qualified": "refresh_access_token", + "signature": "def refresh_access_token(refresh_token: str) -> str:" + } + ], + "found": true, + "index": { + "built_at": "", + "exists": true, + "head_commit": "", + "stale": false + }, + "primary": { + "god_rank": 2, + "in_degree": 2, + "module": "src/auth", + "name": "refresh_access_token", + "out_degree": 0, + "path": "src/auth/token.py" + }, + "query": "refresh_access_token" +} diff --git a/tests/golden/mcp_describe_symbol.json b/tests/golden/mcp_describe_symbol.json new file mode 100644 index 0000000..f56684d --- /dev/null +++ b/tests/golden/mcp_describe_symbol.json @@ -0,0 +1,46 @@ +{ + "callees": [], + "callers": [ + { + "confidence": "extracted", + "line": 11, + "path": "src/api/service.py" + }, + { + "confidence": "extracted", + "line": 11, + "path": "src/auth/token.py" + } + ], + "definitions": [ + { + "in_degree": 2, + "kind": "function", + "line_end": 6, + "line_start": 4, + "name": "refresh_access_token", + "out_degree": 0, + "path": "src/auth/token.py", + "qualified": "refresh_access_token", + "signature": "def refresh_access_token(refresh_token: str) -> str:" + } + ], + "found": true, + "index": { + "built_at": "", + "exists": true, + "head_commit": "", + "stale": false + }, + "primary": { + "god_rank": 2, + "in_degree": 2, + "module": "src/auth", + "name": "refresh_access_token", + "out_degree": 0, + "path": "src/auth/token.py" + }, + "query": "refresh_access_token", + "schema_version": 1, + "tool": "describe_symbol" +} diff --git a/tests/golden/mcp_path_between.json b/tests/golden/mcp_path_between.json new file mode 100644 index 0000000..c4e02af --- /dev/null +++ b/tests/golden/mcp_path_between.json @@ -0,0 +1,51 @@ +{ + "dst": "refresh_access_token", + "found": true, + "hops": 1, + "index": { + "built_at": "", + "exists": true, + "head_commit": "", + "stale": false + }, + "nodes": [ + { + "kind": "symbol", + "line_start": 10, + "name": "renew", + "path": "src/api/service.py", + "symbol_kind": "method" + }, + { + "kind": "symbol", + "line_start": 4, + "name": "refresh_access_token", + "path": "src/auth/token.py", + "symbol_kind": "function" + } + ], + "schema_version": 1, + "src": "renew", + "steps": [ + { + "confidence": "extracted", + "direction": "out", + "edge_type": "call", + "from": { + "kind": "symbol", + "line_start": 10, + "name": "renew", + "path": "src/api/service.py", + "symbol_kind": "method" + }, + "to": { + "kind": "symbol", + "line_start": 4, + "name": "refresh_access_token", + "path": "src/auth/token.py", + "symbol_kind": "function" + } + } + ], + "tool": "path_between" +} diff --git a/tests/golden/path_renew_to_token.json b/tests/golden/path_renew_to_token.json new file mode 100644 index 0000000..1bcfa0a --- /dev/null +++ b/tests/golden/path_renew_to_token.json @@ -0,0 +1,49 @@ +{ + "dst": "refresh_access_token", + "found": true, + "hops": 1, + "index": { + "built_at": "", + "exists": true, + "head_commit": "", + "stale": false + }, + "nodes": [ + { + "kind": "symbol", + "line_start": 10, + "name": "renew", + "path": "src/api/service.py", + "symbol_kind": "method" + }, + { + "kind": "symbol", + "line_start": 4, + "name": "refresh_access_token", + "path": "src/auth/token.py", + "symbol_kind": "function" + } + ], + "src": "renew", + "steps": [ + { + "confidence": "extracted", + "direction": "out", + "edge_type": "call", + "from": { + "kind": "symbol", + "line_start": 10, + "name": "renew", + "path": "src/api/service.py", + "symbol_kind": "method" + }, + "to": { + "kind": "symbol", + "line_start": 4, + "name": "refresh_access_token", + "path": "src/auth/token.py", + "symbol_kind": "function" + } + } + ] +} diff --git a/tests/test_cli_golden.py b/tests/test_cli_golden.py index 602e51e..1319d7f 100644 --- a/tests/test_cli_golden.py +++ b/tests/test_cli_golden.py @@ -65,6 +65,8 @@ def indexed_repo(tmp_path_factory): ("impact_user_model", ["impact", "src/models/user.py", "--direction", "up"]), ("explain_auth", ["explain", "how does authentication work"]), ("architecture", ["architecture"]), + ("path_renew_to_token", ["path", "renew", "refresh_access_token"]), + ("describe_refresh", ["describe", "refresh_access_token"]), ("stats", ["stats"]), ] diff --git a/tests/test_mcp_golden.py b/tests/test_mcp_golden.py index a134051..8a1a608 100644 --- a/tests/test_mcp_golden.py +++ b/tests/test_mcp_golden.py @@ -69,6 +69,8 @@ def _call(indexed_repo, tool_fn, **kwargs): "mcp_impact_of": (lambda: mcp_server.impact_of, {"target": "src/models/user.py", "direction": "up"}), "mcp_explain_code": (lambda: mcp_server.explain_code, {"query": "how does authentication work"}), "mcp_architecture": (lambda: mcp_server.architecture_overview, {}), + "mcp_path_between": (lambda: mcp_server.path_between, {"source": "renew", "target": "refresh_access_token"}), + "mcp_describe_symbol": (lambda: mcp_server.describe_symbol, {"symbol": "refresh_access_token"}), "mcp_index_stats": (lambda: mcp_server.index_stats, {}), } @@ -81,6 +83,8 @@ def _call(indexed_repo, tool_fn, **kwargs): "mcp_impact_of": "impact_of", "mcp_explain_code": "explain_code", "mcp_architecture": "architecture_overview", + "mcp_path_between": "path_between", + "mcp_describe_symbol": "describe_symbol", "mcp_index_stats": "index_stats", } diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 418ffab..2a22b15 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -37,6 +37,8 @@ def test_mcp_server_has_expected_tools(): "impact_of", "explain_code", "architecture_overview", + "path_between", + "describe_symbol", "index_stats", } @@ -71,6 +73,8 @@ def test_search_code_no_index(): "impact_of": lambda: _call(mcp_server.impact_of, target="foo.py"), "explain_code": lambda: _call(mcp_server.explain_code, query="how does foo work"), "architecture_overview": lambda: _call(mcp_server.architecture_overview), + "path_between": lambda: _call(mcp_server.path_between, source="a", target="b"), + "describe_symbol": lambda: _call(mcp_server.describe_symbol, symbol="foo"), "index_stats": lambda: _call(mcp_server.index_stats), } diff --git a/tests/test_navigate.py b/tests/test_navigate.py new file mode 100644 index 0000000..3188e80 --- /dev/null +++ b/tests/test_navigate.py @@ -0,0 +1,102 @@ +"""Tests for graph.navigate — shortest path and the describe node card.""" + +from __future__ import annotations + +from codebase_index.graph.builder import build_graph +from codebase_index.graph.navigate import describe_payload, path_payload +from codebase_index.output import markdown +from codebase_index.parsers.base import Symbol +from codebase_index.storage import repo +from codebase_index.storage.db import Database + + +def _graph(tmp_path) -> Database: + """login -> make_token, login -> run_query (bridge), run_query -> exec_stmt.""" + db = Database(tmp_path / "index.sqlite").open() + auth = repo.upsert_file( + db.conn, path="src/auth/token.py", lang="python", size_bytes=1, sha256="a", + mtime_ns=1, git_status=None, parser="treesitter", indexed_at="t", is_generated=False, + ) + dbf = repo.upsert_file( + db.conn, path="src/db/query.py", lang="python", size_bytes=1, sha256="b", + mtime_ns=1, git_status=None, parser="treesitter", indexed_at="t", is_generated=False, + ) + a = repo.replace_symbols(db.conn, auth, [ + Symbol(name="make_token", kind="function", line_start=1, line_end=2), + Symbol(name="login", kind="function", line_start=3, line_end=4), + ]) + b = repo.replace_symbols(db.conn, dbf, [ + Symbol(name="run_query", kind="function", line_start=1, line_end=2), + Symbol(name="exec_stmt", kind="function", line_start=3, line_end=4), + ]) + repo.replace_edges(db.conn, auth, [ + {"edge_type": "call", "src_kind": "symbol", "src_id": a[1], + "dst_kind": None, "dst_id": None, "dst_name": "make_token", "line": 3, "resolved": 0}, + {"edge_type": "call", "src_kind": "symbol", "src_id": a[1], + "dst_kind": None, "dst_id": None, "dst_name": "run_query", "line": 4, "resolved": 0}, + ]) + repo.replace_edges(db.conn, dbf, [ + {"edge_type": "call", "src_kind": "symbol", "src_id": b[0], + "dst_kind": None, "dst_id": None, "dst_name": "exec_stmt", "line": 2, "resolved": 0}, + ]) + build_graph(db.conn) + return db + + +def test_path_finds_chain(tmp_path): + db = _graph(tmp_path) + payload = path_payload(db.conn, "login", "exec_stmt") + assert payload["found"] is True + names = [n["name"] for n in payload["nodes"]] + assert names[0] == "login" and names[-1] == "exec_stmt" + assert "run_query" in names # the bridge node + assert payload["hops"] == 2 + # Markdown renders without error and mentions both ends. + md = markdown.render_path(payload) + assert "login" in md and "exec_stmt" in md + db.close() + + +def test_path_unresolved_source(tmp_path): + db = _graph(tmp_path) + payload = path_payload(db.conn, "no_such_symbol", "exec_stmt") + assert payload["found"] is False + assert "no_such_symbol" in payload["reason"] + db.close() + + +def test_path_no_connection(tmp_path): + db = _graph(tmp_path) + # make_token is a leaf callee; nothing connects it to a fresh isolated file. + iso = repo.upsert_file( + db.conn, path="src/iso/lonely.py", lang="python", size_bytes=1, sha256="c", + mtime_ns=1, git_status=None, parser="treesitter", indexed_at="t", is_generated=False, + ) + repo.replace_symbols(db.conn, iso, [ + Symbol(name="lonely", kind="function", line_start=1, line_end=2), + ]) + payload = path_payload(db.conn, "lonely", "exec_stmt") + assert payload["found"] is False + db.close() + + +def test_describe_symbol_card(tmp_path): + db = _graph(tmp_path) + payload = describe_payload(db.conn, "run_query") + assert payload["found"] is True + assert payload["primary"]["in_degree"] == 1 # called by login + assert payload["primary"]["out_degree"] == 1 # calls exec_stmt + callee_names = {c["name"] for c in payload["callees"]} + assert "exec_stmt" in callee_names + md = markdown.render_describe(payload) + assert "run_query" in md and "exec_stmt" in md + db.close() + + +def test_describe_unknown_symbol(tmp_path): + db = _graph(tmp_path) + payload = describe_payload(db.conn, "does_not_exist") + assert payload["found"] is False + assert "does_not_exist" in payload["reason"] + assert "Not found" in markdown.render_describe(payload) or "No symbol" in payload["reason"] + db.close()