Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 85 additions & 30 deletions mcp_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ class NodeRef(BaseModel):
id: str
kind: Literal["symbol", "route", "client", "producer", "unresolved_call_site"]
fqn: str
name: str | None = None
symbol_kind: str | None = None
microservice: str | None = None
module: str | None = None
Expand Down Expand Up @@ -517,6 +518,11 @@ class FindOutput(BaseModel):
default=None,
description="Echoed from the request — the page offset the server applied. None on success=False.",
)
has_more_results: bool | None = Field(
default=None,
description="True when additional pages remain beyond offset+limit (more matches exist). "
"None when unset (e.g. success=False).",
)
advisories: list[str] = Field(default_factory=list, description="Pure informational text with no tool call suggestion")
hints_structured: list[StructuredHint] = Field(default_factory=list, description=MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION)

Expand All @@ -537,6 +543,12 @@ class NeighborsOutput(BaseModel):
default_factory=list,
description="Echo of neighbors(edge_types=...) from the request; empty when success=False.",
)
has_more_results: bool | None = Field(
default=None,
description="True when additional pages remain beyond offset+limit. None when unset or "
"when the single-origin CALLS path paginated in SQL (use unfiltered_calls_count / "
"calls_row_count there).",
)
advisories: list[str] = Field(default_factory=list, description="Pure informational text with no tool call suggestion")
hints_structured: list[StructuredHint] = Field(default_factory=list, description=MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION)

Expand Down Expand Up @@ -958,6 +970,17 @@ def search_v2(
model_name=model_name,
device=device,
model=model,
# Push the NodeFilter structural predicates into the LanceDB query so
# they apply BEFORE pagination (issue #353) — previously they were only
# a post-filter on the already-paginated page, which could shrink or
# empty filtered pages even when many matches existed deeper in the
# ranking. _node_matches_filter below still re-checks every row (it
# covers the non-pushdownable fields and is the contract guarantee).
role=nf.role if nf else None,
module=nf.module if nf else None,
microservice=nf.microservice if nf else None,
capability=nf.capability if nf else None,
exclude_roles=nf.exclude_roles if nf else None,
)
hits: list[SearchHit] = []
for row in rows:
Expand Down Expand Up @@ -1059,7 +1082,12 @@ def find_v2(
hint_payload: dict[str, Any] = {
"success": True,
"kind": kind,
"results": [r.model_dump() for r in refs],
# exclude_none: this dict feeds generate_hints (which reads fields
# defensively via .get), not the tool result (FindOutput below holds the
# pydantic objects). Drop null fields -- including the NodeRef.name field
# that is None for every structured ref -- to match filter_dump above and
# avoid spurious "name": null noise in the hint input.
"results": [r.model_dump(exclude_none=True) for r in refs],
"limit": limit,
"offset": offset,
"filter": filter_dump,
Expand All @@ -1071,6 +1099,7 @@ def find_v2(
results=refs,
limit=limit,
offset=offset,
has_more_results=has_more_results,
advisories=raw_advisories,
hints_structured=_to_structured_hints(raw_struct),
)
Expand Down Expand Up @@ -1534,6 +1563,28 @@ def resolve_v2(
return out


# Per-edge-type attribute columns selected by the generic (flat-label) neighbors
# query (issue #356). RETURNing a fixed superset of columns regardless of which
# edge type matched is the typed-union RETURN anti-pattern: a stricter binder
# (e.g. Kùzu) errors when a RETURNed column does not exist on the matched type.
# Selecting columns per edge type keeps the query portable; _neighbor_edge_attrs
# still drops None/"" so each edge exposes only the attrs that exist for its type.
# Aligned with the REL TABLE schemas in build_ast_graph.py.
_FLAT_EDGE_ATTR_COLUMNS: dict[str, tuple[str, ...]] = {
"CALLS": ("confidence", "strategy", "source", "call_site_line", "call_site_byte", "arg_count", "resolved"),
"HTTP_CALLS": ("confidence", "strategy", "match"),
"ASYNC_CALLS": ("confidence", "strategy", "match"),
"EXPOSES": ("confidence", "strategy"),
"DECLARES_CLIENT": ("confidence", "strategy"),
"DECLARES_PRODUCER": ("confidence", "strategy"),
"INJECTS": ("mechanism", "annotation", "field_or_param", "resolved"),
"EXTENDS": ("resolved",),
"IMPLEMENTS": ("resolved",),
"DECLARES": (),
"OVERRIDES": (),
}


def _neighbor_edge_attrs(row: dict[str, Any]) -> dict[str, Any]:
attrs = {
k: v
Expand Down Expand Up @@ -1896,33 +1947,25 @@ def neighbors_v2(
results.extend(origin_edges)
continue
if flat_labels:
# Some Cypher binders can drop `label(e) IN $list` in WHERE; use OR of scalar equalities.
label_params = [f"l{i}" for i in range(len(flat_labels))]
label_predicate = "(" + " OR ".join(f"label(e) = ${name}" for name in label_params) + ")"
q_params = {"id": origin_id, **dict(zip(label_params, flat_labels, strict=True))}
if direction == "out":
rows = g._rows( # noqa: SLF001
"MATCH (a)-[e]->(b) WHERE a.id = $id AND "
f"{label_predicate} "
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
"e.source AS source, e.call_site_line AS call_site_line, "
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
"e.resolved AS resolved",
q_params,
)
else:
rows = g._rows( # noqa: SLF001
"MATCH (a)<-[e]-(b) WHERE a.id = $id AND "
f"{label_predicate} "
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
"e.source AS source, e.call_site_line AS call_site_line, "
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
"e.resolved AS resolved",
q_params,
# Select attribute columns per edge type (issue #356). A single
# multi-label query RETURNing a fixed column superset references
# columns that don't exist on every matched type — the typed-union
# RETURN anti-pattern, which errors on stricter binders (e.g. Kùzu).
# Run one single-label query per type, RETURNing only that type's
# columns, and merge the rows. `label(e) = $label` scalar equality
# (not `label(e) IN [...]`) per the AGENTS.md Cypher note.
rows: list[dict[str, Any]] = []
match_clause = "MATCH (a)-[e]->(b)" if direction == "out" else "MATCH (a)<-[e]-(b)"
for label in flat_labels:
cols = _FLAT_EDGE_ATTR_COLUMNS.get(label, ())
select = "b.id AS other_id, label(e) AS edge_type"
if cols:
select += ", " + ", ".join(f"e.{c} AS {c}" for c in cols)
rows.extend(
g._rows( # noqa: SLF001
f"{match_clause} WHERE a.id = $id AND label(e) = $label RETURN {select}",
{"id": origin_id, "label": label},
)
)
for row in rows:
other_id = str(row.get("other_id") or "")
Expand Down Expand Up @@ -1979,14 +2022,25 @@ def neighbors_v2(
)
if use_calls_path and len(origins) > 1:
sliced = results[offset : offset + limit]
neighbors_has_more = len(results) > offset + limit
elif use_calls_path:
# Single-origin CALLS path. When paginate_in_sql is True the SQL did
# the OFFSET/LIMIT and the row/unfiltered counts carry the has-more
# signal, so this field stays None (unknown). When paginate_in_sql is
# False (a node_filter is set, include_unresolved, or dedup_calls) we
# loaded the FULL matching set with no pushdown, so the client already
# has every edge -> False (not None), so a paging client need not probe.
sliced = results
neighbors_has_more = None if paginate_in_sql else False
else:
sliced = results if use_calls_path else results[offset : offset + limit]
sliced = results[offset : offset + limit]
neighbors_has_more = len(results) > offset + limit
first_origin = origins[0]
origin_kind = _resolve_node_kind(g, first_origin)
subject_record = _load_node_record(g, first_origin, origin_kind)
neigh_payload = {
"success": True,
"results": [e.model_dump() for e in sliced],
"results": [e.model_dump(exclude_none=True) for e in sliced],
"requested_edge_types": requested_edge_types,
"requested_direction": direction,
"offset": offset,
Expand All @@ -2006,6 +2060,7 @@ def neighbors_v2(
success=True,
results=sliced,
requested_edge_types=requested_edge_types,
has_more_results=neighbors_has_more,
advisories=raw_advisories,
hints_structured=_to_structured_hints(raw_struct),
)
Expand Down
134 changes: 134 additions & 0 deletions tests/test_mcp_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,14 @@ def test_neighbors_route_in_exposes_returns_handler(ladybug_graph) -> None:
def test_neighbors_route_in_http_calls_returns_callers(ladybug_graph) -> None:
class FakeGraph:
def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
# The generic flat-label path now queries one edge type at a time and
# passes label(e) = $label (issue #356); model that only HTTP_CALLS
# callers exist in this fixture so the ASYNC_CALLS label returns none.
if (
"MATCH (a)<-[e]-(b)" in query
and "WHERE a.id" in query
and "RETURN b.id AS other_id" in query
and (params or {}).get("label") == "HTTP_CALLS"
):
return [{"other_id": "client:caller", "edge_type": "HTTP_CALLS", "confidence": 0.8, "match": "cross_service"}]
if "MATCH (n:Client)" in query:
Expand Down Expand Up @@ -584,6 +588,110 @@ def test_search_unknown_filter_key_returns_failure(monkeypatch, ladybug_graph) -
assert "typo_key" in out.message


def test_search_pushes_nodefilter_into_run_search(monkeypatch, ladybug_graph) -> None:
"""search forwards NodeFilter structural fields into run_search so the filter
applies BEFORE pagination, not as a post-filter on the already-paginated page
(issue #353) — previously a filtered page could shrink to 0-2 results even
when many matches existed deeper in the ranking."""
captured: dict[str, Any] = {}

def fake_run_search(query, **kwargs):
captured.update(kwargs)
return _fake_search_rows()

monkeypatch.setattr("mcp_v2.run_search", fake_run_search)
out = search_v2(
"ChatService",
filter={
"role": "SERVICE",
"module": "chat-assign",
"microservice": "chat-assign",
"capability": "c",
"exclude_roles": ["CONTROLLER"],
},
graph=ladybug_graph,
)
assert out.success is True
assert captured.get("role") == "SERVICE"
assert captured.get("module") == "chat-assign"
assert captured.get("microservice") == "chat-assign"
assert captured.get("capability") == "c"
assert captured.get("exclude_roles") == ["CONTROLLER"]


def test_unresolved_call_site_noderef_carries_callee_name() -> None:
"""The unresolved-call-site NodeRef must carry the callee identifier in `name`
(issue #354) — NodeRef previously had no `name` field, so pydantic's default
extra='ignore' silently dropped `name=callee`, leaving the structured ref with
fqn='' and no human-readable callee (clients had to dig into attrs)."""
from mcp_v2 import _unresolved_site_to_edge

edge = _unresolved_site_to_edge(
"origin:1",
{
"id": "ucs:1",
"callee_simple": "Foo.bar",
"call_site_line": 42,
"call_site_byte": 7,
"arg_count": 2,
"reason": "phantom",
"receiver_expr": "x",
},
)
assert edge.other.kind == "unresolved_call_site"
assert edge.other.name == "Foo.bar"
# callee is also still carried in attrs for clients that read attrs
assert edge.attrs["callee_simple"] == "Foo.bar"


def test_find_exposes_has_more_results(ladybug_graph) -> None:
"""find surfaces has_more_results on FindOutput so a paging client can tell
whether another page exists without a probe call (issue #355). The value was
computed and placed in the hint payload but absent from the output model."""
out = find_v2("symbol", {"symbol_kind": "method"}, limit=1, offset=0, graph=ladybug_graph)
assert out.success is True
assert out.has_more_results is True # bank-chat has more than one method

# Past the end: no rows remain, so has_more is False.
out_last = find_v2("symbol", {"symbol_kind": "method"}, limit=1, offset=1_000_000, graph=ladybug_graph)
assert out_last.success is True
assert out_last.has_more_results is False


def test_neighbors_flat_labels_select_columns_per_edge_type() -> None:
"""The generic flat-label neighbors query issues one Cypher per edge type and
RETURNs only that type's columns (issue #356) — never a fixed superset that
references columns absent on some matched type (the typed-union RETURN
anti-pattern that errors on stricter binders like Kuzu)."""
from mcp_v2 import _FLAT_EDGE_ATTR_COLUMNS

issued: list[tuple[str, dict[str, Any]]] = []

class FakeGraph:
def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
if "RETURN b.id AS other_id" in query:
issued.append((query, params or {}))
return []

out = neighbors_v2(
"sym:origin",
direction="out",
edge_types=["CALLS", "DECLARES", "INJECTS", "EXPOSES"],
graph=FakeGraph(), # type: ignore[arg-type]
)
assert out.success is True
# One flat-label query per edge type, each tagged with its label param.
labels = [p.get("label") for _, p in issued]
assert set(labels) == {"CALLS", "DECLARES", "INJECTS", "EXPOSES"}
for query, params in issued:
label = params["label"]
allowed = set(_FLAT_EDGE_ATTR_COLUMNS.get(label, ()))
referenced = set(re.findall(r"e\.(\w+) AS ", query))
assert referenced <= allowed, (
f"label {label}: RETURN references {referenced}, only {allowed} are valid"
)


def test_search_cross_kind_filter_returns_failure(monkeypatch, ladybug_graph) -> None:
monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows())
out = search_v2("ChatService", filter={"path_prefix": "/api"}, graph=ladybug_graph)
Expand Down Expand Up @@ -647,6 +755,32 @@ def test_neighbors_filter_accepts_json_string(ladybug_graph) -> None:
assert out_dict.results == out_str.results


def test_neighbors_calls_has_more_results_reflects_pagination_mode(ladybug_graph) -> None:
"""Single-origin CALLS has_more_results depends on whether SQL paginated.

Regression for the #355 has_more_results field on NeighborsOutput. When a
node_filter forces the in-memory (non-SQL-paginated) path, the full filtered
CALLS set is returned, so has_more_results must be False (the client has
everything and need not probe) -- not None ("unknown"). With no filter the
single-origin SQL path paginates and the row/unfiltered counts carry the
signal, so the field stays None.
"""
mid = _method_id_with_calls(ladybug_graph, "out")
# node_filter set -> paginate_in_sql False -> full set returned -> has_more False
filtered = neighbors_v2(
mid, direction="out", edge_types=["CALLS"], filter={"role": "SERVICE"}, graph=ladybug_graph
)
assert filtered.success is True
assert filtered.has_more_results is False, (
"non-SQL-paginated CALLS returned the full set; has_more_results must be "
"False so a paging client does not issue a redundant probe (#355)"
)
# No filter, single origin -> SQL-paginated -> has-more signal is the row count.
paginated = neighbors_v2(mid, direction="out", edge_types=["CALLS"], graph=ladybug_graph)
assert paginated.success is True
assert paginated.has_more_results is None


def test_neighbors_filter_unknown_key_returns_failure(ladybug_graph) -> None:
mid = _method_id_with_calls(ladybug_graph, "out")
out = neighbors_v2(mid, direction="out", edge_types=["CALLS"], filter={"typo_key": "x"}, graph=ladybug_graph)
Expand Down
Loading