From 42c5c024c641074042155d59a04a498216b43157 Mon Sep 17 00:00:00 2001
From: Dmitry Teryaev <doudmitry@gmail.com>
Date: Fri, 3 Jul 2026 23:31:17 +0300
Subject: [PATCH 1/3] fix(mcp): search filter pushdown, NodeRef.name,
 has_more_results, per-type neighbor columns (#353-356)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four correctness/contract fixes in the search/find/neighbors family:

#353 — search now forwards role/module/microservice/capability/exclude_roles
from NodeFilter into run_search so the filter applies BEFORE pagination, not as
a post-filter on the already-paginated page (which could empty a filtered page
even when many matches existed deeper in the ranking).

#354 — NodeRef gains `name`; the unresolved-call-site ref (other=NodeRef(...,
name=callee)) no longer silently drops the callee identifier (pydantic
extra='ignore' discarded it, leaving fqn='' and no readable callee).

#355 — FindOutput and NeighborsOutput gain `has_more_results`. find_v2 computed
it for the hint payload but never put it on the model; clients had to probe to
tell if a next page existed. neighbors_v2 computes it where it paginates in
Python (None on the single-origin CALLS SQL-paginated path, where
calls_row_count is the signal).

#356 — the generic flat-label neighbors query no longer RETURNs a fixed column
superset regardless of matched edge type (typed-union RETURN anti-pattern that
errors on stricter binders). It now issues one single-label query per edge type,
RETURNing only that type's columns (map aligned with build_ast_graph REL schemas).

Co-Authored-By: Claude <noreply@anthropic.com>
---
 mcp_v2.py            | 102 +++++++++++++++++++++++++++++-----------
 tests/test_mcp_v2.py | 108 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 28 deletions(-)

diff --git a/mcp_v2.py b/mcp_v2.py
index 86807ed3..90058ddb 100644
--- a/mcp_v2.py
+++ b/mcp_v2.py
@@ -450,6 +450,7 @@ class NodeRef(BaseModel):
     id: str
     kind: Literal["symbol", "route", "client", "producer", "unresolved_call_site"]
     fqn: str
+    name: str | None = None
     symbol_kind: str | None = None
     microservice: str | None = None
     module: str | None = None
@@ -517,6 +518,11 @@ class FindOutput(BaseModel):
         default=None,
         description="Echoed from the request — the page offset the server applied. None on success=False.",
     )
+    has_more_results: bool | None = Field(
+        default=None,
+        description="True when additional pages remain beyond offset+limit (more matches exist). "
+        "None when unset (e.g. success=False).",
+    )
     advisories: list[str] = Field(default_factory=list, description="Pure informational text with no tool call suggestion")
     hints_structured: list[StructuredHint] = Field(default_factory=list, description=MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION)
 
@@ -537,6 +543,12 @@ class NeighborsOutput(BaseModel):
         default_factory=list,
         description="Echo of neighbors(edge_types=...) from the request; empty when success=False.",
     )
+    has_more_results: bool | None = Field(
+        default=None,
+        description="True when additional pages remain beyond offset+limit. None when unset or "
+        "when the single-origin CALLS path paginated in SQL (use unfiltered_calls_count / "
+        "calls_row_count there).",
+    )
     advisories: list[str] = Field(default_factory=list, description="Pure informational text with no tool call suggestion")
     hints_structured: list[StructuredHint] = Field(default_factory=list, description=MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION)
 
@@ -958,6 +970,17 @@ def search_v2(
             model_name=model_name,
             device=device,
             model=model,
+            # Push the NodeFilter structural predicates into the LanceDB query so
+            # they apply BEFORE pagination (issue #353) — previously they were only
+            # a post-filter on the already-paginated page, which could shrink or
+            # empty filtered pages even when many matches existed deeper in the
+            # ranking. _node_matches_filter below still re-checks every row (it
+            # covers the non-pushdownable fields and is the contract guarantee).
+            role=nf.role if nf else None,
+            module=nf.module if nf else None,
+            microservice=nf.microservice if nf else None,
+            capability=nf.capability if nf else None,
+            exclude_roles=nf.exclude_roles if nf else None,
         )
         hits: list[SearchHit] = []
         for row in rows:
@@ -1071,6 +1094,7 @@ def find_v2(
             results=refs,
             limit=limit,
             offset=offset,
+            has_more_results=has_more_results,
             advisories=raw_advisories,
             hints_structured=_to_structured_hints(raw_struct),
         )
@@ -1534,6 +1558,28 @@ def resolve_v2(
         return out
 
 
+# Per-edge-type attribute columns selected by the generic (flat-label) neighbors
+# query (issue #356). RETURNing a fixed superset of columns regardless of which
+# edge type matched is the typed-union RETURN anti-pattern: a stricter binder
+# (e.g. Kùzu) errors when a RETURNed column does not exist on the matched type.
+# Selecting columns per edge type keeps the query portable; _neighbor_edge_attrs
+# still drops None/"" so each edge exposes only the attrs that exist for its type.
+# Aligned with the REL TABLE schemas in build_ast_graph.py.
+_FLAT_EDGE_ATTR_COLUMNS: dict[str, tuple[str, ...]] = {
+    "CALLS": ("confidence", "strategy", "source", "call_site_line", "call_site_byte", "arg_count", "resolved"),
+    "HTTP_CALLS": ("confidence", "strategy", "match"),
+    "ASYNC_CALLS": ("confidence", "strategy", "match"),
+    "EXPOSES": ("confidence", "strategy"),
+    "DECLARES_CLIENT": ("confidence", "strategy"),
+    "DECLARES_PRODUCER": ("confidence", "strategy"),
+    "INJECTS": ("mechanism", "annotation", "field_or_param", "resolved"),
+    "EXTENDS": ("resolved",),
+    "IMPLEMENTS": ("resolved",),
+    "DECLARES": (),
+    "OVERRIDES": (),
+}
+
+
 def _neighbor_edge_attrs(row: dict[str, Any]) -> dict[str, Any]:
     attrs = {
         k: v
@@ -1896,33 +1942,25 @@ def neighbors_v2(
                 results.extend(origin_edges)
                 continue
             if flat_labels:
-                # Some Cypher binders can drop `label(e) IN $list` in WHERE; use OR of scalar equalities.
-                label_params = [f"l{i}" for i in range(len(flat_labels))]
-                label_predicate = "(" + " OR ".join(f"label(e) = ${name}" for name in label_params) + ")"
-                q_params = {"id": origin_id, **dict(zip(label_params, flat_labels, strict=True))}
-                if direction == "out":
-                    rows = g._rows(  # noqa: SLF001
-                        "MATCH (a)-[e]->(b) WHERE a.id = $id AND "
-                        f"{label_predicate} "
-                        "RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
-                        "e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
-                        "e.annotation AS annotation, e.field_or_param AS field_or_param, "
-                        "e.source AS source, e.call_site_line AS call_site_line, "
-                        "e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
-                        "e.resolved AS resolved",
-                        q_params,
-                    )
-                else:
-                    rows = g._rows(  # noqa: SLF001
-                        "MATCH (a)<-[e]-(b) WHERE a.id = $id AND "
-                        f"{label_predicate} "
-                        "RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
-                        "e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
-                        "e.annotation AS annotation, e.field_or_param AS field_or_param, "
-                        "e.source AS source, e.call_site_line AS call_site_line, "
-                        "e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
-                        "e.resolved AS resolved",
-                        q_params,
+                # Select attribute columns per edge type (issue #356). A single
+                # multi-label query RETURNing a fixed column superset references
+                # columns that don't exist on every matched type — the typed-union
+                # RETURN anti-pattern, which errors on stricter binders (e.g. Kùzu).
+                # Run one single-label query per type, RETURNing only that type's
+                # columns, and merge the rows. `label(e) = $label` scalar equality
+                # (not `label(e) IN [...]`) per the AGENTS.md Cypher note.
+                rows: list[dict[str, Any]] = []
+                match_clause = "MATCH (a)-[e]->(b)" if direction == "out" else "MATCH (a)<-[e]-(b)"
+                for label in flat_labels:
+                    cols = _FLAT_EDGE_ATTR_COLUMNS.get(label, ())
+                    select = "b.id AS other_id, label(e) AS edge_type"
+                    if cols:
+                        select += ", " + ", ".join(f"e.{c} AS {c}" for c in cols)
+                    rows.extend(
+                        g._rows(  # noqa: SLF001
+                            f"{match_clause} WHERE a.id = $id AND label(e) = $label RETURN {select}",
+                            {"id": origin_id, "label": label},
+                        )
                     )
                 for row in rows:
                     other_id = str(row.get("other_id") or "")
@@ -1979,8 +2017,15 @@ def neighbors_v2(
                     )
         if use_calls_path and len(origins) > 1:
             sliced = results[offset : offset + limit]
+            neighbors_has_more = len(results) > offset + limit
+        elif use_calls_path:
+            # Single-origin CALLS path already paginated in SQL; the row/unfiltered
+            # counts are the pagination signal there, so has_more stays None.
+            sliced = results
+            neighbors_has_more = None
         else:
-            sliced = results if use_calls_path else results[offset : offset + limit]
+            sliced = results[offset : offset + limit]
+            neighbors_has_more = len(results) > offset + limit
         first_origin = origins[0]
         origin_kind = _resolve_node_kind(g, first_origin)
         subject_record = _load_node_record(g, first_origin, origin_kind)
@@ -2006,6 +2051,7 @@ def neighbors_v2(
             success=True,
             results=sliced,
             requested_edge_types=requested_edge_types,
+            has_more_results=neighbors_has_more,
             advisories=raw_advisories,
             hints_structured=_to_structured_hints(raw_struct),
         )
diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py
index 4ceb829a..7a729710 100644
--- a/tests/test_mcp_v2.py
+++ b/tests/test_mcp_v2.py
@@ -451,10 +451,14 @@ def test_neighbors_route_in_exposes_returns_handler(ladybug_graph) -> None:
 def test_neighbors_route_in_http_calls_returns_callers(ladybug_graph) -> None:
     class FakeGraph:
         def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+            # The generic flat-label path now queries one edge type at a time and
+            # passes label(e) = $label (issue #356); model that only HTTP_CALLS
+            # callers exist in this fixture so the ASYNC_CALLS label returns none.
             if (
                 "MATCH (a)<-[e]-(b)" in query
                 and "WHERE a.id" in query
                 and "RETURN b.id AS other_id" in query
+                and (params or {}).get("label") == "HTTP_CALLS"
             ):
                 return [{"other_id": "client:caller", "edge_type": "HTTP_CALLS", "confidence": 0.8, "match": "cross_service"}]
             if "MATCH (n:Client)" in query:
@@ -584,6 +588,110 @@ def test_search_unknown_filter_key_returns_failure(monkeypatch, ladybug_graph) -
     assert "typo_key" in out.message
 
 
+def test_search_pushes_nodefilter_into_run_search(monkeypatch, ladybug_graph) -> None:
+    """search forwards NodeFilter structural fields into run_search so the filter
+    applies BEFORE pagination, not as a post-filter on the already-paginated page
+    (issue #353) — previously a filtered page could shrink to 0-2 results even
+    when many matches existed deeper in the ranking."""
+    captured: dict[str, Any] = {}
+
+    def fake_run_search(query, **kwargs):
+        captured.update(kwargs)
+        return _fake_search_rows()
+
+    monkeypatch.setattr("mcp_v2.run_search", fake_run_search)
+    out = search_v2(
+        "ChatService",
+        filter={
+            "role": "SERVICE",
+            "module": "chat-assign",
+            "microservice": "chat-assign",
+            "capability": "c",
+            "exclude_roles": ["CONTROLLER"],
+        },
+        graph=ladybug_graph,
+    )
+    assert out.success is True
+    assert captured.get("role") == "SERVICE"
+    assert captured.get("module") == "chat-assign"
+    assert captured.get("microservice") == "chat-assign"
+    assert captured.get("capability") == "c"
+    assert captured.get("exclude_roles") == ["CONTROLLER"]
+
+
+def test_unresolved_call_site_noderef_carries_callee_name() -> None:
+    """The unresolved-call-site NodeRef must carry the callee identifier in `name`
+    (issue #354) — NodeRef previously had no `name` field, so pydantic's default
+    extra='ignore' silently dropped `name=callee`, leaving the structured ref with
+    fqn='' and no human-readable callee (clients had to dig into attrs)."""
+    from mcp_v2 import _unresolved_site_to_edge
+
+    edge = _unresolved_site_to_edge(
+        "origin:1",
+        {
+            "id": "ucs:1",
+            "callee_simple": "Foo.bar",
+            "call_site_line": 42,
+            "call_site_byte": 7,
+            "arg_count": 2,
+            "reason": "phantom",
+            "receiver_expr": "x",
+        },
+    )
+    assert edge.other.kind == "unresolved_call_site"
+    assert edge.other.name == "Foo.bar"
+    # callee is also still carried in attrs for clients that read attrs
+    assert edge.attrs["callee_simple"] == "Foo.bar"
+
+
+def test_find_exposes_has_more_results(ladybug_graph) -> None:
+    """find surfaces has_more_results on FindOutput so a paging client can tell
+    whether another page exists without a probe call (issue #355). The value was
+    computed and placed in the hint payload but absent from the output model."""
+    out = find_v2("symbol", {"symbol_kind": "method"}, limit=1, offset=0, graph=ladybug_graph)
+    assert out.success is True
+    assert out.has_more_results is True  # bank-chat has more than one method
+
+    # Past the end: no rows remain, so has_more is False.
+    out_last = find_v2("symbol", {"symbol_kind": "method"}, limit=1, offset=1_000_000, graph=ladybug_graph)
+    assert out_last.success is True
+    assert out_last.has_more_results is False
+
+
+def test_neighbors_flat_labels_select_columns_per_edge_type() -> None:
+    """The generic flat-label neighbors query issues one Cypher per edge type and
+    RETURNs only that type's columns (issue #356) — never a fixed superset that
+    references columns absent on some matched type (the typed-union RETURN
+    anti-pattern that errors on stricter binders like Kuzu)."""
+    from mcp_v2 import _FLAT_EDGE_ATTR_COLUMNS
+
+    issued: list[tuple[str, dict[str, Any]]] = []
+
+    class FakeGraph:
+        def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+            if "RETURN b.id AS other_id" in query:
+                issued.append((query, params or {}))
+            return []
+
+    out = neighbors_v2(
+        "sym:origin",
+        direction="out",
+        edge_types=["CALLS", "DECLARES", "INJECTS", "EXPOSES"],
+        graph=FakeGraph(),  # type: ignore[arg-type]
+    )
+    assert out.success is True
+    # One flat-label query per edge type, each tagged with its label param.
+    labels = [p.get("label") for _, p in issued]
+    assert set(labels) == {"CALLS", "DECLARES", "INJECTS", "EXPOSES"}
+    for query, params in issued:
+        label = params["label"]
+        allowed = set(_FLAT_EDGE_ATTR_COLUMNS.get(label, ()))
+        referenced = set(re.findall(r"e\.(\w+) AS ", query))
+        assert referenced <= allowed, (
+            f"label {label}: RETURN references {referenced}, only {allowed} are valid"
+        )
+
+
 def test_search_cross_kind_filter_returns_failure(monkeypatch, ladybug_graph) -> None:
     monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows())
     out = search_v2("ChatService", filter={"path_prefix": "/api"}, graph=ladybug_graph)

From b9818ba6051675510ab2012f81483c312686bee0 Mon Sep 17 00:00:00 2001
From: Dmitry Teryaev <doudmitry@gmail.com>
Date: Sat, 4 Jul 2026 08:21:45 +0300
Subject: [PATCH 2/3] fix(mcp): set has_more_results=False when the CALLS path
 returns the full set

On the single-origin CALLS neighbors path, has_more_results was None
unconditionally. When paginate_in_sql is False (a node_filter is set,
include_unresolved, or dedup_calls), _neighbors_calls_for_origin loads
the FULL matching set (offset=0, limit=None), so the client already has
every edge -- has_more must be False, not None ("unknown"), so a paging
client need not issue a redundant probe (#355). Keep None only for the
SQL-paginated case where the row/unfiltered counts carry the signal.

Adds a regression test covering both modes (filtered -> False,
unfiltered single-origin -> None). Verified red->green.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 mcp_v2.py            | 10 +++++++---
 tests/test_mcp_v2.py | 26 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/mcp_v2.py b/mcp_v2.py
index 90058ddb..400d914c 100644
--- a/mcp_v2.py
+++ b/mcp_v2.py
@@ -2019,10 +2019,14 @@ def neighbors_v2(
             sliced = results[offset : offset + limit]
             neighbors_has_more = len(results) > offset + limit
         elif use_calls_path:
-            # Single-origin CALLS path already paginated in SQL; the row/unfiltered
-            # counts are the pagination signal there, so has_more stays None.
+            # Single-origin CALLS path. When paginate_in_sql is True the SQL did
+            # the OFFSET/LIMIT and the row/unfiltered counts carry the has-more
+            # signal, so this field stays None (unknown). When paginate_in_sql is
+            # False (a node_filter is set, include_unresolved, or dedup_calls) we
+            # loaded the FULL matching set with no pushdown, so the client already
+            # has every edge -> False (not None), so a paging client need not probe.
             sliced = results
-            neighbors_has_more = None
+            neighbors_has_more = None if paginate_in_sql else False
         else:
             sliced = results[offset : offset + limit]
             neighbors_has_more = len(results) > offset + limit
diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py
index 7a729710..c9eb7187 100644
--- a/tests/test_mcp_v2.py
+++ b/tests/test_mcp_v2.py
@@ -755,6 +755,32 @@ def test_neighbors_filter_accepts_json_string(ladybug_graph) -> None:
     assert out_dict.results == out_str.results
 
 
+def test_neighbors_calls_has_more_results_reflects_pagination_mode(ladybug_graph) -> None:
+    """Single-origin CALLS has_more_results depends on whether SQL paginated.
+
+    Regression for the #355 has_more_results field on NeighborsOutput. When a
+    node_filter forces the in-memory (non-SQL-paginated) path, the full filtered
+    CALLS set is returned, so has_more_results must be False (the client has
+    everything and need not probe) -- not None ("unknown"). With no filter the
+    single-origin SQL path paginates and the row/unfiltered counts carry the
+    signal, so the field stays None.
+    """
+    mid = _method_id_with_calls(ladybug_graph, "out")
+    # node_filter set -> paginate_in_sql False -> full set returned -> has_more False
+    filtered = neighbors_v2(
+        mid, direction="out", edge_types=["CALLS"], filter={"role": "SERVICE"}, graph=ladybug_graph
+    )
+    assert filtered.success is True
+    assert filtered.has_more_results is False, (
+        "non-SQL-paginated CALLS returned the full set; has_more_results must be "
+        "False so a paging client does not issue a redundant probe (#355)"
+    )
+    # No filter, single origin -> SQL-paginated -> has-more signal is the row count.
+    paginated = neighbors_v2(mid, direction="out", edge_types=["CALLS"], graph=ladybug_graph)
+    assert paginated.success is True
+    assert paginated.has_more_results is None
+
+
 def test_neighbors_filter_unknown_key_returns_failure(ladybug_graph) -> None:
     mid = _method_id_with_calls(ladybug_graph, "out")
     out = neighbors_v2(mid, direction="out", edge_types=["CALLS"], filter={"typo_key": "x"}, graph=ladybug_graph)

From 5ec979a8bcc1b1e0d97a642085261bbfee3f1d40 Mon Sep 17 00:00:00 2001
From: Dmitry Teryaev <doudmitry@gmail.com>
Date: Sat, 4 Jul 2026 08:55:45 +0300
Subject: [PATCH 3/3] fix(mcp): fail-loud on missing edge-type attrs; drop null
 fields from hint input

- _FLAT_EDGE_ATTR_COLUMNS now asserts at import that it covers every EdgeType
  exactly. A future edge type added to EdgeType + EDGE_SCHEMA + a REL TABLE but
  missing here would otherwise silently SELECT only (id, label) and yield edges
  with empty attrs via the .get(label, ()) fallback; the assert trips on both a
  missing and a stale entry.
- The find/neighbors hint payloads used bare model_dump(), so every structured
  NodeRef carried name: null (and other null fields) into generate_hints. Switch
  to exclude_none=True -- these dicts feed generate_hints (which reads fields
  defensively via .get), not the tool result, and it matches the adjacent filter
  dumps. Verified against test_mcp_v2 / test_mcp_hints / test_client_hint_recovery.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 mcp_v2.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mcp_v2.py b/mcp_v2.py
index 400d914c..60761da9 100644
--- a/mcp_v2.py
+++ b/mcp_v2.py
@@ -1082,7 +1082,12 @@ def find_v2(
         hint_payload: dict[str, Any] = {
             "success": True,
             "kind": kind,
-            "results": [r.model_dump() for r in refs],
+            # exclude_none: this dict feeds generate_hints (which reads fields
+            # defensively via .get), not the tool result (FindOutput below holds the
+            # pydantic objects). Drop null fields -- including the NodeRef.name field
+            # that is None for every structured ref -- to match filter_dump above and
+            # avoid spurious "name": null noise in the hint input.
+            "results": [r.model_dump(exclude_none=True) for r in refs],
             "limit": limit,
             "offset": offset,
             "filter": filter_dump,
@@ -2035,7 +2040,7 @@ def neighbors_v2(
         subject_record = _load_node_record(g, first_origin, origin_kind)
         neigh_payload = {
             "success": True,
-            "results": [e.model_dump() for e in sliced],
+            "results": [e.model_dump(exclude_none=True) for e in sliced],
             "requested_edge_types": requested_edge_types,
             "requested_direction": direction,
             "offset": offset,