From 393cef46b9c6134cf61fd14f84282d3159b0866f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 17:57:15 -0500 Subject: [PATCH 01/10] mcp(refactor[instructions]): surface call-site rules in tool descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to 6432646 which split ``_BASE_INSTRUCTIONS`` into named gap-explainer / positive-guidance segments and named the "prefer the tool description" decision. Phase 1 of the slim-down acts on it: per-tool rules move from the global card (or invisible module docstrings) into tool descriptions an agent sees on every ``list_tools`` call. * ``show_hooks``: docstring now carries the no-set_hook rationale (write-hooks survive process death, so they belong in the tmux config file, not a transient MCP session). Previously only in the ``hook_tools`` module docstring — FastMCP doesn't surface those. * ``load_buffer``: docstring carries the no-list_buffers / clipboard-privacy rationale. Same module-docstring-only problem. * ``capture_pane``: registered with a ``description=`` override pointing at ``snapshot_pane``, ``wait_for_text``, and ``search_panes``. The function docstring stays focused on parameters for Sphinx; the override carries the agent-facing cross-references without bloating the human docstring. * ``send_keys``: explicit anti-poll guidance naming ``wait_for_text`` as the server-side blocking primitive. * ``list_panes`` / ``list_windows``: sharpened metadata-vs-content phrasing with the user-trigger language ("panes that contain X"). New parametrized ``test_tool_description_includes`` asserts each tool is registered AND its description carries the cross-reference, so a future rename that drops the rule fails loudly instead of silently. Pure addition — ``_BASE_INSTRUCTIONS`` is unchanged. The redundant card-level segments come out in a later phase once the call-site copies have shipped. --- src/libtmux_mcp/tools/buffer_tools.py | 5 ++ src/libtmux_mcp/tools/hook_tools.py | 5 ++ src/libtmux_mcp/tools/pane_tools/__init__.py | 17 +++++- src/libtmux_mcp/tools/pane_tools/io.py | 8 ++- src/libtmux_mcp/tools/session_tools.py | 5 +- src/libtmux_mcp/tools/window_tools.py | 6 +- tests/test_server.py | 58 ++++++++++++++++++++ 7 files changed, 93 insertions(+), 11 deletions(-) diff --git a/src/libtmux_mcp/tools/buffer_tools.py b/src/libtmux_mcp/tools/buffer_tools.py index 108119d..6fb35ae 100644 --- a/src/libtmux_mcp/tools/buffer_tools.py +++ b/src/libtmux_mcp/tools/buffer_tools.py @@ -176,6 +176,11 @@ def load_buffer( ) -> BufferRef: """Load text into a new agent-namespaced tmux paste buffer. + Track the returned BufferRef on subsequent paste_buffer / show_buffer + / delete_buffer calls — there is no list_buffers tool, because tmux + buffers may include OS clipboard history (passwords, private + snippets) and a blanket enumeration would leak that to the agent. + Each call allocates a fresh buffer name — two concurrent calls will land in distinct buffers even if they pass the same ``logical_name``. Agents MUST use the returned :attr:`BufferRef.buffer_name` on diff --git a/src/libtmux_mcp/tools/hook_tools.py b/src/libtmux_mcp/tools/hook_tools.py index f9e8b48..b7e91fa 100644 --- a/src/libtmux_mcp/tools/hook_tools.py +++ b/src/libtmux_mcp/tools/hook_tools.py @@ -166,6 +166,11 @@ def show_hooks( ) -> HookListResult: """List configured tmux hooks at the given scope. + Hooks are read-only by design: tmux hooks survive process death + (kill -9, OOM, etc.), so write-hooks belong in your tmux config file, + not a transient MCP session. No set_hook / unset_hook tool is exposed + for that reason. Use this to inspect what is configured. + ``scope="server"`` enumerates hooks installed via ``tmux set-hook -g ...``. tmux splits those globals across two options trees by hook category: session-level hooks diff --git a/src/libtmux_mcp/tools/pane_tools/__init__.py b/src/libtmux_mcp/tools/pane_tools/__init__.py index a3787ef..8f38d8b 100644 --- a/src/libtmux_mcp/tools/pane_tools/__init__.py +++ b/src/libtmux_mcp/tools/pane_tools/__init__.py @@ -80,9 +80,20 @@ def register(mcp: FastMCP) -> None: mcp.tool(title="Send Keys", annotations=ANNOTATIONS_SHELL, tags={TAG_MUTATING})( send_keys ) - mcp.tool(title="Capture Pane", annotations=ANNOTATIONS_RO, tags={TAG_READONLY})( - capture_pane - ) + mcp.tool( + title="Capture Pane", + annotations=ANNOTATIONS_RO, + tags={TAG_READONLY}, + description=( + "Capture the visible contents of a tmux pane (tail-preserving " + "truncation at max_lines, default 500). For pane content + " + "cursor + mode + scroll state in one call, use snapshot_pane. " + "For 'send_keys then wait for output' flows, use wait_for_text " + "or wait_for_content_change instead of a capture_pane retry " + "loop — server-side blocking is dramatically cheaper in agent " + "turns. To find text across many panes, use search_panes." + ), + )(capture_pane) mcp.tool( title="Resize Pane", annotations=ANNOTATIONS_MUTATING, tags={TAG_MUTATING} )(resize_pane) diff --git a/src/libtmux_mcp/tools/pane_tools/io.py b/src/libtmux_mcp/tools/pane_tools/io.py index d4483ea..d84296e 100644 --- a/src/libtmux_mcp/tools/pane_tools/io.py +++ b/src/libtmux_mcp/tools/pane_tools/io.py @@ -32,9 +32,11 @@ def send_keys( ) -> str: """Send keys (commands or text) to a tmux pane. - After sending, use wait_for_text to block until the command completes, - or capture_pane to read the result. Do not capture_pane immediately — - there is a race condition. + After sending, use wait_for_text to block until the command completes + (server-side, turn-cheap) or capture_pane once you know it has + finished. Do not capture_pane in a tight loop — that races with + command execution and burns agent turns; wait_for_text is the + server-side blocking primitive built for this flow. Parameters ---------- diff --git a/src/libtmux_mcp/tools/session_tools.py b/src/libtmux_mcp/tools/session_tools.py index c80ad8f..cfc7c1f 100644 --- a/src/libtmux_mcp/tools/session_tools.py +++ b/src/libtmux_mcp/tools/session_tools.py @@ -39,8 +39,9 @@ def list_windows( ) -> list[WindowInfo]: """List windows in a tmux session, or all windows across sessions. - Only searches window metadata (name, index, layout). To search - the actual text visible in terminal panes, use search_panes instead. + Searches window metadata only (name, index, layout). For text + visible IN terminals — when users say "panes that contain/mention/show X" + — use search_panes instead. Parameters ---------- diff --git a/src/libtmux_mcp/tools/window_tools.py b/src/libtmux_mcp/tools/window_tools.py index d9e86a1..294e5e0 100644 --- a/src/libtmux_mcp/tools/window_tools.py +++ b/src/libtmux_mcp/tools/window_tools.py @@ -50,9 +50,9 @@ def list_panes( ) -> list[PaneInfo]: """List panes in a tmux window, session, or across the entire server. - Only searches pane metadata (current command, title, working directory). - To search the actual text visible in terminal panes, use search_panes - instead. + Searches pane metadata only (current command, title, working + directory). For text visible IN terminals — when users say "panes + that contain/mention/show X" — use search_panes instead. Parameters ---------- diff --git a/tests/test_server.py b/tests/test_server.py index a56bf70..44fee66 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -247,6 +247,64 @@ def test_base_instructions_document_buffer_lifecycle() -> None: assert "clipboard history" in _BASE_INSTRUCTIONS +@pytest.mark.parametrize( + ("tool_name", "must_include"), + [ + ("capture_pane", "snapshot_pane"), + ("capture_pane", "wait_for_text"), + ("capture_pane", "search_panes"), + ("show_hooks", "tmux config file"), + ("load_buffer", "list_buffers"), + ("load_buffer", "clipboard history"), + ("send_keys", "wait_for_text"), + ("list_panes", "search_panes"), + ("list_windows", "search_panes"), + ], +) +def test_tool_description_includes(tool_name: str, must_include: str) -> None: + """Tool descriptions carry cross-references the agent needs at the call site. + + Phase 1 of the BASE_INSTRUCTIONS slim-down: rules that are tool-specific + live in tool descriptions (surfaced by FastMCP at every ``list_tools`` + call), not in the global card or in module docstrings (which FastMCP + does not surface). The asserted phrases are the ones an agent would + look for when deciding which tool to call: + + * ``capture_pane`` cross-references richer alternatives + (``snapshot_pane``, ``wait_for_text``) and the parallel-search tool + (``search_panes``). + * ``show_hooks`` carries the no-set_hook rationale ("tmux config + file") that previously lived only in ``hook_tools``' module + docstring. + * ``load_buffer`` carries the no-list_buffers / clipboard-privacy + rationale that previously lived only in ``buffer_tools``' module + docstring. + * ``send_keys`` points at ``wait_for_text`` instead of a poll loop. + * ``list_panes`` / ``list_windows`` point at ``search_panes`` for + content (vs. metadata-only) queries. + + The "tool exists" assertion is a strict upgrade over substring tests + on ``_BASE_INSTRUCTIONS``: a future rename that drops the rule fails + here instead of silently losing agent-relevant guidance. + """ + import asyncio + + from fastmcp import FastMCP + + from libtmux_mcp.tools import register_tools + + mcp = FastMCP(name="tool-description-contract") + register_tools(mcp) + + tools = asyncio.run(mcp.list_tools()) + by_name = {tool.name: tool for tool in tools} + assert tool_name in by_name, f"{tool_name!r} is not registered" + description = by_name[tool_name].description or "" + assert must_include in description, ( + f"{tool_name!r} description missing {must_include!r}; got {description!r}" + ) + + def test_build_instructions_documents_is_caller_workflow_inside_tmux( monkeypatch: pytest.MonkeyPatch, ) -> None: From 5ef52c99e6dda91f32f3696aec1ec1c8b44cf5ac Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 18:00:35 -0500 Subject: [PATCH 02/10] mcp(feat[resources]): add tmux://reference/format-strings cheat sheet Static markdown reference resource cataloging the tmux format strings agents most commonly encounter via ``display_message`` and friends: pane / window / session / server fields, plus the conditional and string-operation forms (``#{?cond,then,else}``, ``#{=N:expr}``, ``#{s/from/to/:expr}``, etc.). Why a resource and not a tool: tmux format strings are a closed catalog, not a query. Agents that hit a ``#{...}`` field they don't recognize need a fixed lookup, not a server round-trip. Pulling the reference is cheaper than guessing a name and burning a ``display_message`` call to confirm it. The new module follows the ``hierarchy.register(mcp)`` shape: closures decorated with ``@mcp.resource(...)`` and a trailing ``_ = (fn,)`` tuple to silence type-checker unused-name warnings without exporting the local name. Concrete URI (no template params), so it registers under ``mcp.list_resources()`` rather than ``list_resource_templates()``. Tests cover both layers: a closure-capture fixture verifies the body contains the spot-check format strings (``#{pane_id}``, ``#{window_id}``, ``#{session_id}``, ``#{?cond,then,else}``), and a real-FastMCP test verifies the registered resource advertises ``text/markdown`` so clients render it correctly. --- src/libtmux_mcp/resources/__init__.py | 3 +- src/libtmux_mcp/resources/reference.py | 107 +++++++++++++++++++++++++ tests/test_resources.py | 72 +++++++++++++++++ 3 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 src/libtmux_mcp/resources/reference.py diff --git a/src/libtmux_mcp/resources/__init__.py b/src/libtmux_mcp/resources/__init__.py index 950defe..39164d8 100644 --- a/src/libtmux_mcp/resources/__init__.py +++ b/src/libtmux_mcp/resources/__init__.py @@ -10,6 +10,7 @@ def register_resources(mcp: FastMCP) -> None: """Register all resource modules with the FastMCP instance.""" - from libtmux_mcp.resources import hierarchy + from libtmux_mcp.resources import hierarchy, reference hierarchy.register(mcp) + reference.register(mcp) diff --git a/src/libtmux_mcp/resources/reference.py b/src/libtmux_mcp/resources/reference.py new file mode 100644 index 0000000..b4d4a80 --- /dev/null +++ b/src/libtmux_mcp/resources/reference.py @@ -0,0 +1,107 @@ +"""Static reference resources for tmux primitives. + +Why a resource and not a tool: tmux format strings (``#{pane_id}``, +``#{pane_in_mode}``, ``#{?cond,then,else}``) are a closed reference +catalog, not a query. Agents that hit a ``#{...}`` field they don't +recognize need a fixed lookup, not a tool round-trip. Exposing this as +an MCP resource lets clients pull it on demand and lets the agent +recover from an unknown-format-name guess without paying a +``display_message`` round-trip just to discover what's available. +""" + +from __future__ import annotations + +import textwrap +import typing as t + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +#: MIME type for the format-string reference resource. Markdown gives +#: clients enough structure to render headings and code spans without +#: requiring a richer content type. +_MARKDOWN_MIME = "text/markdown" + + +_FORMAT_STRING_REFERENCE = textwrap.dedent("""\ + # tmux format strings + + Pass via ``display_message(format_string="#{...}")`` or any other + tool that accepts a tmux format expression. + + ## Pane + + - ``#{pane_id}`` — globally unique pane identifier (e.g. ``%1``) + - ``#{pane_index}`` — index within the window + - ``#{pane_current_command}`` — foreground command name + - ``#{pane_current_path}`` — current working directory + - ``#{pane_pid}`` — pane process PID + - ``#{pane_dead}`` — ``1`` when the pane's process has exited + - ``#{pane_in_mode}`` — ``1`` when the pane is in copy/scroll mode + - ``#{pane_mode}`` — current pane mode name when in mode + - ``#{pane_active}`` — ``1`` for the active pane in its window + - ``#{pane_width}`` / ``#{pane_height}`` — pane dimensions in cells + - ``#{cursor_x}`` / ``#{cursor_y}`` — cursor position within the pane + - ``#{scroll_position}`` — scrollback position when in copy mode + + ## Window + + - ``#{window_id}`` — globally unique window identifier (e.g. ``@1``) + - ``#{window_index}`` — window index within the session + - ``#{window_name}`` — window name + - ``#{window_zoomed_flag}`` — ``1`` when a pane is zoomed + - ``#{window_layout}`` — current layout string + - ``#{window_panes}`` — number of panes in the window + - ``#{window_active}`` — ``1`` for the active window in its session + + ## Session + + - ``#{session_id}`` — globally unique session identifier (e.g. ``$1``) + - ``#{session_name}`` — session name + - ``#{session_attached}`` — ``1`` when at least one client is attached + - ``#{session_windows}`` — number of windows in the session + + ## Server / client + + - ``#{host}`` — hostname running the tmux server + - ``#{client_tty}`` — TTY of the client (when evaluated client-side) + - ``#{socket_path}`` — server socket path + + ## Conditionals and string operations + + - ``#{?cond,then,else}`` — emit ``then`` if ``cond`` is truthy, else + ``else`` + - ``#{C/i:pattern}`` — case-insensitive search inside the result + - ``#{=N:expr}`` — truncate ``expr`` to ``N`` characters + - ``#{s/from/to/:expr}`` — substitution + - ``#{T:expr}`` — recursively expand format strings within ``expr`` + + See ``man tmux`` (FORMATS section) for the complete catalog. +""") + + +def register(mcp: FastMCP) -> None: + """Register reference resources with the FastMCP instance.""" + + @mcp.resource( + "tmux://reference/format-strings", + title="tmux Format String Reference", + mime_type=_MARKDOWN_MIME, + ) + def get_format_string_reference() -> str: + """Return the tmux format-string cheat sheet as Markdown. + + Static reference content. Use this when an agent encounters + an unfamiliar ``#{...}`` field — pulling the resource is + cheaper than a ``display_message`` round-trip and avoids + hallucinated format names. + """ + return _FORMAT_STRING_REFERENCE + + # Type checkers: list the function to silence unused-name warnings + # without exposing it outside this closure. + _ = (get_format_string_reference,) + + +__all__ = ["register"] diff --git a/tests/test_resources.py b/tests/test_resources.py index 2fedfd3..cd54c0a 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,6 +8,7 @@ import pytest from libtmux_mcp.resources.hierarchy import register +from libtmux_mcp.resources.reference import register as register_reference if t.TYPE_CHECKING: from libtmux.pane import Pane @@ -184,3 +185,74 @@ def test_hierarchy_resources_advertise_mime_type(uri: str, expected_mime: str) - candidate = by_uri.get(uri) assert candidate is not None, f"resource {uri!r} not registered" assert candidate.mime_type == expected_mime + + +# --------------------------------------------------------------------------- +# Reference resources (static catalogs, no tmux server interaction) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def reference_resource_functions() -> dict[str, t.Any]: + """Capture reference-module resource closures by URI. + + Mirrors ``resource_functions`` but registers + :mod:`libtmux_mcp.resources.reference` instead of ``hierarchy``. + Reference resources are static and need no tmux fixtures. + """ + functions: dict[str, t.Any] = {} + + class MockMCP: + def resource(self, uri: str, **kwargs: t.Any) -> t.Any: + def decorator(fn: t.Any) -> t.Any: + functions[uri] = fn + return fn + + return decorator + + register_reference(MockMCP()) # type: ignore[arg-type] + return functions + + +def test_format_string_reference_returns_markdown( + reference_resource_functions: dict[str, t.Any], +) -> None: + """tmux://reference/format-strings returns non-empty Markdown. + + The agent's reason for pulling this resource is to recover from an + unfamiliar ``#{...}`` token without burning a ``display_message`` + round-trip. The body must therefore (a) be present and (b) name + the format strings most likely to confuse — pane / window / + session ID forms and the ``#{?cond,then,else}`` conditional. + """ + fn = reference_resource_functions["tmux://reference/format-strings"] + body = fn() + assert isinstance(body, str) + assert body.strip(), "format-string reference body is empty" + # Spot-check the highest-traffic catalog entries — if any of these + # vanish, the reference is failing at its job. + assert "#{pane_id}" in body + assert "#{window_id}" in body + assert "#{session_id}" in body + assert "#{?cond,then,else}" in body + + +def test_format_string_reference_advertises_markdown_mime() -> None: + """tmux://reference/format-strings is registered with text/markdown. + + Concrete URIs (no ``{...}`` template params) register as resources, + not resource templates — they show up under ``mcp.list_resources()`` + rather than ``mcp.list_resource_templates()``. + """ + import asyncio + + from fastmcp import FastMCP + + mcp = FastMCP(name="test-reference-mime") + register_reference(mcp) + + resources = asyncio.run(mcp.list_resources()) + by_uri = {str(getattr(r, "uri", "")): r for r in resources} + target = by_uri.get("tmux://reference/format-strings") + assert target is not None, "format-strings reference not registered" + assert target.mime_type == "text/markdown" From 72abdae721ed9d59fbe12d69baf0cdf6e338f0cd Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 18:06:11 -0500 Subject: [PATCH 03/10] mcp(refactor[instructions]): collapse base instructions to a 117-word card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 of the BASE_INSTRUCTIONS slim-down: delete the six gap-explainer / positive-guidance segments now that their content lives in tool descriptions (Phase 1). The card shrinks from a 305-word, 6-paragraph monolith to a 117-word "three handles" overview that answers (1) what is this server and (2) where do I look for the rest. Server.py changes ----------------- * Delete ``_INSTR_HIERARCHY``, ``_INSTR_METADATA_VS_CONTENT``, ``_INSTR_READ_TOOLS``, ``_INSTR_WAIT_NOT_POLL``, ``_INSTR_HOOKS_GAP``, ``_INSTR_BUFFERS_GAP``. * Add ``_INSTR_CARD`` (server identity, hierarchy, socket_name exception) and ``_INSTR_HANDLES`` (Tools / Resources / Prompts). * Rewrite the module-level comment to name the cross-cutting vs. tool-specific boundary explicitly, pointing future contributors at tool descriptions for any rule that names a specific tool. Live ``_build_instructions(mutating)`` output drops from 357 words to 215 — comfortably inside MCP's 150-300 word recommendation for global instructions. Tests ----- * Delete the six standalone ``test_base_instructions_*`` functions — every substring they pinned has either moved into a tool description (covered by Phase 1's ``test_tool_description_includes``) or been deliberately removed from the card. * Add ``CardContract`` ``NamedTuple`` + parametrized ``test_card_contracts`` with three rows: ``server_identity``, ``socket_name_exception`` (with ``must_exclude=("All tools accept",)`` guarding against drift back to the pre-refactor lie), and ``three_handles``. * Add ``test_card_length_budget`` enforcing a soft 200-word ceiling so the card cannot regress into the monolith it just shrank from. * The ``BuildInstructionsFixture`` matrix is unchanged — its assertions cover the dynamic safety / agent-context blocks of ``_build_instructions``, not ``_BASE_INSTRUCTIONS``, and those still build identically. Docs ---- ``docs/topics/prompting.md`` previously excerpted only the first two old segments; updated to mirror the new slim card so readers see what agents actually receive. --- docs/topics/prompting.md | 29 ++++---- src/libtmux_mcp/server.py | 110 ++++++++--------------------- tests/test_server.py | 145 ++++++++++++++++++++------------------ 3 files changed, 125 insertions(+), 159 deletions(-) diff --git a/docs/topics/prompting.md b/docs/topics/prompting.md index 74250ff..450d412 100644 --- a/docs/topics/prompting.md +++ b/docs/topics/prompting.md @@ -11,19 +11,22 @@ Every MCP client receives these instructions when connecting to the libtmux-mcp ```{code-block} text :class: server-prompt -libtmux MCP server for programmatic tmux control. tmux hierarchy: -Server > Session > Window > Pane. Use pane_id (e.g. '%1') as the -preferred targeting method - it is globally unique within a tmux server. -Use send_keys to execute commands and capture_pane to read output. All -tools accept an optional socket_name parameter for multi-server support -(defaults to LIBTMUX_SOCKET env var). - -IMPORTANT — metadata vs content: list_windows, list_panes, and -list_sessions only search metadata (names, IDs, current command). To -find text that is actually visible in terminals — when users ask what -panes 'contain', 'mention', 'show', or 'have' — use search_panes to -search across all pane contents, or list_panes + capture_pane on each -pane for manual inspection. +libtmux MCP server: programmatic tmux control. tmux hierarchy is +Server > Session > Window > Pane; every pane has a globally unique +pane_id like %1 — prefer it over name/index for targeting. Targeted +tools accept an optional socket_name (defaults to LIBTMUX_SOCKET); +list_servers discovers sockets via TMUX_TMPDIR / extra_socket_paths +and is the documented socket_name exception. + +Three handles cover everything the agent needs: +- Tools — call list_tools; per-tool descriptions tell you which to + prefer (e.g. snapshot_pane over capture_pane + get_pane_info, + wait_for_text over capture_pane in a retry loop, search_panes over + list_panes when the user says "panes that contain X"). +- Resources (tmux://) — browseable hierarchy plus reference cards + (format strings). +- Prompts — packaged workflows: run_and_wait, diagnose_failing_pane, + build_dev_workspace, interrupt_gracefully. ``` The server also dynamically adds: diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index ecf87a5..7025fba 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -43,93 +43,45 @@ _ServerCacheKey: t.TypeAlias = tuple[str | None, str | None, str | None] # --------------------------------------------------------------------------- -# _BASE_INSTRUCTIONS — composed from named segments. +# _BASE_INSTRUCTIONS — slim "three handles" card. # -# The string handed to FastMCP grew organically from "what does this server -# do?" toward a hybrid of positive guidance (HIERARCHY, READ_TOOLS, -# WAIT_NOT_POLL) and *gap-explainers* (HOOKS_GAP, BUFFERS_GAP) that document -# why a tool the agent might expect is absent. Splitting into named -# constants keeps additions deliberate: when a new ``_GAP`` segment feels -# tempting, prefer first to push the explanation into the relevant tool's -# docstring/description (where the agent encounters it at call time) and -# only fall back to a server-level segment when the gap is *server-shaped* -# (e.g. an entire tool family is intentionally missing). +# The card answers two questions the agent has at session start: +# (1) what is this server, and (2) where do I look for the rest? It points +# at tools, resources, and prompts — and that's it. Tool-specific rules +# (which tool to prefer, what's intentionally not exposed and why) live in +# the relevant tool's docstring or ``description=`` override, where the +# agent reads them on every ``list_tools`` call instead of parsing them +# out of a one-shot prompt that has long since rolled out of context. # -# Output text is byte-identical to the previous monolith; tests assert on -# substrings of ``_BASE_INSTRUCTIONS``, so keeping the join shape stable -# matters. +# When in doubt about adding text here, ask: is this rule cross-cutting +# (about the server as a whole) or tool-specific (about when to call X +# vs Y)? Cross-cutting belongs in the card; tool-specific belongs in the +# tool description. ``test_card_length_budget`` enforces a soft 200-word +# ceiling against creeping re-bloat. # --------------------------------------------------------------------------- -_INSTR_HIERARCHY = ( - "libtmux MCP server for programmatic tmux control. " - "tmux hierarchy: Server > Session > Window > Pane. " - "Use pane_id (e.g. '%1') as the preferred targeting method - " - "it is globally unique within a tmux server. " - "Use send_keys to execute commands and capture_pane to read output. " - "Targeted tmux tools accept an optional socket_name parameter " - "(defaults to LIBTMUX_SOCKET env var); list_servers discovers " - "sockets via TMUX_TMPDIR plus optional extra_socket_paths instead." +_INSTR_CARD = ( + "libtmux MCP server: programmatic tmux control. tmux hierarchy is " + "Server > Session > Window > Pane; every pane has a globally unique " + "pane_id like %1 — prefer it over name/index for targeting. Targeted " + "tools accept an optional socket_name (defaults to LIBTMUX_SOCKET); " + "list_servers discovers sockets via TMUX_TMPDIR / extra_socket_paths " + "and is the documented socket_name exception." ) -_INSTR_METADATA_VS_CONTENT = ( - "IMPORTANT — metadata vs content: list_windows, list_panes, and " - "list_sessions only search metadata (names, IDs, current command). " - "To find text that is actually visible in terminals — when users ask " - "what panes 'contain', 'mention', 'show', or 'have' — use " - "search_panes to search across all pane contents, or list_panes + " - "capture_pane on each pane for manual inspection." +_INSTR_HANDLES = ( + "Three handles cover everything the agent needs:\n" + "- Tools — call list_tools; per-tool descriptions tell you which to " + "prefer (e.g. snapshot_pane over capture_pane + get_pane_info, " + "wait_for_text over capture_pane in a retry loop, search_panes over " + 'list_panes when the user says "panes that contain X").\n' + "- Resources (tmux://) — browseable hierarchy plus reference cards " + "(format strings).\n" + "- Prompts — packaged workflows: run_and_wait, diagnose_failing_pane, " + "build_dev_workspace, interrupt_gracefully." ) -_INSTR_READ_TOOLS = ( - "READ TOOLS TO PREFER: snapshot_pane returns pane content plus " - "cursor position, mode, and scroll state in one call — use it " - "instead of capture_pane + get_pane_info when you need context. " - "display_message evaluates a tmux format string (e.g. " - "'#{pane_current_command}', '#{session_name}') against a target " - "and returns the expanded value — cheaper than parsing captured " - "output. (The tool is named after the tmux 'display-message -p' " - "verb it wraps; its MCP title is 'Evaluate tmux Format String'.)" -) - -_INSTR_WAIT_NOT_POLL = ( - "WAIT, DON'T POLL: for 'run command, wait for output' workflows " - "use wait_for_text (matches text/regex on a pane) or " - "wait_for_content_change (waits for any change). These block " - "server-side until the condition is met or the timeout expires, " - "which is dramatically cheaper in agent turns than capture_pane " - "in a retry loop." -) - -#: Gap-explainer: write-hook tools are intentionally absent. See module -#: comment above for when to add another ``_GAP`` segment vs. push the -#: explanation into a tool description. -_INSTR_HOOKS_GAP = ( - "HOOKS ARE READ-ONLY: inspect via show_hooks / show_hook. Write-hook " - "tools are intentionally not exposed — tmux hooks survive process " - "death, so they belong in your tmux config file, not a transient " - "MCP session." -) - -#: Gap-explainer: ``list_buffers`` is intentionally absent because tmux -#: buffers can include OS clipboard history. See module comment above. -_INSTR_BUFFERS_GAP = ( - "BUFFERS: load_buffer stages content, paste_buffer delivers it into " - "a pane, delete_buffer removes the staged buffer. Track owned " - "buffers via the BufferRef returned from load_buffer — there is no " - "list_buffers tool because tmux buffers may include OS clipboard " - "history (passwords, private snippets)." -) - -_BASE_INSTRUCTIONS = "\n\n".join( - ( - _INSTR_HIERARCHY, - _INSTR_METADATA_VS_CONTENT, - _INSTR_READ_TOOLS, - _INSTR_WAIT_NOT_POLL, - _INSTR_HOOKS_GAP, - _INSTR_BUFFERS_GAP, - ) -) +_BASE_INSTRUCTIONS = "\n\n".join((_INSTR_CARD, _INSTR_HANDLES)) def _build_instructions(safety_level: str = TAG_MUTATING) -> str: diff --git a/tests/test_server.py b/tests/test_server.py index 44fee66..32e192d 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -129,65 +129,93 @@ def test_build_instructions( assert f"Safety level: {expect_safety_in_text}" in result -def test_base_instructions_content() -> None: - """_BASE_INSTRUCTIONS contains key guidance for the LLM.""" - assert "tmux hierarchy" in _BASE_INSTRUCTIONS - assert "pane_id" in _BASE_INSTRUCTIONS - assert "search_panes" in _BASE_INSTRUCTIONS - assert "metadata vs content" in _BASE_INSTRUCTIONS - - -def test_base_instructions_surface_flagship_read_tools() -> None: - """_BASE_INSTRUCTIONS mentions the richer read tools by name. - - ``display_message`` (tmux format queries) and ``snapshot_pane`` - (content + metadata in one call) are strictly more expressive than - ``capture_pane`` for most contexts, but agents that never see them - named in the instructions default to ``capture_pane`` + a follow-up - ``get_pane_info``. Naming both explicitly changes that default. +class CardContract(t.NamedTuple): + """Contract about what ``_BASE_INSTRUCTIONS`` must / must not contain. + + The slim card is the public-facing server prompt — every MCP client + that connects gets it. ``must_include`` pins the substrings agents + rely on to orient (server identity, socket_name exception, three + handles); ``must_exclude`` pins the deleted-pre-refactor phrasing so + a future drift back to the lie ("All tools accept socket_name") fails + loudly here instead of silently shipping. """ - assert "display_message" in _BASE_INSTRUCTIONS - assert "snapshot_pane" in _BASE_INSTRUCTIONS + test_id: str + must_include: tuple[str, ...] + must_exclude: tuple[str, ...] = () + + +CARD_CONTRACTS: list[CardContract] = [ + CardContract( + test_id="server_identity", + must_include=( + "tmux hierarchy", + "Server > Session > Window > Pane", + "pane_id", + ), + ), + CardContract( + test_id="socket_name_exception", + # ``list_servers`` does NOT accept socket_name (it's the discovery + # tool — see ``server_tools.py`` SOCKET_NAME_EXEMPT). The pre-refactor + # wording "All tools accept socket_name" was a lie; the new card + # qualifies "Targeted tools" and names list_servers explicitly. + must_include=("Targeted tools", "list_servers", "extra_socket_paths"), + must_exclude=("All tools accept",), + ), + CardContract( + test_id="three_handles", + # The card's job is to point at where the rest of the answer lives + # (tools / resources / prompts), not to inline tool-specific rules. + must_include=("Tools", "Resources (tmux://)", "Prompts"), + ), +] -def test_base_instructions_prefer_wait_over_poll() -> None: - """_BASE_INSTRUCTIONS names wait_for_text and wait_for_content_change. - The wait tools block server-side, which is dramatically cheaper in - agent turns than ``capture_pane`` in a retry loop. Making them - discoverable from the instructions is a no-cost UX win. +@pytest.mark.parametrize( + CardContract._fields, + CARD_CONTRACTS, + ids=[c.test_id for c in CARD_CONTRACTS], +) +def test_card_contracts( + test_id: str, + must_include: tuple[str, ...], + must_exclude: tuple[str, ...], +) -> None: + """``_BASE_INSTRUCTIONS`` is the slim "three handles" server card. + + Tool-specific rules live in tool descriptions — Phase 1 of the + instructions slim-down moved them there. The card carries only + cross-cutting orientation: server identity, the socket_name + exception, and pointers to the Tools / Resources / Prompts handles. + Anything naming a specific tool's preference rule belongs at the + call site, not here. """ - assert "wait_for_text" in _BASE_INSTRUCTIONS - assert "wait_for_content_change" in _BASE_INSTRUCTIONS + for needle in must_include: + assert needle in _BASE_INSTRUCTIONS, ( + f"[{test_id}] missing required substring {needle!r}" + ) + for needle in must_exclude: + assert needle not in _BASE_INSTRUCTIONS, ( + f"[{test_id}] forbidden substring {needle!r} crept back in" + ) -def test_base_instructions_document_hook_boundary() -> None: - """_BASE_INSTRUCTIONS explains hooks are read-only by design. +def test_card_length_budget() -> None: + """``_BASE_INSTRUCTIONS`` stays under the ~200-word budget. - Without this sentence agents waste a turn asking for ``set_hook`` or - trying to write hooks through a nonexistent tool. Naming the - boundary heads off the exploratory call. + Per-tool rules belong in tool descriptions (visible at every + ``list_tools`` call), not in this card. This guard fails loudly if a + future contributor reaches for the card to add a tool-specific rule, + pointing them at the right home before the card grows back into the + 305-word monolith it just shrank from. """ - assert "HOOKS ARE READ-ONLY" in _BASE_INSTRUCTIONS - assert "show_hooks" in _BASE_INSTRUCTIONS - assert "tmux config file" in _BASE_INSTRUCTIONS - - -def test_base_instructions_document_socket_name_contract() -> None: - """_BASE_INSTRUCTIONS frames the socket_name promise precisely. - - list_servers does NOT accept socket_name (it's the discovery tool — - see server_tools.py:263-264 where the signature is - ``list_servers(extra_socket_paths=...)``), so the previous "All - tools accept socket_name" wording was a lie. The instruction now - qualifies "Targeted tmux tools" and explicitly names list_servers - as the documented exception, matching what - test_registered_tools_accept_socket_name asserts at the schema - level. - """ - assert "Targeted tmux tools accept" in _BASE_INSTRUCTIONS - assert "list_servers" in _BASE_INSTRUCTIONS - assert "extra_socket_paths" in _BASE_INSTRUCTIONS + word_count = len(_BASE_INSTRUCTIONS.split()) + assert word_count <= 200, ( + f"_BASE_INSTRUCTIONS grew to {word_count} words; per-tool rules " + f"belong in tool descriptions, not the card. See the module-level " + f"comment in server.py for the boundary." + ) def test_registered_tools_accept_socket_name() -> None: @@ -230,23 +258,6 @@ def test_registered_tools_accept_socket_name() -> None: ) -def test_base_instructions_document_buffer_lifecycle() -> None: - """_BASE_INSTRUCTIONS explains the buffer lifecycle + no list_buffers. - - The load/paste/delete triple is non-obvious, and agents otherwise - expect a ``list_buffers`` affordance. The instruction prevents both - confusions and surfaces the clipboard-privacy reason so the - omission reads as deliberate, not missing. - """ - assert "BUFFERS" in _BASE_INSTRUCTIONS - assert "load_buffer" in _BASE_INSTRUCTIONS - assert "paste_buffer" in _BASE_INSTRUCTIONS - assert "delete_buffer" in _BASE_INSTRUCTIONS - assert "BufferRef" in _BASE_INSTRUCTIONS - assert "list_buffers" in _BASE_INSTRUCTIONS - assert "clipboard history" in _BASE_INSTRUCTIONS - - @pytest.mark.parametrize( ("tool_name", "must_include"), [ From 883cd805de9b6951c61ce8e96c1074970f8e1ea5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 18:10:16 -0500 Subject: [PATCH 04/10] mcp(feat[instructions]): make the card visibility-aware via run_server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 of the BASE_INSTRUCTIONS slim-down. The card names specific tools by hint phrase (``snapshot_pane over capture_pane + get_pane_info``, ``wait_for_text over capture_pane in a retry loop``, ``search_panes over list_panes...``); under ``LIBTMUX_SAFETY=readonly`` some of those tools are hidden by ``mcp.enable(tags=..., only=True)``, so naming them in the card was misleading. Composition ----------- * New ``_HANDLE_HINTS: tuple[tuple[str, str], ...]`` keyed by tool name. * New ``_format_handles_section(visible_tool_names)`` renders the three-bullet handles list, filtering the Tools-handle hints by visibility. ``visible_tool_names=None`` keeps every hint (backward-compat for tests that build instructions without invoking ``mcp.enable``, and for the module-import-time placeholder). * ``_INSTR_HANDLES`` is now ``_format_handles_section(None)`` — the unfiltered baseline used by ``_BASE_INSTRUCTIONS``. Wiring ------ * ``_build_instructions`` gains a ``visible_tool_names: set[str] | None`` parameter. When provided it rebuilds the base from ``_INSTR_CARD`` + filtered handles; otherwise it uses ``_BASE_INSTRUCTIONS`` directly. Env-driven ``is_caller`` block is unchanged. * ``run_server`` now collects visible tool names via ``asyncio.run(mcp.list_tools())`` after ``mcp.enable(tags=..., only=True)`` has applied the safety-tier filter, then overwrites ``mcp.instructions`` with the visibility-filtered card. The module-import-time ``instructions=`` set on the FastMCP constructor is now an explicitly documented placeholder. Tests ----- * ``test_card_omits_invisible_tools`` (parametrized) — drops one tool from the visible set and asserts its hint phrase disappears, with a paired sanity check that the phrase IS present when the tool is visible. * ``test_build_instructions_default_visible_tool_names_emits_full_card`` — pins the backward-compat behavior so a future contributor doesn't silently break the placeholder by changing the default. End-to-end smoke: simulating ``LIBTMUX_SAFETY=readonly`` (wait_for_text hidden), the card drops the wait_for_text hint while keeping snapshot_pane and search_panes. Default placeholder still emits all three at 215 total words. --- src/libtmux_mcp/server.py | 98 ++++++++++++++++++++++++++++++++++----- tests/test_server.py | 61 ++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 12 deletions(-) diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 7025fba..de26452 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -69,22 +69,69 @@ "and is the documented socket_name exception." ) -_INSTR_HANDLES = ( - "Three handles cover everything the agent needs:\n" - "- Tools — call list_tools; per-tool descriptions tell you which to " - "prefer (e.g. snapshot_pane over capture_pane + get_pane_info, " - "wait_for_text over capture_pane in a retry loop, search_panes over " - 'list_panes when the user says "panes that contain X").\n' - "- Resources (tmux://) — browseable hierarchy plus reference cards " - "(format strings).\n" - "- Prompts — packaged workflows: run_and_wait, diagnose_failing_pane, " - "build_dev_workspace, interrupt_gracefully." +#: Tool-prefer hints in the Tools handle, keyed by the tool that +#: motivates them. ``_format_handles_section`` filters these by +#: ``visible_tool_names`` so the card never names a tool the agent +#: cannot call (e.g. ``send_keys``-only hints under +#: ``LIBTMUX_SAFETY=readonly``). +_HANDLE_HINTS: tuple[tuple[str, str], ...] = ( + ("snapshot_pane", "snapshot_pane over capture_pane + get_pane_info"), + ("wait_for_text", "wait_for_text over capture_pane in a retry loop"), + ( + "search_panes", + 'search_panes over list_panes when the user says "panes that contain X"', + ), ) + +def _format_handles_section(visible_tool_names: set[str] | None) -> str: + """Render the three-handles bullet list, optionally visibility-filtered. + + When ``visible_tool_names`` is ``None`` every hint is included + (backward-compat for tests that build instructions without first + invoking ``mcp.enable``). Otherwise hints whose tool is not in the + visible set are dropped — naming a tool the agent cannot call would + be misleading. + """ + if visible_tool_names is None: + hints = [hint for _, hint in _HANDLE_HINTS] + else: + hints = [hint for tool, hint in _HANDLE_HINTS if tool in visible_tool_names] + + tools_line = ( + "- Tools — call list_tools; per-tool descriptions tell you which to prefer" + ) + if hints: + tools_line += " (e.g. " + ", ".join(hints) + ")." + else: + tools_line += "." + + return "\n".join( + ( + "Three handles cover everything the agent needs:", + tools_line, + ( + "- Resources (tmux://) — browseable hierarchy plus reference " + "cards (format strings)." + ), + ( + "- Prompts — packaged workflows: run_and_wait, " + "diagnose_failing_pane, build_dev_workspace, " + "interrupt_gracefully." + ), + ) + ) + + +_INSTR_HANDLES = _format_handles_section(visible_tool_names=None) + _BASE_INSTRUCTIONS = "\n\n".join((_INSTR_CARD, _INSTR_HANDLES)) -def _build_instructions(safety_level: str = TAG_MUTATING) -> str: +def _build_instructions( + safety_level: str = TAG_MUTATING, + visible_tool_names: set[str] | None = None, +) -> str: """Build server instructions with agent context and safety level. When the MCP server process runs inside a tmux pane, ``TMUX_PANE`` and @@ -95,13 +142,25 @@ def _build_instructions(safety_level: str = TAG_MUTATING) -> str: ---------- safety_level : str Active safety tier (readonly, mutating, or destructive). + visible_tool_names : set of str, optional + When provided, the handles section drops hints for tools not in + the set so the card never names a tool the agent cannot call. + ``run_server`` populates this from ``mcp.list_tools()`` after + ``mcp.enable(tags=..., only=True)`` has applied the safety-tier + filter. Defaults to ``None`` (backward-compat: all hints emitted), + which is what the module-import-time placeholder uses before + ``run_server`` runs. Returns ------- str Server instructions string, optionally with agent tmux context. """ - parts: list[str] = [_BASE_INSTRUCTIONS] + if visible_tool_names is None: + base = _BASE_INSTRUCTIONS + else: + base = "\n\n".join((_INSTR_CARD, _format_handles_section(visible_tool_names))) + parts: list[str] = [base] # Safety tier context parts.append( @@ -273,6 +332,8 @@ def _register_all() -> None: def run_server() -> None: """Run the MCP server.""" + import asyncio + _register_all() # Use FastMCP's native visibility system as primary gate, @@ -284,4 +345,17 @@ def run_server() -> None: allowed_tags.add(TAG_DESTRUCTIVE) mcp.enable(tags=allowed_tags, only=True) + # Rebuild instructions now that ``mcp.enable`` has hidden tools + # outside the active safety tier. The card mentions specific tools + # by name (snapshot_pane, wait_for_text, search_panes); naming a + # tool the agent cannot call would be misleading. The + # module-import-time ``instructions=`` set on the FastMCP + # constructor was a placeholder built without a visibility filter — + # this overwrite is the authoritative version. + visible_tool_names = {tool.name for tool in asyncio.run(mcp.list_tools())} + mcp.instructions = _build_instructions( + safety_level=_safety_level, + visible_tool_names=visible_tool_names, + ) + mcp.run() diff --git a/tests/test_server.py b/tests/test_server.py index 32e192d..45e92cb 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -218,6 +218,67 @@ def test_card_length_budget() -> None: ) +# Phrases the handles section emits when the corresponding tool is in +# ``visible_tool_names``. Phase 4: ``_build_instructions`` filters the +# Tools handle by visibility so the card never names a tool the agent +# cannot call. Keep this aligned with ``_HANDLE_HINTS`` in server.py. +_VISIBILITY_FILTER_CASES: list[tuple[str, str]] = [ + ("snapshot_pane", "snapshot_pane over capture_pane"), + ("wait_for_text", "wait_for_text over capture_pane"), + ("search_panes", "search_panes over list_panes"), +] + + +@pytest.mark.parametrize( + ("omitted_tool", "phrase_that_should_drop"), + _VISIBILITY_FILTER_CASES, + ids=[t for t, _ in _VISIBILITY_FILTER_CASES], +) +def test_card_omits_invisible_tools( + omitted_tool: str, + phrase_that_should_drop: str, +) -> None: + """The handles section drops hints for tools outside ``visible_tool_names``. + + Each row asserts that removing one tool from the visible set drops + its tool-prefer hint from the card. Sanity check: the same phrase IS + present when the tool IS visible — guards against the test passing + accidentally because the phrase was never there. + """ + full_visibility = {tool for tool, _ in _VISIBILITY_FILTER_CASES} + visible = full_visibility - {omitted_tool} + + filtered = _build_instructions(TAG_MUTATING, visible_tool_names=visible) + assert phrase_that_should_drop not in filtered, ( + f"phrase {phrase_that_should_drop!r} stayed when {omitted_tool} " + f"was filtered out — visibility filter is broken" + ) + + full = _build_instructions(TAG_MUTATING, visible_tool_names=full_visibility) + assert phrase_that_should_drop in full, ( + f"phrase {phrase_that_should_drop!r} missing even when " + f"{omitted_tool} is visible — sanity check failed" + ) + + +def test_build_instructions_default_visible_tool_names_emits_full_card() -> None: + """``visible_tool_names=None`` is the backward-compat default. + + The module-import-time ``instructions=`` placeholder builds without + invoking ``mcp.enable``, so it has no visibility set to consult. In + that case ``_build_instructions`` must emit every handle hint as if + the agent could call any tool — the alternative would be silently + dropping hints during the (brief) window before ``run_server`` + overwrites the placeholder. + """ + full = _build_instructions(TAG_MUTATING) + for _, phrase in _VISIBILITY_FILTER_CASES: + assert phrase in full, ( + f"phrase {phrase!r} missing from default _build_instructions; " + f"visible_tool_names=None should emit the full hint set" + ) + + def test_registered_tools_accept_socket_name() -> None: """All registered tools (except list_servers) accept ``socket_name``. From de39b7a3921a7547300b3a86dce0869472b21f3b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:18:10 -0500 Subject: [PATCH 05/10] mcp(test[server]): guard _HANDLE_HINTS against tool-name drift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``_format_handles_section`` filters card hints by ``tool in visible_tool_names`` — a tool name in ``_HANDLE_HINTS`` that no longer matches a registered tool produces the worst of both worlds: agents see a phantom tool name during the brief import-time placeholder window (where ``visible_tool_names is None`` and every hint is emitted unconditionally), then lose the guidance entirely once ``run_server`` filters by real visibility. Phase 1's ``test_tool_description_includes`` already catches drift in tool descriptions. This test catches drift in the parallel ``_HANDLE_HINTS`` table — the two together close the rename-without- update loophole. Pure addition; no source changes. --- tests/test_server.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test_server.py b/tests/test_server.py index 45e92cb..cc43add 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -279,6 +279,38 @@ def test_build_instructions_default_visible_tool_names_emits_full_card() -> None ) +def test_handle_hints_keys_are_registered_tools() -> None: + """Every key in ``_HANDLE_HINTS`` must be an actual registered tool name. + + ``_format_handles_section`` filters hints by ``tool in visible_tool_names``, + so a stale key (e.g. left behind after a tool rename) silently disappears + from the filtered card while still being emitted in the unfiltered + placeholder — the worst of both worlds: agents would see a phantom tool + name during the brief import-time window, then lose the guidance entirely + once ``run_server`` filters by real visibility. + + Phase 1's ``test_tool_description_includes`` catches drift in tool + *descriptions*; this test catches drift in the parallel ``_HANDLE_HINTS`` + table. The two together close the rename-without-update loophole. + """ + import asyncio + + from fastmcp import FastMCP + + from libtmux_mcp.server import _HANDLE_HINTS + from libtmux_mcp.tools import register_tools + + mcp = FastMCP(name="hint-key-contract") + register_tools(mcp) + + registered = {tool.name for tool in asyncio.run(mcp.list_tools())} + for tool_name, _hint in _HANDLE_HINTS: + assert tool_name in registered, ( + f"_HANDLE_HINTS key {tool_name!r} is not a registered tool name; " + f"update _HANDLE_HINTS in server.py when renaming or removing tools" + ) + + def test_registered_tools_accept_socket_name() -> None: """All registered tools (except list_servers) accept ``socket_name``. From 5770fdb2e58544c5ce8a1346689ffe7ba17c4b37 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:20:24 -0500 Subject: [PATCH 06/10] mcp(test[server]): cover _format_handles_section empty-visibility branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``_format_handles_section`` has two output branches: with hints (``"... prefer (e.g. ...)."``) and without (``"... prefer."``). The no-hints branch fires when ``visible_tool_names`` is non-None but no hint tool is in it. With every current hint tool tagged ``TAG_READONLY`` this branch is unreachable by the existing tool set — even ``LIBTMUX_SAFETY=readonly`` keeps ``snapshot_pane`` / ``wait_for_text`` / ``search_panes`` visible. Codecov flagged it as uncovered, and the upcoming Phase 5 (``LIBTMUX_TOOLSETS``) makes it reachable: a user setting ``LIBTMUX_TOOLSETS=server,session`` would hide every pane tool, leaving ``visible_tool_names`` disjoint from every ``_HANDLE_HINTS`` key. Pin the branch's output now so a regression that emits broken syntax (trailing ", ).", duplicate periods, etc.) fails loudly. Pure addition; no source changes. Pulls the empty-set branch from 0% to 100% coverage. --- tests/test_server.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_server.py b/tests/test_server.py index cc43add..56c237b 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -311,6 +311,42 @@ def test_handle_hints_keys_are_registered_tools() -> None: ) +def test_format_handles_section_empty_visible_set_emits_no_examples() -> None: + """``_format_handles_section`` renders a bare Tools handle when no hint matches. + + With every current hint tool tagged ``TAG_READONLY`` this branch is + unreachable by the existing tool set — even ``LIBTMUX_SAFETY=readonly`` + keeps ``snapshot_pane`` / ``wait_for_text`` / ``search_panes`` visible. + But a future hint tagged mutating/destructive-only would land here under + a readonly safety tier, and so would a future ``LIBTMUX_TOOLSETS`` filter + that hides every pane tool. Pin the branch behavior now so a regression + that emits broken syntax (e.g. trailing ", )." from a half-formatted + hint list, or duplicate periods) fails loudly. + """ + from libtmux_mcp.server import _format_handles_section + + result = _format_handles_section(set()) + + # No hint phrases emitted — no "e.g." preamble, no individual hint + # tool names mentioned in the Tools-line. + assert "e.g." not in result + for tool_name, _ in _VISIBILITY_FILTER_CASES: + assert tool_name not in result, ( + f"hint tool name {tool_name!r} leaked into card with empty visible set" + ) + + # Tools-line still present and well-formed (ends with a single period, + # no dangling comma or open paren). + expected_tools_line = ( + "- Tools — call list_tools; per-tool descriptions tell you which to prefer." + ) + assert expected_tools_line in result + + # The other two handles are unaffected — they don't depend on visibility. + assert "- Resources (tmux://)" in result + assert "- Prompts — packaged workflows" in result + + def test_registered_tools_accept_socket_name() -> None: """All registered tools (except list_servers) accept ``socket_name``. From d14d48f718a3b73b960a744c7529173f6f1d48df Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:41:49 -0500 Subject: [PATCH 07/10] mcp(test[server]): clarify socket_name contract test runs unfiltered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``test_registered_tools_accept_socket_name`` constructs a fresh ``FastMCP("socket-name-contract")`` and asserts every registered tool (except ``SOCKET_NAME_EXEMPT``) takes a ``socket_name`` parameter. This exercises the unfiltered registered set, not the production singleton at ``server.py:222`` (which has middleware + tier filtering applied by ``run_server``). The behavior is correct: any subset of an all-pass set is also all-pass for the same property, so verifying on the unfiltered set covers every tier-filtered subset by implication. Adding the scope note to the docstring prevents a future maintainer from misreading the test as covering the singleton — and flags that tier-specific socket_name behavior, if ever added, would need a parallel test. Documentation only; no test logic changes. --- tests/test_server.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_server.py b/tests/test_server.py index 56c237b..bb3a012 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -356,6 +356,15 @@ def test_registered_tools_accept_socket_name() -> None: If a future tool registration drops ``socket_name``, this test catches the regression instead of silently making the agent-facing instructions a lie. + + **Scope**: this test runs against an unfiltered fresh ``FastMCP`` — + it does NOT exercise the production singleton at ``server.py`` + (which has middleware + ``mcp.enable(tags=..., only=True)`` applied + by ``run_server``). The unfiltered set is a strict superset of any + tier-filtered visible set, so a positive result here implies the + same property on every filtered subset. A future maintainer + adding tier-specific socket_name behavior should add a parallel + test against the singleton. """ import asyncio import inspect From b20d651b5958fc48748085dec263254db89db4ec Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:43:39 -0500 Subject: [PATCH 08/10] mcp(fix[pane_tools]): document capture_pane truncation marker for agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ``capture_pane`` truncates, the returned string is prefixed with a literal ``[... truncated K lines ...]`` first line so the caller can detect the cap. This is documented in the function docstring at ``pane_tools/io.py:174-178``, but FastMCP only surfaces the ``description=`` override registered at ``pane_tools/__init__.py:83-95``, not the docstring — and the override only said "tail-preserving truncation at max_lines, default 500". Agents reading the description had no signal that the first line of a truncated response is a marker rather than terminal content, and could mishandle the output. Extends the override to name the marker and tell the agent it's a literal prefix line to skip when parsing. Also surfaces the ``max_lines=None`` opt-out. Test: adds a row to ``test_tool_description_includes`` asserting ``capture_pane``'s description contains ``"truncated"``. The contract suite already pinned ``snapshot_pane`` / ``wait_for_text`` / ``search_panes`` cross-references; this extends the same drift guard to the truncation behavior. --- src/libtmux_mcp/tools/pane_tools/__init__.py | 18 +++++++++++------- tests/test_server.py | 5 +++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/__init__.py b/src/libtmux_mcp/tools/pane_tools/__init__.py index 8f38d8b..52c8901 100644 --- a/src/libtmux_mcp/tools/pane_tools/__init__.py +++ b/src/libtmux_mcp/tools/pane_tools/__init__.py @@ -85,13 +85,17 @@ def register(mcp: FastMCP) -> None: annotations=ANNOTATIONS_RO, tags={TAG_READONLY}, description=( - "Capture the visible contents of a tmux pane (tail-preserving " - "truncation at max_lines, default 500). For pane content + " - "cursor + mode + scroll state in one call, use snapshot_pane. " - "For 'send_keys then wait for output' flows, use wait_for_text " - "or wait_for_content_change instead of a capture_pane retry " - "loop — server-side blocking is dramatically cheaper in agent " - "turns. To find text across many panes, use search_panes." + "Capture the visible contents of a tmux pane. Output is " + "tail-preserving truncated at max_lines (default 500); when " + "truncation occurs, the first line of the returned string is a " + "literal '[... truncated K lines ...]' marker — skip it when " + "parsing terminal content. Pass max_lines=None to disable " + "truncation. For pane content + cursor + mode + scroll state in " + "one call, use snapshot_pane. For 'send_keys then wait for " + "output' flows, use wait_for_text or wait_for_content_change " + "instead of a capture_pane retry loop — server-side blocking " + "is dramatically cheaper in agent turns. To find text across " + "many panes, use search_panes." ), )(capture_pane) mcp.tool( diff --git a/tests/test_server.py b/tests/test_server.py index bb3a012..e6145f0 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -402,6 +402,11 @@ def test_registered_tools_accept_socket_name() -> None: ("capture_pane", "snapshot_pane"), ("capture_pane", "wait_for_text"), ("capture_pane", "search_panes"), + # Truncation marker — agents need to know the first line of a + # truncated capture is a literal '[... truncated K lines ...]' + # header, not terminal content. Without this in the description, + # an agent might treat the marker as the most recent output. + ("capture_pane", "truncated"), ("show_hooks", "tmux config file"), ("load_buffer", "list_buffers"), ("load_buffer", "clipboard history"), From 258ecfbbba85750f90c07a8790bc5d95b2029451 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:45:46 -0500 Subject: [PATCH 09/10] mcp(fix[resources]): lead format-strings reference with subset disclaimer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``tmux://reference/format-strings`` resource lists ~25 of tmux's ~200 format variables. Without a clear "this is a subset" signal at the top, an agent that reads the body and doesn't find e.g. ``#{history_size}`` (one of the 175 omitted strings) may erroneously conclude that string is unsupported and skip a ``display_message`` call that would have worked. The closing ``man tmux`` line existed but came after the substantive content — too easy to miss in a top-down skim. Adds a leading note declaring the resource a curated cheat sheet, not the complete catalog, and points readers at ``man tmux`` for anything not listed. Also softens the ``get_format_string_reference`` docstring's "avoids hallucinated format names" claim — the resource is now honest about being a subset, so the docstring framing matches. Test: ``test_format_string_reference_leads_with_subset_disclaimer`` asserts both that the disclaimer keywords are present AND that the disclaimer precedes the first format-variable example, so a future edit can't quietly move the disclaimer to the bottom (recreating the original problem). --- src/libtmux_mcp/resources/reference.py | 21 ++++++++++----- tests/test_resources.py | 37 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/resources/reference.py b/src/libtmux_mcp/resources/reference.py index b4d4a80..e15d293 100644 --- a/src/libtmux_mcp/resources/reference.py +++ b/src/libtmux_mcp/resources/reference.py @@ -27,6 +27,12 @@ _FORMAT_STRING_REFERENCE = textwrap.dedent("""\ # tmux format strings + > **Note:** This is a curated cheat sheet, not the complete catalog. + > tmux supports ~200 format variables; this lists the ones agents + > most commonly need. Omission here does NOT mean a string is + > unsupported — fall back to ``man tmux`` (FORMATS section) for + > anything not listed. + Pass via ``display_message(format_string="#{...}")`` or any other tool that accepts a tmux format expression. @@ -90,12 +96,15 @@ def register(mcp: FastMCP) -> None: mime_type=_MARKDOWN_MIME, ) def get_format_string_reference() -> str: - """Return the tmux format-string cheat sheet as Markdown. - - Static reference content. Use this when an agent encounters - an unfamiliar ``#{...}`` field — pulling the resource is - cheaper than a ``display_message`` round-trip and avoids - hallucinated format names. + """Return a curated subset of the tmux format-string catalog as Markdown. + + Covers the format variables agents most commonly encounter; for + less-common ones, the body itself directs the agent at + ``man tmux`` (FORMATS section). The resource is intentionally + a subset rather than a mirror — it stays small enough to be + cheap to pull, and the disclaimer at the top prevents the + false-negative trap where an agent assumes an omitted string + is unsupported. """ return _FORMAT_STRING_REFERENCE diff --git a/tests/test_resources.py b/tests/test_resources.py index cd54c0a..817bb74 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -237,6 +237,43 @@ def test_format_string_reference_returns_markdown( assert "#{?cond,then,else}" in body +def test_format_string_reference_leads_with_subset_disclaimer( + reference_resource_functions: dict[str, t.Any], +) -> None: + """The body declares itself a curated subset BEFORE the substantive content. + + Without a leading disclaimer, an agent that reads the resource and + doesn't find a format string it knows is valid (e.g. + ``#{history_size}``, one of the ~175 omitted strings) may + erroneously conclude the string is unsupported and skip a + ``display_message`` call that would have worked. The disclaimer + must precede the catalog content so an agent skim-reading the top + of the body cannot miss it. + """ + fn = reference_resource_functions["tmux://reference/format-strings"] + body = fn() + + # Disclaimer keywords must appear in the body... + assert "curated" in body.lower(), ( + "format-string reference must declare itself a curated subset" + ) + assert "man tmux" in body, ( + "format-string reference must point readers at man tmux for the " + "complete catalog" + ) + + # ...AND must appear *before* the first format-variable example. + # Otherwise a top-down skim hits the catalog before the caveat. + first_example = body.find("#{pane_id}") + disclaimer = body.lower().find("curated") + assert disclaimer != -1, "disclaimer absent" + assert disclaimer < first_example, ( + f"disclaimer at offset {disclaimer} comes after first format-variable " + f"example at offset {first_example}; readers will see the catalog " + f"before the 'this is a subset' caveat" + ) + + def test_format_string_reference_advertises_markdown_mime() -> None: """tmux://reference/format-strings is registered with text/markdown. From 2b985dde9ca03f7d27bde8e95fed24643a3a5657 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 26 Apr 2026 20:47:52 -0500 Subject: [PATCH 10/10] mcp(test[server]): integration test card hints against real registered tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``test_card_omits_invisible_tools`` proves the visibility filter's *logic* using a synthetic 3-element ``visible_tool_names`` set. The production wiring uses ~20+ names returned by ``asyncio.run(mcp.list_tools())`` after ``mcp.enable(tags=..., only=True)`` runs in ``run_server``. The two paths share code but are not the same exercise. Adds an integration test that registers the real tool surface, applies the same ``mcp.enable(tags={TAG_READONLY}, only=True)`` call ``run_server`` makes, collects the visible names from ``mcp.list_tools()``, and asserts every hint phrase still appears in the card. All three current hint tools (``snapshot_pane``, ``wait_for_text``, ``search_panes``) carry ``TAG_READONLY``, so even the most restrictive tier keeps them visible — the test pins this invariant. A future change that moves a hint tool out of the readonly tier will fail this test and force a deliberate decision: retag the tool, or drop the hint from ``_HANDLE_HINTS``. The check is paired: each iteration asserts both that the hint tool is actually in the visible set (catches an unexpected tier change) and that the corresponding phrase is in the produced card (catches a wiring regression). Pure addition; no source changes. --- tests/test_server.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_server.py b/tests/test_server.py index e6145f0..f592b1d 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -347,6 +347,51 @@ def test_format_handles_section_empty_visible_set_emits_no_examples() -> None: assert "- Prompts — packaged workflows" in result +def test_card_emits_hints_against_real_registered_tools() -> None: + """End-to-end: register_tools → mcp.enable → list_tools → _build_instructions. + + ``test_card_omits_invisible_tools`` proves the filter *logic* using a + 3-element synthetic ``visible_tool_names``. This test proves the + production *wiring*: it registers the real tool surface, applies the + same ``mcp.enable(tags=..., only=True)`` call ``run_server`` makes, + collects the visible names from ``mcp.list_tools()``, and confirms + every hint phrase still appears in the produced card. + + All three current hint tools (``snapshot_pane``, ``wait_for_text``, + ``search_panes``) carry ``TAG_READONLY``, so even the most + restrictive safety tier keeps them visible — the test pins this + invariant. If a future change moves a hint tool out of the readonly + tier, this test will fail and force a deliberate decision about + whether to retain or drop the corresponding card hint. + """ + import asyncio + + from fastmcp import FastMCP + + from libtmux_mcp.tools import register_tools + + mcp = FastMCP(name="card-wiring-integration") + register_tools(mcp) + # Mirror run_server's gating call exactly — readonly is the most + # restrictive tier, so a hint phrase surviving here implies it + # survives every higher tier too. + mcp.enable(tags={TAG_READONLY}, only=True) + + visible = {tool.name for tool in asyncio.run(mcp.list_tools())} + card = _build_instructions(TAG_READONLY, visible_tool_names=visible) + + for hint_tool, phrase in _VISIBILITY_FILTER_CASES: + assert hint_tool in visible, ( + f"hint tool {hint_tool!r} unexpectedly hidden under readonly " + f"tier; either retag the tool or drop the hint from " + f"_HANDLE_HINTS" + ) + assert phrase in card, ( + f"hint phrase {phrase!r} missing from card built against the " + f"real registered tool set; production wiring is broken" + ) + + def test_registered_tools_accept_socket_name() -> None: """All registered tools (except list_servers) accept ``socket_name``.