diff --git a/docs/topics/prompting.md b/docs/topics/prompting.md index 74250ff..450d412 100644 --- a/docs/topics/prompting.md +++ b/docs/topics/prompting.md @@ -11,19 +11,22 @@ Every MCP client receives these instructions when connecting to the libtmux-mcp ```{code-block} text :class: server-prompt -libtmux MCP server for programmatic tmux control. tmux hierarchy: -Server > Session > Window > Pane. Use pane_id (e.g. '%1') as the -preferred targeting method - it is globally unique within a tmux server. -Use send_keys to execute commands and capture_pane to read output. All -tools accept an optional socket_name parameter for multi-server support -(defaults to LIBTMUX_SOCKET env var). - -IMPORTANT — metadata vs content: list_windows, list_panes, and -list_sessions only search metadata (names, IDs, current command). To -find text that is actually visible in terminals — when users ask what -panes 'contain', 'mention', 'show', or 'have' — use search_panes to -search across all pane contents, or list_panes + capture_pane on each -pane for manual inspection. +libtmux MCP server: programmatic tmux control. tmux hierarchy is +Server > Session > Window > Pane; every pane has a globally unique +pane_id like %1 — prefer it over name/index for targeting. Targeted +tools accept an optional socket_name (defaults to LIBTMUX_SOCKET); +list_servers discovers sockets via TMUX_TMPDIR / extra_socket_paths +and is the documented socket_name exception. + +Three handles cover everything the agent needs: +- Tools — call list_tools; per-tool descriptions tell you which to + prefer (e.g. snapshot_pane over capture_pane + get_pane_info, + wait_for_text over capture_pane in a retry loop, search_panes over + list_panes when the user says "panes that contain X"). +- Resources (tmux://) — browseable hierarchy plus reference cards + (format strings). +- Prompts — packaged workflows: run_and_wait, diagnose_failing_pane, + build_dev_workspace, interrupt_gracefully. ``` The server also dynamically adds: diff --git a/src/libtmux_mcp/resources/__init__.py b/src/libtmux_mcp/resources/__init__.py index 950defe..39164d8 100644 --- a/src/libtmux_mcp/resources/__init__.py +++ b/src/libtmux_mcp/resources/__init__.py @@ -10,6 +10,7 @@ def register_resources(mcp: FastMCP) -> None: """Register all resource modules with the FastMCP instance.""" - from libtmux_mcp.resources import hierarchy + from libtmux_mcp.resources import hierarchy, reference hierarchy.register(mcp) + reference.register(mcp) diff --git a/src/libtmux_mcp/resources/reference.py b/src/libtmux_mcp/resources/reference.py new file mode 100644 index 0000000..e15d293 --- /dev/null +++ b/src/libtmux_mcp/resources/reference.py @@ -0,0 +1,116 @@ +"""Static reference resources for tmux primitives. + +Why a resource and not a tool: tmux format strings (``#{pane_id}``, +``#{pane_in_mode}``, ``#{?cond,then,else}``) are a closed reference +catalog, not a query. Agents that hit a ``#{...}`` field they don't +recognize need a fixed lookup, not a tool round-trip. Exposing this as +an MCP resource lets clients pull it on demand and lets the agent +recover from an unknown-format-name guess without paying a +``display_message`` round-trip just to discover what's available. +""" + +from __future__ import annotations + +import textwrap +import typing as t + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +#: MIME type for the format-string reference resource. Markdown gives +#: clients enough structure to render headings and code spans without +#: requiring a richer content type. +_MARKDOWN_MIME = "text/markdown" + + +_FORMAT_STRING_REFERENCE = textwrap.dedent("""\ + # tmux format strings + + > **Note:** This is a curated cheat sheet, not the complete catalog. + > tmux supports ~200 format variables; this lists the ones agents + > most commonly need. Omission here does NOT mean a string is + > unsupported — fall back to ``man tmux`` (FORMATS section) for + > anything not listed. + + Pass via ``display_message(format_string="#{...}")`` or any other + tool that accepts a tmux format expression. + + ## Pane + + - ``#{pane_id}`` — globally unique pane identifier (e.g. ``%1``) + - ``#{pane_index}`` — index within the window + - ``#{pane_current_command}`` — foreground command name + - ``#{pane_current_path}`` — current working directory + - ``#{pane_pid}`` — pane process PID + - ``#{pane_dead}`` — ``1`` when the pane's process has exited + - ``#{pane_in_mode}`` — ``1`` when the pane is in copy/scroll mode + - ``#{pane_mode}`` — current pane mode name when in mode + - ``#{pane_active}`` — ``1`` for the active pane in its window + - ``#{pane_width}`` / ``#{pane_height}`` — pane dimensions in cells + - ``#{cursor_x}`` / ``#{cursor_y}`` — cursor position within the pane + - ``#{scroll_position}`` — scrollback position when in copy mode + + ## Window + + - ``#{window_id}`` — globally unique window identifier (e.g. ``@1``) + - ``#{window_index}`` — window index within the session + - ``#{window_name}`` — window name + - ``#{window_zoomed_flag}`` — ``1`` when a pane is zoomed + - ``#{window_layout}`` — current layout string + - ``#{window_panes}`` — number of panes in the window + - ``#{window_active}`` — ``1`` for the active window in its session + + ## Session + + - ``#{session_id}`` — globally unique session identifier (e.g. ``$1``) + - ``#{session_name}`` — session name + - ``#{session_attached}`` — ``1`` when at least one client is attached + - ``#{session_windows}`` — number of windows in the session + + ## Server / client + + - ``#{host}`` — hostname running the tmux server + - ``#{client_tty}`` — TTY of the client (when evaluated client-side) + - ``#{socket_path}`` — server socket path + + ## Conditionals and string operations + + - ``#{?cond,then,else}`` — emit ``then`` if ``cond`` is truthy, else + ``else`` + - ``#{C/i:pattern}`` — case-insensitive search inside the result + - ``#{=N:expr}`` — truncate ``expr`` to ``N`` characters + - ``#{s/from/to/:expr}`` — substitution + - ``#{T:expr}`` — recursively expand format strings within ``expr`` + + See ``man tmux`` (FORMATS section) for the complete catalog. +""") + + +def register(mcp: FastMCP) -> None: + """Register reference resources with the FastMCP instance.""" + + @mcp.resource( + "tmux://reference/format-strings", + title="tmux Format String Reference", + mime_type=_MARKDOWN_MIME, + ) + def get_format_string_reference() -> str: + """Return a curated subset of the tmux format-string catalog as Markdown. + + Covers the format variables agents most commonly encounter; for + less-common ones, the body itself directs the agent at + ``man tmux`` (FORMATS section). The resource is intentionally + a subset rather than a mirror — it stays small enough to be + cheap to pull, and the disclaimer at the top prevents the + false-negative trap where an agent assumes an omitted string + is unsupported. + """ + return _FORMAT_STRING_REFERENCE + + # Type checkers: list the function to silence unused-name warnings + # without exposing it outside this closure. + _ = (get_format_string_reference,) + + +__all__ = ["register"] diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index ecf87a5..de26452 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -43,96 +43,95 @@ _ServerCacheKey: t.TypeAlias = tuple[str | None, str | None, str | None] # --------------------------------------------------------------------------- -# _BASE_INSTRUCTIONS — composed from named segments. +# _BASE_INSTRUCTIONS — slim "three handles" card. # -# The string handed to FastMCP grew organically from "what does this server -# do?" toward a hybrid of positive guidance (HIERARCHY, READ_TOOLS, -# WAIT_NOT_POLL) and *gap-explainers* (HOOKS_GAP, BUFFERS_GAP) that document -# why a tool the agent might expect is absent. Splitting into named -# constants keeps additions deliberate: when a new ``_GAP`` segment feels -# tempting, prefer first to push the explanation into the relevant tool's -# docstring/description (where the agent encounters it at call time) and -# only fall back to a server-level segment when the gap is *server-shaped* -# (e.g. an entire tool family is intentionally missing). +# The card answers two questions the agent has at session start: +# (1) what is this server, and (2) where do I look for the rest? It points +# at tools, resources, and prompts — and that's it. Tool-specific rules +# (which tool to prefer, what's intentionally not exposed and why) live in +# the relevant tool's docstring or ``description=`` override, where the +# agent reads them on every ``list_tools`` call instead of parsing them +# out of a one-shot prompt that has long since rolled out of context. # -# Output text is byte-identical to the previous monolith; tests assert on -# substrings of ``_BASE_INSTRUCTIONS``, so keeping the join shape stable -# matters. +# When in doubt about adding text here, ask: is this rule cross-cutting +# (about the server as a whole) or tool-specific (about when to call X +# vs Y)? Cross-cutting belongs in the card; tool-specific belongs in the +# tool description. ``test_card_length_budget`` enforces a soft 200-word +# ceiling against creeping re-bloat. # --------------------------------------------------------------------------- -_INSTR_HIERARCHY = ( - "libtmux MCP server for programmatic tmux control. " - "tmux hierarchy: Server > Session > Window > Pane. " - "Use pane_id (e.g. '%1') as the preferred targeting method - " - "it is globally unique within a tmux server. " - "Use send_keys to execute commands and capture_pane to read output. " - "Targeted tmux tools accept an optional socket_name parameter " - "(defaults to LIBTMUX_SOCKET env var); list_servers discovers " - "sockets via TMUX_TMPDIR plus optional extra_socket_paths instead." +_INSTR_CARD = ( + "libtmux MCP server: programmatic tmux control. tmux hierarchy is " + "Server > Session > Window > Pane; every pane has a globally unique " + "pane_id like %1 — prefer it over name/index for targeting. Targeted " + "tools accept an optional socket_name (defaults to LIBTMUX_SOCKET); " + "list_servers discovers sockets via TMUX_TMPDIR / extra_socket_paths " + "and is the documented socket_name exception." ) -_INSTR_METADATA_VS_CONTENT = ( - "IMPORTANT — metadata vs content: list_windows, list_panes, and " - "list_sessions only search metadata (names, IDs, current command). " - "To find text that is actually visible in terminals — when users ask " - "what panes 'contain', 'mention', 'show', or 'have' — use " - "search_panes to search across all pane contents, or list_panes + " - "capture_pane on each pane for manual inspection." +#: Tool-prefer hints in the Tools handle, keyed by the tool that +#: motivates them. ``_format_handles_section`` filters these by +#: ``visible_tool_names`` so the card never names a tool the agent +#: cannot call (e.g. ``send_keys``-only hints under +#: ``LIBTMUX_SAFETY=readonly``). +_HANDLE_HINTS: tuple[tuple[str, str], ...] = ( + ("snapshot_pane", "snapshot_pane over capture_pane + get_pane_info"), + ("wait_for_text", "wait_for_text over capture_pane in a retry loop"), + ( + "search_panes", + 'search_panes over list_panes when the user says "panes that contain X"', + ), ) -_INSTR_READ_TOOLS = ( - "READ TOOLS TO PREFER: snapshot_pane returns pane content plus " - "cursor position, mode, and scroll state in one call — use it " - "instead of capture_pane + get_pane_info when you need context. " - "display_message evaluates a tmux format string (e.g. " - "'#{pane_current_command}', '#{session_name}') against a target " - "and returns the expanded value — cheaper than parsing captured " - "output. (The tool is named after the tmux 'display-message -p' " - "verb it wraps; its MCP title is 'Evaluate tmux Format String'.)" -) -_INSTR_WAIT_NOT_POLL = ( - "WAIT, DON'T POLL: for 'run command, wait for output' workflows " - "use wait_for_text (matches text/regex on a pane) or " - "wait_for_content_change (waits for any change). These block " - "server-side until the condition is met or the timeout expires, " - "which is dramatically cheaper in agent turns than capture_pane " - "in a retry loop." -) - -#: Gap-explainer: write-hook tools are intentionally absent. See module -#: comment above for when to add another ``_GAP`` segment vs. push the -#: explanation into a tool description. -_INSTR_HOOKS_GAP = ( - "HOOKS ARE READ-ONLY: inspect via show_hooks / show_hook. Write-hook " - "tools are intentionally not exposed — tmux hooks survive process " - "death, so they belong in your tmux config file, not a transient " - "MCP session." -) +def _format_handles_section(visible_tool_names: set[str] | None) -> str: + """Render the three-handles bullet list, optionally visibility-filtered. -#: Gap-explainer: ``list_buffers`` is intentionally absent because tmux -#: buffers can include OS clipboard history. See module comment above. -_INSTR_BUFFERS_GAP = ( - "BUFFERS: load_buffer stages content, paste_buffer delivers it into " - "a pane, delete_buffer removes the staged buffer. Track owned " - "buffers via the BufferRef returned from load_buffer — there is no " - "list_buffers tool because tmux buffers may include OS clipboard " - "history (passwords, private snippets)." -) + When ``visible_tool_names`` is ``None`` every hint is included + (backward-compat for tests that build instructions without first + invoking ``mcp.enable``). Otherwise hints whose tool is not in the + visible set are dropped — naming a tool the agent cannot call would + be misleading. + """ + if visible_tool_names is None: + hints = [hint for _, hint in _HANDLE_HINTS] + else: + hints = [hint for tool, hint in _HANDLE_HINTS if tool in visible_tool_names] -_BASE_INSTRUCTIONS = "\n\n".join( - ( - _INSTR_HIERARCHY, - _INSTR_METADATA_VS_CONTENT, - _INSTR_READ_TOOLS, - _INSTR_WAIT_NOT_POLL, - _INSTR_HOOKS_GAP, - _INSTR_BUFFERS_GAP, + tools_line = ( + "- Tools — call list_tools; per-tool descriptions tell you which to prefer" ) -) + if hints: + tools_line += " (e.g. " + ", ".join(hints) + ")." + else: + tools_line += "." + + return "\n".join( + ( + "Three handles cover everything the agent needs:", + tools_line, + ( + "- Resources (tmux://) — browseable hierarchy plus reference " + "cards (format strings)." + ), + ( + "- Prompts — packaged workflows: run_and_wait, " + "diagnose_failing_pane, build_dev_workspace, " + "interrupt_gracefully." + ), + ) + ) + + +_INSTR_HANDLES = _format_handles_section(visible_tool_names=None) +_BASE_INSTRUCTIONS = "\n\n".join((_INSTR_CARD, _INSTR_HANDLES)) -def _build_instructions(safety_level: str = TAG_MUTATING) -> str: + +def _build_instructions( + safety_level: str = TAG_MUTATING, + visible_tool_names: set[str] | None = None, +) -> str: """Build server instructions with agent context and safety level. When the MCP server process runs inside a tmux pane, ``TMUX_PANE`` and @@ -143,13 +142,25 @@ def _build_instructions(safety_level: str = TAG_MUTATING) -> str: ---------- safety_level : str Active safety tier (readonly, mutating, or destructive). + visible_tool_names : set of str, optional + When provided, the handles section drops hints for tools not in + the set so the card never names a tool the agent cannot call. + ``run_server`` populates this from ``mcp.list_tools()`` after + ``mcp.enable(tags=..., only=True)`` has applied the safety-tier + filter. Defaults to ``None`` (backward-compat: all hints emitted), + which is what the module-import-time placeholder uses before + ``run_server`` runs. Returns ------- str Server instructions string, optionally with agent tmux context. """ - parts: list[str] = [_BASE_INSTRUCTIONS] + if visible_tool_names is None: + base = _BASE_INSTRUCTIONS + else: + base = "\n\n".join((_INSTR_CARD, _format_handles_section(visible_tool_names))) + parts: list[str] = [base] # Safety tier context parts.append( @@ -321,6 +332,8 @@ def _register_all() -> None: def run_server() -> None: """Run the MCP server.""" + import asyncio + _register_all() # Use FastMCP's native visibility system as primary gate, @@ -332,4 +345,17 @@ def run_server() -> None: allowed_tags.add(TAG_DESTRUCTIVE) mcp.enable(tags=allowed_tags, only=True) + # Rebuild instructions now that ``mcp.enable`` has hidden tools + # outside the active safety tier. The card mentions specific tools + # by name (snapshot_pane, wait_for_text, search_panes); naming a + # tool the agent cannot call would be misleading. The + # module-import-time ``instructions=`` set on the FastMCP + # constructor was a placeholder built without a visibility filter — + # this overwrite is the authoritative version. + visible_tool_names = {tool.name for tool in asyncio.run(mcp.list_tools())} + mcp.instructions = _build_instructions( + safety_level=_safety_level, + visible_tool_names=visible_tool_names, + ) + mcp.run() diff --git a/src/libtmux_mcp/tools/buffer_tools.py b/src/libtmux_mcp/tools/buffer_tools.py index 108119d..6fb35ae 100644 --- a/src/libtmux_mcp/tools/buffer_tools.py +++ b/src/libtmux_mcp/tools/buffer_tools.py @@ -176,6 +176,11 @@ def load_buffer( ) -> BufferRef: """Load text into a new agent-namespaced tmux paste buffer. + Track the returned BufferRef on subsequent paste_buffer / show_buffer + / delete_buffer calls — there is no list_buffers tool, because tmux + buffers may include OS clipboard history (passwords, private + snippets) and a blanket enumeration would leak that to the agent. + Each call allocates a fresh buffer name — two concurrent calls will land in distinct buffers even if they pass the same ``logical_name``. Agents MUST use the returned :attr:`BufferRef.buffer_name` on diff --git a/src/libtmux_mcp/tools/hook_tools.py b/src/libtmux_mcp/tools/hook_tools.py index f9e8b48..b7e91fa 100644 --- a/src/libtmux_mcp/tools/hook_tools.py +++ b/src/libtmux_mcp/tools/hook_tools.py @@ -166,6 +166,11 @@ def show_hooks( ) -> HookListResult: """List configured tmux hooks at the given scope. + Hooks are read-only by design: tmux hooks survive process death + (kill -9, OOM, etc.), so write-hooks belong in your tmux config file, + not a transient MCP session. No set_hook / unset_hook tool is exposed + for that reason. Use this to inspect what is configured. + ``scope="server"`` enumerates hooks installed via ``tmux set-hook -g ...``. tmux splits those globals across two options trees by hook category: session-level hooks diff --git a/src/libtmux_mcp/tools/pane_tools/__init__.py b/src/libtmux_mcp/tools/pane_tools/__init__.py index a3787ef..52c8901 100644 --- a/src/libtmux_mcp/tools/pane_tools/__init__.py +++ b/src/libtmux_mcp/tools/pane_tools/__init__.py @@ -80,9 +80,24 @@ def register(mcp: FastMCP) -> None: mcp.tool(title="Send Keys", annotations=ANNOTATIONS_SHELL, tags={TAG_MUTATING})( send_keys ) - mcp.tool(title="Capture Pane", annotations=ANNOTATIONS_RO, tags={TAG_READONLY})( - capture_pane - ) + mcp.tool( + title="Capture Pane", + annotations=ANNOTATIONS_RO, + tags={TAG_READONLY}, + description=( + "Capture the visible contents of a tmux pane. Output is " + "tail-preserving truncated at max_lines (default 500); when " + "truncation occurs, the first line of the returned string is a " + "literal '[... truncated K lines ...]' marker — skip it when " + "parsing terminal content. Pass max_lines=None to disable " + "truncation. For pane content + cursor + mode + scroll state in " + "one call, use snapshot_pane. For 'send_keys then wait for " + "output' flows, use wait_for_text or wait_for_content_change " + "instead of a capture_pane retry loop — server-side blocking " + "is dramatically cheaper in agent turns. To find text across " + "many panes, use search_panes." + ), + )(capture_pane) mcp.tool( title="Resize Pane", annotations=ANNOTATIONS_MUTATING, tags={TAG_MUTATING} )(resize_pane) diff --git a/src/libtmux_mcp/tools/pane_tools/io.py b/src/libtmux_mcp/tools/pane_tools/io.py index d4483ea..d84296e 100644 --- a/src/libtmux_mcp/tools/pane_tools/io.py +++ b/src/libtmux_mcp/tools/pane_tools/io.py @@ -32,9 +32,11 @@ def send_keys( ) -> str: """Send keys (commands or text) to a tmux pane. - After sending, use wait_for_text to block until the command completes, - or capture_pane to read the result. Do not capture_pane immediately — - there is a race condition. + After sending, use wait_for_text to block until the command completes + (server-side, turn-cheap) or capture_pane once you know it has + finished. Do not capture_pane in a tight loop — that races with + command execution and burns agent turns; wait_for_text is the + server-side blocking primitive built for this flow. Parameters ---------- diff --git a/src/libtmux_mcp/tools/session_tools.py b/src/libtmux_mcp/tools/session_tools.py index c80ad8f..cfc7c1f 100644 --- a/src/libtmux_mcp/tools/session_tools.py +++ b/src/libtmux_mcp/tools/session_tools.py @@ -39,8 +39,9 @@ def list_windows( ) -> list[WindowInfo]: """List windows in a tmux session, or all windows across sessions. - Only searches window metadata (name, index, layout). To search - the actual text visible in terminal panes, use search_panes instead. + Searches window metadata only (name, index, layout). For text + visible IN terminals — when users say "panes that contain/mention/show X" + — use search_panes instead. Parameters ---------- diff --git a/src/libtmux_mcp/tools/window_tools.py b/src/libtmux_mcp/tools/window_tools.py index d9e86a1..294e5e0 100644 --- a/src/libtmux_mcp/tools/window_tools.py +++ b/src/libtmux_mcp/tools/window_tools.py @@ -50,9 +50,9 @@ def list_panes( ) -> list[PaneInfo]: """List panes in a tmux window, session, or across the entire server. - Only searches pane metadata (current command, title, working directory). - To search the actual text visible in terminal panes, use search_panes - instead. + Searches pane metadata only (current command, title, working + directory). For text visible IN terminals — when users say "panes + that contain/mention/show X" — use search_panes instead. Parameters ---------- diff --git a/tests/test_resources.py b/tests/test_resources.py index 2fedfd3..817bb74 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -8,6 +8,7 @@ import pytest from libtmux_mcp.resources.hierarchy import register +from libtmux_mcp.resources.reference import register as register_reference if t.TYPE_CHECKING: from libtmux.pane import Pane @@ -184,3 +185,111 @@ def test_hierarchy_resources_advertise_mime_type(uri: str, expected_mime: str) - candidate = by_uri.get(uri) assert candidate is not None, f"resource {uri!r} not registered" assert candidate.mime_type == expected_mime + + +# --------------------------------------------------------------------------- +# Reference resources (static catalogs, no tmux server interaction) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def reference_resource_functions() -> dict[str, t.Any]: + """Capture reference-module resource closures by URI. + + Mirrors ``resource_functions`` but registers + :mod:`libtmux_mcp.resources.reference` instead of ``hierarchy``. + Reference resources are static and need no tmux fixtures. + """ + functions: dict[str, t.Any] = {} + + class MockMCP: + def resource(self, uri: str, **kwargs: t.Any) -> t.Any: + def decorator(fn: t.Any) -> t.Any: + functions[uri] = fn + return fn + + return decorator + + register_reference(MockMCP()) # type: ignore[arg-type] + return functions + + +def test_format_string_reference_returns_markdown( + reference_resource_functions: dict[str, t.Any], +) -> None: + """tmux://reference/format-strings returns non-empty Markdown. + + The agent's reason for pulling this resource is to recover from an + unfamiliar ``#{...}`` token without burning a ``display_message`` + round-trip. The body must therefore (a) be present and (b) name + the format strings most likely to confuse — pane / window / + session ID forms and the ``#{?cond,then,else}`` conditional. + """ + fn = reference_resource_functions["tmux://reference/format-strings"] + body = fn() + assert isinstance(body, str) + assert body.strip(), "format-string reference body is empty" + # Spot-check the highest-traffic catalog entries — if any of these + # vanish, the reference is failing at its job. + assert "#{pane_id}" in body + assert "#{window_id}" in body + assert "#{session_id}" in body + assert "#{?cond,then,else}" in body + + +def test_format_string_reference_leads_with_subset_disclaimer( + reference_resource_functions: dict[str, t.Any], +) -> None: + """The body declares itself a curated subset BEFORE the substantive content. + + Without a leading disclaimer, an agent that reads the resource and + doesn't find a format string it knows is valid (e.g. + ``#{history_size}``, one of the ~175 omitted strings) may + erroneously conclude the string is unsupported and skip a + ``display_message`` call that would have worked. The disclaimer + must precede the catalog content so an agent skim-reading the top + of the body cannot miss it. + """ + fn = reference_resource_functions["tmux://reference/format-strings"] + body = fn() + + # Disclaimer keywords must appear in the body... + assert "curated" in body.lower(), ( + "format-string reference must declare itself a curated subset" + ) + assert "man tmux" in body, ( + "format-string reference must point readers at man tmux for the " + "complete catalog" + ) + + # ...AND must appear *before* the first format-variable example. + # Otherwise a top-down skim hits the catalog before the caveat. + first_example = body.find("#{pane_id}") + disclaimer = body.lower().find("curated") + assert disclaimer != -1, "disclaimer absent" + assert disclaimer < first_example, ( + f"disclaimer at offset {disclaimer} comes after first format-variable " + f"example at offset {first_example}; readers will see the catalog " + f"before the 'this is a subset' caveat" + ) + + +def test_format_string_reference_advertises_markdown_mime() -> None: + """tmux://reference/format-strings is registered with text/markdown. + + Concrete URIs (no ``{...}`` template params) register as resources, + not resource templates — they show up under ``mcp.list_resources()`` + rather than ``mcp.list_resource_templates()``. + """ + import asyncio + + from fastmcp import FastMCP + + mcp = FastMCP(name="test-reference-mime") + register_reference(mcp) + + resources = asyncio.run(mcp.list_resources()) + by_uri = {str(getattr(r, "uri", "")): r for r in resources} + target = by_uri.get("tmux://reference/format-strings") + assert target is not None, "format-strings reference not registered" + assert target.mime_type == "text/markdown" diff --git a/tests/test_server.py b/tests/test_server.py index a56bf70..f592b1d 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -129,65 +129,267 @@ def test_build_instructions( assert f"Safety level: {expect_safety_in_text}" in result -def test_base_instructions_content() -> None: - """_BASE_INSTRUCTIONS contains key guidance for the LLM.""" - assert "tmux hierarchy" in _BASE_INSTRUCTIONS - assert "pane_id" in _BASE_INSTRUCTIONS - assert "search_panes" in _BASE_INSTRUCTIONS - assert "metadata vs content" in _BASE_INSTRUCTIONS - - -def test_base_instructions_surface_flagship_read_tools() -> None: - """_BASE_INSTRUCTIONS mentions the richer read tools by name. - - ``display_message`` (tmux format queries) and ``snapshot_pane`` - (content + metadata in one call) are strictly more expressive than - ``capture_pane`` for most contexts, but agents that never see them - named in the instructions default to ``capture_pane`` + a follow-up - ``get_pane_info``. Naming both explicitly changes that default. +class CardContract(t.NamedTuple): + """Contract about what ``_BASE_INSTRUCTIONS`` must / must not contain. + + The slim card is the public-facing server prompt — every MCP client + that connects gets it. ``must_include`` pins the substrings agents + rely on to orient (server identity, socket_name exception, three + handles); ``must_exclude`` pins the deleted-pre-refactor phrasing so + a future drift back to the lie ("All tools accept socket_name") fails + loudly here instead of silently shipping. """ - assert "display_message" in _BASE_INSTRUCTIONS - assert "snapshot_pane" in _BASE_INSTRUCTIONS + test_id: str + must_include: tuple[str, ...] + must_exclude: tuple[str, ...] = () + + +CARD_CONTRACTS: list[CardContract] = [ + CardContract( + test_id="server_identity", + must_include=( + "tmux hierarchy", + "Server > Session > Window > Pane", + "pane_id", + ), + ), + CardContract( + test_id="socket_name_exception", + # ``list_servers`` does NOT accept socket_name (it's the discovery + # tool — see ``server_tools.py`` SOCKET_NAME_EXEMPT). The pre-refactor + # wording "All tools accept socket_name" was a lie; the new card + # qualifies "Targeted tools" and names list_servers explicitly. + must_include=("Targeted tools", "list_servers", "extra_socket_paths"), + must_exclude=("All tools accept",), + ), + CardContract( + test_id="three_handles", + # The card's job is to point at where the rest of the answer lives + # (tools / resources / prompts), not to inline tool-specific rules. + must_include=("Tools", "Resources (tmux://)", "Prompts"), + ), +] -def test_base_instructions_prefer_wait_over_poll() -> None: - """_BASE_INSTRUCTIONS names wait_for_text and wait_for_content_change. - The wait tools block server-side, which is dramatically cheaper in - agent turns than ``capture_pane`` in a retry loop. Making them - discoverable from the instructions is a no-cost UX win. +@pytest.mark.parametrize( + CardContract._fields, + CARD_CONTRACTS, + ids=[c.test_id for c in CARD_CONTRACTS], +) +def test_card_contracts( + test_id: str, + must_include: tuple[str, ...], + must_exclude: tuple[str, ...], +) -> None: + """``_BASE_INSTRUCTIONS`` is the slim "three handles" server card. + + Tool-specific rules live in tool descriptions — Phase 1 of the + instructions slim-down moved them there. The card carries only + cross-cutting orientation: server identity, the socket_name + exception, and pointers to the Tools / Resources / Prompts handles. + Anything naming a specific tool's preference rule belongs at the + call site, not here. """ - assert "wait_for_text" in _BASE_INSTRUCTIONS - assert "wait_for_content_change" in _BASE_INSTRUCTIONS + for needle in must_include: + assert needle in _BASE_INSTRUCTIONS, ( + f"[{test_id}] missing required substring {needle!r}" + ) + for needle in must_exclude: + assert needle not in _BASE_INSTRUCTIONS, ( + f"[{test_id}] forbidden substring {needle!r} crept back in" + ) + + +def test_card_length_budget() -> None: + """``_BASE_INSTRUCTIONS`` stays under the ~200-word budget. + + Per-tool rules belong in tool descriptions (visible at every + ``list_tools`` call), not in this card. This guard fails loudly if a + future contributor reaches for the card to add a tool-specific rule, + pointing them at the right home before the card grows back into the + 305-word monolith it just shrank from. + """ + word_count = len(_BASE_INSTRUCTIONS.split()) + assert word_count <= 200, ( + f"_BASE_INSTRUCTIONS grew to {word_count} words; per-tool rules " + f"belong in tool descriptions, not the card. See the module-level " + f"comment in server.py for the boundary." + ) + + +# Phrases the handles section emits when the corresponding tool is in +# ``visible_tool_names``. Phase 4: ``_build_instructions`` filters the +# Tools handle by visibility so the card never names a tool the agent +# cannot call. Keep this aligned with ``_HANDLE_HINTS`` in server.py. +_VISIBILITY_FILTER_CASES: list[tuple[str, str]] = [ + ("snapshot_pane", "snapshot_pane over capture_pane"), + ("wait_for_text", "wait_for_text over capture_pane"), + ("search_panes", "search_panes over list_panes"), +] + + +@pytest.mark.parametrize( + ("omitted_tool", "phrase_that_should_drop"), + _VISIBILITY_FILTER_CASES, + ids=[t for t, _ in _VISIBILITY_FILTER_CASES], +) +def test_card_omits_invisible_tools( + omitted_tool: str, + phrase_that_should_drop: str, +) -> None: + """The handles section drops hints for tools outside ``visible_tool_names``. + + Each row asserts that removing one tool from the visible set drops + its tool-prefer hint from the card. Sanity check: the same phrase IS + present when the tool IS visible — guards against the test passing + accidentally because the phrase was never there. + """ + full_visibility = {tool for tool, _ in _VISIBILITY_FILTER_CASES} + visible = full_visibility - {omitted_tool} + + filtered = _build_instructions(TAG_MUTATING, visible_tool_names=visible) + assert phrase_that_should_drop not in filtered, ( + f"phrase {phrase_that_should_drop!r} stayed when {omitted_tool} " + f"was filtered out — visibility filter is broken" + ) + + full = _build_instructions(TAG_MUTATING, visible_tool_names=full_visibility) + assert phrase_that_should_drop in full, ( + f"phrase {phrase_that_should_drop!r} missing even when " + f"{omitted_tool} is visible — sanity check failed" + ) + + +def test_build_instructions_default_visible_tool_names_emits_full_card() -> None: + """``visible_tool_names=None`` is the backward-compat default. + + The module-import-time ``instructions=`` placeholder builds without + invoking ``mcp.enable``, so it has no visibility set to consult. In + that case ``_build_instructions`` must emit every handle hint as if + the agent could call any tool — the alternative would be silently + dropping hints during the (brief) window before ``run_server`` + overwrites the placeholder. + """ + full = _build_instructions(TAG_MUTATING) + for _, phrase in _VISIBILITY_FILTER_CASES: + assert phrase in full, ( + f"phrase {phrase!r} missing from default _build_instructions; " + f"visible_tool_names=None should emit the full hint set" + ) + + +def test_handle_hints_keys_are_registered_tools() -> None: + """Every key in ``_HANDLE_HINTS`` must be an actual registered tool name. + + ``_format_handles_section`` filters hints by ``tool in visible_tool_names``, + so a stale key (e.g. left behind after a tool rename) silently disappears + from the filtered card while still being emitted in the unfiltered + placeholder — the worst of both worlds: agents would see a phantom tool + name during the brief import-time window, then lose the guidance entirely + once ``run_server`` filters by real visibility. + + Phase 1's ``test_tool_description_includes`` catches drift in tool + *descriptions*; this test catches drift in the parallel ``_HANDLE_HINTS`` + table. The two together close the rename-without-update loophole. + """ + import asyncio + from fastmcp import FastMCP + + from libtmux_mcp.server import _HANDLE_HINTS + from libtmux_mcp.tools import register_tools -def test_base_instructions_document_hook_boundary() -> None: - """_BASE_INSTRUCTIONS explains hooks are read-only by design. + mcp = FastMCP(name="hint-key-contract") + register_tools(mcp) + + registered = {tool.name for tool in asyncio.run(mcp.list_tools())} + for tool_name, _hint in _HANDLE_HINTS: + assert tool_name in registered, ( + f"_HANDLE_HINTS key {tool_name!r} is not a registered tool name; " + f"update _HANDLE_HINTS in server.py when renaming or removing tools" + ) - Without this sentence agents waste a turn asking for ``set_hook`` or - trying to write hooks through a nonexistent tool. Naming the - boundary heads off the exploratory call. + +def test_format_handles_section_empty_visible_set_emits_no_examples() -> None: + """``_format_handles_section`` renders a bare Tools handle when no hint matches. + + With every current hint tool tagged ``TAG_READONLY`` this branch is + unreachable by the existing tool set — even ``LIBTMUX_SAFETY=readonly`` + keeps ``snapshot_pane`` / ``wait_for_text`` / ``search_panes`` visible. + But a future hint tagged mutating/destructive-only would land here under + a readonly safety tier, and so would a future ``LIBTMUX_TOOLSETS`` filter + that hides every pane tool. Pin the branch behavior now so a regression + that emits broken syntax (e.g. trailing ", )." from a half-formatted + hint list, or duplicate periods) fails loudly. """ - assert "HOOKS ARE READ-ONLY" in _BASE_INSTRUCTIONS - assert "show_hooks" in _BASE_INSTRUCTIONS - assert "tmux config file" in _BASE_INSTRUCTIONS - - -def test_base_instructions_document_socket_name_contract() -> None: - """_BASE_INSTRUCTIONS frames the socket_name promise precisely. - - list_servers does NOT accept socket_name (it's the discovery tool — - see server_tools.py:263-264 where the signature is - ``list_servers(extra_socket_paths=...)``), so the previous "All - tools accept socket_name" wording was a lie. The instruction now - qualifies "Targeted tmux tools" and explicitly names list_servers - as the documented exception, matching what - test_registered_tools_accept_socket_name asserts at the schema - level. + from libtmux_mcp.server import _format_handles_section + + result = _format_handles_section(set()) + + # No hint phrases emitted — no "e.g." preamble, no individual hint + # tool names mentioned in the Tools-line. + assert "e.g." not in result + for tool_name, _ in _VISIBILITY_FILTER_CASES: + assert tool_name not in result, ( + f"hint tool name {tool_name!r} leaked into card with empty visible set" + ) + + # Tools-line still present and well-formed (ends with a single period, + # no dangling comma or open paren). + expected_tools_line = ( + "- Tools — call list_tools; per-tool descriptions tell you which to prefer." + ) + assert expected_tools_line in result + + # The other two handles are unaffected — they don't depend on visibility. + assert "- Resources (tmux://)" in result + assert "- Prompts — packaged workflows" in result + + +def test_card_emits_hints_against_real_registered_tools() -> None: + """End-to-end: register_tools → mcp.enable → list_tools → _build_instructions. + + ``test_card_omits_invisible_tools`` proves the filter *logic* using a + 3-element synthetic ``visible_tool_names``. This test proves the + production *wiring*: it registers the real tool surface, applies the + same ``mcp.enable(tags=..., only=True)`` call ``run_server`` makes, + collects the visible names from ``mcp.list_tools()``, and confirms + every hint phrase still appears in the produced card. + + All three current hint tools (``snapshot_pane``, ``wait_for_text``, + ``search_panes``) carry ``TAG_READONLY``, so even the most + restrictive safety tier keeps them visible — the test pins this + invariant. If a future change moves a hint tool out of the readonly + tier, this test will fail and force a deliberate decision about + whether to retain or drop the corresponding card hint. """ - assert "Targeted tmux tools accept" in _BASE_INSTRUCTIONS - assert "list_servers" in _BASE_INSTRUCTIONS - assert "extra_socket_paths" in _BASE_INSTRUCTIONS + import asyncio + + from fastmcp import FastMCP + + from libtmux_mcp.tools import register_tools + + mcp = FastMCP(name="card-wiring-integration") + register_tools(mcp) + # Mirror run_server's gating call exactly — readonly is the most + # restrictive tier, so a hint phrase surviving here implies it + # survives every higher tier too. + mcp.enable(tags={TAG_READONLY}, only=True) + + visible = {tool.name for tool in asyncio.run(mcp.list_tools())} + card = _build_instructions(TAG_READONLY, visible_tool_names=visible) + + for hint_tool, phrase in _VISIBILITY_FILTER_CASES: + assert hint_tool in visible, ( + f"hint tool {hint_tool!r} unexpectedly hidden under readonly " + f"tier; either retag the tool or drop the hint from " + f"_HANDLE_HINTS" + ) + assert phrase in card, ( + f"hint phrase {phrase!r} missing from card built against the " + f"real registered tool set; production wiring is broken" + ) def test_registered_tools_accept_socket_name() -> None: @@ -199,6 +401,15 @@ def test_registered_tools_accept_socket_name() -> None: If a future tool registration drops ``socket_name``, this test catches the regression instead of silently making the agent-facing instructions a lie. + + **Scope**: this test runs against an unfiltered fresh ``FastMCP`` — + it does NOT exercise the production singleton at ``server.py`` + (which has middleware + ``mcp.enable(tags=..., only=True)`` applied + by ``run_server``). The unfiltered set is a strict superset of any + tier-filtered visible set, so a positive result here implies the + same property on every filtered subset. A future maintainer + adding tier-specific socket_name behavior should add a parallel + test against the singleton. """ import asyncio import inspect @@ -230,21 +441,67 @@ def test_registered_tools_accept_socket_name() -> None: ) -def test_base_instructions_document_buffer_lifecycle() -> None: - """_BASE_INSTRUCTIONS explains the buffer lifecycle + no list_buffers. - - The load/paste/delete triple is non-obvious, and agents otherwise - expect a ``list_buffers`` affordance. The instruction prevents both - confusions and surfaces the clipboard-privacy reason so the - omission reads as deliberate, not missing. +@pytest.mark.parametrize( + ("tool_name", "must_include"), + [ + ("capture_pane", "snapshot_pane"), + ("capture_pane", "wait_for_text"), + ("capture_pane", "search_panes"), + # Truncation marker — agents need to know the first line of a + # truncated capture is a literal '[... truncated K lines ...]' + # header, not terminal content. Without this in the description, + # an agent might treat the marker as the most recent output. + ("capture_pane", "truncated"), + ("show_hooks", "tmux config file"), + ("load_buffer", "list_buffers"), + ("load_buffer", "clipboard history"), + ("send_keys", "wait_for_text"), + ("list_panes", "search_panes"), + ("list_windows", "search_panes"), + ], +) +def test_tool_description_includes(tool_name: str, must_include: str) -> None: + """Tool descriptions carry cross-references the agent needs at the call site. + + Phase 1 of the BASE_INSTRUCTIONS slim-down: rules that are tool-specific + live in tool descriptions (surfaced by FastMCP at every ``list_tools`` + call), not in the global card or in module docstrings (which FastMCP + does not surface). The asserted phrases are the ones an agent would + look for when deciding which tool to call: + + * ``capture_pane`` cross-references richer alternatives + (``snapshot_pane``, ``wait_for_text``) and the parallel-search tool + (``search_panes``). + * ``show_hooks`` carries the no-set_hook rationale ("tmux config + file") that previously lived only in ``hook_tools``' module + docstring. + * ``load_buffer`` carries the no-list_buffers / clipboard-privacy + rationale that previously lived only in ``buffer_tools``' module + docstring. + * ``send_keys`` points at ``wait_for_text`` instead of a poll loop. + * ``list_panes`` / ``list_windows`` point at ``search_panes`` for + content (vs. metadata-only) queries. + + The "tool exists" assertion is a strict upgrade over substring tests + on ``_BASE_INSTRUCTIONS``: a future rename that drops the rule fails + here instead of silently losing agent-relevant guidance. """ - assert "BUFFERS" in _BASE_INSTRUCTIONS - assert "load_buffer" in _BASE_INSTRUCTIONS - assert "paste_buffer" in _BASE_INSTRUCTIONS - assert "delete_buffer" in _BASE_INSTRUCTIONS - assert "BufferRef" in _BASE_INSTRUCTIONS - assert "list_buffers" in _BASE_INSTRUCTIONS - assert "clipboard history" in _BASE_INSTRUCTIONS + import asyncio + + from fastmcp import FastMCP + + from libtmux_mcp.tools import register_tools + + mcp = FastMCP(name="tool-description-contract") + register_tools(mcp) + + tools = asyncio.run(mcp.list_tools()) + by_name = {tool.name: tool for tool in tools} + assert tool_name in by_name, f"{tool_name!r} is not registered" + description = by_name[tool_name].description or "" + assert must_include in description, ( + f"{tool_name!r} description missing {must_include!r}; got {description!r}" + ) def test_build_instructions_documents_is_caller_workflow_inside_tmux(