Skip to content

Commit 017eaef

Browse files
authored
fix(wait_for_tools): unblock event loop via asyncio.to_thread (#21)
why: `wait_for_channel` and `signal_channel` were sync `def` MCP tools running blocking `subprocess.run(timeout=N)` on FastMCP's event-loop thread. For the duration of a wait the server could not service other tool calls, MCP pings, or client cancellations; clients whose stdio keepalive was shorter than the timeout could disconnect mid-wait. The 0.1.0a2 release applied the same fix to `wait_for_text` / `wait_for_content_change`, but left the channel pair behind. what: - **Async conversion.** Both tools become `async def` and swap `@handle_tool_errors` → `@handle_tool_errors_async`. FastMCP auto-detects coroutine handlers at registration; no registration change needed. - **`asyncio.to_thread` offload.** The blocking `subprocess.run` now runs in the default executor so the event loop stays free to service concurrent tool calls and cancellation signals. - **Cancellation propagation.** `handle_tool_errors_async` catches `Exception` (not `BaseException`); the narrow `subprocess.*` except-blocks in the tool body cannot swallow `asyncio.CancelledError` either — so client-initiated cancels surface cleanly through the decorator. - **Regression tests.** `test_channel_tools_are_coroutines` pins the async surface; `test_wait_for_channel_does_not_block_event_loop` fires a 10 ms ticker concurrent with a 500 ms unsignalled wait and asserts ≥ 20 ticks (≈ 0 before the fix); `test_wait_for_channel_propagates_cancellation` mirrors the pattern added for the sibling pane-wait tools in 0.1.0a2. Fixes #18 See also: codex and gemini CLI harnesses empirically confirmed the fix — `list_sessions` returned in 20 ms while `wait_for_channel` was pending a 2 s timeout (pre-fix the second call would block for the full timeout)
2 parents 22323a2 + 9546080 commit 017eaef

4 files changed

Lines changed: 161 additions & 28 deletions

File tree

CHANGES

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ _Notes on upcoming releases will be added here_
2626
`_caller_is_on_server` (the same socket-scoped comparator used by
2727
the self-kill guard) via the new `_compute_is_caller` helper
2828
(#22, fixes #19).
29+
- {tooliconl}`wait-for-channel` and {tooliconl}`signal-channel` no
30+
longer block the FastMCP event loop. Both were sync `def` handlers
31+
running `subprocess.run(timeout=N)` on the main loop — for the
32+
duration of the wait the server could not service other tool calls,
33+
MCP pings, or client cancellations. Ported to `async def` +
34+
{func}`asyncio.to_thread`, matching the pattern already used by
35+
{tooliconl}`wait-for-text` / {tooliconl}`wait-for-content-change`
36+
(#21, fixes #18).
2937

3038
## libtmux-mcp 0.1.0a2 (2026-04-19)
3139

src/libtmux_mcp/tools/wait_for_tools.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from __future__ import annotations
2626

27+
import asyncio
2728
import re
2829
import subprocess
2930
import typing as t
@@ -35,7 +36,7 @@
3536
TAG_MUTATING,
3637
_get_server,
3738
_tmux_argv,
38-
handle_tool_errors,
39+
handle_tool_errors_async,
3940
)
4041

4142
if t.TYPE_CHECKING:
@@ -96,8 +97,8 @@ def _validate_channel_name(name: str) -> str:
9697
return name
9798

9899

99-
@handle_tool_errors
100-
def wait_for_channel(
100+
@handle_tool_errors_async
101+
async def wait_for_channel(
101102
channel: str,
102103
timeout: float = 30.0,
103104
socket_name: str | None = None,
@@ -143,7 +144,15 @@ def wait_for_channel(
143144
cname = _validate_channel_name(channel)
144145
argv = _tmux_argv(server, "wait-for", cname)
145146
try:
146-
subprocess.run(argv, check=True, capture_output=True, timeout=timeout)
147+
# FastMCP direct-awaits async tools on its event loop. ``tmux
148+
# wait-for`` blocks for the full timeout (up to 30 s by default)
149+
# so the synchronous ``subprocess.run`` must run off the loop —
150+
# otherwise no other tool call, MCP ping, or cancellation can
151+
# be serviced for the duration of the wait. Mirror the pattern
152+
# used by :func:`~libtmux_mcp.tools.pane_tools.wait.wait_for_text`.
153+
await asyncio.to_thread(
154+
subprocess.run, argv, check=True, capture_output=True, timeout=timeout
155+
)
147156
except subprocess.TimeoutExpired as e:
148157
msg = f"wait-for timeout: channel {cname!r} was not signalled within {timeout}s"
149158
raise ToolError(msg) from e
@@ -154,8 +163,8 @@ def wait_for_channel(
154163
return f"Channel {cname!r} was signalled"
155164

156165

157-
@handle_tool_errors
158-
def signal_channel(
166+
@handle_tool_errors_async
167+
async def signal_channel(
159168
channel: str,
160169
socket_name: str | None = None,
161170
) -> str:
@@ -180,8 +189,12 @@ def signal_channel(
180189
cname = _validate_channel_name(channel)
181190
argv = _tmux_argv(server, "wait-for", "-S", cname)
182191
try:
183-
subprocess.run(
184-
argv, check=True, capture_output=True, timeout=_SIGNAL_TIMEOUT_SECONDS
192+
await asyncio.to_thread(
193+
subprocess.run,
194+
argv,
195+
check=True,
196+
capture_output=True,
197+
timeout=_SIGNAL_TIMEOUT_SECONDS,
185198
)
186199
except subprocess.TimeoutExpired as e:
187200
msg = (

tests/test_pane_tools.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,7 @@ def test_search_panes_per_pane_matched_lines_cap(
742742
``capture_pane`` (command-line plus output-line for each), well
743743
past the truncation threshold of three.
744744
"""
745+
import asyncio
745746
import uuid
746747

747748
from libtmux_mcp.tools.wait_for_tools import wait_for_channel
@@ -753,7 +754,11 @@ def test_search_panes_per_pane_matched_lines_cap(
753754
f"tmux wait-for -S {channel}"
754755
)
755756
mcp_pane.send_keys(payload, enter=True)
756-
wait_for_channel(channel=channel, timeout=5.0, socket_name=mcp_server.socket_name)
757+
asyncio.run(
758+
wait_for_channel(
759+
channel=channel, timeout=5.0, socket_name=mcp_server.socket_name
760+
)
761+
)
757762

758763
result = search_panes(
759764
pattern=marker,

tests/test_wait_for_tools.py

Lines changed: 126 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import asyncio
56
import threading
67
import time
78
import typing as t
@@ -38,6 +39,20 @@ def test_validate_channel_name_rejects_invalid(name: str) -> None:
3839
_validate_channel_name(name)
3940

4041

42+
def test_channel_tools_are_coroutines() -> None:
43+
"""Both tools must be ``async def`` so FastMCP awaits them.
44+
45+
Regression guard for tmux-python/libtmux-mcp#18: sync ``def`` tools
46+
were direct-called on FastMCP's event loop and the internal
47+
``subprocess.run`` blocked stdio for the full timeout window. The
48+
fix converts both to ``async def`` + ``asyncio.to_thread``; this
49+
assertion pins the async surface so a silent revert doesn't sneak
50+
through.
51+
"""
52+
assert asyncio.iscoroutinefunction(wait_for_channel)
53+
assert asyncio.iscoroutinefunction(signal_channel)
54+
55+
4156
@pytest.mark.usefixtures("mcp_session")
4257
def test_signal_channel_no_waiter_is_noop(mcp_server: Server) -> None:
4358
"""``tmux wait-for -S`` on an unwaited channel returns successfully.
@@ -47,9 +62,11 @@ def test_signal_channel_no_waiter_is_noop(mcp_server: Server) -> None:
4762
unstarted Server instance, so ``mcp_session`` is what actually
4863
boots the tmux process.
4964
"""
50-
result = signal_channel(
51-
channel="wf_test_noop",
52-
socket_name=mcp_server.socket_name,
65+
result = asyncio.run(
66+
signal_channel(
67+
channel="wf_test_noop",
68+
socket_name=mcp_server.socket_name,
69+
)
5370
)
5471
assert "signalled" in result
5572

@@ -61,15 +78,17 @@ def test_wait_for_channel_returns_when_signalled(mcp_server: Server) -> None:
6178

6279
def _signal_after_delay() -> None:
6380
time.sleep(0.3)
64-
signal_channel(channel=channel, socket_name=mcp_server.socket_name)
81+
asyncio.run(signal_channel(channel=channel, socket_name=mcp_server.socket_name))
6582

6683
thread = threading.Thread(target=_signal_after_delay)
6784
thread.start()
6885
try:
69-
result = wait_for_channel(
70-
channel=channel,
71-
timeout=5.0,
72-
socket_name=mcp_server.socket_name,
86+
result = asyncio.run(
87+
wait_for_channel(
88+
channel=channel,
89+
timeout=5.0,
90+
socket_name=mcp_server.socket_name,
91+
)
7392
)
7493
assert "signalled" in result
7594
finally:
@@ -81,10 +100,12 @@ def test_wait_for_channel_times_out(mcp_server: Server) -> None:
81100
"""Unsignalled channel raises a timeout ``ToolError`` within the cap."""
82101
start = time.monotonic()
83102
with pytest.raises(ToolError, match="wait-for timeout"):
84-
wait_for_channel(
85-
channel="wf_timeout_test",
86-
timeout=0.5,
87-
socket_name=mcp_server.socket_name,
103+
asyncio.run(
104+
wait_for_channel(
105+
channel="wf_timeout_test",
106+
timeout=0.5,
107+
socket_name=mcp_server.socket_name,
108+
)
88109
)
89110
elapsed = time.monotonic() - start
90111
# Allow generous slack for tmux subprocess spawn overhead.
@@ -94,17 +115,103 @@ def test_wait_for_channel_times_out(mcp_server: Server) -> None:
94115
def test_wait_for_channel_rejects_invalid_name(mcp_server: Server) -> None:
95116
"""Invalid channel names are rejected before spawning tmux."""
96117
with pytest.raises(ToolError, match="Invalid channel name"):
97-
wait_for_channel(
98-
channel="has space",
99-
timeout=1.0,
100-
socket_name=mcp_server.socket_name,
118+
asyncio.run(
119+
wait_for_channel(
120+
channel="has space",
121+
timeout=1.0,
122+
socket_name=mcp_server.socket_name,
123+
)
101124
)
102125

103126

104127
def test_signal_channel_rejects_invalid_name(mcp_server: Server) -> None:
105128
"""Invalid channel names are rejected before spawning tmux."""
106129
with pytest.raises(ToolError, match="Invalid channel name"):
107-
signal_channel(
108-
channel="has/slash",
109-
socket_name=mcp_server.socket_name,
130+
asyncio.run(
131+
signal_channel(
132+
channel="has/slash",
133+
socket_name=mcp_server.socket_name,
134+
)
110135
)
136+
137+
138+
@pytest.mark.usefixtures("mcp_session")
139+
def test_wait_for_channel_does_not_block_event_loop(mcp_server: Server) -> None:
140+
"""Concurrent coroutines must make progress while the wait is pending.
141+
142+
Regression guard for tmux-python/libtmux-mcp#18. Before the fix,
143+
``subprocess.run`` blocked the FastMCP event loop for the full
144+
timeout; the ticker below would advance only between poll iterations
145+
(which there aren't any of — the subprocess is a single blocking
146+
call). With ``asyncio.to_thread`` the ticker must fire many times
147+
while the tmux subprocess waits for its signal.
148+
149+
Discriminator: the wait is set to 0.5 s on an unsignalled channel.
150+
The ticker samples at 10 ms. With the fix we expect ≥ 20 ticks
151+
(500 ms / 10 ms = 50 nominal, halved to guard against CI jitter);
152+
without the fix we expect 0 — the event loop is pinned in
153+
``subprocess.run`` until it times out.
154+
"""
155+
156+
async def _drive() -> int:
157+
ticks = 0
158+
stop = asyncio.Event()
159+
160+
async def _ticker() -> None:
161+
nonlocal ticks
162+
while not stop.is_set():
163+
ticks += 1
164+
await asyncio.sleep(0.01)
165+
166+
async def _waiter() -> None:
167+
try:
168+
with pytest.raises(ToolError, match="wait-for timeout"):
169+
await wait_for_channel(
170+
channel="wf_evtloop_test",
171+
timeout=0.5,
172+
socket_name=mcp_server.socket_name,
173+
)
174+
finally:
175+
stop.set()
176+
177+
await asyncio.gather(_ticker(), _waiter())
178+
return ticks
179+
180+
ticks = asyncio.run(_drive())
181+
assert ticks >= 20, (
182+
f"ticker advanced only {ticks} times — wait_for_channel is blocking "
183+
f"the event loop instead of running the subprocess in a thread"
184+
)
185+
186+
187+
@pytest.mark.usefixtures("mcp_session")
188+
def test_wait_for_channel_propagates_cancellation(mcp_server: Server) -> None:
189+
"""``wait_for_channel`` raises ``CancelledError`` (not ``ToolError``).
190+
191+
MCP cancellation semantics: when a client cancels an in-flight tool
192+
call, the awaiting ``asyncio.Task`` receives ``CancelledError``.
193+
``handle_tool_errors_async`` catches ``Exception`` (not
194+
``BaseException``), and the function's narrow ``subprocess.*``
195+
except-blocks cannot swallow ``CancelledError`` either — so the
196+
cancellation propagates through the decorator naturally. This test
197+
locks that contract in so a future broadening of the catch
198+
(e.g. ``except BaseException``) trips immediately.
199+
200+
Uses ``task.cancel()`` rather than ``asyncio.wait_for`` so the
201+
raised exception is the inner ``CancelledError`` directly.
202+
"""
203+
204+
async def _runner() -> None:
205+
task = asyncio.create_task(
206+
wait_for_channel(
207+
channel="wf_cancel_test",
208+
timeout=10.0,
209+
socket_name=mcp_server.socket_name,
210+
)
211+
)
212+
await asyncio.sleep(0.1) # let the to_thread handoff start
213+
task.cancel()
214+
await task
215+
216+
with pytest.raises(asyncio.CancelledError):
217+
asyncio.run(_runner())

0 commit comments

Comments
 (0)