Skip to content

Commit 35821bb

Browse files
committed
feat(sprites): close control connections after configurable idle window
Adds an idle-close watcher to SpritesSandboxSession that closes pooled control connections after ``idle_close_seconds`` of no I/O. Once the last WS closes, the sprite drops back to ``warm`` and stops accruing running-state cost; the next I/O reopens a connection and the platform auto-wakes the sprite on traffic arrival (~1s wake-up). The watcher is a single asyncio Task scheduled on first activity. Each I/O hook calls _touch_activity(), which (a) updates the last-activity timestamp and (b) respawns the watcher if it has previously exited. The loop sleeps until the deadline, re-checks (since activity may have shifted the deadline forward), and exits after firing the close so the next activity gets a fresh task. PTY operations skip the close (their connections must stay open). Shutdown cancels and awaits the watcher before tearing down. New public knob: - SpritesSandboxClientOptions.idle_close_seconds (default 60.0) - SpritesSandboxSessionState.idle_close_seconds Set to 0 (or negative) to disable. Field appended at end of both classes; compat-guard parametrize entries updated. 4 new unit tests cover the watcher closing, the disabled mode, the PTY-active skip, and the activity-resets-deadline behavior.
1 parent b80c6eb commit 35821bb

3 files changed

Lines changed: 183 additions & 5 deletions

File tree

src/agents/extensions/sandbox/sprites/sandbox.py

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@
100100
DEFAULT_SPRITES_API_URL = "https://api.sprites.dev"
101101
DEFAULT_SPRITES_WORKSPACE_ROOT = "/workspace"
102102
DEFAULT_SPRITES_WAIT_FOR_RUNNING_TIMEOUT_S = 45.0
103+
DEFAULT_SPRITES_IDLE_CLOSE_SECONDS = 60.0
104+
"""Default idle threshold after which control connections are closed so the
105+
sprite can drop back to ``warm`` and stop accruing running-state cost. The
106+
next I/O reopens a control connection; the platform auto-wakes the sprite on
107+
traffic arrival, so the cost is just the WS reconnect (~1s)."""
103108

104109
# The upstream sprite status enum is not exported from sprites-py; values are
105110
# defined by the API. A sprite that has finished provisioning reports either
@@ -192,6 +197,12 @@ class SpritesSandboxClientOptions(BaseSandboxClientOptions):
192197
workspace_persistence: WorkspacePersistenceMode = "tar"
193198
"""Workspace persistence mode. v1 supports only ``"tar"``."""
194199

200+
idle_close_seconds: float = DEFAULT_SPRITES_IDLE_CLOSE_SECONDS
201+
"""Seconds of inactivity after which the session closes its control
202+
connections so the sprite can drop back to ``warm``. Set to ``0`` (or
203+
any negative value) to disable — connections stay open until shutdown.
204+
Default ``60.0`` matches Sprites' running-state idle billing window."""
205+
195206
def __init__(
196207
self,
197208
sprite_name: str | None = None,
@@ -205,6 +216,7 @@ def __init__(
205216
timeout_ms: int | None = None,
206217
wait_for_running_timeout_s: float = DEFAULT_SPRITES_WAIT_FOR_RUNNING_TIMEOUT_S,
207218
workspace_persistence: WorkspacePersistenceMode = "tar",
219+
idle_close_seconds: float = DEFAULT_SPRITES_IDLE_CLOSE_SECONDS,
208220
*,
209221
type: Literal["sprites"] = "sprites",
210222
) -> None:
@@ -221,6 +233,7 @@ def __init__(
221233
timeout_ms=timeout_ms,
222234
wait_for_running_timeout_s=wait_for_running_timeout_s,
223235
workspace_persistence=workspace_persistence,
236+
idle_close_seconds=idle_close_seconds,
224237
)
225238

226239

@@ -243,6 +256,7 @@ class SpritesSandboxSessionState(SandboxSessionState):
243256
env: dict[str, str] | None = None
244257
timeout_ms: int | None = None
245258
workspace_persistence: WorkspacePersistenceMode = "tar"
259+
idle_close_seconds: float = DEFAULT_SPRITES_IDLE_CLOSE_SECONDS
246260

247261

248262
class SpritesSandboxSession(BaseSandboxSession):
@@ -258,6 +272,9 @@ class SpritesSandboxSession(BaseSandboxSession):
258272
_pty_processes: dict[int, _SpritePtyProcessEntry]
259273
_reserved_pty_process_ids: set[int]
260274
_warmth_verified: bool
275+
_last_activity_at: float
276+
_idle_close_seconds: float
277+
_idle_watch_task: asyncio.Task[None] | None
261278

262279
def __init__(
263280
self,
@@ -277,12 +294,15 @@ def __init__(
277294
self._pty_lock = asyncio.Lock()
278295
self._pty_processes = {}
279296
self._reserved_pty_process_ids = set()
280-
# ``_warmth_verified`` tracks whether we've already confirmed the sprite
281-
# is warm/running. Set when a fresh sprite is provisioned (we have to
282-
# poll anyway), or when the first I/O operation drives a successful
283-
# control-plane connect. Resetting via ``_invalidate_warmth`` after a
284-
# transport error forces a re-check on the next operation.
285297
self._warmth_verified = False
298+
# Idle-close: when an I/O operation hasn't run for ``idle_close_seconds``,
299+
# the watcher closes the control-connection pool so the sprite can drop
300+
# to ``warm`` and stop accruing running-state cost. The next I/O
301+
# operation reopens a connection; the platform auto-wakes the sprite on
302+
# traffic arrival.
303+
self._last_activity_at = time.monotonic()
304+
self._idle_close_seconds = float(state.idle_close_seconds)
305+
self._idle_watch_task = None
286306

287307
@classmethod
288308
def from_state(
@@ -370,6 +390,7 @@ async def _ensure_warm(self) -> None:
370390
out from under us and we have to re-attach in a recovery flow).
371391
"""
372392

393+
self._touch_activity()
373394
if self._warmth_verified:
374395
return
375396
await self._wait_for_sprite_running()
@@ -380,6 +401,62 @@ def _invalidate_warmth(self) -> None:
380401

381402
self._warmth_verified = False
382403

404+
def _touch_activity(self) -> None:
405+
"""Mark this moment as the most recent I/O. Starts the idle watcher
406+
if it isn't already running."""
407+
408+
self._last_activity_at = time.monotonic()
409+
self._maybe_start_idle_watch()
410+
411+
def _maybe_start_idle_watch(self) -> None:
412+
if self._idle_close_seconds <= 0:
413+
return
414+
task = self._idle_watch_task
415+
if task is not None and not task.done():
416+
return
417+
try:
418+
self._idle_watch_task = asyncio.create_task(self._idle_watch_loop())
419+
except RuntimeError:
420+
# No running event loop (e.g. unit-test fixture creating a session
421+
# outside an asyncio context). The watcher will start on the next
422+
# I/O call from inside an active loop.
423+
self._idle_watch_task = None
424+
425+
async def _idle_watch_loop(self) -> None:
426+
try:
427+
while True:
428+
# Sleep until the configured idle window elapses since the
429+
# most-recent activity, re-checking each loop because activity
430+
# may have happened during the sleep and reset the deadline.
431+
elapsed = time.monotonic() - self._last_activity_at
432+
remaining = self._idle_close_seconds - elapsed
433+
if remaining > 0:
434+
await asyncio.sleep(remaining)
435+
continue
436+
await self._close_idle_control_connections()
437+
# Watcher exits; the next I/O calls ``_touch_activity`` which
438+
# will respawn it.
439+
return
440+
except asyncio.CancelledError:
441+
pass
442+
443+
async def _close_idle_control_connections(self) -> None:
444+
"""Close pooled control connections so the sprite can drop to ``warm``.
445+
446+
Skipped when there are active PTY operations — those need their
447+
connections kept alive.
448+
"""
449+
450+
if self._pty_processes:
451+
return
452+
sprite = self._sprite
453+
if sprite is None:
454+
return
455+
try:
456+
await sprite.close_control_connection()
457+
except Exception:
458+
pass
459+
383460
def _build_sprite_config(self) -> sprites.SpriteConfig | None:
384461
if (
385462
self.state.ram_mb is None
@@ -559,6 +636,16 @@ async def running(self) -> bool:
559636
return bool(refreshed.status in _SPRITE_READY_STATUSES)
560637

561638
async def shutdown(self) -> None:
639+
# Stop the idle watcher first so it doesn't race with our cleanup.
640+
watcher = self._idle_watch_task
641+
if watcher is not None and not watcher.done():
642+
watcher.cancel()
643+
try:
644+
await watcher
645+
except (asyncio.CancelledError, Exception):
646+
pass
647+
self._idle_watch_task = None
648+
562649
# Tear down any in-flight PTY operations first so their control connections
563650
# are released back to the pool before the sprite is deleted.
564651
try:

tests/extensions/test_sandbox_sprites.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,3 +1154,92 @@ async def test_lazy_warm_invalidate_forces_repoll(patched_sprites: dict[str, Any
11541154
inner._invalidate_warmth()
11551155
await inner._exec_internal("echo", "2")
11561156
assert len(fake_client.get_sprite_calls) == 2
1157+
1158+
1159+
# ---------- 18. Idle-close watcher ----------
1160+
1161+
1162+
@pytest.mark.asyncio
1163+
async def test_idle_watch_closes_control_connections_after_threshold(
1164+
patched_sprites: dict[str, Any],
1165+
) -> None:
1166+
fake_client = patched_sprites["client"]
1167+
sprite = _FakeSprite(name=SPRITE_NAME)
1168+
fake_client._sprites_by_name[SPRITE_NAME] = sprite
1169+
state = _make_state()
1170+
inner = SpritesSandboxSession.from_state(state, token="tok")
1171+
_attach(inner, client=fake_client, sprite=sprite)
1172+
# Make the idle window vanishingly small so the test runs fast.
1173+
inner._idle_close_seconds = 0.01
1174+
1175+
# Touch activity to spawn the watcher, then wait long enough for the
1176+
# watcher's idle threshold to elapse and close the control connection.
1177+
inner._touch_activity()
1178+
assert inner._idle_watch_task is not None
1179+
await asyncio.wait_for(inner._idle_watch_task, timeout=1.0)
1180+
assert sprite.close_control_connection_calls == 1
1181+
1182+
1183+
@pytest.mark.asyncio
1184+
async def test_idle_watch_disabled_when_seconds_is_zero(
1185+
patched_sprites: dict[str, Any],
1186+
) -> None:
1187+
fake_client = patched_sprites["client"]
1188+
sprite = _FakeSprite(name=SPRITE_NAME)
1189+
fake_client._sprites_by_name[SPRITE_NAME] = sprite
1190+
state = _make_state(idle_close_seconds=0)
1191+
inner = SpritesSandboxSession.from_state(state, token="tok")
1192+
_attach(inner, client=fake_client, sprite=sprite)
1193+
inner._idle_close_seconds = 0 # belt-and-braces
1194+
1195+
inner._touch_activity()
1196+
assert inner._idle_watch_task is None
1197+
# Wait briefly to confirm no close ever fires.
1198+
await asyncio.sleep(0.05)
1199+
assert sprite.close_control_connection_calls == 0
1200+
1201+
1202+
@pytest.mark.asyncio
1203+
async def test_idle_watch_skipped_when_pty_active(
1204+
patched_sprites: dict[str, Any],
1205+
) -> None:
1206+
fake_client = patched_sprites["client"]
1207+
sprite = _FakeSprite(name=SPRITE_NAME)
1208+
fake_client._sprites_by_name[SPRITE_NAME] = sprite
1209+
state = _make_state()
1210+
inner = SpritesSandboxSession.from_state(state, token="tok")
1211+
_attach(inner, client=fake_client, sprite=sprite)
1212+
inner._idle_close_seconds = 0.01
1213+
# Pretend a PTY is active so the watcher should refuse to close.
1214+
inner._pty_processes[123] = object() # type: ignore[assignment]
1215+
1216+
await inner._close_idle_control_connections()
1217+
assert sprite.close_control_connection_calls == 0
1218+
1219+
1220+
@pytest.mark.asyncio
1221+
async def test_activity_during_idle_window_keeps_connection_open(
1222+
patched_sprites: dict[str, Any],
1223+
) -> None:
1224+
fake_client = patched_sprites["client"]
1225+
sprite = _FakeSprite(name=SPRITE_NAME)
1226+
fake_client._sprites_by_name[SPRITE_NAME] = sprite
1227+
state = _make_state()
1228+
inner = SpritesSandboxSession.from_state(state, token="tok")
1229+
_attach(inner, client=fake_client, sprite=sprite)
1230+
inner._idle_close_seconds = 0.05
1231+
1232+
inner._touch_activity()
1233+
# Half the window: nudge activity forward so the deadline shifts.
1234+
await asyncio.sleep(0.025)
1235+
inner._touch_activity()
1236+
# Wait long enough for the original deadline to have passed had we not
1237+
# touched activity, but short of the new deadline.
1238+
await asyncio.sleep(0.04)
1239+
# The connection should still be open at this point.
1240+
assert sprite.close_control_connection_calls == 0
1241+
# Now actually let it idle out fully.
1242+
watcher = inner._idle_watch_task
1243+
assert watcher is not None
1244+
await asyncio.wait_for(watcher, timeout=0.2)
1245+
assert sprite.close_control_connection_calls == 1

tests/sandbox/test_compatibility_guards.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ def test_optional_sandbox_dataclass_constructor_field_order_is_stable(
535535
"timeout_ms",
536536
"wait_for_running_timeout_s",
537537
"workspace_persistence",
538+
"idle_close_seconds",
538539
),
539540
),
540541
],
@@ -794,6 +795,7 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable(
794795
"env",
795796
"timeout_ms",
796797
"workspace_persistence",
798+
"idle_close_seconds",
797799
),
798800
),
799801
],

0 commit comments

Comments
 (0)