diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml
new file mode 100644
index 0000000..fc176b3
--- /dev/null
+++ b/.github/workflows/canary.yml
@@ -0,0 +1,48 @@
+name: Canary — plugin vs Claude Code latest
+
+on:
+  schedule:
+    # 05:13 & 17:13 America/Los_Angeles (PDT/UTC-7 basis; cron is UTC) —
+    # leads the fleet's hour-staggered canary waves so upstream breakage
+    # surfaces here first.
+    - cron: "13 12 * * *"
+    - cron: "13 0 * * *"
+  workflow_dispatch:
+
+jobs:
+  contract:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install bridge + latest SDK
+        run: |
+          pip install -e . pytest
+          pip install -U claude-agent-sdk
+
+      - name: Install latest Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Contract tests vs latest host
+        run: pytest -q tests/contract
+
+      # Unattended runs page the team; manual dispatches don't.
+      - name: Notify Google Chat on failure
+        if: failure() && github.event_name == 'schedule'
+        env:
+          WEBHOOK_URL: ${{ secrets.GOOGLE_CHAT_WEBHOOK_URL }}
+        run: |
+          [ -n "$WEBHOOK_URL" ] || exit 0
+          curl -sS -X POST -H 'Content-Type: application/json' \
+            -d "{\"text\": \"🚨 claude-code-plugin canary FAILED against the latest Claude Code host: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}" \
+            "$WEBHOOK_URL" || true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index e12f42a..0000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: CI
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.12"
-
-      # inkbox is mocked in the tests, so install only what they import.
-      - name: Install test deps
-        run: pip install pytest aiohttp segno claude-agent-sdk
-
-      - name: Test
-        run: pytest -q
diff --git a/.github/workflows/live-channels.yml b/.github/workflows/live-channels.yml
new file mode 100644
index 0000000..72d2fc9
--- /dev/null
+++ b/.github/workflows/live-channels.yml
@@ -0,0 +1,165 @@
+name: Live channels e2e
+
+# Boots a REAL bridge (tunnel + webhooks + Claude Code sessions) with this
+# checkout, then drives it from a remote Inkbox identity over email + SMS.
+# Two legs, serialized (they share the AUT identity):
+#   * mock — the sessions "think" against a local deterministic model server
+#            (tests/live/mock_anthropic.py): free, proves the whole pipe.
+#   * real — a real Claude model: proves reasoning + tool use end to end.
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+    inputs:
+      timeout_s:
+        description: "Per-question reply timeout (seconds)"
+        required: false
+        default: "150"
+  workflow_run:
+    workflows: ["Canary — plugin vs Claude Code latest"]
+    types: [completed]
+
+# Only one holder of the AUT identity's Inkbox tunnel at a time — the voice
+# suite shares this group, so live runs queue instead of fighting the tunnel.
+concurrency:
+  group: inkbox-live-aut-tunnel
+  cancel-in-progress: false
+
+jobs:
+  live:
+    # Draft PRs and fork PRs (no secrets) skip; chained runs only follow a
+    # PASSING canary.
+    if: >
+      (github.event_name == 'pull_request' &&
+       github.event.pull_request.draft == false &&
+       github.event.pull_request.head.repo.full_name == github.repository) ||
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    strategy:
+      max-parallel: 1
+      fail-fast: false
+      matrix:
+        leg: [mock, real]
+    env:
+      INKBOX_API_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_API_KEY }}
+      INKBOX_SIGNING_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_SIGNING_KEY }}
+      CLAUDE_PROJECT_DIR: ${{ github.workspace }}
+      # A stray permission escalation should fail a test fast, not park the
+      # session for the default 10 minutes.
+      INKBOX_PERMISSION_TIMEOUT_S: "30"
+      DISABLE_AUTOUPDATER: "1"
+      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install bridge
+        run: pip install -e . pytest
+
+      - name: Install Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Derive AUT identity handle
+        run: |
+          HANDLE=$(python3 - <<'PY'
+          import os
+          from inkbox import Inkbox
+          c = Inkbox(api_key=os.environ["INKBOX_API_KEY"])
+          print(c.mailboxes.list()[0].email_address.split("@", 1)[0])
+          PY
+          )
+          echo "INKBOX_IDENTITY=$HANDLE" >> "$GITHUB_ENV"
+          echo "AUT handle: $HANDLE"
+
+      - name: Start mock model server
+        if: matrix.leg == 'mock'
+        run: |
+          nohup python3 tests/live/mock_anthropic.py 8089 > /tmp/mock_anthropic.log 2>&1 &
+          for i in $(seq 1 10); do
+            curl -fsS http://127.0.0.1:8089/ > /dev/null 2>&1 && exit 0
+            sleep 1
+          done
+          echo "mock model server never came up"; exit 1
+
+      - name: Point sessions at the mock model
+        if: matrix.leg == 'mock'
+        run: |
+          echo "ANTHROPIC_BASE_URL=http://127.0.0.1:8089" >> "$GITHUB_ENV"
+          echo "ANTHROPIC_API_KEY=sk-mock-not-used" >> "$GITHUB_ENV"
+
+      - name: Point sessions at the real model
+        if: matrix.leg == 'real'
+        run: echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> "$GITHUB_ENV"
+
+      - name: Start bridge gateway
+        run: |
+          nohup inkbox-claude run > /tmp/gateway.log 2>&1 &
+          echo $! > /tmp/gateway.pid
+          for i in $(seq 1 36); do
+            if grep -q "\[bridge\] ready" /tmp/gateway.log; then
+              echo "gateway ready"; exit 0
+            fi
+            if ! kill -0 "$(cat /tmp/gateway.pid)" 2>/dev/null; then
+              echo "gateway process died during startup"; tail -n 150 /tmp/gateway.log; exit 1
+            fi
+            sleep 5
+          done
+          echo "gateway never became ready"; tail -n 150 /tmp/gateway.log; exit 1
+
+      - name: Live channel tests (${{ matrix.leg }} model)
+        env:
+          REMOTE_INKBOX_API_KEY: ${{ secrets.REMOTE_INKBOX_API_KEY }}
+          CLAUDE_CODE_INKBOX_API_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_API_KEY }}
+          LIVE_EMAIL_TIMEOUT: ${{ github.event.inputs.timeout_s || '150' }}
+          LIVE_REAL_MODEL: ${{ matrix.leg == 'real' && '1' || '' }}
+          LIVE_CONTACT_CRUD: ${{ matrix.leg == 'real' && '1' || '' }}
+        run: python3 -m pytest tests/live -v
+
+      # Logs can carry live message content — surface them only when needed.
+      - name: Dump logs on failure
+        if: failure()
+        run: |
+          echo "=== gateway.log ==="; tail -n 300 /tmp/gateway.log 2>/dev/null || true
+          echo "=== mock_anthropic.log ==="; tail -n 100 /tmp/mock_anthropic.log 2>/dev/null || true
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: live-channels-${{ matrix.leg }}-logs
+          path: |
+            /tmp/gateway.log
+            /tmp/mock_anthropic.log
+          if-no-files-found: ignore
+
+      - name: Stop bridge gateway
+        if: always()
+        run: |
+          [ -f /tmp/gateway.pid ] && kill "$(cat /tmp/gateway.pid)" 2>/dev/null || true
+
+  # Page the team only when an UNATTENDED run breaks (the canary chain);
+  # PR authors and manual dispatchers are already watching.
+  notify:
+    needs: live
+    if: always() && needs.live.result == 'failure' && github.event_name == 'workflow_run'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Notify Google Chat
+        env:
+          WEBHOOK_URL: ${{ secrets.GOOGLE_CHAT_WEBHOOK_URL }}
+        run: |
+          [ -n "$WEBHOOK_URL" ] || exit 0
+          curl -sS -X POST -H 'Content-Type: application/json' \
+            -d "{\"text\": \"🚨 claude-code-plugin live channels e2e FAILED (chained off the canary): ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}" \
+            "$WEBHOOK_URL" || true
diff --git a/.github/workflows/live-voice.yml b/.github/workflows/live-voice.yml
new file mode 100644
index 0000000..8b60708
--- /dev/null
+++ b/.github/workflows/live-voice.yml
@@ -0,0 +1,147 @@
+name: Live voice e2e
+
+# Real phone calls against a real bridge, one scenario per job:
+#   * inbound_inkbox    — driver calls the agent; Inkbox STT/TTS answers.
+#   * outbound_realtime — driver texts "call me"; the agent dials back powered
+#                         by the realtime voice API.
+# A driver process (tests/live/voice_driver.py) is the peer on the other end
+# of the call, bridged over the driver identity's own Inkbox tunnel.
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+
+# Shares the AUT tunnel lock with the channels suite.
+concurrency:
+  group: inkbox-live-aut-tunnel
+  cancel-in-progress: false
+
+jobs:
+  voice:
+    if: >
+      (github.event_name == 'pull_request' &&
+       github.event.pull_request.draft == false &&
+       github.event.pull_request.head.repo.full_name == github.repository) ||
+      github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      max-parallel: 1
+      fail-fast: false
+      matrix:
+        scenario: [inbound_inkbox, outbound_realtime]
+    env:
+      INKBOX_API_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_API_KEY }}
+      INKBOX_SIGNING_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_SIGNING_KEY }}
+      CLAUDE_PROJECT_DIR: ${{ github.workspace }}
+      INKBOX_PERMISSION_TIMEOUT_S: "30"
+      DISABLE_AUTOUPDATER: "1"
+      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1"
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      VOICE_DRIVER_STATE: /tmp/voice_driver_state.json
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      # uvicorn[standard] matters: the bare install can't accept WebSocket
+      # upgrades, and the driver's call-media endpoint is a WebSocket.
+      - name: Install bridge + driver deps
+        run: pip install -e . pytest fastapi 'uvicorn[standard]'
+
+      - name: Install Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Derive AUT identity handle
+        run: |
+          HANDLE=$(python3 - <<'PY'
+          import os
+          from inkbox import Inkbox
+          c = Inkbox(api_key=os.environ["INKBOX_API_KEY"])
+          print(c.mailboxes.list()[0].email_address.split("@", 1)[0])
+          PY
+          )
+          echo "INKBOX_IDENTITY=$HANDLE" >> "$GITHUB_ENV"
+          echo "AUT handle: $HANDLE"
+
+      - name: Configure speech mode (${{ matrix.scenario }})
+        run: |
+          if [ "${{ matrix.scenario }}" = "outbound_realtime" ]; then
+            echo "INKBOX_REALTIME_ENABLED=true" >> "$GITHUB_ENV"
+            echo "INKBOX_REALTIME_MODEL=gpt-realtime-2" >> "$GITHUB_ENV"
+            echo "INKBOX_REALTIME_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> "$GITHUB_ENV"
+          else
+            echo "INKBOX_REALTIME_ENABLED=false" >> "$GITHUB_ENV"
+          fi
+
+      - name: Start voice driver
+        env:
+          REMOTE_INKBOX_API_KEY: ${{ secrets.REMOTE_INKBOX_API_KEY }}
+        run: |
+          nohup python3 tests/live/voice_driver.py > /tmp/voice_driver.log 2>&1 &
+          echo $! > /tmp/voice_driver.pid
+          for i in $(seq 1 30); do
+            [ -f "$VOICE_DRIVER_STATE" ] && { echo "driver ready"; exit 0; }
+            if ! kill -0 "$(cat /tmp/voice_driver.pid)" 2>/dev/null; then
+              echo "voice driver died during startup"; tail -n 100 /tmp/voice_driver.log; exit 1
+            fi
+            sleep 3
+          done
+          echo "voice driver never became ready"; tail -n 100 /tmp/voice_driver.log; exit 1
+
+      - name: Start bridge gateway
+        run: |
+          nohup inkbox-claude run > /tmp/gateway.log 2>&1 &
+          echo $! > /tmp/gateway.pid
+          for i in $(seq 1 36); do
+            if grep -q "\[bridge\] ready" /tmp/gateway.log; then
+              echo "gateway ready"; exit 0
+            fi
+            if ! kill -0 "$(cat /tmp/gateway.pid)" 2>/dev/null; then
+              echo "gateway process died during startup"; tail -n 150 /tmp/gateway.log; exit 1
+            fi
+            sleep 5
+          done
+          echo "gateway never became ready"; tail -n 150 /tmp/gateway.log; exit 1
+
+      - name: Live voice test (${{ matrix.scenario }})
+        env:
+          REMOTE_INKBOX_API_KEY: ${{ secrets.REMOTE_INKBOX_API_KEY }}
+          CLAUDE_CODE_INKBOX_API_KEY: ${{ secrets.CLAUDE_CODE_INKBOX_API_KEY }}
+          VOICE_SCENARIO: ${{ matrix.scenario }}
+          LIVE_REAL_MODEL: "1"
+        run: python3 -m pytest tests/live/test_voice.py -v
+
+      # Logs can carry live call transcripts — surface them only when needed.
+      - name: Dump logs on failure
+        if: failure()
+        run: |
+          echo "=== gateway.log ==="; tail -n 300 /tmp/gateway.log 2>/dev/null || true
+          echo "=== voice_driver.log ==="; tail -n 150 /tmp/voice_driver.log 2>/dev/null || true
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: live-voice-${{ matrix.scenario }}-logs
+          path: |
+            /tmp/gateway.log
+            /tmp/voice_driver.log
+          if-no-files-found: ignore
+
+      # Driver first (SIGINT so its cleanup reverts the number's auto-accept),
+      # then a beat for the revert to land, then the gateway.
+      - name: Teardown
+        if: always()
+        run: |
+          [ -f /tmp/voice_driver.pid ] && kill -INT "$(cat /tmp/voice_driver.pid)" 2>/dev/null || true
+          sleep 3
+          [ -f /tmp/gateway.pid ] && kill "$(cat /tmp/gateway.pid)" 2>/dev/null || true
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..7594643
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,60 @@
+name: Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  # Offline unit suite — inkbox is mocked in the tests, so install only what
+  # they import. Runs on every push/PR, drafts included.
+  unit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install test deps
+        run: pip install pytest aiohttp segno claude-agent-sdk
+
+      # tests/contract runs in its own job against the LATEST host, not here.
+      # tests/live is collected but self-skips without the live API keys.
+      - name: Test
+        run: pytest -q --ignore=tests/contract
+
+  # Contract suite — the bridge's view of the host interface, checked against
+  # the latest published claude-agent-sdk + Claude Code CLI so upstream drift
+  # fails a PR here instead of a live gateway later.
+  contract-pr:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install bridge + latest SDK
+        run: |
+          pip install -e . pytest
+          pip install -U claude-agent-sdk
+
+      - name: Install latest Claude Code CLI
+        run: npm install -g @anthropic-ai/claude-code
+
+      - name: Contract tests vs latest host
+        run: pytest -q tests/contract
diff --git a/tests/contract/test_host_interface.py b/tests/contract/test_host_interface.py
new file mode 100644
index 0000000..a6b3ba1
--- /dev/null
+++ b/tests/contract/test_host_interface.py
@@ -0,0 +1,100 @@
+"""Contract tests: the host interface this bridge depends on, against the
+INSTALLED claude-agent-sdk + Claude Code CLI.
+
+Run in CI with the latest published SDK/CLI (not the pinned dev versions) so an
+upstream rename, signature change, or removal fails HERE — before it takes down
+a live gateway. Everything asserted is something the bridge actually imports,
+constructs, or invokes.
+"""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def test_sdk_exports_every_symbol_the_bridge_imports():
+    # Mirrors the imports in sessions.py and tools.py, 1:1.
+    from claude_agent_sdk import (  # noqa: F401
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ClaudeSDKClient,
+        PermissionResultAllow,
+        PermissionResultDeny,
+        ResultMessage,
+        TextBlock,
+        create_sdk_mcp_server,
+        tool,
+    )
+
+
+def test_options_accept_the_kwargs_the_bridge_passes():
+    """Constructing ClaudeAgentOptions with the exact kwargs sessions.py uses
+    fails loudly if the SDK renames or drops any of them."""
+    from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
+
+    async def _can_use_tool(tool_name, input_data, context):  # signature stand-in
+        raise NotImplementedError
+
+    options = ClaudeAgentOptions(
+        cwd="/tmp",
+        model=None,
+        system_prompt={"type": "preset", "preset": "claude_code", "append": "extra"},
+        setting_sources=["user", "project"],
+        allowed_tools=["Read", "mcp__inkbox__inkbox_whoami"],
+        mcp_servers={},
+        can_use_tool=_can_use_tool,
+        resume=None,
+    )
+    # The client must construct from those options without connecting.
+    assert ClaudeSDKClient(options=options) is not None
+
+
+def test_permission_results_construct_like_the_bridge_uses_them():
+    from claude_agent_sdk import PermissionResultAllow, PermissionResultDeny
+
+    PermissionResultAllow()
+    PermissionResultAllow(updated_input={"answers": {}})
+    PermissionResultDeny(message="not approved")
+
+
+def test_inkbox_mcp_server_builds_against_installed_sdk():
+    """build_inkbox_mcp_server exercises the SDK's ``tool`` decorator and
+    ``create_sdk_mcp_server`` for every tool the bridge registers."""
+    from inkbox_claude.tools import build_inkbox_mcp_server
+
+    server, tool_names = build_inkbox_mcp_server(MagicMock(), "contract-test")
+    assert server is not None
+    expected = {
+        "mcp__inkbox__inkbox_whoami",
+        "mcp__inkbox__inkbox_send_email",
+        "mcp__inkbox__inkbox_send_sms",
+        "mcp__inkbox__inkbox_send_imessage",
+        "mcp__inkbox__inkbox_place_call",
+        "mcp__inkbox__inkbox_list_calls",
+        "mcp__inkbox__inkbox_get_call_transcript",
+        "mcp__inkbox__inkbox_list_text_conversations",
+        "mcp__inkbox__inkbox_get_text_conversation",
+        "mcp__inkbox__inkbox_list_imessage_conversations",
+        "mcp__inkbox__inkbox_get_imessage_conversation",
+        "mcp__inkbox__inkbox_lookup_contact",
+        "mcp__inkbox__inkbox_list_contacts",
+        "mcp__inkbox__inkbox_get_contact",
+        "mcp__inkbox__inkbox_create_contact",
+        "mcp__inkbox__inkbox_update_contact",
+        "mcp__inkbox__inkbox_delete_contact",
+    }
+    assert expected <= set(tool_names)
+
+
+def test_claude_cli_installed_and_answers_version():
+    """The SDK drives a ``claude`` subprocess; the CLI must be present and sane."""
+    claude = shutil.which("claude")
+    if claude is None:
+        pytest.fail("claude CLI not on PATH — the bridge cannot start sessions without it")
+    out = subprocess.run([claude, "--version"], capture_output=True, text=True, timeout=60)
+    assert out.returncode == 0, f"claude --version failed: {out.stderr[:300]}"
+    assert out.stdout.strip(), "claude --version printed nothing"
diff --git a/tests/live/mock_anthropic.py b/tests/live/mock_anthropic.py
new file mode 100644
index 0000000..56f097c
--- /dev/null
+++ b/tests/live/mock_anthropic.py
@@ -0,0 +1,94 @@
+"""Deterministic Anthropic-API mock for live agent tests.
+
+Claude Code honours ``ANTHROPIC_BASE_URL``, so pointing the bridged sessions at
+this server makes the agent "think" here instead of against the real API: no
+real key, no tokens, no flakiness, fully deterministic. We still exercise the
+entire real pipeline (bridge, tunnel, inbound routing, Claude Code session,
+Inkbox send + delivery) — only the LLM brain is faked.
+
+Every reply contains ``REPLY_OK`` plus, when present, the inbound's smoke nonce,
+so a live test can assert the canned content travelled inbound → model → reply →
+delivery end to end (and that the agent did NOT fall back to an error message).
+
+Serves the Messages API (``POST /v1/messages``, streaming and not) and the
+token-count endpoint. Run: ``python mock_anthropic.py [port]`` (default 8089).
+Stdlib only.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sys
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+_NONCE = re.compile(r"smoke-[0-9a-f]{6,}")
+
+
+def _reply_text(req: dict) -> str:
+    m = _NONCE.search(json.dumps(req))
+    tag = m.group(0) if m else "no-nonce"
+    return f"REPLY_OK {tag} — automated reachability reply from the agent."
+
+
+class Handler(BaseHTTPRequestHandler):
+    def log_message(self, *_args):  # quiet
+        pass
+
+    def _send_json(self, code: int, obj: dict) -> None:
+        body = json.dumps(obj).encode()
+        self.send_response(code)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):  # noqa: N802  (health / probes)
+        self._send_json(200, {"ok": True})
+
+    def _sse(self, event: str, data: dict) -> None:
+        self.wfile.write(f"event: {event}\ndata: {json.dumps(data)}\n\n".encode())
+
+    def do_POST(self):  # noqa: N802
+        n = int(self.headers.get("Content-Length") or 0)
+        try:
+            req = json.loads(self.rfile.read(n) or b"{}")
+        except ValueError:
+            req = {}
+
+        if self.path.rstrip("/").endswith("/count_tokens"):
+            self._send_json(200, {"input_tokens": 1})
+            return
+
+        text = _reply_text(req)
+        model = req.get("model", "mock-model")
+        usage = {"input_tokens": 1, "output_tokens": 1}
+        if req.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            self._sse("message_start", {"type": "message_start", "message": {
+                "id": "msg_mock", "type": "message", "role": "assistant", "model": model,
+                "content": [], "stop_reason": None, "stop_sequence": None, "usage": usage,
+            }})
+            self._sse("content_block_start", {"type": "content_block_start", "index": 0,
+                                              "content_block": {"type": "text", "text": ""}})
+            self._sse("content_block_delta", {"type": "content_block_delta", "index": 0,
+                                              "delta": {"type": "text_delta", "text": text}})
+            self._sse("content_block_stop", {"type": "content_block_stop", "index": 0})
+            self._sse("message_delta", {"type": "message_delta",
+                                        "delta": {"stop_reason": "end_turn", "stop_sequence": None},
+                                        "usage": {"output_tokens": 1}})
+            self._sse("message_stop", {"type": "message_stop"})
+            self.wfile.flush()
+        else:
+            self._send_json(200, {
+                "id": "msg_mock", "type": "message", "role": "assistant", "model": model,
+                "content": [{"type": "text", "text": text}],
+                "stop_reason": "end_turn", "stop_sequence": None, "usage": usage,
+            })
+
+
+if __name__ == "__main__":
+    port = int(sys.argv[1]) if len(sys.argv) > 1 else 8089
+    ThreadingHTTPServer(("127.0.0.1", port), Handler).serve_forever()
diff --git a/tests/live/test_cross_channel.py b/tests/live/test_cross_channel.py
new file mode 100644
index 0000000..fbb584b
--- /dev/null
+++ b/tests/live/test_cross_channel.py
@@ -0,0 +1,196 @@
+"""Live cross-channel suite — the agent answers on a DIFFERENT channel.
+
+Ask on one channel; the agent must figure out the sender's *other-channel* address
+from the contact card and respond there. Each request carries a short token, and we
+assert that token shows up on the other channel — proving the response is tied to
+the request.
+
+  * email -> SMS : email asks for a text; we poll SMS for the token.
+  * SMS  -> email: SMS asks for an email; we poll email for the token.
+
+Voice is the odd one out: an unanswered call carries no token, so instead of
+matching content we assert that a *new inbound call from the AUT's number* lands
+on the driver's number within the window — proof the request reasoned its way to
+``inkbox_place_call`` and Inkbox actually dialed the driver.
+
+  * email -> call: email asks the agent to call; we poll the driver's calls.
+  * SMS   -> call: SMS asks the agent to call; we poll the driver's calls.
+
+More channels (iMessage) get added here. Real-model only.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import time
+import uuid
+
+import pytest
+
+REMOTE_KEY = os.environ.get("REMOTE_INKBOX_API_KEY")
+AUT_KEY = os.environ.get("CLAUDE_CODE_INKBOX_API_KEY")
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+REAL = os.environ.get("LIVE_REAL_MODEL") == "1"
+TIMEOUT_S = float(os.environ.get("LIVE_XCHANNEL_TIMEOUT", "200"))
+POLL_EVERY_S = 6.0
+
+pytestmark = pytest.mark.skipif(
+    not (REMOTE_KEY and AUT_KEY and REAL),
+    reason="cross-channel suite: needs both keys + LIVE_REAL_MODEL=1",
+)
+
+
+def _digits(s: str) -> str:
+    return re.sub(r"\D", "", s or "")
+
+
+def _client(key):
+    from inkbox import Inkbox
+
+    return Inkbox(api_key=key, base_url=BASE_URL)
+
+
+def _token() -> str:
+    return uuid.uuid4().hex[:6]
+
+
+@pytest.fixture(scope="module")
+def xc():
+    remote = _client(REMOTE_KEY)
+    aut = _client(AUT_KEY)
+    remote_email = remote.mailboxes.list()[0].email_address
+    aut_email = aut.mailboxes.list()[0].email_address
+    rnums = remote.phone_numbers.list()
+    anums = aut.phone_numbers.list()
+    assert rnums and anums, "both identities need a phone number for cross-channel"
+    remote_phone, remote_pid = rnums[0].number, str(rnums[0].id)
+    aut_phone = anums[0].number
+
+    # The agent can only cross channels if the sender's card has BOTH an email and a
+    # phone. Ensure it does (merge in whatever is missing; never clobber existing data).
+    from inkbox.contacts.types import ContactEmail, ContactPhone
+    matches = aut.contacts.lookup(email=remote_email)
+    if not matches:
+        aut.contacts.create(
+            given_name="Penny", family_name="Tester",
+            emails=[ContactEmail("work", remote_email)],
+            phones=[ContactPhone("mobile", remote_phone)],
+        )
+    else:
+        c = matches[0]
+        emails = list(getattr(c, "emails", []))
+        phones = list(getattr(c, "phones", []))
+        changed = False
+        if not any((e.value or "").lower() == remote_email.lower() for e in emails):
+            emails.append(ContactEmail("work", remote_email))
+            changed = True
+        if not any(_digits(p.value)[-10:] == _digits(remote_phone)[-10:] for p in phones):
+            phones.append(ContactPhone("mobile", remote_phone))
+            changed = True
+        if changed:
+            aut.contacts.update(c.id, emails=emails, phones=phones)
+
+    return {
+        "remote": remote, "aut": aut,
+        "remote_email": remote_email, "remote_pid": remote_pid,
+        "aut_email": aut_email, "aut_phone": aut_phone,
+    }
+
+
+def test_email_request_gets_sms_response(xc):
+    """Email asks the agent to TEXT a code; the code must arrive over SMS."""
+    remote, remote_pid, aut_phone = xc["remote"], xc["remote_pid"], xc["aut_phone"]
+    token = _token()
+    tail = _digits(aut_phone)[-10:]
+
+    def _sms_from_aut():
+        return [m for m in remote.texts.list(remote_pid, limit=30)
+                if (getattr(m, "direction", "") or "").lower() == "inbound"
+                and _digits(getattr(m, "remote_phone_number", "") or "")[-10:] == tail]
+
+    before = {m.id for m in _sms_from_aut()}
+    remote.messages.send(
+        xc["remote_email"], to=[xc["aut_email"]], subject=f"[{token}] text me please",
+        body_text=f"Please send me a text message (SMS) that says: lalala {token}",
+    )
+
+    deadline = time.monotonic() + TIMEOUT_S
+    while time.monotonic() < deadline:
+        for m in _sms_from_aut():
+            if m.id not in before and token in (getattr(m, "text", "") or "").lower():
+                return  # cross-channel confirmed: email request -> SMS response with the token
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"agent did not send an SMS containing {token!r} within {TIMEOUT_S:.0f}s")
+
+
+def test_sms_request_gets_email_response(xc):
+    """SMS asks the agent to EMAIL a code; the code must arrive over email."""
+    from inkbox.mail.types import MessageDirection
+
+    remote, remote_email, aut_email = xc["remote"], xc["remote_email"], xc["aut_email"]
+    token = _token()
+
+    def _email_from_aut():
+        return [m for m in remote.messages.list(remote_email, direction=MessageDirection.INBOUND)
+                if aut_email.lower() in (getattr(m, "from_address", "") or "").lower()]
+
+    before = {m.id for m in _email_from_aut()}
+    remote.texts.send(xc["remote_pid"], to=xc["aut_phone"], text=f"Please email me the code {token}.")
+
+    deadline = time.monotonic() + TIMEOUT_S
+    while time.monotonic() < deadline:
+        for m in _email_from_aut():
+            if m.id in before:
+                continue
+            hay = (getattr(m, "subject", "") or "").lower()
+            if token not in hay:
+                body = getattr(remote.messages.get(remote_email, m.id), "body_text", "") or ""
+                hay = body.lower()
+            if token in hay:
+                return  # cross-channel confirmed: SMS request -> email response with the token
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"agent did not send an email containing {token!r} within {TIMEOUT_S:.0f}s")
+
+
+def _inbound_calls_from_aut(remote, remote_pid: str, aut_phone: str):
+    """The driver's inbound calls originating from the AUT's number."""
+    tail = _digits(aut_phone)[-10:]
+    return [c for c in remote.calls.list(remote_pid, limit=30)
+            if (getattr(c, "direction", "") or "").lower() == "inbound"
+            and _digits(getattr(c, "remote_phone_number", "") or "")[-10:] == tail]
+
+
+def _wait_for_new_call(remote, remote_pid: str, aut_phone: str, before: set):
+    """Block until an inbound call from the AUT with an id not in ``before`` appears.
+
+    ``before`` is the pre-request snapshot, so a stale call can't satisfy the
+    assertion — same new-id correlation the SMS/email legs use. Fails on timeout.
+    """
+    deadline = time.monotonic() + TIMEOUT_S
+    while time.monotonic() < deadline:
+        for c in _inbound_calls_from_aut(remote, remote_pid, aut_phone):
+            if c.id not in before:
+                return  # a fresh call from the AUT landed on the driver's number
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"agent did not place a call to the driver within {TIMEOUT_S:.0f}s")
+
+
+def test_email_request_gets_call(xc):
+    """Email asks the agent to CALL; a new inbound call must land on the driver."""
+    remote, remote_pid, aut_phone = xc["remote"], xc["remote_pid"], xc["aut_phone"]
+    # Snapshot BEFORE sending so a pre-existing call can't be mistaken for the reply.
+    before = {c.id for c in _inbound_calls_from_aut(remote, remote_pid, aut_phone)}
+    remote.messages.send(
+        xc["remote_email"], to=[xc["aut_email"]], subject="please call me",
+        body_text="Please place a phone call to my number now — I'd rather talk than type.",
+    )
+    _wait_for_new_call(remote, remote_pid, aut_phone, before)
+
+
+def test_sms_request_gets_call(xc):
+    """SMS asks the agent to CALL; a new inbound call must land on the driver."""
+    remote, remote_pid, aut_phone = xc["remote"], xc["remote_pid"], xc["aut_phone"]
+    before = {c.id for c in _inbound_calls_from_aut(remote, remote_pid, aut_phone)}
+    remote.texts.send(remote_pid, to=aut_phone, text="Call me please — give me a ring now.")
+    _wait_for_new_call(remote, remote_pid, aut_phone, before)
diff --git a/tests/live/test_email_intelligence.py b/tests/live/test_email_intelligence.py
new file mode 100644
index 0000000..134b6b8
--- /dev/null
+++ b/tests/live/test_email_intelligence.py
@@ -0,0 +1,269 @@
+"""Live intelligence suite over email — the agent's REAL brain + tools.
+
+Runs against a real Claude model (``LIVE_REAL_MODEL=1``, real key) so it proves
+the agent actually reasons and uses its Inkbox tools/data — not a mock. A remote
+identity emails questions; we verify the replies against values looked up live
+through the API keys (NO hardcoded expectations):
+
+  * basic        — answers a simple question (sanity).
+  * own identity — reports its own handle / email / phone (looked up via the AUT key).
+  * sender       — reports who the sender is, from the contact card it can see
+                   (looked up via the AUT key).
+  * tools        — names its real Inkbox tools (scraped from the tool sources).
+  * contact CRUD — with LIVE_CONTACT_CRUD=1, creates/updates/deletes a
+                   temporary contact through the real agent loop.
+
+Skipped unless both keys + LIVE_REAL_MODEL=1 are set.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import time
+import uuid
+from pathlib import Path
+
+import pytest
+
+REMOTE_KEY = os.environ.get("REMOTE_INKBOX_API_KEY")
+AUT_KEY = os.environ.get("CLAUDE_CODE_INKBOX_API_KEY")
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+TIMEOUT_S = float(os.environ.get("LIVE_EMAIL_TIMEOUT", "150"))
+POLL_EVERY_S = 5.0
+# "i hit an error" is the bridge's canned failed-turn reply.
+ERROR_MARKERS = ("non-retryable error", "missing authentication", "http 401", "http 403", "traceback",
+                 "i hit an error")
+
+pytestmark = pytest.mark.skipif(
+    not (REMOTE_KEY and AUT_KEY and os.environ.get("LIVE_REAL_MODEL") == "1"),
+    reason="real-model intelligence suite: needs both keys + LIVE_REAL_MODEL=1",
+)
+
+
+def _digits(s: str) -> str:
+    return re.sub(r"\D", "", s or "")
+
+
+def _phone_present(phone: str, body: str) -> bool:
+    """True if the agent reported ``phone`` in ``body``.
+
+    Accepts either the full number (all digits present) or a privacy-masked
+    form the model tends to emit in formal identity listings, where it keeps a
+    leading prefix + the last 4 and masks the middle (e.g. ``+192****3235``).
+    The masked branch requires a run of mask chars immediately followed by the
+    real last-4, so it won't false-match on markdown bold (``**name:**``).
+    """
+    want = _digits(phone)
+    if want[-10:] in _digits(body):
+        return True
+    tail = re.escape(want[-4:])
+    return bool(re.search(r"[*xX•·]{2,}\D{0,2}" + tail, body))
+
+
+def _mailbox(client) -> str:
+    boxes = client.mailboxes.list()
+    assert boxes, "identity has no mailbox"
+    return boxes[0].email_address
+
+
+def _first_phone(client) -> str | None:
+    nums = client.phone_numbers.list()
+    return nums[0].number if nums else None
+
+
+def _client(key):
+    from inkbox import Inkbox
+
+    return Inkbox(api_key=key, base_url=BASE_URL)
+
+
+def _plugin_tool_names() -> list[str]:
+    """Tool names the bridge registers, scraped from the tool sources —
+    tracks the code without a hand-kept list."""
+    src = Path(__file__).resolve().parents[2] / "inkbox_claude" / "tools.py"
+    return sorted(set(re.findall(r'@tool\(\s*"(inkbox_[a-z0-9_]+)"', src.read_text())))
+
+
+def _ask(remote, aut_email: str, remote_email: str, question: str) -> str:
+    """Email the agent a question; return the reply body (lowercased)."""
+    from inkbox.mail.types import MessageDirection
+
+    nonce = f"smoke-{uuid.uuid4().hex[:8]}"
+    sent = remote.messages.send(remote_email, to=[aut_email], subject=f"[{nonce}] {question[:40]}", body_text=question)
+    thread_id = str(getattr(sent, "thread_id", "") or "")
+
+    def _is_reply(msg) -> bool:
+        if thread_id and str(getattr(msg, "thread_id", "") or "") == thread_id:
+            return True
+        frm = (getattr(msg, "from_address", "") or "").lower()
+        return aut_email.lower() in frm and nonce in (getattr(msg, "subject", "") or "")
+
+    deadline = time.monotonic() + TIMEOUT_S
+    while time.monotonic() < deadline:
+        for msg in remote.messages.list(remote_email, direction=MessageDirection.INBOUND):
+            if _is_reply(msg):
+                body = getattr(remote.messages.get(remote_email, msg.id), "body_text", "") or ""
+                bad = [m for m in ERROR_MARKERS if m in body.lower()]
+                assert not bad, f"reply is an error, not a real answer: {bad}\n{body[:300]}"
+                return body.lower()
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"no reply within {TIMEOUT_S:.0f}s to: {question!r}")
+
+
+@pytest.fixture(scope="module")
+def ctx():
+    remote = _client(REMOTE_KEY)
+    aut = _client(AUT_KEY)
+    return {
+        "remote": remote,
+        "aut": aut,
+        "remote_email": _mailbox(remote),
+        "aut_email": _mailbox(aut),
+    }
+
+
+def test_basic_reply(ctx):
+    body = _ask(ctx["remote"], ctx["aut_email"], ctx["remote_email"],
+                "Please reply with a one-sentence acknowledgement that you received this email.")
+    assert len(body.strip()) > 0, "empty reply"
+
+
+def test_reports_own_identity(ctx):
+    aut = ctx["aut"]
+    handle = _mailbox(aut).split("@", 1)[0]
+    aut_email = ctx["aut_email"]
+    aut_phone = _first_phone(aut)
+    assert aut_phone, "AUT identity has no phone number to report"
+
+    body = _ask(ctx["remote"], aut_email, ctx["remote_email"],
+                "What is your full Inkbox identity? Reply with your handle, display "
+                "name, email address, and phone number. Write the phone number in "
+                "full — every digit, with no masking, asterisks, or abbreviation.")
+    assert handle in body, f"reply missing handle {handle!r}\n{body[:400]}"
+    assert aut_email in body, f"reply missing email {aut_email!r}\n{body[:400]}"
+    # Accept a privacy-masked phone (the model self-redacts the middle digits
+    # in formal listings) as well as full.
+    assert _phone_present(aut_phone, body), f"reply missing phone {aut_phone!r}\n{body[:400]}"
+
+
+def test_reports_sender_details(ctx):
+    """The agent must report who the sender is, from the contact card it can see."""
+    aut, remote = ctx["aut"], ctx["remote"]
+    remote_email = ctx["remote_email"]
+
+    # Look up (or seed) the sender's contact in the AUT org — the card the agent sees.
+    matches = aut.contacts.lookup(email=remote_email)
+    if not matches:
+        from inkbox.contacts.types import ContactEmail, ContactPhone
+        rphone = _first_phone(remote)
+        aut.contacts.create(
+            given_name="Penny",
+            family_name="Tester",
+            emails=[ContactEmail(label="work", value=remote_email)],
+            phones=[ContactPhone(label="mobile", value=rphone)] if rphone else None,
+        )
+        matches = aut.contacts.lookup(email=remote_email)
+    assert matches, "could not establish a contact card for the sender"
+    contact = matches[0]
+    name = (getattr(contact, "preferred_name", None) or getattr(contact, "given_name", None) or "")
+    emails = [e.value for e in getattr(contact, "emails", [])]
+    phones = [p.value for p in getattr(contact, "phones", [])]
+
+    body = _ask(ctx["remote"], ctx["aut_email"], remote_email,
+                "Who am I to you? Tell me everything you have on file about me. "
+                "Include my phone number in full — every digit, with no masking, "
+                "asterisks, or abbreviation.")
+    if name:
+        assert name.lower() in body, f"reply missing sender name {name!r}\n{body[:400]}"
+    assert any(e.lower() in body for e in emails), f"reply missing sender email {emails}\n{body[:400]}"
+    if phones:
+        # Accept full or privacy-masked (see _phone_present).
+        assert any(_phone_present(p, body) for p in phones), \
+            f"reply missing sender phone {phones}\n{body[:400]}"
+
+
+def test_aware_of_inkbox_tools(ctx):
+    """Non-LLM proof the agent is wired with real tools: it names them."""
+    tool_names = _plugin_tool_names()
+    assert tool_names, "no inkbox_* tool names found in inkbox_claude/tools.py"
+    contact_tools = {
+        "inkbox_lookup_contact",
+        "inkbox_list_contacts",
+        "inkbox_get_contact",
+        "inkbox_create_contact",
+        "inkbox_update_contact",
+        "inkbox_delete_contact",
+    }
+    assert contact_tools <= set(tool_names)
+
+    body = _ask(ctx["remote"], ctx["aut_email"], ctx["remote_email"],
+                "List the exact names of all the Inkbox tools you have access to, one per line.")
+    hits = [t for t in tool_names if t.lower() in body]
+    assert len(hits) >= 3, f"agent named only {hits} of its tools {tool_names}\n{body[:500]}"
+    missing_contacts = sorted(t for t in contact_tools if t.lower() not in body)
+    assert not missing_contacts, f"agent did not name contact tools {missing_contacts}\n{body[:500]}"
+
+
+def _contacts_by_email(client, email: str):
+    return list(client.contacts.lookup(email=email) or [])
+
+
+def _delete_contacts_by_email(client, email: str) -> None:
+    for contact in _contacts_by_email(client, email):
+        contact_id = str(getattr(contact, "id", "") or "")
+        if contact_id:
+            client.contacts.delete(contact_id)
+
+
+@pytest.mark.skipif(
+    os.environ.get("LIVE_CONTACT_CRUD") != "1",
+    reason="mutating contact CRUD live test: set LIVE_CONTACT_CRUD=1 to opt in",
+)
+def test_contact_crud_tool_use(ctx):
+    """The real agent can reason about and use contact write tools end to end."""
+    aut = ctx["aut"]
+    nonce = f"cc-live-{uuid.uuid4().hex[:8]}"
+    contact_name = f"Claude Live {nonce}"
+    contact_email = f"{nonce}@example.com"
+    updated_notes = f"updated-notes-{nonce}"
+
+    _delete_contacts_by_email(aut, contact_email)
+    try:
+        created = _ask(
+            ctx["remote"],
+            ctx["aut_email"],
+            ctx["remote_email"],
+            "Use inkbox_create_contact now. Create a new contact named "
+            f"{contact_name} with email {contact_email}. Do not just describe the action. "
+            f"After the tool succeeds, reply exactly: CREATED {nonce}",
+        )
+        assert "created" in created and nonce in created, created[:500]
+        matches = _contacts_by_email(aut, contact_email)
+        assert matches, f"agent said it created {contact_email}, but lookup found nothing"
+        contact_id = str(getattr(matches[0], "id", "") or "")
+        assert contact_id, f"created contact has no id: {matches[0]!r}"
+
+        updated = _ask(
+            ctx["remote"],
+            ctx["aut_email"],
+            ctx["remote_email"],
+            "Use inkbox_update_contact now. Update contactId "
+            f"{contact_id} and set notes to {updated_notes}. Do not create a second contact. "
+            f"After the tool succeeds, reply exactly: UPDATED {nonce}",
+        )
+        assert "updated" in updated and nonce in updated, updated[:500]
+        fetched = aut.contacts.get(contact_id)
+        assert updated_notes.lower() in str(getattr(fetched, "notes", "") or "").lower()
+
+        deleted = _ask(
+            ctx["remote"],
+            ctx["aut_email"],
+            ctx["remote_email"],
+            "I confirm this is a temporary test contact. Use inkbox_delete_contact now "
+            f"to delete contactId {contact_id}. After the tool succeeds, reply exactly: DELETED {nonce}",
+        )
+        assert "deleted" in deleted and nonce in deleted, deleted[:500]
+        assert not _contacts_by_email(aut, contact_email)
+    finally:
+        _delete_contacts_by_email(aut, contact_email)
diff --git a/tests/live/test_email_reply.py b/tests/live/test_email_reply.py
new file mode 100644
index 0000000..6dda45c
--- /dev/null
+++ b/tests/live/test_email_reply.py
@@ -0,0 +1,97 @@
+"""Live test: the agent emails back — and the reply is real, not an error.
+
+A *remote* Inkbox identity emails the agent-under-test (AUT). The AUT's running
+bridge routes it into a Claude Code session that "thinks" against a deterministic
+mock model (see mock_anthropic.py — no real LLM, so this is repeatable and free),
+and emails a reply.
+
+We assert two independent things so a broken setup can't pass:
+  1. delivery  — a reply lands in the remote mailbox, tracked by thread_id;
+  2. content   — the reply body carries the mock's ``REPLY_OK <nonce>`` marker and
+                 contains NO error strings (this is what catches the agent emailing
+                 back a model-auth 401 instead of a real reply).
+
+Skipped unless both API keys are present, so it never runs in the offline suite.
+Requires the AUT bridge to already be running (the workflow starts it).
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import uuid
+
+import pytest
+
+REMOTE_KEY = os.environ.get("REMOTE_INKBOX_API_KEY")
+AUT_KEY = os.environ.get("CLAUDE_CODE_INKBOX_API_KEY")
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+TIMEOUT_S = float(os.environ.get("LIVE_EMAIL_TIMEOUT", "120"))
+POLL_EVERY_S = 5.0
+
+# Strings that mean the agent replied with a failure instead of a real answer.
+ERROR_MARKERS = ("non-retryable error", "missing authentication", "http 401", "http 403", "traceback",
+                 "i hit an error")
+
+pytestmark = pytest.mark.skipif(
+    not (REMOTE_KEY and AUT_KEY) or os.environ.get("LIVE_REAL_MODEL") == "1",
+    reason="mock-model reachability test (needs both keys; skipped in real-model mode)",
+)
+
+
+def _mailbox(client) -> str:
+    boxes = client.mailboxes.list()
+    assert boxes, "identity has no mailbox"
+    return boxes[0].email_address
+
+
+def test_email_reachability():
+    from inkbox import Inkbox
+    from inkbox.mail.types import MessageDirection
+
+    remote = Inkbox(api_key=REMOTE_KEY, base_url=BASE_URL)
+    aut = Inkbox(api_key=AUT_KEY, base_url=BASE_URL)
+
+    remote_email = _mailbox(remote)
+    aut_email = _mailbox(aut)
+    assert remote_email.lower() != aut_email.lower(), "remote and AUT must be different identities"
+
+    nonce = f"smoke-{uuid.uuid4().hex[:8]}"
+    subject = f"[{nonce}] are you there?"
+    sent = remote.messages.send(
+        remote_email,
+        to=[aut_email],
+        subject=subject,
+        body_text="This is an automated reachability check — please reply to this email to confirm.",
+    )
+    thread_id = str(getattr(sent, "thread_id", "") or "")
+
+    # Poll the remote mailbox for the AUT's reply — match on thread_id (preferred),
+    # falling back to sender + nonce when the send didn't surface a thread id.
+    def _is_reply(msg) -> bool:
+        if thread_id and str(getattr(msg, "thread_id", "") or "") == thread_id:
+            return True
+        frm = (getattr(msg, "from_address", "") or "").lower()
+        subj = getattr(msg, "subject", "") or ""
+        return aut_email.lower() in frm and nonce in subj
+
+    deadline = time.monotonic() + TIMEOUT_S
+    reply = None
+    while time.monotonic() < deadline and reply is None:
+        for msg in remote.messages.list(remote_email, direction=MessageDirection.INBOUND):
+            if _is_reply(msg):
+                reply = msg
+                break
+        if reply is None:
+            time.sleep(POLL_EVERY_S)
+
+    # (1) delivery
+    assert reply is not None, f"no reply within {TIMEOUT_S:.0f}s — inbound routing or reply send is broken"
+
+    # (2) content is a real reply, not an error fallback
+    detail = remote.messages.get(remote_email, reply.id)
+    body = ((getattr(detail, "body_text", "") or "") + " " + (getattr(reply, "subject", "") or "")).lower()
+    bad = [m for m in ERROR_MARKERS if m in body]
+    assert not bad, f"reply delivered but the body is an error, not a real answer: {bad}\n{body[:300]}"
+    assert "reply_ok" in body, f"reply delivered but missing the mock marker REPLY_OK:\n{body[:300]}"
+    assert nonce in body, f"reply did not echo the request nonce {nonce} — agent may not have read the inbound"
diff --git a/tests/live/test_sms.py b/tests/live/test_sms.py
new file mode 100644
index 0000000..8bc6988
--- /dev/null
+++ b/tests/live/test_sms.py
@@ -0,0 +1,159 @@
+"""Live SMS suite — the same questions as the email suite, over real SMS.
+
+SMS differs from email: agent-to-agent SMS skips the START opt-in (the server
+bypasses it for inter-agent traffic), and outbound SMS is subject to carrier +
+spam filtering — so prompts ask for SHORT replies and avoid spammy content.
+
+  * mock leg → reachability (deterministic ``REPLY_OK`` from the mock model).
+  * real leg → intelligence: basic, own identity, sender, tools.
+
+Skipped unless both keys are set. Replies are matched by *new* inbound message id
+from the AUT's number (robust to clock skew).
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import time
+from pathlib import Path
+
+import pytest
+
+REMOTE_KEY = os.environ.get("REMOTE_INKBOX_API_KEY")
+AUT_KEY = os.environ.get("CLAUDE_CODE_INKBOX_API_KEY")
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+REAL = os.environ.get("LIVE_REAL_MODEL") == "1"
+TIMEOUT_S = float(os.environ.get("LIVE_SMS_TIMEOUT", "180"))
+POLL_EVERY_S = 6.0
+# "i hit an error" is the bridge's canned failed-turn reply.
+ERROR_MARKERS = ("non-retryable error", "missing authentication", "http 401", "http 403", "traceback",
+                 "i hit an error")
+
+pytestmark = pytest.mark.skipif(
+    not (REMOTE_KEY and AUT_KEY),
+    reason="live SMS suite: needs REMOTE_INKBOX_API_KEY + CLAUDE_CODE_INKBOX_API_KEY",
+)
+real_only = pytest.mark.skipif(not REAL, reason="intelligence runs in the real-model leg")
+mock_only = pytest.mark.skipif(REAL, reason="reachability runs in the mock-model leg")
+
+
+def _digits(s: str) -> str:
+    return re.sub(r"\D", "", s or "")
+
+
+def _client(key):
+    from inkbox import Inkbox
+
+    return Inkbox(api_key=key, base_url=BASE_URL)
+
+
+def _phone(client):
+    nums = client.phone_numbers.list()
+    assert nums, "identity has no phone number"
+    return nums[0].number, str(nums[0].id)
+
+
+def _plugin_tool_names() -> list[str]:
+    """Tool names the bridge registers, scraped from the tool sources —
+    tracks the code without a hand-kept list."""
+    src = Path(__file__).resolve().parents[2] / "inkbox_claude" / "tools.py"
+    return sorted(set(re.findall(r'@tool\(\s*"(inkbox_[a-z0-9_]+)"', src.read_text())))
+
+
+@pytest.fixture(scope="module")
+def sms():
+    remote = _client(REMOTE_KEY)
+    aut = _client(AUT_KEY)
+    aut_phone, _aut_pid = _phone(aut)
+    _remote_phone, remote_pid = _phone(remote)
+    # No opt-in/START needed: the server bypasses the missing-opt-in gate for
+    # inter-agent traffic (the recipient is an Inkbox-managed number). Only an
+    # explicit STOP/opt-out would block.
+    return {"remote": remote, "aut": aut, "aut_phone": aut_phone, "remote_pid": remote_pid}
+
+
+def _ask_sms(sms, text: str) -> str:
+    """Text the agent; return the reply body (lowercased), matched by new message id.
+
+    The agent sometimes emits a trailing *second* SMS for the PREVIOUS question
+    (a duplicate "OK", or a masked + unmasked identity pair) that lands a few
+    seconds late. Matching on "any new inbound id after I sent" would let that
+    leftover leak into the next question's match. So before sending we first
+    drain the inbound conversation to a quiet state — polling until the id-set
+    stops growing — which folds any in-flight trailing reply into ``before``.
+    """
+    remote, aut_phone, pid = sms["remote"], sms["aut_phone"], sms["remote_pid"]
+    tail = _digits(aut_phone)[-10:]
+
+    def _inbound_from_aut():
+        out = []
+        for m in remote.texts.list(pid, limit=30):
+            if (getattr(m, "direction", "") or "").lower() == "inbound" \
+                    and _digits(getattr(m, "remote_phone_number", "") or "")[-10:] == tail:
+                out.append(m)
+        return out
+
+    # Settle: wait until no new inbound arrives for one quiet poll, so a trailing
+    # reply to the prior question is captured in `before` instead of mis-matched.
+    before = {m.id for m in _inbound_from_aut()}
+    quiet_deadline = time.monotonic() + 2 * POLL_EVERY_S
+    while time.monotonic() < quiet_deadline:
+        time.sleep(POLL_EVERY_S)
+        now_ids = {m.id for m in _inbound_from_aut()}
+        if now_ids == before:
+            break
+        before = now_ids
+
+    remote.texts.send(pid, to=aut_phone, text=text)
+
+    deadline = time.monotonic() + TIMEOUT_S
+    while time.monotonic() < deadline:
+        for m in _inbound_from_aut():
+            if m.id not in before:
+                body = getattr(m, "text", "") or ""
+                bad = [x for x in ERROR_MARKERS if x in body.lower()]
+                assert not bad, f"SMS reply is an error, not a real answer: {bad}\n{body[:200]}"
+                return body.lower()
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"no SMS reply within {TIMEOUT_S:.0f}s to: {text!r}")
+
+
+@mock_only
+def test_sms_reachability(sms):
+    body = _ask_sms(sms, "ping")
+    assert "reply_ok" in body, f"mock reachability: missing REPLY_OK marker\n{body[:200]}"
+
+
+@real_only
+def test_sms_basic_reply(sms):
+    body = _ask_sms(sms, "Please reply OK to confirm you got this text.")
+    assert len(body.strip()) > 0, "empty reply"
+
+
+@real_only
+def test_sms_reports_own_identity(sms):
+    aut_email = sms["aut"].mailboxes.list()[0].email_address
+    body = _ask_sms(sms, "Reply with just your Inkbox email address and phone number — short.")
+    assert aut_email in body, f"reply missing email {aut_email!r}\n{body[:200]}"
+
+
+@real_only
+def test_sms_reports_sender_details(sms):
+    aut, remote = sms["aut"], sms["remote"]
+    remote_email = remote.mailboxes.list()[0].email_address
+    matches = aut.contacts.lookup(email=remote_email)
+    if not matches:
+        pytest.skip("no contact card for the sender to report")
+    name = (getattr(matches[0], "preferred_name", None) or getattr(matches[0], "given_name", None) or "")
+    body = _ask_sms(sms, "Who am I to you? Tell me what you have on file about me.")
+    if name:
+        assert name.lower() in body, f"reply missing sender name {name!r}\n{body[:200]}"
+
+
+@real_only
+def test_sms_aware_of_inkbox_tools(sms):
+    tool_names = _plugin_tool_names()
+    body = _ask_sms(sms, "Name three of your Inkbox tools (exact names).")
+    hits = [t for t in tool_names if t.lower() in body]
+    assert len(hits) >= 2, f"agent named only {hits} of its tools\n{body[:300]}"
diff --git a/tests/live/test_voice.py b/tests/live/test_voice.py
new file mode 100644
index 0000000..ccd1824
--- /dev/null
+++ b/tests/live/test_voice.py
@@ -0,0 +1,153 @@
+"""Live voice-call suite — real phone calls, real model, transcript-verified.
+
+Two scenarios, each run against a bridge booted in the matching speech mode (the
+workflow sets that up and selects the scenario via VOICE_SCENARIO):
+
+  * inbound_inkbox   — the driver calls the agent; the agent answers with Inkbox
+                       STT/TTS and holds a turn.
+  * outbound_realtime — the driver texts "call me"; the agent places a call back,
+                       powered by the realtime API, and holds a turn.
+
+A companion driver process (voice_driver.py) bridges the driver's side of the call
+over an Inkbox tunnel and speaks one line. We then read the stored call transcript
+and assert both parties spoke — proving the agent reached the caller out loud.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import time
+
+import pytest
+
+REMOTE_KEY = os.environ.get("REMOTE_INKBOX_API_KEY")
+AUT_KEY = os.environ.get("CLAUDE_CODE_INKBOX_API_KEY")
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+REAL = os.environ.get("LIVE_REAL_MODEL") == "1"
+SCENARIO = os.environ.get("VOICE_SCENARIO", "")
+STATE_FILE = os.environ.get("VOICE_DRIVER_STATE", "/tmp/voice_driver_state.json")
+TIMEOUT_S = float(os.environ.get("LIVE_VOICE_TIMEOUT", "220"))
+POLL_EVERY_S = 6.0
+
+pytestmark = pytest.mark.skipif(
+    not (REMOTE_KEY and AUT_KEY and REAL),
+    reason="voice suite: needs both keys + LIVE_REAL_MODEL=1",
+)
+
+
+def _digits(s: str) -> str:
+    return re.sub(r"\D", "", s or "")
+
+
+def _client(key):
+    from inkbox import Inkbox
+
+    return Inkbox(api_key=key, base_url=BASE_URL)
+
+
+def _driver_state() -> dict:
+    with open(STATE_FILE) as fh:
+        return json.load(fh)
+
+
+def _aut_phone(aut) -> str:
+    nums = aut.phone_numbers.list()
+    assert nums, "AUT identity has no phone number"
+    return nums[0].number
+
+
+def _segments(remote, number_id, call_id):
+    """Transcript segments for a call, split by who spoke."""
+    segs = remote.transcripts.list(number_id, call_id)
+    rem = [s for s in segs if (getattr(s, "party", "") or "").lower() == "remote" and (s.text or "").strip()]
+    loc = [s for s in segs if (getattr(s, "party", "") or "").lower() == "local" and (s.text or "").strip()]
+    return segs, rem, loc
+
+
+def _wait_for_two_way_call(remote, number_id, call_id):
+    """Block until the call transcript shows BOTH the agent and the driver spoke."""
+    deadline = time.monotonic() + TIMEOUT_S
+    last = ""
+    while time.monotonic() < deadline:
+        try:
+            _all, rem, loc = _segments(remote, number_id, call_id)
+        except Exception as exc:  # transcripts may 404 until the call is set up
+            last = f"transcripts not ready: {exc!r}"
+            time.sleep(POLL_EVERY_S)
+            continue
+        if rem and loc:
+            agent_said = " | ".join(s.text.strip() for s in rem)
+            return agent_said  # the agent reached the caller out loud, in a two-way call
+        last = f"segments so far: remote={len(rem)} local={len(loc)}"
+        time.sleep(POLL_EVERY_S)
+    pytest.fail(f"agent never held a two-way call within {TIMEOUT_S:.0f}s ({last})")
+
+
+def _aut_speech_mode(aut, direction, driver_number):
+    """(use_inkbox_tts, use_inkbox_stt) of the agent's most recent answered call
+    in `direction` with the driver. Tells Inkbox STT/TTS (True/True) from realtime
+    (False/False), so each leg can prove it ran the speech path it claims."""
+    num_id = str(aut.phone_numbers.list()[0].id)
+    tail = _digits(driver_number)[-10:]
+    answered = [c for c in aut.calls.list(num_id, limit=10)
+                if (getattr(c, "direction", "") or "").lower() == direction
+                and _digits(getattr(c, "remote_phone_number", "") or "")[-10:] == tail
+                and c.use_inkbox_tts is not None]
+    assert answered, f"no answered {direction} agent call with the driver found"
+    c = answered[0]  # newest first
+    return c.use_inkbox_tts, c.use_inkbox_stt
+
+
+@pytest.mark.skipif(SCENARIO != "inbound_inkbox", reason="inbound Inkbox STT/TTS leg only")
+def test_inbound_call_inkbox_tts_stt():
+    """Driver calls the agent; the agent answers via Inkbox STT/TTS and replies."""
+    st = _driver_state()
+    remote, aut = _client(REMOTE_KEY), _client(AUT_KEY)
+    aut_phone = _aut_phone(aut)
+
+    # Place the call to the agent, handing Inkbox the driver's own media WS.
+    call = remote.calls.place(
+        from_number=st["number"], to_number=aut_phone, client_websocket_url=st["ws_url"],
+    )
+    agent_said = _wait_for_two_way_call(remote, st["number_id"], call.id)
+    assert agent_said, "agent produced no speech on the inbound call"
+
+    tts, stt = _aut_speech_mode(aut, "inbound", st["number"])
+    assert tts and stt, f"inbound call should run Inkbox STT/TTS, got tts={tts} stt={stt}"
+
+
+@pytest.mark.skipif(SCENARIO != "outbound_realtime", reason="outbound realtime leg only")
+def test_outbound_call_realtime():
+    """Driver texts 'call me'; the agent places a realtime-powered call and replies."""
+    st = _driver_state()
+    remote, aut = _client(REMOTE_KEY), _client(AUT_KEY)
+    aut_phone = _aut_phone(aut)
+    tail = _digits(aut_phone)[-10:]
+
+    def _inbound_from_aut():
+        return [c for c in remote.calls.list(st["number_id"], limit=30)
+                if (getattr(c, "direction", "") or "").lower() == "inbound"
+                and _digits(getattr(c, "remote_phone_number", "") or "")[-10:] == tail]
+
+    before = {c.id for c in _inbound_from_aut()}
+    remote.texts.send(st["number_id"], to=aut_phone, text="Please call me right now by phone — give me a ring.")
+
+    # Wait for the agent to dial back, then verify the call transcript.
+    deadline = time.monotonic() + TIMEOUT_S
+    call_id = None
+    while time.monotonic() < deadline:
+        fresh = [c for c in _inbound_from_aut() if c.id not in before]
+        if fresh:
+            call_id = fresh[0].id
+            break
+        time.sleep(POLL_EVERY_S)
+    assert call_id, f"agent never placed a call back within {TIMEOUT_S:.0f}s"
+
+    agent_said = _wait_for_two_way_call(remote, st["number_id"], call_id)
+    assert agent_said, "agent produced no speech on the outbound call"
+
+    tts, stt = _aut_speech_mode(aut, "outbound", st["number"])
+    assert tts is False and stt is False, \
+        f"outbound call must be powered by the realtime API (Inkbox speech off), got tts={tts} stt={stt}"
diff --git a/tests/live/voice_driver.py b/tests/live/voice_driver.py
new file mode 100644
index 0000000..f1ff2a3
--- /dev/null
+++ b/tests/live/voice_driver.py
@@ -0,0 +1,172 @@
+"""Live voice-call driver: the peer on the other end of a real phone call.
+
+Opens an Inkbox tunnel for the driver identity, serves the call-media WebSocket
+behind it, and bridges audio in Inkbox STT/TTS mode (text frames only — no local
+model). It speaks one scripted line so the agent under test gets a turn, and the
+call transcript (read separately by the test) proves the agent replied.
+
+Run as a standalone process alongside the gateway. On startup it writes a small
+JSON state file (its public WS URL + phone-number id) that the test reads to place
+or expect a call. Two call directions are supported by the same bridge:
+  * the test places a call to the agent and passes this driver's WS URL, or
+  * the agent calls this driver's number, which is set to auto-accept onto the
+    same WS URL.
+
+Env:
+  REMOTE_INKBOX_API_KEY   driver identity key (identity-scoped)
+  INKBOX_BASE_URL         API root (default https://inkbox.ai)
+  VOICE_DRIVER_PORT       local port the tunnel forwards to (default 8090)
+  VOICE_DRIVER_STATE      path to write the JSON state file
+  VOICE_DRIVER_LINE       the one line the driver speaks (default below)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+import time
+from pathlib import Path
+
+import uvicorn
+from fastapi import FastAPI, WebSocket
+from starlette.websockets import WebSocketState
+
+from inkbox import Inkbox
+from inkbox.tunnels.client import connect as tunnel_connect
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s driver %(message)s")
+log = logging.getLogger("voice_driver")
+
+API_KEY = os.environ["REMOTE_INKBOX_API_KEY"]
+BASE_URL = os.environ.get("INKBOX_BASE_URL", "https://inkbox.ai")
+PORT = int(os.environ.get("VOICE_DRIVER_PORT", "8090"))
+STATE_FILE = os.environ.get("VOICE_DRIVER_STATE", "/tmp/voice_driver_state.json")
+LINE = os.environ.get(
+    "VOICE_DRIVER_LINE",
+    "Hi, this is a quick test call. Please reply out loud with one short sentence, then say goodbye.",
+)
+# Speak shortly after the pipeline is ready so the agent's greeting lands first.
+SPEAK_AFTER_S = float(os.environ.get("VOICE_DRIVER_SPEAK_AFTER", "3"))
+# Then give the agent a turn and hang up — a dropped WS does NOT end the call, so we
+# must send an explicit stop or the leg lingers until the server max-duration cap.
+LISTEN_S = float(os.environ.get("VOICE_DRIVER_LISTEN", "12"))
+
+app = FastAPI()
+
+
+@app.get("/health")
+async def health() -> dict:
+    return {"ok": True}
+
+
+@app.websocket("/phone/media/ws")
+async def phone_media_ws(ws: WebSocket) -> None:
+    """Accept the call-media WS in Inkbox STT/TTS mode and run one scripted turn."""
+    import asyncio
+
+    # Opt into Inkbox-managed speech both ways → we exchange text, not audio.
+    await ws.accept(headers=[
+        (b"x-use-inkbox-text-to-speech", b"true"),
+        (b"x-use-inkbox-speech-to-text", b"true"),
+    ])
+    log.info("call WS accepted")
+    spoke = asyncio.Event()
+    convo: asyncio.Task | None = None
+
+    async def _speak(text: str) -> None:
+        if spoke.is_set():
+            return
+        spoke.set()
+        await ws.send_text(json.dumps({"event": "text", "delta": text}))
+        await ws.send_text(json.dumps({"event": "text", "done": True}))
+        log.info("spoke: %s", text)
+
+    async def _run_turn() -> None:
+        # Speak one line, give the agent a turn, then hang up so the call ends fast.
+        await asyncio.sleep(SPEAK_AFTER_S)
+        await _speak(LINE)
+        await asyncio.sleep(LISTEN_S)
+        try:
+            await ws.send_text(json.dumps({"event": "stop"}))
+            log.info("sent stop (hangup)")
+        except Exception:
+            pass
+
+    try:
+        while True:
+            raw = await ws.receive_text()
+            ev = json.loads(raw)
+            kind = ev.get("event")
+            if kind == "start":
+                log.info("call start: %s", ev.get("stream_id"))
+                convo = asyncio.create_task(_run_turn())
+            elif kind == "transcript" and ev.get("is_final"):
+                log.info("heard (final): %s", ev.get("text"))
+                await _speak(LINE)  # speak now if the greeting beat our timer
+            elif kind == "stop":
+                log.info("call stop: %s", ev.get("reason"))
+                break
+    except Exception as exc:  # noqa: BLE001 — never let the bridge crash the process
+        log.info("WS loop ended: %r", exc)
+    finally:
+        if convo:
+            convo.cancel()
+        if ws.client_state != WebSocketState.DISCONNECTED:
+            try:
+                await ws.close()
+            except Exception:
+                pass
+
+
+def _run_uvicorn() -> uvicorn.Server:
+    server = uvicorn.Server(uvicorn.Config(app, host="127.0.0.1", port=PORT, log_level="warning"))
+    threading.Thread(target=server.run, name="uvicorn", daemon=True).start()
+    deadline = time.monotonic() + 10
+    while time.monotonic() < deadline:
+        if server.started:
+            return server
+        time.sleep(0.05)
+    raise RuntimeError("uvicorn did not start")
+
+
+def main() -> None:
+    client = Inkbox(api_key=API_KEY, base_url=BASE_URL)
+    handle = client.mailboxes.list()[0].email_address.split("@", 1)[0]   # tunnel name = handle
+    num = client.phone_numbers.list()[0]
+    log.info("driver identity %s number %s", handle, num.number)
+
+    server = _run_uvicorn()
+
+    listener = tunnel_connect(
+        client, name=handle, forward_to=f"http://127.0.0.1:{PORT}",
+        state_dir=f"/tmp/inkbox-tunnel-{handle}",
+    )
+    public_host = listener.tunnel.public_host
+    ws_url = f"wss://{public_host}/phone/media/ws"
+    log.info("tunnel ready: %s", ws_url)
+
+    # Auto-accept inbound calls (agent → driver) straight onto this WS.
+    prev_action = getattr(num, "incoming_call_action", None)
+    client.phone_numbers.update(num.id, incoming_call_action="auto_accept", client_websocket_url=ws_url)
+
+    Path(STATE_FILE).write_text(json.dumps({
+        "ws_url": ws_url, "number": num.number, "number_id": str(num.id), "handle": handle,
+    }))
+    log.info("state written to %s", STATE_FILE)
+
+    try:
+        listener.wait()
+    finally:
+        # Leave the number as we found it so other suites aren't affected.
+        try:
+            client.phone_numbers.update(num.id, incoming_call_action=prev_action or "auto_reject")
+        except Exception as exc:  # noqa: BLE001
+            log.info("number revert failed: %r", exc)
+        listener.close()
+        server.should_exit = True
+
+
+if __name__ == "__main__":
+    main()