From 87114d143a80bbde940e5f22e2cf80602eab4b91 Mon Sep 17 00:00:00 2001 From: Matt Galligan Date: Sun, 7 Jun 2026 10:44:19 -0400 Subject: [PATCH] feat: harden dispatch live-use trust contracts Refresh the App Server schema baseline for codex-cli 0.137.0-alpha.4, project the useful stable request fields, and bump the package to 0.4.1. --- .agents/plans/live-use-hardening/GOAL.md | 28 +++ .agents/plans/live-use-hardening/PLAN.md | 123 ++++++++++++ .agents/plans/live-use-hardening/REFS.md | 59 ++++++ .agents/plans/live-use-hardening/RETRO.md | 64 +++++++ .claude/rules/client.md | 3 +- .claude/rules/contracts.md | 8 + .claude/rules/surfaces.md | 5 + README.md | 18 +- .../0002-single-daemon-over-one-app-server.md | 4 +- ...ommand-config-presets-and-name-prefixes.md | 10 +- docs/adrs/0020-live-use-trust-contracts.md | 68 +++++++ docs/adrs/README.md | 1 + docs/development/design.md | 17 +- docs/research/app-server-verification.md | 60 ++++-- docs/usage/README.md | 41 +++- plugins/dispatch/README.md | 7 +- pyproject.toml | 2 +- skills/dispatch/SKILL.md | 36 +++- spikes/README.md | 6 +- src/outfitter/dispatch/client/client.py | 44 ++++- src/outfitter/dispatch/client/models.py | 11 ++ src/outfitter/dispatch/contracts/context.py | 22 ++- .../dispatch/contracts/derive_cli.py | 88 ++++++--- src/outfitter/dispatch/core/handlers.py | 84 +++++++- src/outfitter/dispatch/core/models.py | 30 ++- src/outfitter/dispatch/core/ops.py | 9 +- src/outfitter/dispatch/core/reactor.py | 12 +- src/outfitter/dispatch/doctor.py | 34 +++- src/outfitter/dispatch/registry/models.py | 5 + src/outfitter/dispatch/registry/store.py | 82 +++++++- src/outfitter/dispatch/surfaces/cli.py | 158 +++++++++++++++- tests/client/test_client.py | 61 ++++++ tests/client/test_events.py | 8 + tests/client/test_models.py | 50 ++++- tests/core/test_handlers.py | 95 +++++++++- tests/core/test_sync.py | 4 +- tests/fakes.py | 52 ++++- tests/registry/test_store.py | 125 +++++++++++- tests/surfaces/test_derive_cli.py | 33 +++- tests/surfaces/test_derive_mcp.py | 2 +- tests/surfaces/test_parity.py | 51 ++++- tests/test_doctor.py | 179 ++++++++++++++++++ uv.lock | 2 +- 43 files changed, 1662 insertions(+), 139 deletions(-) create mode 100644 .agents/plans/live-use-hardening/GOAL.md create mode 100644 .agents/plans/live-use-hardening/PLAN.md create mode 100644 .agents/plans/live-use-hardening/REFS.md create mode 100644 .agents/plans/live-use-hardening/RETRO.md create mode 100644 docs/adrs/0020-live-use-trust-contracts.md diff --git a/.agents/plans/live-use-hardening/GOAL.md b/.agents/plans/live-use-hardening/GOAL.md new file mode 100644 index 0000000..0ae6cf1 --- /dev/null +++ b/.agents/plans/live-use-hardening/GOAL.md @@ -0,0 +1,28 @@ +# Live-use hardening - pasteable goal + +```text +/goal Work in the dispatch repo root. Implement the live-use hardening plan in .agents/plans/live-use-hardening/PLAN.md. + +Context: a real Trails delegation attempt exposed trust failures that unit/parity tests did not catch. Dispatch accepted work that had not proven alive, hid model/system failures behind raw watch events, let slash-command goal text look like a native goal, left daemon lifecycle commands outside the JSON/scriptability contract, and allowed CLI projection hand-wiring to grow beyond the no-drift doctrine. + +Objective: make dispatch trustworthy for live agent coordination. Document the incident, add regression tests and guardrails, tighten derived surface boundaries, fix launch/error/status semantics, make cleanup/lifecycle commands agent-safe, update docs/skills, and run local review until no P0/P1/P2 issues remain. + +Required outcomes: +- A durable plan/retro records decisions, checks, review findings, and deferred work. +- Public CLI/MCP projections are governed by explicit projection metadata or an allowlisted control-surface contract; ungoverned hand-wired per-op routes are test failures. +- `new`/`send` outputs distinguish accepted delivery from proof of execution. +- `get`/list-like status surfaces expose latest turn/error state well enough that raw `watch` is not required to discover obvious model/system failures. +- `/goal ...` as message text is either rejected/warned or replaced by a first-class `new --goal` path that calls the native goal API. +- Destroy operations have explicit non-interactive confirmation support, and `up`/`down` expose JSON output. +- Registry schema recovery is boring: doctor/up explain or expose a safe migrate/repair path without manual DB surgery. +- Docs, README, skills, plugin docs, schemas/help, tests, and ADR/rules are updated where behavior or doctrine changes. +- Checks pass, including focused tests and `just check`; run local review loops and fix P2+ findings. + +Constraints: +- Preserve contract-first/no-drift architecture; if a surface needs special ergonomics, make the override explicit and tested. +- Do not touch live user Codex state in tests. Use isolated `DISPATCH_HOME`/`CODEX_HOME` for any smoke. +- Do not merge, publish, or mutate release state unless explicitly asked. +- If model preflight cannot be made reliable from the current App Server contract, surface the first failure clearly and record the limitation. + +Done only when all required outcomes are implemented or explicitly deferred with evidence, local checks pass, review P2+ is clear, and RETRO.md contains final proof. +``` diff --git a/.agents/plans/live-use-hardening/PLAN.md b/.agents/plans/live-use-hardening/PLAN.md new file mode 100644 index 0000000..6a78dec --- /dev/null +++ b/.agents/plans/live-use-hardening/PLAN.md @@ -0,0 +1,123 @@ +# Live-use Hardening - implementation plan + +One-branch hardening packet for the real-use failures found during a Trails +delegation attempt. Goal loop: [`GOAL.md`](./GOAL.md). References: +[`REFS.md`](./REFS.md). Execution ledger: [`RETRO.md`](./RETRO.md). + +## Objective + +Make dispatch trustworthy when an agent uses it for real coordination: + +- keep surface projection honest and guarded; +- make launch results distinguish "accepted" from "alive/responded"; +- surface latest turn/model/system failures in normal status surfaces; +- make native goals first-class instead of relying on slash-command text; +- make daemon lifecycle and destructive cleanup scriptable; +- make registry migration/recovery safe and obvious; +- update docs, skills, and tests so this class of failure does not sneak + through again. + +## Incident facts + +The real Trails use case found these product failures: + +- A stale registry with schema v1 and missing tables required manual DB backup + and recreation because `dispatch up` no-oped while a daemon answered. +- `dispatch up --json` failed even though most agent-operated commands are + JSON-shaped. +- `dispatch new --model gpt-5.5-codex --text "$goal_prompt"` used a stale, + guessed explicit model id and returned + `sent: true` and `status: idle`, but no assistant work happened. +- `/goal ...` sent as initial text did not create native goal state. +- The unsupported model failure was only obvious through `dispatch watch`, not + `dispatch get`. +- `trigger rm --json` and `archive --json` still required interactive stdin. +- The existing parity/handler tests stayed green. + +## Root causes to address + +1. Projection doctrine is written down, but CLI has bespoke route functions and + control commands without an enforceable manifest/allowlist. +2. Tests prove routing and accepted calls, not live coordination trust. +3. Normal state models do not persist latest turn failures or suspicious + no-assistant completions. +4. Goal text and native App Server goals are separate, but docs/skills do not + make the boundary loud enough. +5. Integration tests are intentionally out of the default gate, so real + semantics need cheap fake-level regression tests plus release smoke guidance. + +## Implementation chunks + +### Chunk 1 - regression tests and projection guardrails + +- Add failing tests for: + - destroy commands supporting an explicit non-interactive confirmation flag; + - `up --json` / `down --json`; + - `/goal` text guard or first-class `new --goal`; + - `TurnFailed.message` being persisted and exposed by `get`; + - `new` not overclaiming that a turn produced work. +- Introduce CLI projection metadata/manifest or a strict allowlist that + classifies public commands as: + - op projection; + - composed op projection; + - surface control. +- Add tests that fail for ungoverned public commands and mismatched schema/help + routes. + +### Chunk 2 - launch, goal, and status semantics + +- Replace or supplement `NewLane.sent` with explicit launch fields such as + `message_accepted`, `goal_set`, `first_turn`, and/or a structured launch + result. Maintain honest naming in docs/schemas. +- Add `NewInput.goal` or equivalent. If text starts with `/goal` and no native + goal field is used, fail or warn clearly. +- Persist latest turn/error state in the registry and expose it in `get`, + relevant list outputs, and MCP schemas. +- Ensure model/system failures show in normal status without raw `watch`. + +### Chunk 3 - scriptable surfaces and registry recovery + +- Add `--yes`/`--no-interactive` support for destroy-intent CLI commands from + projection rules, not one-off commands. +- Add JSON output to `up` and `down`. +- Improve doctor recovery for versioned missing tables. +- Add a safe registry migrate/repair command or lifecycle helper if it can be + done without broad architecture churn. At minimum, make `up`/doctor refuse + misleading no-op recovery and provide exact safe commands. +- Add tests for older schema v1/v2 cases with existing lanes/triggers. + +### Chunk 4 - docs, skills, and release smoke + +- Update README, docs/usage, skills/dispatch, skills/dm if affected, plugin docs, + AGENTS/rules/ADRs where behavior or doctrine changed. +- Add a documented pre-release/live-dogfood smoke that uses isolated state and + proves lane liveness. +- Update examples/schema expectations. + +### Chunk 5 - local review and finalization + +- Run focused tests after each chunk. +- Run `just check`. +- Run a local review pass focused on P0/P1/P2: + - surface derivation drift; + - live-use trust; + - destructive/scripted safety; + - registry migration safety; + - docs/skill truthfulness. +- Fix P2+; fix cheap P3s; record deferred P3s in RETRO. + +## Deferral policy + +Acceptable deferrals only if recorded in RETRO with evidence: + +- account-specific model preflight if `model/list`/verification does not expose + reliable support in the current App Server; +- optional Graphite/worktree ownership, which is useful but not the root control + plane trust failure; +- long-lived streaming subscriptions beyond bounded `watch`. + +## Done + +Done only when tests and docs prove the full objective, `just check` passes, a +review loop has no unresolved P0/P1/P2, and RETRO contains exact verification +commands, final git state, remaining risks, and PR state if submitted. diff --git a/.agents/plans/live-use-hardening/REFS.md b/.agents/plans/live-use-hardening/REFS.md new file mode 100644 index 0000000..d888d1a --- /dev/null +++ b/.agents/plans/live-use-hardening/REFS.md @@ -0,0 +1,59 @@ +# Live-use Hardening - references + +## Field report + +- `/tmp/trails-dispatch-real-use-feedback-2026-06-07.md` + - Registry schema v1 missing `lane_snapshots` and `lane_sync_sources`. + - `dispatch up --json` unsupported; `dispatch up` no-oped against running + daemon. + - `dispatch new --model gpt-5.5-codex --text "$goal_prompt"` used a stale, + guessed explicit model id and returned `sent: true`, `status: idle`, but no + assistant response and no goal state. + - `watch` surfaced unsupported model error; `get` did not. + - Destroy cleanup required `printf 'y\n' | ... --json`. + +## Architecture docs + +- `docs/adrs/0000-contract-first-surface-derived.md` + - Every surface is a pure projection of one op registry. + - Parity tests must check behavior, not only names. +- `docs/adrs/0010-surface-projections-are-ergonomic-not-isomorphic.md` + - Surfaces may group/rename/compose, but may not restate schemas, examples, + safety intent, error behavior, or capability policy. +- `.claude/rules/contracts.md` + - Overrides must be visible escape hatches, not default hand wiring. +- `.claude/rules/surfaces.md` + - Surface modules contain projection wiring only. + +## Code hot spots + +- `src/outfitter/dispatch/contracts/derive_cli.py` + - CLI projection, custom route functions, schema route table, destroy prompt. +- `src/outfitter/dispatch/surfaces/cli.py` + - `doctor`, `up`, `down`, and `mcp` hand-written control commands. +- `src/outfitter/dispatch/core/handlers.py` + - `new_lane`, `show`, send/goal handlers. +- `src/outfitter/dispatch/core/reactor.py` + - `TurnFailed` currently updates status but does not persist message. +- `src/outfitter/dispatch/registry/store.py` + - Schema migrations and registry state. +- `src/outfitter/dispatch/doctor.py` + - Registry diagnostics and recovery hints. + +## Existing tests + +- `tests/surfaces/test_parity.py` +- `tests/surfaces/test_derive_cli.py` +- `tests/core/test_handlers.py` +- `tests/test_doctor.py` +- `tests/integration/test_daemon_e2e.py` +- `tests/integration/test_app_server.py` + +## Verification commands + +```bash +uv run pytest tests/surfaces/test_parity.py tests/surfaces/test_derive_cli.py tests/test_doctor.py tests/core/test_handlers.py -q +just check +``` + +Optional live smoke must use isolated runtime paths. diff --git a/.agents/plans/live-use-hardening/RETRO.md b/.agents/plans/live-use-hardening/RETRO.md new file mode 100644 index 0000000..2f19eb0 --- /dev/null +++ b/.agents/plans/live-use-hardening/RETRO.md @@ -0,0 +1,64 @@ +# Live-use Hardening - execution ledger + +Durable execution ledger for the live-use trust hardening goal. + +## Timeline + +- 2026-06-07: Created packet on `feat/live-use-hardening` after the Trails + real-use report showed green tests missing operator trust failures. +- 2026-06-07: Implemented runtime turn-state persistence, native `new --goal`, + honest `message_accepted` launch output, scriptable daemon lifecycle output, + destroy-command confirmation flags, registry migration recovery, and explicit + CLI projection/control manifests. +- 2026-06-07: Updated README, usage docs, dispatch skill, plugin README, + development design notes, agent rules, and ADR-0020. + +## Checks + +- `uv run pytest tests/test_doctor.py tests/surfaces/test_parity.py tests/surfaces/test_derive_cli.py tests/core/test_handlers.py tests/registry/test_store.py -q` + - `103 passed` +- `uv run pytest -q` + - `210 passed, 9 deselected` +- `just check` + - `ruff check`: passed + - `ruff format --check`: passed + - `mypy src tests`: passed + - `pytest`: `210 passed, 9 deselected` + - `uv build`: built `outfitter_dispatch-0.4.0` sdist/wheel + - `scripts/check_package_contents.py`: passed +- CLI smoke: + - `uv run dispatch schema new | jq -r ...` + - verified goal and `message_accepted` schema descriptions. + - `uv run dispatch schema 'list --unmanaged' | jq -r .op` + - returned `discover`. + - `uv run dispatch schema 'tail --follow'` + - exited `2` with a clean unknown-command error, matching current docs. + - `uv run dispatch registry migrate --help` + - showed JSON/text, backup, and controlled-running options. + +## Review + +- P0/P1/P2 review pass: + - Verified projection guardrails cover op-backed CLI routes, schema spellings, + and full CLI surface-control allowlist. + - Verified synchronous `turn/start` failures no longer leave registered lanes + looking idle; `new`/`send` now persist latest error state before re-raising. + - Verified `TurnFailed.message` projects through reactor -> registry -> `get`. + - Verified `/goal ...` initial text is rejected unless callers use native + `--goal`, and native goal set happens before the initial turn. + - Verified old registry recovery has doctor guidance plus `registry migrate` + tests, including daemon-running refusal. + - Verified docs/skill/plugin/rules/ADR describe the changed behavior and + current limitations. +- Unresolved P0/P1/P2: none found in local review. + +## Deferred + +- Account/model preflight remains deferred. The current App Server client + accepts model strings on thread/turn options but does not expose a cheap, + reliable account-specific model support check in dispatch's verified contract. + The implemented mitigation is to persist and expose App Server failures through + ordinary status surfaces instead of requiring raw `watch`. +- Infinite streaming remains deferred. `watch` is still a bounded live event + sample over a request/response control socket; a subscription-capable control + socket remains future work. diff --git a/.claude/rules/client.md b/.claude/rules/client.md index 42c2edd..277924a 100644 --- a/.claude/rules/client.md +++ b/.claude/rules/client.md @@ -19,6 +19,7 @@ Demux the single stream: responses by request `id`, notifications by `threadId`, - `thread/start.sandbox` is a **string** enum (`read-only`/`workspace-write`/`danger-full-access`); `turn/start.sandboxPolicy` is an **object** (`{type:"readOnly", ...}`). Different encodings — model both. - `turn/steer` requires `expectedTurnId` (from `turn/started`). - `thread/list` results are under `result.data` (not `result.threads`); `useStateDbOnly:true` reads the persisted store. +- Current `thread/list` supports native `archived`, `cwd`, `searchTerm`, `sourceKinds`, and sort filters; use them when they match dispatch semantics, then keep registry/authority filters in core. - `thread/search` is experimental; enable the experimental API capability before using it and keep the wrapper thin. - `thread/resume` of a *persisted* thread yields live event fan-out; pre-persistence it errors `no rollout found`. - Approvals are server→client requests: lane emits `thread/status/changed` `activeFlags:["waitingOnApproval"]`; reply `{id, result:{decision}}` (`accept`/`acceptForSession`/`decline`/`cancel`); server emits `serverRequest/resolved`. File-change approvals carry **no diff** — correlate by `itemId` to the `fileChange` item. @@ -26,5 +27,5 @@ Demux the single stream: responses by request `id`, notifications by `threadId`, ## Discipline -- Pin the binary; regenerate wire models from `codex app-server generate-json-schema` for that version. Do NOT depend on the `openai-codex` Python SDK (it pins an older CLI). +- Pin/record the binary; regenerate wire models from `codex app-server generate-json-schema` for that version. Do not assume the `openai-codex` Python SDK matches the installed CLI; it has lagged before. - No business logic here — this layer is transport + typed primitives only. Orchestration lives in `core/`. diff --git a/.claude/rules/contracts.md b/.claude/rules/contracts.md index 40762b1..c00349a 100644 --- a/.claude/rules/contracts.md +++ b/.claude/rules/contracts.md @@ -20,6 +20,11 @@ compose ops (for example `list --unmanaged` → `discover`, `goal status` → derived from the registry; never hand-implement the same behavior separately in a surface. +If the CLI needs custom shell grammar, declare it in the CLI projection manifest +(`CliRoute`, `cli_public_routes`, and when needed `cli_schema_routes`). Do not add +or special-case a command path without a parity test proving the path reaches the +canonical op and that `dispatch schema ` reports the canonical op schema. + ## Derivation (never hand-write a surface per op) Surfaces are pure projections of the registry, mirroring Trails' `derive* → create* → surface`: @@ -44,6 +49,9 @@ One `DispatchError` hierarchy in `errors.py` (e.g. `NotFoundError`, `LaneBusyErr - Adding capability = adding an op, registering it, and ensuring the derived projections route it intentionally. If a route is missing, the parity tests should fail. +- If a route is intentionally a surface control rather than an op (`doctor`, + `up`, `down`, `registry migrate`, `schema`, `mcp`), document why and keep it out + of per-op business logic. - Every op exposed on MCP/remote must define `output`. - Keep handlers pure-ish: input in, output out (or raise). Side effects go through injected dependencies (the App Server client, the registry) passed via `ctx`, never imported ad hoc. - A parity test must stay green — and it checks **behavior/reachability, not diff --git a/.claude/rules/surfaces.md b/.claude/rules/surfaces.md index 6dc1b4b..dc0ed8c 100644 --- a/.claude/rules/surfaces.md +++ b/.claude/rules/surfaces.md @@ -9,6 +9,9 @@ Path: `src/outfitter/dispatch/surfaces/`. Each surface is a thin, generated proj tree may group/alias ops for shell ergonomics, but each command marshals contract input → calls the daemon control socket → renders the result with Rich. The CLI is a **sync** client; it does not import `core/` or `client/`. + Process/control commands such as `doctor`, `up`, `down`, `registry migrate`, + `schema`, and `mcp` are the allowed exceptions: they manage or inspect the + surface/runtime itself and must not duplicate op behavior. - **MCP** (`mcp.py`): a stdio MCP server (via the `mcp` SDK) from `derive_mcp(registry)`; grouped tool handlers route to the daemon control socket, same as the CLI. Spawned by the MCP client (Claude/Codex), not hosted @@ -18,6 +21,8 @@ Path: `src/outfitter/dispatch/surfaces/`. Each surface is a thin, generated proj - Keep the **parity test** green: every registered op must be reachable through each surface's derived projection with matching schemas, annotations, and error projection. Surface names do not need to equal op ids. +- Destroy-intent CLI routes must preserve the derived confirmation behavior: + prompt interactively, and require `--yes` when paired with `--no-interactive`. ## Why diff --git a/README.md b/README.md index d500fe2..8ad9cc7 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ uv tool install outfitter-dispatch dispatch --help dispatchd --help dispatch doctor +dispatch up --json +dispatch down --json ``` From a source checkout: @@ -20,7 +22,7 @@ From a source checkout: uv sync uv run dispatch --help uv run dispatch doctor --no-app-server -uv run dispatch up +uv run dispatch up --json uv run dispatch daemon status ``` @@ -30,12 +32,13 @@ Create an owned managed thread, send it work, and inspect the daemon: uv run dispatch new \ --name docs \ --cwd /path/to/dispatch \ + --goal "Finish the docs review." \ --text "Please summarize the current stack state." uv run dispatch list +uv run dispatch get uv run dispatch tail --limit 20 -uv run dispatch goal set "Finish the docs review." uv run dispatch daemon log --limit 10 -uv run dispatch down +uv run dispatch down --json ``` Use owned managed threads for turn-writing work. Existing desktop Codex threads can be attached as @@ -48,12 +51,19 @@ unmanaged Codex thread ids, and `search` can span both. Attach is metadata-only default; use `dispatch sync ` when you want dispatch to refresh its local indexed view of an attached thread. +`new` reports whether the first message was accepted by the App Server, not whether +assistant work completed. Use `get` to inspect the latest turn state and persisted +App Server errors, or `watch` for a bounded live event sample. Slash commands in +`--text` are plain text; use `--goal` when creating a native App Server goal. + For the operator guide, CLI/MCP examples, triggers, and plugin setup, start at [`docs/usage/README.md`](docs/usage/README.md). Start troubleshooting with `dispatch doctor`. It checks PATH visibility, the Codex CLI and auth footprint, daemon socket/pidfile state, registry schema/integrity, packaged -skills/plugin assets, and a low-risk Codex App Server initialize smoke. +skills/plugin assets, and a low-risk Codex App Server initialize smoke. If doctor reports +an old registry schema, stop the daemon and run `dispatch registry migrate` before +starting it again. ## Agent And Plugin Support diff --git a/docs/adrs/0002-single-daemon-over-one-app-server.md b/docs/adrs/0002-single-daemon-over-one-app-server.md index 3651252..3bdca2e 100644 --- a/docs/adrs/0002-single-daemon-over-one-app-server.md +++ b/docs/adrs/0002-single-daemon-over-one-app-server.md @@ -4,7 +4,7 @@ slug: single-daemon-over-one-app-server title: Single Daemon over One App Server status: accepted created: 2026-06-02 -updated: 2026-06-05 +updated: 2026-06-09 owners: ['[galligan](https://github.com/galligan)'] --- @@ -18,7 +18,7 @@ dispatch drives many lanes (Codex threads), reacts to their live events for trig Run one long-lived **daemon** (`dispatchd`) that spawns and owns **one** `codex app-server --listen stdio://` (sharing `CODEX_HOME=~/.codex` so it sees existing desktop lanes). A message router demuxes the single connection by request id / `threadId` into per-lane event streams. The daemon hosts the core and executes all op handlers, and exposes a Unix-socket control API — the canonical projection surfaces render. The CLI is a thin **sync** client; MCP (`dispatch mcp`) is a stdio server routing to the same control API. -We drive the App Server binary directly (not the `openai-codex` SDK, which pins an older CLI than local). +We drive the App Server binary directly; the `openai-codex` SDK has lagged the installed CLI before, so adopting it would require a fresh bundled-binary check. ## Consequences diff --git a/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md b/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md index f8148b5..1e2452b 100644 --- a/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md +++ b/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md @@ -4,7 +4,7 @@ slug: new-command-config-presets-and-name-prefixes title: New Command, Config Presets, and Name Prefixes status: proposed created: 2026-06-03 -updated: 2026-06-03 +updated: 2026-06-09 owners: ['[galligan](https://github.com/galligan)'] --- @@ -43,7 +43,6 @@ Configuration has defaults plus named presets: cwd = "." sandbox = "read-only" approval_policy = "never" -model = "gpt-5-codex" effort = "medium" ephemeral = false prefix = "[${DISPATCH.CWD.REPO}]" @@ -68,6 +67,11 @@ prefix = "[${DISPATCH.CWD.REPO}]" effort = "low" ``` +Omit `model` unless you intentionally want Codex to use an explicit model. +When a preset does pin a model, choose it from the live App Server catalog +(`dispatch model list` once that surface exists) rather than from docs or stale +examples. + Merge order: 1. Built-in safe defaults. @@ -108,7 +112,7 @@ Do not expose arbitrary environment interpolation in v1; it is too easy to leak dispatch should project App Server/SDK options where they are available and verified, but not invent fake knobs. Initial candidates: - thread/session: `cwd`, `sandbox`, `approval_policy`, `approvals_reviewer`, `model`, `model_provider`, `base_instructions`, `developer_instructions`, `personality`, `ephemeral`, `service_tier`. -- initial turn: `text`, `effort`, `summary`, `sandbox_policy`, `approval_policy`, `approvals_reviewer`, `model`, `output_schema`. +- initial turn: `text`, `effort`, `summary`, `sandbox_policy`, `approval_policy`, `approvals_reviewer`, `model`, `service_tier`, `output_schema`. Options should be added through the contract layer so CLI, MCP, remote, docs, schemas, and examples derive from one source. diff --git a/docs/adrs/0020-live-use-trust-contracts.md b/docs/adrs/0020-live-use-trust-contracts.md new file mode 100644 index 0000000..0cfd7ec --- /dev/null +++ b/docs/adrs/0020-live-use-trust-contracts.md @@ -0,0 +1,68 @@ +# ADR-0020: Live-Use Trust Contracts + +## Status + +Accepted. + +## Context + +Real dispatch use exposed a trust failure: an agent could create or message a +thread, receive JSON that looked successful, and still have no useful evidence +that the Codex App Server accepted native goal state or produced assistant work. +Separately, some CLI behavior had drifted into hand-coded surface branches that +were not fully represented in the projection/parity tests. + +The project already has a contract-first design, but live users and agents need +more than a clean architecture diagram. They need explicit recovery paths, +machine-readable control output, and status fields that distinguish request +acceptance from completed work. + +## Decision + +dispatch will treat live-use trust as part of the public contract: + +- Initial launch output distinguishes `message_accepted` from assistant + completion and exposes the latest observed turn state. +- Native goals are created through first-class goal fields/ops. Slash commands + embedded in message text are not interpreted as App Server control commands. +- Turn lifecycle events are persisted into the registry so `get`/`list` can show + recent runtime status and App Server error text. +- Recovery-oriented control commands (`up --json`, `down --json`, + `registry migrate`, and `doctor`) return scriptable output and concrete next + steps. +- Ergonomic CLI routes, including composed routes such as `list --unmanaged`, + must be declared in the CLI projection manifest and covered by parity tests. +- Destroy-intent CLI operations keep derived confirmation behavior and require + explicit `--yes` for non-interactive scripts. + +## Consequences + +### Positive + +- Agents can verify whether work actually started, failed, or only had its + request accepted. +- Registry schema changes have a documented, tested recovery path. +- CLI/MCP/no-drift work has a sharper guardrail: custom shell ergonomics are + allowed, but unmanifested command paths are not. + +### Tradeoffs + +- Public output models gain a small amount of runtime status detail. +- The CLI has a few process/control commands that are intentionally not ops. + They need an explicit allowlist and tests because they manage the surface + itself rather than business behavior inside the daemon. + +## Alternatives Considered + +- **Keep `sent: true` as the launch signal** — rejected because it conflates + request acceptance with completed assistant work. +- **Allow `/goal ...` text to pass through silently** — rejected because it + makes a thread look goal-driven while bypassing native goal state. +- **Document recovery without a command** — rejected because agents need a + scriptable path that can be tested locally. + +## References + +- [ADR-0000: Contract-First, Surface-Derived Design](0000-contract-first-surface-derived.md) +- [ADR-0010: Surface Projections Are Ergonomic, Not Isomorphic](0010-surface-projections-are-ergonomic-not-isomorphic.md) +- [ADR-0019: Dispatch-Local Refs and Flat Thread CLI](0019-dispatch-local-refs-and-flat-thread-cli.md) diff --git a/docs/adrs/README.md b/docs/adrs/README.md index 380e25f..a7d4665 100644 --- a/docs/adrs/README.md +++ b/docs/adrs/README.md @@ -28,3 +28,4 @@ Files are `NNNN-slug.md`. Copy [`template.md`](template.md) to start one. Keep t | [0017](0017-progressive-thread-sync-index.md) | Progressive Thread Sync Index | Accepted | | [0018](0018-top-level-thread-actions-and-search.md) | Top-Level Thread Actions and Search | Accepted | | [0019](0019-dispatch-local-refs-and-flat-thread-cli.md) | Dispatch-Local Refs and Flat Thread CLI | Accepted | +| [0020](0020-live-use-trust-contracts.md) | Live-Use Trust Contracts | Accepted | diff --git a/docs/development/design.md b/docs/development/design.md index c7a1194..6097203 100644 --- a/docs/development/design.md +++ b/docs/development/design.md @@ -2,7 +2,7 @@ A local control plane for orchestrating Codex agent lanes (threads) over the Codex App Server: create/attach lanes, send work or context, queue delivery, stop active turns, and automate pings on time- and event-based triggers. One authored contract per operation is projected onto multiple surfaces — CLI now, MCP now, remote control later — with no drift. -Status: approved design, implemented through v0 and updated for dispatch-local refs / flat thread CLI. Companion research (verified against `codex-cli 0.136.0-alpha.2`): [`docs/research/app-server-verification.md`](../research/app-server-verification.md) and [`docs/research/orchestration-thesis.md`](../research/orchestration-thesis.md). Decisions: [`docs/adrs/`](../adrs/). Execution ledger: [`../../.agents/plans/v0/RETRO.md`](../../.agents/plans/v0/RETRO.md). +Status: approved design, implemented through v0 and updated for dispatch-local refs / flat thread CLI. Companion research (schema refreshed against `codex-cli 0.137.0-alpha.4`): [`docs/research/app-server-verification.md`](../research/app-server-verification.md) and [`docs/research/orchestration-thesis.md`](../research/orchestration-thesis.md). Decisions: [`docs/adrs/`](../adrs/). Execution ledger: [`../../.agents/plans/v0/RETRO.md`](../../.agents/plans/v0/RETRO.md). ## Naming @@ -72,8 +72,9 @@ Projections (pure functions over the registry, mirroring Trails' `derive* → cr ## Command surface (v1) -- Daemon lifecycle: `up` / `down` (process) · `daemon status` · `daemon log` -- Thread creation: `new [--preset ...] [--text ...] [--no-send]` +- Daemon lifecycle: `up` / `down` (process) · `daemon status` · `daemon log` · + `registry migrate` +- Thread creation: `new [--preset ...] [--goal ...] [--text ...] [--no-send]` - Thread reads/discovery: `get ` · `list` · `list --unmanaged` · `sync ` · `tail ` · `watch ` - Thread management/search: `attach [--sync]` · @@ -104,7 +105,7 @@ for collisions. Titles and `@handles` are mutable convenience labels. | Op | App Server call | Notes (verified) | | --- | --- | --- | | `open` | `thread/start` (then register) | `sandbox` is a STRING enum (`read-only`/`workspace-write`/`danger-full-access`); persists by default (`ephemeral:false`) → spawned lanes show in desktop app, matching the `→ @project:name` convention. | -| `new` | `thread/start` + `thread/name/set` + optional `turn/start` | Applies `.dispatch/config.toml` defaults/presets, name prefixes, verified session/turn options, and optional initial payload. | +| `new` | `thread/start` + `thread/name/set` + optional `thread/goal/set` + optional `turn/start` | Applies `.dispatch/config.toml` defaults/presets, name prefixes, verified session/turn options, optional native goal, and optional initial payload. `service_tier` is sent to both thread creation and the initial turn when configured. Output reports request acceptance, not assistant completion. | | `attach` | `thread/read(includeTurns:false)` (+ register) | Metadata-only by default: verifies the thread id, registers a turn-write locked attached lane, assigns a dispatch ref, and stores sync state without loading turn history. `--sync` runs a quick local index refresh after registration. | | `sync` | `thread/read(includeTurns:false)` + bounded local JSONL parsing | Refreshes dispatch's index/cache for a managed thread: source file identity, sync state, latest event timestamp, latest turn id, preview, and selected metadata. Does not copy transcripts wholesale or grant attached-lane write authority. | | `send` (`mode=send`) | `turn/start` | Delivers a message the lane processes + answers. The DM/`send_message_to_thread` equivalent. `sandboxPolicy` here is an OBJECT (`{type:"readOnly"}`) — different encoding than `thread/start.sandbox`. | @@ -117,9 +118,9 @@ for collisions. Titles and `@handles` are mutable convenience labels. | `archive` (`archive`) | `thread/archive` | Accepts managed refs or unmanaged raw thread ids. If App Server reports `no rollout found` for an owned no-rollout lane, dispatch archives the local registry entry so throwaway lanes can be cleaned up. | | `restore` (`restore`) | `thread/unarchive` | Restores the archived Codex thread only; does not resume or start a new turn. | | `search` (`search`) | experimental `thread/search` for broad search; `thread/read(includeTurns:true)` for one-thread search | Broad search uses App Server search plus dispatch-side managed/unmanaged, repo/directory, and date filters. Thread-focused search reads one transcript and scans locally because App Server search has no thread-id filter. | -| `roster` (`list`) | `thread/list` + registry + status | List results are under `result.data` (NOT `result.threads`); `useStateDbOnly:true` reads the persisted store. | -| `discover` (`list --unmanaged`) | `thread/list` state DB only | Lists persisted Codex sessions that could be attached; it does not resume or register them. | -| `show` (`get`) | registry + optional `thread/read(includeTurns:true)` | Compact managed-thread summary; optional transcript convenience. | +| `roster` (`list`) | `thread/list` + registry + status | List results are under `result.data` (NOT `result.threads`); `useStateDbOnly:true` reads the persisted store. Current App Server also supports native `archived`, `cwd`, `searchTerm`, `sourceKinds`, and sort filters. | +| `discover` (`list --unmanaged`) | `thread/list` state DB only | Lists persisted active Codex sessions that could be attached; asks for recently updated rows and does not resume or register them. | +| `show` (`get`) | registry + optional `thread/read(includeTurns:true)` | Compact managed-thread summary with sync state and latest observed turn runtime/error state; optional transcript convenience. | | `transcript` (`tail`) | `thread/read(includeTurns:true)` | Persisted turn/item snapshot, not a full execution log. | | `watch` (`watch`) | raw app-server event stream, bounded by limit/timeout | Request/response bounded sample; a true infinite tail needs a subscription control-socket extension. | | `goal-get/set/clear` (`goal status/set/clear`) | `thread/goal/{get,set,clear}` | Native App Server goal lifecycle for owned lanes. | @@ -195,4 +196,4 @@ The client supports the full responder loop. v1 surfaces `waiting_on_approval` a - **Cross-process contention** (dispatch vs desktop app-server on one thread) — resolved for v0 by ADR-0005/0018: attached lanes are turn-write locked, while metadata/lifecycle actions are explicit. - **MCP transport** — stdio first; SSE/streamable-HTTP later (mirrors Codex/Trails MCP status). -- **App-server version drift** — pin the binary; the Python SDK pins an older CLI (0.132) than local (0.136), so we drive the binary directly, not via the SDK. +- **App-server version drift** — pin/record the binary; current local schema was refreshed against `codex-cli 0.137.0-alpha.4`. The Python SDK has lagged the installed CLI before, so we drive the binary directly and regenerate schemas before relying on new fields. diff --git a/docs/research/app-server-verification.md b/docs/research/app-server-verification.md index 758fa1d..effec34 100644 --- a/docs/research/app-server-verification.md +++ b/docs/research/app-server-verification.md @@ -1,19 +1,28 @@ -# Codex App Server — verification against local binary (2026-06-02) +# Codex App Server — verification against local binary (refreshed 2026-06-09) Verifies the "Codex App Server Technical Report (§1–15)" against the actually installed binary. Report is accurate for its pinned era (~CLI 0.132); several conclusions are already stale on the newer local binary. ## Ground truth on this machine -- `codex-cli 0.136.0-alpha.2` at `~/.local/bin/codex` -- A **managed control daemon is already running**: `codex app-server daemon version` → - ```json - {"status":"running", - "managedCodexPath":"~/.codex/packages/standalone/current/codex", - "socketPath":"~/.codex/app-server-control/app-server-control.sock", - "cliVersion":"0.136.0-alpha.2","appServerVersion":"0.135.0-alpha.1"} - ``` -- Desktop `Codex.app` separately spawns ~10 `codex app-server --listen stdio://` processes (one per thread) — so **both topologies run side by side** today. -- Python SDK (`openai-codex`) still pins `openai-codex-cli-bin==0.132.0`, i.e. installing it gives an **older** App Server than the local CLI. Pin deliberately. +Refreshed 2026-06-09: + +- `~/.local/bin/codex` and `/Applications/Codex.app/Contents/Resources/codex` + both report `codex-cli 0.137.0-alpha.4`. +- `codex app-server daemon version` could not connect because the managed daemon + control socket was absent: + `~/.codex/app-server-control/app-server-control.sock` did not exist. +- Desktop `Codex.app` still spawns separate + `codex app-server --listen stdio://` processes per active thread. A Desktop + SSH/proxy process was also visible for a remote `mini` host, but that proxy is + not yet a supported Dispatch transport. + +Original 2026-06-02 behavioral probes ran against `codex-cli 0.136.0-alpha.2` +with a managed daemon reporting `appServerVersion:"0.135.0-alpha.1"`. Keep the +empirical findings below unless re-probed; update schema facts per binary. + +The Python SDK (`openai-codex`) pinned an older app-server binary during the +2026-06-02 review. Re-verify before adopting it; Dispatch still drives the +installed `codex` binary directly. ## Biggest delta vs the report: daemon + proxy = attach to a running server @@ -33,13 +42,29 @@ Regenerate any time (read-only, no network): codex app-server generate-json-schema --out # stable codex app-server generate-json-schema --experimental --out # + gated ``` -Counts on this binary: stable v2 = 217 types, experimental v2 = 261. Protocol is namespaced `v1/` (just Initialize) + `v2/` (everything else). Initialize is still v1 and returns `{codexHome, platformFamily, platformOs, userAgent}`. +Current 0.137.0-alpha.4 generated schema files: stable = 256, experimental = +312. Current method counts: stable client requests = 82, experimental client +requests = 112; server request/notification counts did not materially change in +the refresh. Protocol is namespaced `v1/` (just Initialize) + `v2/` (everything +else). Initialize is still v1 and returns `{codexHome, platformFamily, +platformOs, userAgent}`. ### Client → server methods (STABLE — non-experimental) -Lifecycle/threads/turns: `thread/start resume fork read list loaded/list archive unarchive unsubscribe metadata/update name/set rollback inject_items compact/start goal/{get,set,clear} approveGuardianDeniedAction shellCommand`, `turn/start steer interrupt`. Accounts/models/config: `account/{read,login/start,login/cancel,logout, rateLimits/read,sendAddCreditsNudgeEmail}`, `model/list`, `modelProvider/capabilities/read`, `config/{read,value/write,batchWrite, mcpServer/reload}`, `configRequirements/read`, `permissionProfile/list`. Tools/ecosystem: `skills/{list,config/write,extraRoots/set}`, `plugin/{install,installed,list,read,uninstall,skill/read,share/*}`, `marketplace/{add,remove,upgrade}`, `hooks/list`, `app/list`, `review/start`, `mcpServer/{tool/call,resource/read,oauth/login}`, `mcpServerStatus/list`. System: `command/exec{,/write,/resize,/terminate}`, full `fs/*` (`readFile writeFile readDirectory createDirectory copy remove getMetadata watch unwatch`), `externalAgentConfig/{detect,import}`, `feedback/upload`, `experimentalFeature/{list,enablement/set}`, `windowsSandbox/{readiness,setupStart}`. +Lifecycle/threads/turns: `thread/start resume fork read list loaded/list archive unarchive unsubscribe metadata/update name/set rollback inject_items compact/start goal/{get,set,clear} approveGuardianDeniedAction shellCommand`, `turn/start steer interrupt`. Accounts/models/config: `account/{read,login/start,login/cancel,logout, rateLimits/read,sendAddCreditsNudgeEmail}`, `model/list`, `modelProvider/capabilities/read`, `config/{read,value/write,batchWrite, mcpServer/reload}`, `configRequirements/read`, `permissionProfile/list`. Tools/ecosystem: `skills/{list,config/write,extraRoots/set}`, `plugin/{install,installed,list,read,uninstall,skill/read,share/*}`, `marketplace/{add,remove,upgrade}`, `hooks/list`, `app/list`, `review/start`, `mcpServer/{tool/call,resource/read,oauth/login}`, `mcpServerStatus/list`. System: `command/exec{,/write,/resize,/terminate}`, full `fs/*` (`readFile writeFile readDirectory createDirectory copy remove getMetadata watch unwatch`), stable `fuzzyFileSearch`, `externalAgentConfig/{detect,import}`, `feedback/upload`, `experimentalFeature/{list,enablement/set}`, `windowsSandbox/{readiness,setupStart}`. + +0.137 parameter deltas Dispatch should care about: + +- `thread/list` now accepts native `archived`, `cwd`, `searchTerm`, + `modelProviders`, `sourceKinds`, and sort filters. Dispatch should prefer + those when they match existing CLI/MCP semantics, then keep dispatch-side + filters for managed/unmanaged and date predicates. +- `turn/start` accepts `serviceTier` plus richer context/environment metadata. + Dispatch projects `service_tier` through configured `new` turns. +- `thread/resume` accepts `excludeTurns` / `initialTurnsPage`, useful for future + live observation without heavy initial history hydration. ### Client → server methods (EXPERIMENTAL-gated — diff stable↔exp) -`process/{spawn,kill,writeStdin,resizePty}` (unsandboxed; matches report's warning), `thread/{search, turns/list, turns/items/list, settings/update, memoryMode/set, increment/decrementElicitation, backgroundTerminals/clean}`, `thread/realtime/{start,stop,appendAudio,appendText,listVoices}`, `remoteControl/{enable,disable,status/read}`, `collaborationMode/list`, `environment/add`, `memory/reset`, `mockExperimentalMethod`. +`process/{spawn,kill,writeStdin,resizePty}` (unsandboxed; matches report's warning), `thread/{search, turns/list, turns/items/list, settings/update, memoryMode/set, increment/decrementElicitation, backgroundTerminals/clean}`, `thread/realtime/{start,stop,appendAudio,appendText,listVoices}`, `remoteControl/{enable,disable,status/read,pairing/start,client/list,client/revoke}`, `collaborationMode/list`, `environment/add`, experimental `fuzzyFileSearch/session{Start,Update,Stop}`, `memory/reset`, `mockExperimentalMethod`. > Note: `thread/turns/list` and `thread/search` are EXPERIMENTAL here — the report > listed turns/list as Supported. The realtime *control* verbs are gated, but the > realtime *notifications* below ship in stable. @@ -113,20 +138,21 @@ Two WS clients (A, B), one `ws://` server. **Earlier "partial fan-out" was wrong ## Status of original open questions - **Q1 transports — RESOLVED.** stdio=JSONL (build here); unix/ws=WebSocket; daemon control socket=auth-gated + undocumented handshake. -- **Q2 multi-client — RESOLVED.** Resume of a *persisted* thread = live co-presence (full content fan-out). thread/list discovery unreliable; track ids yourself. +- **Q2 multi-client — RESOLVED.** Resume of a *persisted* thread = live co-presence (full content fan-out). `thread/list(useStateDbOnly:true)` discovery works when reading `result.data`; track ids/refs yourself for authority. - **Q3 Guardian — RESOLVED (behaviorally).** auto_review is selective (escalations only); benign approvals still hit the client. Approval responder is mandatory. ## Remaining unknowns (lower priority / deliberately not chased) 1. The `proxy`/control-socket handshake — IS attaching to the live managed daemon supported for third parties at all? (Undocumented; volatile. Spawn-your-own is the pragmatic path regardless.) 2. Triggering `item/autoApprovalReview/*` — needs a real risk-category action (network/sandbox-escape/MCP) with `auto_review`; not yet exercised. 3. `thread/fork`, `thread/rollback`, `thread/compact/start` runtime semantics. -4. Python SDK (`openai-codex`) actual high-level API vs raw (it pins CLI 0.132, not local 0.136) — evaluate before adopting vs hand-rolling a stdio client. +4. Python SDK (`openai-codex`) actual high-level API vs raw — it has lagged the installed CLI before, so evaluate its bundled binary before adopting it over the direct stdio client. ## Building blocks confirmed (ready to design against) - Transport: stdio JSONL (1:1) or ws loopback (multi-viewer via resume). - Lifecycle: initialize → initialized → thread/start → turn/start → stream → resume. - Approval responder: reply `{id,result:{decision}}`; watch `waitingOnApproval` flag + `serverRequest/resolved`; correlate file diffs by `itemId`. - Sandbox: thread `sandbox:"read-only|workspace-write|danger-full-access"`; turn `sandboxPolicy:{type:"readOnly|workspaceWrite|externalSandbox|dangerFullAccess"}`. -- Schema: regenerate per binary; 217 stable / 261 experimental v2 types. +- Schema: regenerate per binary; current 0.137.0-alpha.4 generated 256 stable / + 312 experimental schema files. Scratch this session: schema at path in `/tmp/codex-appserver-scratch-path`; probe scripts `/tmp/codex_{stdio,ws,fanout,lab4}.py`. diff --git a/docs/usage/README.md b/docs/usage/README.md index 0d6679a..2303951 100644 --- a/docs/usage/README.md +++ b/docs/usage/README.md @@ -35,9 +35,9 @@ Use this clean-machine smoke after installing or upgrading: ```bash dispatch doctor dispatch schema send -dispatch up +dispatch up --json dispatch daemon status -dispatch down +dispatch down --json ``` If `dispatch doctor` fails before the app-server smoke because the Codex CLI is @@ -114,6 +114,10 @@ Common recovery paths: isolated state, confirm `DISPATCH_HOME`, `DISPATCH_SOCKET`, and `DISPATCH_PIDFILE`. - Registry schema newer than the installed binary: upgrade with `uv tool upgrade outfitter-dispatch` before starting the daemon. +- Registry schema older than the installed binary: run `dispatch down`, then + `dispatch registry migrate`, then `dispatch up`. Migration backs up the registry + by default and refuses to run while the daemon is reachable unless + `--allow-running` is explicitly set for a controlled recovery. - Registry integrity failure: stop the daemon, back up the database at the path shown by doctor, and recreate it or inspect with `sqlite3`. - App Server initialize failure: run `codex app-server --listen stdio://` directly in @@ -150,6 +154,7 @@ An owned lane is a Codex thread created by dispatch. Owned lanes are writable. U uv run dispatch new \ --name docs-review \ --cwd /path/to/dispatch \ + --goal "Review until no P2 findings remain." \ --text "Review the README for missing usage steps." ``` @@ -197,6 +202,19 @@ developer_file = ".dispatch/instructions/builder.md" ``` Preset order matters: later presets win, and CLI flags win over presets. +When `service_tier` is configured, dispatch sends it to both `thread/start` and +the optional initial `turn/start` request. + +Use `--goal` to create a native App Server goal before the initial message is sent. +Slash commands in `--text` are not interpreted by dispatch; `--text "/goal ..."` +is rejected so agents do not accidentally create a thread that looks goal-driven but +has no native goal state. Goals require non-ephemeral threads, so `new --goal` +cannot be combined with `--ephemeral`. + +The `new` response reports `message_accepted` and `goal_set`. `message_accepted` +means the App Server accepted the initial turn request; it does not prove the +assistant produced work. Use `get` to inspect `latest_turn`, `tail` for persisted +history, or `watch` for a bounded live event sample after launch. Use `send --context` for silent context injection. It adds model-visible context without starting a turn: @@ -247,6 +265,11 @@ uv run dispatch send @docs-review "Can you sanity-check this?" --intro uv run dispatch get ``` +The response includes `latest_turn` when dispatch has observed turn lifecycle events: +the latest turn id, runtime status (`started`, `completed`, or `failed`), and the +last App Server error message/time when a turn fails. This is the first place to look +when a send or initial message was accepted but no assistant output appears. + Use `tail` when you want persisted turn history. It reads `thread/read` with `includeTurns:true` and returns a compact item list; it is a history snapshot, not a full execution log. App Server does not support `includeTurns` on ephemeral threads. @@ -290,6 +313,8 @@ uv run dispatch restore ``` `restore` unarchives the thread only; it does not resume the thread or start a new turn. +Destroy-intent commands prompt on a TTY. In scripts, use `--yes --json`; if you also +set `--no-interactive`, `--yes` is required or the command exits with a usage error. Use `search` to search Codex thread history without first attaching every thread: @@ -313,8 +338,9 @@ local substring scan. Date bounds accept ISO dates or datetimes and default to f `list` shows the threads dispatch already manages. `list --unmanaged` is the other half: it lists the persisted Codex sessions on this machine — desktop threads and prior -runs — that you could attach. It uses App Server `thread/list` in state-db-only mode, so -it is fast and read-only; it never resumes, writes, or registers anything. +runs — that you could attach. It uses App Server `thread/list` in state-db-only mode, +asks for active sessions sorted by recent updates, and remains read-only; it never +resumes, writes, or registers anything. ```bash uv run dispatch list --unmanaged --limit 20 @@ -432,7 +458,7 @@ Manage triggers: uv run dispatch trigger list uv run dispatch trigger pause uv run dispatch trigger resume -uv run dispatch trigger rm +uv run dispatch trigger rm --yes --json ``` ## Schemas @@ -444,11 +470,16 @@ derived from the contract registry: ```bash uv run dispatch list --json uv run dispatch schema send +uv run dispatch schema "list --unmanaged" uv run dispatch schema sync uv run dispatch schema watch uv run dispatch schema "goal set" ``` +`schema` uses the CLI projection manifest, including composed command spellings such +as `list --unmanaged`. It is the preferred way to discover stable fields for `jq` +instead of scraping `--help` or hand-copying Pydantic schemas. + ## MCP The MCP surface is derived from the same op registry as the CLI. The local entrypoint is: diff --git a/plugins/dispatch/README.md b/plugins/dispatch/README.md index 3f09296..56eb3a4 100644 --- a/plugins/dispatch/README.md +++ b/plugins/dispatch/README.md @@ -10,14 +10,17 @@ This workspace-local plugin exposes: The MCP server and skills expose the same derived operation registry as the CLI, including managed-thread creation/messaging, dispatch refs, persisted `tail`, bounded live `watch`, native goals, triggers, schemas, and daemon status/log reads. +`new --goal` creates native App Server goal state; `/goal ...` in message text is +plain text and should not be used as a goal substitute. Run `dispatch doctor` after installing or upgrading dispatch. It verifies the CLI entrypoints, Codex CLI/auth footprint, daemon socket/pidfile state, registry schema/integrity, packaged skills/plugin assets, and a low-risk App Server initialize smoke. Use `dispatch doctor --no-app-server` when you only want local -install checks. +install checks. If doctor reports an old registry schema, run `dispatch down`, +`dispatch registry migrate`, then `dispatch up`. -Run `dispatch up` before MCP tool calls that need the daemon. `dispatch mcp` +Run `dispatch up --json` before MCP tool calls that need the daemon. `dispatch mcp` serves the derived tools over stdio; the daemon remains the executor. `skills` is a symlink to the repo-root [`../../skills`](../../skills) tree so the plugin diff --git a/pyproject.toml b/pyproject.toml index 2293046..f8aae4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "outfitter-dispatch" -version = "0.4.0" +version = "0.4.1" description = "Local control plane for orchestrating Codex agent lanes over the Codex App Server." readme = "README.md" requires-python = ">=3.13" diff --git a/skills/dispatch/SKILL.md b/skills/dispatch/SKILL.md index d9944a8..088bff0 100644 --- a/skills/dispatch/SKILL.md +++ b/skills/dispatch/SKILL.md @@ -27,6 +27,7 @@ The current canonical operator grammar is: - health: `doctor` - daemon process: `up`, `down` - daemon reads: `daemon status`, `daemon log` +- registry recovery: `registry migrate` - thread lifecycle/read/search: `new`, `attach`, `list`, `list --unmanaged`, `get`, `sync`, `tail`, `watch`, `search` - thread actions: `rename`, `archive`, `restore` @@ -43,7 +44,7 @@ machine-output contract to be explicit. ```bash uv run dispatch doctor --no-app-server -uv run dispatch up +uv run dispatch up --json uv run dispatch daemon status uv run dispatch daemon log --limit 10 ``` @@ -54,6 +55,9 @@ daemon socket/pidfile state, registry schema/integrity, packaged skills/plugin assets, and a low-risk Codex App Server initialize smoke. Use `--no-app-server` when you only need local install/runtime diagnostics. +If doctor reports an old registry schema, run `uv run dispatch down`, then +`uv run dispatch registry migrate`, then `uv run dispatch up --json`. + Stop only when it is clearly your daemon/session to stop: ```bash @@ -77,9 +81,18 @@ prefixes, and can send an initial turn: ```bash uv run dispatch new --name my-lane --cwd /path/to/project --text "Do the bounded thing." +uv run dispatch new --name my-lane --goal "Loop until green." --text "Start with tests." uv run dispatch new --name my-lane --preset reviewer --no-send ``` +Use `--goal` for a native App Server goal before the initial turn. Do not put +`/goal ...` in `--text`; dispatch treats slash commands as plain text and rejects +that shape so agents do not create a thread that only looks goal-driven. + +`new` returns `message_accepted`, not proof of assistant completion. After launch, +use `get` to check `latest_turn`, `tail` for persisted history, or `watch` for a +bounded live sample. + Attached lanes are existing desktop Codex threads registered by raw thread id: ```bash @@ -115,8 +128,8 @@ lanes. `list` shows threads dispatch already manages. `list --unmanaged` lists persisted Codex sessions that are not registered in dispatch. It uses App Server -`thread/list` in state-db-only mode. It is read-only and does not resume or -register anything: +`thread/list` in state-db-only mode, asking for active sessions sorted by recent +updates. It is read-only and does not resume or register anything: ```bash uv run dispatch list @@ -182,6 +195,13 @@ Use `stop` to cancel the active turn without replacement text: uv run dispatch stop ``` +Destroy-intent commands prompt by default. In scripts, use explicit confirmation: + +```bash +uv run dispatch archive --yes --json +uv run dispatch trigger rm --yes --json +``` + For short inter-lane chat, use the companion `$dm` skill, which is backed by `dispatch send`. @@ -193,6 +213,10 @@ Use `get` for compact managed-thread metadata: uv run dispatch get ``` +Check `latest_turn` when a message was accepted but no assistant work is visible. +It records the latest observed turn id, status, and App Server error text/time for +failed turns. + Use `tail` for persisted turn history: ```bash @@ -264,9 +288,13 @@ Use `schema` for derived input/output schemas: ```bash uv run dispatch schema send +uv run dispatch schema "list --unmanaged" uv run dispatch schema "goal set" ``` +Prefer `schema` for `jq`/automation field discovery. It is derived from the same +op registry as CLI and MCP, including composed spellings like `list --unmanaged`. + ## MCP And Plugin The MCP server is: @@ -290,6 +318,8 @@ plugin bundle under `outfitter.dispatch.assets`; use the repo copies for editing - Do not install launchd autostart unless the user explicitly asks. - Start troubleshooting with `dispatch doctor`; use its recovery hints rather than guessing about stale sockets, PATH, auth, or registry shape. +- If doctor reports an old registry, stop the daemon and run + `dispatch registry migrate` before starting it again. - Do not describe `tail --follow` as canonical or streaming forever. Use `watch` for bounded live samples until dispatch grows a subscription-capable control socket. - Do not treat `rollback` as file undo. diff --git a/spikes/README.md b/spikes/README.md index 685e783..52e9a3d 100644 --- a/spikes/README.md +++ b/spikes/README.md @@ -1,6 +1,6 @@ # spikes -Exploratory scripts that probed the Codex App Server (`codex-cli 0.136.0-alpha.2`) to verify the primitives `dispatch` is built on. They are **reference + the seed of the integration suite** — Phase 1 of `PLAN.md` promotes them into `tests/` against a real ephemeral app-server. Preserved here from a verification session (they previously lived in `/tmp`). +Exploratory scripts that probed the Codex App Server (`codex-cli 0.136.0-alpha.2`) to verify the primitives `dispatch` is built on. They are **reference + the seed of the integration suite** — Phase 1 of `PLAN.md` promotes them into `tests/` against a real ephemeral app-server. Preserved here from a verification session (they previously lived in `/tmp`). Schema counts below were refreshed against `codex-cli 0.137.0-alpha.4`. Run ad hoc with: `python3 spikes/.py` (drives a real `codex app-server`; uses your `~/.codex` unless edited — prefer an isolated `CODEX_HOME` when re-running). @@ -15,8 +15,8 @@ Run ad hoc with: `python3 spikes/.py` (drives a real `codex app-server`; u ## Regenerate the protocol schema (per binary) ```bash -codex app-server generate-json-schema --out # stable (217 v2 types) -codex app-server generate-json-schema --experimental --out # + gated (261) +codex app-server generate-json-schema --out # stable (256 files) +codex app-server generate-json-schema --experimental --out # + gated (312 files) ``` Full written findings: see the research notes referenced in `.agents/plans/v0/REFS.md`. diff --git a/src/outfitter/dispatch/client/client.py b/src/outfitter/dispatch/client/client.py index 35df116..1d59e99 100644 --- a/src/outfitter/dispatch/client/client.py +++ b/src/outfitter/dispatch/client/client.py @@ -42,6 +42,7 @@ ThreadGoalSetParams, ThreadGoalStatus, ThreadInfo, + ThreadListCwdFilter, ThreadListParams, ThreadListResult, ThreadReadParams, @@ -195,9 +196,20 @@ async def thread_start( result = await self._request("thread/start", _dump(params)) return ThreadResult.model_validate(result).thread - async def thread_resume(self, thread_id: str) -> ThreadInfo: + async def thread_resume( + self, + thread_id: str, + *, + exclude_turns: bool | None = None, + ) -> ThreadInfo: result = await self._request( - "thread/resume", _dump(ThreadResumeParams(thread_id=thread_id)) + "thread/resume", + _dump( + ThreadResumeParams( + thread_id=thread_id, + exclude_turns=exclude_turns, + ) + ), ) return ThreadResult.model_validate(result).thread @@ -233,9 +245,31 @@ async def thread_fork( return ThreadResult.model_validate(result).thread async def thread_list( - self, limit: int = 50, cursor: str | None = None, use_state_db_only: bool | None = None + self, + limit: int = 50, + cursor: str | None = None, + use_state_db_only: bool | None = None, + *, + archived: bool | None = None, + cwd: ThreadListCwdFilter | None = None, + model_providers: list[str] | None = None, + search_term: str | None = None, + sort_direction: SortDirection | None = None, + sort_key: ThreadSortKey | None = None, + source_kinds: list[ThreadSourceKind] | None = None, ) -> list[ThreadInfo]: - params = ThreadListParams(limit=limit, cursor=cursor, use_state_db_only=use_state_db_only) + params = ThreadListParams( + limit=limit, + cursor=cursor, + archived=archived, + cwd=cwd, + model_providers=model_providers, + search_term=search_term, + sort_direction=sort_direction, + sort_key=sort_key, + source_kinds=source_kinds, + use_state_db_only=use_state_db_only, + ) result = await self._request("thread/list", _dump(params)) return ThreadListResult.model_validate(result).data @@ -331,6 +365,7 @@ async def turn_start( effort: Effort | None = None, summary: ReasoningSummary | None = None, model: str | None = None, + service_tier: str | None = None, output_schema: dict[str, object] | None = None, personality: Personality | None = None, ) -> dict[str, object]: @@ -344,6 +379,7 @@ async def turn_start( effort=effort, summary=summary, model=model, + service_tier=service_tier, output_schema=output_schema, personality=personality, ) diff --git a/src/outfitter/dispatch/client/models.py b/src/outfitter/dispatch/client/models.py index ea5adc4..f67a6e4 100644 --- a/src/outfitter/dispatch/client/models.py +++ b/src/outfitter/dispatch/client/models.py @@ -29,6 +29,7 @@ Decision = Literal["accept", "acceptForSession", "decline", "cancel"] SortDirection = Literal["asc", "desc"] ThreadSortKey = Literal["created_at", "updated_at"] +ThreadListCwdFilter = str | list[str] ThreadSourceKind = Literal[ "cli", "vscode", @@ -105,6 +106,7 @@ class ThreadInfo(WireModel): id: str session_id: str | None = None forked_from_id: str | None = None + parent_thread_id: str | None = None ephemeral: bool | None = None status: ThreadStatus | None = None cwd: str | None = None @@ -138,6 +140,7 @@ class ThreadStartParams(WireModel): class ThreadResumeParams(WireModel): thread_id: str + exclude_turns: bool | None = None class ThreadForkParams(WireModel): @@ -162,6 +165,13 @@ class ThreadSetNameParams(WireModel): class ThreadListParams(WireModel): limit: int = 50 cursor: str | None = None + archived: bool | None = None + cwd: ThreadListCwdFilter | None = None + model_providers: list[str] | None = None + search_term: str | None = None + sort_direction: SortDirection | None = None + sort_key: ThreadSortKey | None = None + source_kinds: list[ThreadSourceKind] | None = None use_state_db_only: bool | None = None @@ -268,6 +278,7 @@ class TurnStartParams(WireModel): effort: Effort | None = None summary: ReasoningSummary | None = None model: str | None = None + service_tier: str | None = None output_schema: dict[str, object] | None = None personality: Personality | None = None diff --git a/src/outfitter/dispatch/contracts/context.py b/src/outfitter/dispatch/contracts/context.py index b22e7c3..270d777 100644 --- a/src/outfitter/dispatch/contracts/context.py +++ b/src/outfitter/dispatch/contracts/context.py @@ -33,6 +33,7 @@ ThreadGoal, ThreadGoalStatus, ThreadInfo, + ThreadListCwdFilter, ThreadSandbox, ThreadSearchResult, ThreadSortKey, @@ -61,7 +62,12 @@ async def thread_start( ephemeral: bool = False, ) -> ThreadInfo: ... - async def thread_resume(self, thread_id: str) -> ThreadInfo: ... + async def thread_resume( + self, + thread_id: str, + *, + exclude_turns: bool | None = None, + ) -> ThreadInfo: ... async def thread_fork( self, @@ -80,7 +86,18 @@ async def thread_fork( ) -> ThreadInfo: ... async def thread_list( - self, limit: int = 50, cursor: str | None = None, use_state_db_only: bool | None = None + self, + limit: int = 50, + cursor: str | None = None, + use_state_db_only: bool | None = None, + *, + archived: bool | None = None, + cwd: ThreadListCwdFilter | None = None, + model_providers: list[str] | None = None, + search_term: str | None = None, + sort_direction: SortDirection | None = None, + sort_key: ThreadSortKey | None = None, + source_kinds: list[ThreadSourceKind] | None = None, ) -> list[ThreadInfo]: ... async def thread_read( @@ -133,6 +150,7 @@ async def turn_start( effort: Effort | None = None, summary: ReasoningSummary | None = None, model: str | None = None, + service_tier: str | None = None, output_schema: dict[str, object] | None = None, personality: Personality | None = None, ) -> dict[str, object]: ... diff --git a/src/outfitter/dispatch/contracts/derive_cli.py b/src/outfitter/dispatch/contracts/derive_cli.py index 3d7effd..f94f192 100644 --- a/src/outfitter/dispatch/contracts/derive_cli.py +++ b/src/outfitter/dispatch/contracts/derive_cli.py @@ -35,7 +35,16 @@ class CliRoute: positionals: tuple[str, ...] = () -_ROUTES: tuple[CliRoute, ...] = ( +_CUSTOM_ROUTES: tuple[CliRoute, ...] = ( + CliRoute(("send",), "send", ("lane", "text")), + CliRoute(("stop",), "stop", ("lane",)), + CliRoute(("search",), "search", ("query",)), + CliRoute(("list",), "roster"), + CliRoute(("trigger", "list"), "trigger-list"), + CliRoute(("goal", "set"), "goal-set", ("lane", "objective")), +) + +_SIMPLE_ROUTES: tuple[CliRoute, ...] = ( CliRoute(("new",), "new"), CliRoute(("attach",), "attach", ("thread",)), CliRoute(("get",), "show", ("lane",)), @@ -55,6 +64,22 @@ class CliRoute: CliRoute(("daemon", "log"), "log"), ) +CLI_PROJECTION_CONTROL_PATHS: tuple[tuple[str, ...], ...] = (("schema",),) +_COMPOSED_SCHEMA_ROUTES: dict[str, str] = {"list --unmanaged": "discover"} + + +def cli_public_routes() -> tuple[CliRoute, ...]: + """All op-backed CLI routes. Custom route functions are still manifest entries; + the custom code is an explicit projection override, not an untracked surface.""" + return (*_CUSTOM_ROUTES, *_SIMPLE_ROUTES) + + +def cli_schema_routes() -> dict[str, str]: + """Public command spellings accepted by ``dispatch schema``.""" + routes = {" ".join(route.path): route.op_id for route in cli_public_routes()} + routes.update(_COMPOSED_SCHEMA_ROUTES) + return routes + def derive_cli( registry: OpRegistry, invoke: Invoker, render: Renderer | None = None @@ -86,7 +111,7 @@ def derive_cli( ) _register_command(app, ("schema",), _schema_command(registry)) - for route in _ROUTES: + for route in _SIMPLE_ROUTES: _register_command( app, route.path, @@ -127,8 +152,18 @@ def _op_command( def command(**kwargs: object) -> None: json_requested = bool(kwargs.pop("json", False)) + yes = bool(kwargs.pop("yes", False)) + no_interactive = bool(kwargs.pop("no_interactive", False)) if op.intent == "destroy": - typer.confirm(f"Run destroy op {op.id!r}?", abort=True, err=json_requested) + if no_interactive and not yes: + typer.secho( + f"dispatch: destroy op {op.id!r} requires --yes with --no-interactive", + fg="red", + err=True, + ) + raise typer.Exit(code=2) + if not yes: + typer.confirm(f"Run destroy op {op.id!r}?", abort=True, err=True) result = invoke(op.id, dict(kwargs)) render(op, result) _ignore_json(json_requested) @@ -161,6 +196,27 @@ def _parameters(op: Op, *, positionals: tuple[str, ...] = ()) -> list[inspect.Pa parameters.append( inspect.Parameter(name, kind, default=default, annotation=field.annotation) ) + if op.intent == "destroy": + parameters.extend( + [ + inspect.Parameter( + "yes", + inspect.Parameter.KEYWORD_ONLY, + default=typer.Option(False, "--yes", help="Confirm this destroy operation."), + annotation=bool, + ), + inspect.Parameter( + "no_interactive", + inspect.Parameter.KEYWORD_ONLY, + default=typer.Option( + False, + "--no-interactive", + help="Fail instead of prompting unless --yes is also provided.", + ), + annotation=bool, + ), + ] + ) parameters.append( inspect.Parameter( "json", @@ -439,25 +495,6 @@ def command( def _schema_op_id(command: str) -> str: - aliases = { - "stop": "stop", - "list": "roster", - "list-unmanaged": "discover", - "attach": "attach", - "get": "show", - "tail": "transcript", - "watch": "watch", - "sync": "sync", - "search": "search", - "rename": "lane-rename", - "archive": "archive", - "restore": "restore", - "goal-status": "goal-get", - "goal-set": "goal-set", - "goal-clear": "goal-clear", - "daemon-status": "status", - "daemon-log": "log", - } try: parts = shlex.split(command.strip()) except ValueError: @@ -467,9 +504,10 @@ def _schema_op_id(command: str) -> str: normalized = "-".join(words) if words else command.strip().replace(" ", "-") if normalized == "tail" and "--follow" in flags: return "__unknown_tail_follow__" - if normalized == "list" and "--unmanaged" in flags: - return "discover" - return aliases.get(normalized, normalized) + schema_routes = cli_schema_routes() + aliases = {command.replace(" ", "-"): op_id for command, op_id in schema_routes.items()} + flagged = " ".join((*words, *sorted(flags))) + return schema_routes.get(flagged) or aliases.get(normalized, normalized) def _default_render(op: Op, result: dict[str, object]) -> None: diff --git a/src/outfitter/dispatch/core/handlers.py b/src/outfitter/dispatch/core/handlers.py index 0a3be1a..2163963 100644 --- a/src/outfitter/dispatch/core/handlers.py +++ b/src/outfitter/dispatch/core/handlers.py @@ -60,6 +60,7 @@ LaneSyncResult, LaneSyncView, LaneTextInput, + LatestTurnView, LogInput, LogOutput, NewInput, @@ -185,8 +186,19 @@ def _sync_view(sync: LaneSync | None) -> LaneSyncView: ) +def _latest_turn_view(lane: Lane) -> LatestTurnView: + return LatestTurnView( + id=lane.latest_turn_id, + status=lane.latest_turn_status, + error=lane.latest_error, + error_at=lane.latest_error_at.isoformat() if lane.latest_error_at else None, + ) + + def _list_item(lane: Lane, sync: LaneSync | None) -> LaneListItem: - return LaneListItem(**_ref(lane).model_dump(), sync=_sync_view(sync)) + return LaneListItem( + **_ref(lane).model_dump(), sync=_sync_view(sync), latest_turn=_latest_turn_view(lane) + ) def _handle(name: str) -> str: @@ -260,6 +272,13 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: ), ) settings = resolved.settings + if inp.text is not None and inp.text.lstrip().startswith("/goal") and inp.goal is None: + raise ValidationError( + "`dispatch new --text` sends plain message text; slash commands are not " + "interpreted. Use `--goal` for a native dispatch/App Server goal." + ) + if inp.goal is not None and settings.ephemeral: + raise ValidationError("native goals require non-ephemeral threads") sandbox = settings.sandbox or "read-only" approval_policy = settings.approval_policy or "never" thread = await ctx.client.thread_start( @@ -284,7 +303,22 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: except ClientError as exc: ctx.log.warning("lane.name_set_failed", lane=lane.id, error=str(exc)) - sent = False + goal_set = False + if inp.goal is not None: + try: + await ctx.client.thread_goal_set(thread.id, objective=inp.goal) + except (DispatchError, ClientError) as exc: + await ctx.registry.log_action( + "goal-set", + lane=lane.id, + detail=inp.goal[:120], + outcome=project_error(exc).code, + ) + raise + await ctx.registry.log_action("goal-set", lane=lane.id, detail=inp.goal[:120]) + goal_set = True + + message_accepted = False if settings.text is not None and inp.send: try: await ctx.client.turn_start( @@ -297,10 +331,12 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: effort=settings.effort, summary=settings.summary, model=settings.model, + service_tier=settings.service_tier, output_schema=settings.output_schema, personality=settings.personality, ) except (DispatchError, ClientError) as exc: + await ctx.registry.record_turn_request_failed(lane.id, str(exc)) await ctx.registry.log_action( "send", lane=lane.id, @@ -309,10 +345,15 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: ) raise await ctx.registry.log_action("send", lane=lane.id, detail=settings.text[:120]) - sent = True - ctx.log.info("lane.new", lane=lane.id, handle=lane.handle, sent=sent) + message_accepted = True + ctx.log.info("lane.new", lane=lane.id, handle=lane.handle, message_accepted=message_accepted) ref = _ref(lane) - return NewLane(**ref.model_dump(), sent=sent) + return NewLane( + **ref.model_dump(), + message_accepted=message_accepted, + goal_set=goal_set, + latest_turn=_latest_turn_view(lane), + ) def _turn_sandbox(sandbox: ThreadSandbox) -> SandboxPolicy: @@ -464,7 +505,16 @@ def _metadata_sync( async def send(inp: LaneTextInput, ctx: Ctx) -> ActionAck: lane = await _resolve(ctx, inp.lane) _require_writable(lane) - await ctx.client.turn_start(lane.id, inp.text, cwd=lane.cwd or ".", sandbox_policy=_READ_ONLY) + try: + await ctx.client.turn_start( + lane.id, inp.text, cwd=lane.cwd or ".", sandbox_policy=_READ_ONLY + ) + except (DispatchError, ClientError) as exc: + await ctx.registry.record_turn_request_failed(lane.id, str(exc)) + await ctx.registry.log_action( + "send", lane=lane.id, detail=inp.text[:120], outcome=project_error(exc).code + ) + raise await ctx.registry.update_lane_status(lane.id, "busy") await ctx.registry.log_action("send", lane=lane.id, detail=inp.text[:120]) return ActionAck(**_managed_identity(lane), op="send") @@ -485,9 +535,16 @@ async def send_message(inp: SendInput, ctx: Ctx) -> ActionAck: turn_id = _require_active_turn(lane, "interject") await ctx.client.turn_interrupt(lane.id, turn_id) await ctx.registry.log_action("interrupt", lane=lane.id, detail="interject") - await ctx.client.turn_start( - lane.id, text, cwd=lane.cwd or ".", sandbox_policy=_READ_ONLY - ) + try: + await ctx.client.turn_start( + lane.id, text, cwd=lane.cwd or ".", sandbox_policy=_READ_ONLY + ) + except (DispatchError, ClientError) as exc: + await ctx.registry.record_turn_request_failed(lane.id, str(exc)) + await ctx.registry.log_action( + "send", lane=lane.id, detail=text[:120], outcome=project_error(exc).code + ) + raise await ctx.registry.update_lane_status(lane.id, "busy") await ctx.registry.log_action("send", lane=lane.id, detail=text[:120]) return ActionAck(**_managed_identity(lane), op="interject") @@ -564,6 +621,7 @@ async def show(inp: ShowInput, ctx: Ctx) -> LaneDetail: status=lane.status, cwd=lane.cwd, active_turn_id=lane.active_turn_id, + latest_turn=_latest_turn_view(lane), sync=_sync_view(sync), transcript=transcript, ) @@ -1069,7 +1127,13 @@ async def discover(inp: DiscoverInput, ctx: Ctx) -> Discovery: """List persisted Codex sessions (``thread/list``, state-db only) — read-only and distinct from ``roster``: these are candidates to ``attach``, not managed lanes. Discovery does not resume or register anything.""" - threads = await ctx.client.thread_list(limit=inp.limit, use_state_db_only=True) + threads = await ctx.client.thread_list( + limit=inp.limit, + archived=False, + sort_direction="desc", + sort_key="updated_at", + use_state_db_only=True, + ) return Discovery(sessions=[_session(thread) for thread in threads]) diff --git a/src/outfitter/dispatch/core/models.py b/src/outfitter/dispatch/core/models.py index e5aace6..0d52015 100644 --- a/src/outfitter/dispatch/core/models.py +++ b/src/outfitter/dispatch/core/models.py @@ -16,7 +16,12 @@ ThreadGoalStatus, ThreadSandbox, ) -from outfitter.dispatch.registry.models import LaneSource, LaneStatus, SyncState +from outfitter.dispatch.registry.models import ( + LaneSource, + LaneStatus, + SyncState, + TurnRuntimeStatus, +) # --- inputs ------------------------------------------------------------------- @@ -48,6 +53,10 @@ class NewInput(BaseModel): preset: list[str] = Field( default_factory=list, description="Preset(s) to apply, left to right." ) + goal: str | None = Field( + default=None, + description="Native App Server goal objective to create before the initial message.", + ) text: str | None = Field(default=None, description="Initial message text.") send: bool = Field(default=True, description="Send the initial message when text is present.") cwd: str | None = Field(default=None, description="Working directory for config discovery.") @@ -280,16 +289,33 @@ class LaneSyncView(BaseModel): error: str | None = None +class LatestTurnView(BaseModel): + id: str | None = Field(default=None, description="Latest observed App Server turn id.") + status: TurnRuntimeStatus | None = Field( + default=None, description="Latest observed turn runtime status." + ) + error: str | None = Field(default=None, description="Latest App Server turn error text.") + error_at: str | None = Field(default=None, description="When the latest turn error occurred.") + + class LaneListItem(LaneRef): sync: LaneSyncView = Field(default_factory=LaneSyncView) + latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) class NewLane(LaneRef): - sent: bool + message_accepted: bool = Field( + description="Whether the App Server accepted the initial message request." + ) + goal_set: bool = Field( + default=False, description="Whether a native App Server goal was set before launch." + ) + latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) class LaneDetail(LaneRef): active_turn_id: str | None = None + latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) sync: LaneSyncView = Field(default_factory=LaneSyncView) transcript: list[TranscriptItem] = Field(default_factory=list) diff --git a/src/outfitter/dispatch/core/ops.py b/src/outfitter/dispatch/core/ops.py index 2c5df33..694bc25 100644 --- a/src/outfitter/dispatch/core/ops.py +++ b/src/outfitter/dispatch/core/ops.py @@ -98,7 +98,14 @@ "source": "own", "status": "idle", "cwd": "/work", - "sent": False, + "message_accepted": False, + "goal_set": False, + "latest_turn": { + "id": None, + "status": None, + "error": None, + "error_at": None, + }, }, ) ], diff --git a/src/outfitter/dispatch/core/reactor.py b/src/outfitter/dispatch/core/reactor.py index c7a75d6..08e3595 100644 --- a/src/outfitter/dispatch/core/reactor.py +++ b/src/outfitter/dispatch/core/reactor.py @@ -43,22 +43,18 @@ async def handle(self, event: LaneEvent) -> None: return # an event for a thread dispatch does not track if isinstance(event, TurnStarted): - await registry.set_active_turn(lane.id, event.turn_id) - await registry.update_lane_status(lane.id, "busy") + await registry.record_turn_started(lane.id, event.turn_id) await registry.touch_lane_event(lane.id) elif isinstance(event, TurnCompleted): - await registry.set_active_turn(lane.id, None) - await registry.update_lane_status(lane.id, "idle") + await registry.record_turn_completed(lane.id, event.turn_id) await registry.touch_lane_event(lane.id) await self._fire_event(lane.id, "turn_completed") await drain_next_queued_message(self._ctx, lane.id) elif isinstance(event, TurnFailed): - await registry.set_active_turn(lane.id, None) - await registry.update_lane_status(lane.id, "error") + await registry.record_turn_failed(lane.id, event.turn_id, event.message) await registry.touch_lane_event(lane.id) elif isinstance(event, LaneIdle): - await registry.set_active_turn(lane.id, None) - await registry.update_lane_status(lane.id, "idle") + await registry.mark_lane_idle(lane.id) await registry.touch_lane_event(lane.id) await drain_next_queued_message(self._ctx, lane.id) elif isinstance(event, ItemCompleted | GoalUpdated | GoalCleared | ThreadCompacted): diff --git a/src/outfitter/dispatch/doctor.py b/src/outfitter/dispatch/doctor.py index 3c4ac1a..c5b7b8a 100644 --- a/src/outfitter/dispatch/doctor.py +++ b/src/outfitter/dispatch/doctor.py @@ -281,6 +281,11 @@ def _registry_check() -> DoctorCheck: "SELECT name FROM sqlite_master WHERE type = 'table'" ).fetchall() } + row_counts = { + table: conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] + for table in ("lanes", "triggers") + if table in tables + } except sqlite3.Error as exc: return DoctorCheck( name="registry", @@ -298,7 +303,15 @@ def _registry_check() -> DoctorCheck: "lane_sync_sources", "lane_snapshots", } - data.update({"schema_version": version, "integrity": integrity, "tables": sorted(tables)}) + data.update( + { + "schema_version": version, + "supported_schema_version": SCHEMA_VERSION, + "integrity": integrity, + "tables": sorted(tables), + "row_counts": row_counts, + } + ) if integrity != "ok": return DoctorCheck( name="registry", @@ -325,8 +338,8 @@ def _registry_check() -> DoctorCheck: summary="registry schema is unversioned", detail=detail, recovery=( - "Restart dispatchd with this dispatch version (`dispatch down`, then " - "`dispatch up`) to apply compatibility migrations." + "Back up the registry, then run `dispatch down`, `dispatch registry migrate`, " + "and `dispatch up` to apply compatibility migrations." ), data=data, ) @@ -337,7 +350,20 @@ def _registry_check() -> DoctorCheck: summary="registry is missing required tables", detail=", ".join(missing), recovery=( - "Run `dispatch up` with a compatible dispatch version or recreate the registry." + "Run `dispatch down`, `dispatch registry migrate`, then `dispatch up`. " + "If migration fails, inspect the backup path from `registry migrate`." + ), + data=data, + ) + if version < SCHEMA_VERSION: + return DoctorCheck( + name="registry", + status="warn", + summary="registry schema is older than this dispatch binary supports", + detail=f"{version} < {SCHEMA_VERSION}", + recovery=( + "Run `dispatch down`, `dispatch registry migrate`, then `dispatch up` " + "to apply compatibility migrations." ), data=data, ) diff --git a/src/outfitter/dispatch/registry/models.py b/src/outfitter/dispatch/registry/models.py index 80ce5d6..ebde865 100644 --- a/src/outfitter/dispatch/registry/models.py +++ b/src/outfitter/dispatch/registry/models.py @@ -9,6 +9,7 @@ LaneSource = Literal["own", "attached"] LaneStatus = Literal["idle", "busy", "waiting_approval", "archived", "error", "unknown"] +TurnRuntimeStatus = Literal["started", "completed", "failed"] SyncState = Literal["unknown", "metadata", "partial", "complete", "error"] QueuedMessageStatus = Literal["pending", "sending", "sent", "error"] @@ -28,6 +29,10 @@ class Lane(BaseModel): status: LaneStatus = "unknown" pinned: bool = False active_turn_id: str | None = None # set by the reactor (Phase 3); needed to steer/interrupt + latest_turn_id: str | None = None + latest_turn_status: TurnRuntimeStatus | None = None + latest_error: str | None = None + latest_error_at: datetime | None = None created_at: datetime updated_at: datetime last_event_at: datetime | None = None diff --git a/src/outfitter/dispatch/registry/store.py b/src/outfitter/dispatch/registry/store.py index 4d216be..a678193 100644 --- a/src/outfitter/dispatch/registry/store.py +++ b/src/outfitter/dispatch/registry/store.py @@ -30,7 +30,7 @@ from .refs import BASE58BTC_ALPHABET, CODEX_REF_SOURCE, codex_ref_payload, make_ref Clock = Callable[[], datetime] -SCHEMA_VERSION = 3 +SCHEMA_VERSION = 4 def _utcnow() -> datetime: @@ -51,6 +51,10 @@ def _utcnow() -> datetime: status TEXT NOT NULL DEFAULT 'unknown', pinned INTEGER NOT NULL DEFAULT 0, active_turn_id TEXT, + latest_turn_id TEXT, + latest_turn_status TEXT, + latest_error TEXT, + latest_error_at TEXT, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, last_event_at TEXT @@ -167,6 +171,8 @@ async def _migrate(self, user_version: int) -> None: await self._conn.execute( "CREATE UNIQUE INDEX IF NOT EXISTS idx_lanes_ref ON lanes(ref)" ) + if user_version < 4: + await self._ensure_lane_runtime_columns() async def _ensure_ref_columns(self) -> None: async with self._conn.execute("PRAGMA table_info(lanes)") as cur: @@ -176,6 +182,20 @@ async def _ensure_ref_columns(self) -> None: if name not in columns: await self._conn.execute(f"ALTER TABLE lanes ADD COLUMN {name} TEXT") + async def _ensure_lane_runtime_columns(self) -> None: + async with self._conn.execute("PRAGMA table_info(lanes)") as cur: + rows = await cur.fetchall() + columns = {str(row["name"]) for row in rows} + column_defs = { + "latest_turn_id": "TEXT", + "latest_turn_status": "TEXT", + "latest_error": "TEXT", + "latest_error_at": "TEXT", + } + for name, definition in column_defs.items(): + if name not in columns: + await self._conn.execute(f"ALTER TABLE lanes ADD COLUMN {name} {definition}") + # --- lanes ---------------------------------------------------------------- async def add_lane( @@ -264,8 +284,9 @@ async def add_lane_with_sync( async def _insert_lane(self, lane: Lane) -> None: await self._conn.execute( "INSERT INTO lanes (id, ref, ref_source, ref_payload, ref_mixer, handle, role, cwd, " - "source, status, pinned, active_turn_id, created_at, updated_at, last_event_at) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + "source, status, pinned, active_turn_id, latest_turn_id, latest_turn_status, " + "latest_error, latest_error_at, created_at, updated_at, last_event_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( lane.id, lane.ref, @@ -279,6 +300,10 @@ async def _insert_lane(self, lane: Lane) -> None: lane.status, int(lane.pinned), lane.active_turn_id, + lane.latest_turn_id, + lane.latest_turn_status, + lane.latest_error, + lane.latest_error_at.isoformat() if lane.latest_error_at else None, lane.created_at.isoformat(), lane.updated_at.isoformat(), lane.last_event_at.isoformat() if lane.last_event_at else None, @@ -381,6 +406,57 @@ async def set_active_turn(self, lane_id: str, turn_id: str | None) -> None: ) await self._conn.commit() + async def record_turn_started(self, lane_id: str, turn_id: str | None) -> None: + await self._conn.execute( + "UPDATE lanes SET active_turn_id = ?, latest_turn_id = ?, " + "latest_turn_status = 'started', latest_error = NULL, latest_error_at = NULL, " + "status = 'busy', updated_at = ? WHERE id = ?", + (turn_id, turn_id, self._now().isoformat(), lane_id), + ) + await self._conn.commit() + + async def record_turn_completed(self, lane_id: str, turn_id: str | None) -> None: + await self._conn.execute( + "UPDATE lanes SET active_turn_id = NULL, latest_turn_id = COALESCE(?, latest_turn_id), " + "latest_turn_status = 'completed', latest_error = NULL, latest_error_at = NULL, " + "status = 'idle', updated_at = ? WHERE id = ?", + (turn_id, self._now().isoformat(), lane_id), + ) + await self._conn.commit() + + async def record_turn_failed( + self, lane_id: str, turn_id: str | None, message: str | None + ) -> None: + now = self._now().isoformat() + await self._conn.execute( + "UPDATE lanes SET active_turn_id = NULL, latest_turn_id = COALESCE(?, latest_turn_id), " + "latest_turn_status = 'failed', latest_error = ?, latest_error_at = ?, " + "status = 'error', updated_at = ? WHERE id = ?", + (turn_id, message, now if message is not None else None, now, lane_id), + ) + await self._conn.commit() + + async def record_turn_request_failed(self, lane_id: str, message: str | None) -> None: + now = self._now().isoformat() + await self._conn.execute( + "UPDATE lanes SET active_turn_id = NULL, latest_turn_id = NULL, " + "latest_turn_status = 'failed', latest_error = ?, latest_error_at = ?, " + "status = 'error', updated_at = ? WHERE id = ?", + (message, now if message is not None else None, now, lane_id), + ) + await self._conn.commit() + + async def mark_lane_idle(self, lane_id: str) -> None: + lane = await self.find_lane(lane_id) + status: LaneStatus = ( + "error" if lane is not None and lane.latest_turn_status == "failed" else "idle" + ) + await self._conn.execute( + "UPDATE lanes SET active_turn_id = NULL, status = ?, updated_at = ? WHERE id = ?", + (status, self._now().isoformat(), lane_id), + ) + await self._conn.commit() + async def touch_lane_event(self, lane_id: str, when: datetime | None = None) -> None: stamp = (when or self._now()).isoformat() await self._conn.execute( diff --git a/src/outfitter/dispatch/surfaces/cli.py b/src/outfitter/dispatch/surfaces/cli.py index 340c034..3115b66 100644 --- a/src/outfitter/dispatch/surfaces/cli.py +++ b/src/outfitter/dispatch/surfaces/cli.py @@ -6,6 +6,7 @@ import json import socket +import sqlite3 from functools import partial from pathlib import Path from typing import Annotated @@ -16,6 +17,14 @@ from outfitter.dispatch.contracts.derive_cli import derive_cli from outfitter.dispatch.version import package_version +CLI_SURFACE_CONTROL_PATHS: tuple[tuple[str, ...], ...] = ( + ("doctor",), + ("mcp",), + ("up",), + ("down",), + ("registry", "migrate"), +) + def _recv_line(sock: socket.socket) -> bytes: buffer = bytearray() @@ -127,7 +136,11 @@ def _doctor( # `up`/`down` manage the daemon PROCESS (not ops, which run inside it). @app.command(name="up", help="Start the daemon (detached singleton).") - def _up() -> None: + def _up( + json_output: Annotated[ + bool, typer.Option("--json/--text", help="Render machine-readable JSON output.") + ] = False, + ) -> None: from outfitter.dispatch.daemon import lifecycle config.ensure_base() @@ -136,13 +149,150 @@ def _up() -> None: except (RuntimeError, TimeoutError) as exc: typer.secho(f"dispatch: {exc}", fg="red", err=True) raise typer.Exit(code=1) from exc - typer.echo("dispatchd started" if started else "dispatchd already running") + if json_output: + typer.echo( + json.dumps( + { + "status": "started" if started else "already_running", + "started": started, + "socket": str(path), + "pidfile": str(config.pidfile_path()), + }, + indent=2, + ) + ) + else: + typer.echo("dispatchd started" if started else "dispatchd already running") @app.command(name="down", help="Stop the daemon.") - def _down() -> None: + def _down( + json_output: Annotated[ + bool, typer.Option("--json/--text", help="Render machine-readable JSON output.") + ] = False, + ) -> None: from outfitter.dispatch.daemon import lifecycle stopped = lifecycle.stop_daemon(path, config.pidfile_path()) - typer.echo("dispatchd stopped" if stopped else "dispatchd not running") + if json_output: + typer.echo( + json.dumps( + { + "status": "stopped" if stopped else "not_running", + "stopped": stopped, + "socket": str(path), + "pidfile": str(config.pidfile_path()), + }, + indent=2, + ) + ) + else: + typer.echo("dispatchd stopped" if stopped else "dispatchd not running") + + registry = typer.Typer(no_args_is_help=True, add_completion=False) + app.add_typer(registry, name="registry") + + @registry.command(name="migrate", help="Apply registry compatibility migrations safely.") + def _registry_migrate( + json_output: Annotated[ + bool, typer.Option("--json/--text", help="Render machine-readable JSON output.") + ] = True, + backup: Annotated[ + bool, + typer.Option( + "--backup/--no-backup", + help="Back up the registry before migrating.", + ), + ] = True, + allow_running: Annotated[ + bool, + typer.Option( + "--allow-running", + help=( + "Allow migration while dispatchd is reachable. " + "Use only for controlled recovery." + ), + ), + ] = False, + ) -> None: + import asyncio + import shutil + from datetime import UTC, datetime + + from outfitter.dispatch.daemon import lifecycle + from outfitter.dispatch.registry.store import SCHEMA_VERSION, Registry + + config.ensure_base() + db = config.db_path() + running = lifecycle.is_daemon_up(path) + if running and not allow_running: + recovery = "Run `dispatch down`, then `dispatch registry migrate`, then `dispatch up`." + payload = { + "status": "blocked", + "migrated": False, + "reason": "daemon_running", + "recovery": recovery, + "db": str(db), + } + if json_output: + typer.echo(json.dumps(payload, indent=2)) + else: + typer.secho(recovery, fg="red", err=True) + raise typer.Exit(code=8) + + before = _registry_version(db) + backup_path: str | None = None + if backup and db.exists(): + stamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + backup_file = db.with_name(f"{db.name}.bak-{stamp}") + shutil.copy2(db, backup_file) + backup_path = str(backup_file) + + async def _migrate() -> None: + store = await Registry.open(db) + await store.close() + + try: + asyncio.run(_migrate()) + except RuntimeError as exc: + payload = { + "status": "failed", + "migrated": False, + "reason": str(exc), + "db": str(db), + "from_schema_version": before, + "to_schema_version": SCHEMA_VERSION, + "backup": backup_path, + } + if json_output: + typer.echo(json.dumps(payload, indent=2)) + else: + typer.secho(f"dispatch: {exc}", fg="red", err=True) + raise typer.Exit(code=8) from exc + + after = _registry_version(db) + payload = { + "status": "ok", + "migrated": before != after, + "db": str(db), + "from_schema_version": before, + "to_schema_version": after, + "backup": backup_path, + "daemon_running": running, + } + if json_output: + typer.echo(json.dumps(payload, indent=2)) + else: + typer.echo( + f"registry schema {before if before is not None else 'none'} -> {after}; " + f"backup: {backup_path or 'none'}" + ) return app + + +def _registry_version(path: Path) -> int | None: + if not path.exists(): + return None + with sqlite3.connect(path) as conn: + row = conn.execute("PRAGMA user_version").fetchone() + return int(row[0]) if row is not None else None diff --git a/tests/client/test_client.py b/tests/client/test_client.py index e31986e..ccca6b3 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -45,6 +45,20 @@ async def test_bounded_request_timeout_discards_pending( assert c._router._pending == {} +async def test_thread_resume_can_avoid_initial_turn_hydration( + client: tuple[AppServerClient, FakeTransport], +) -> None: + c, fake = client + fake.auto = _result_for("thread/resume", {"thread": {"id": "L1"}}) + thread = await c.thread_resume("L1", exclude_turns=True) + assert thread.id == "L1" + assert fake.sent[-1] == { + "id": 1, + "method": "thread/resume", + "params": {"threadId": "L1", "excludeTurns": True}, + } + + async def test_thread_start_parses_thread_info( client: tuple[AppServerClient, FakeTransport], ) -> None: @@ -202,6 +216,53 @@ async def test_thread_list_reads_data_key( assert [t.id for t in threads] == ["a", "b"] +async def test_thread_list_sends_current_native_filters( + client: tuple[AppServerClient, FakeTransport], +) -> None: + c, fake = client + fake.auto = _result_for("thread/list", {"data": []}) + await c.thread_list( + limit=10, + archived=False, + cwd="/repo", + search_term="schema", + sort_direction="desc", + sort_key="updated_at", + source_kinds=["cli"], + use_state_db_only=True, + ) + assert fake.sent[-1] == { + "id": 1, + "method": "thread/list", + "params": { + "limit": 10, + "archived": False, + "cwd": "/repo", + "searchTerm": "schema", + "sortDirection": "desc", + "sortKey": "updated_at", + "sourceKinds": ["cli"], + "useStateDbOnly": True, + }, + } + + +async def test_turn_start_sends_service_tier_when_set( + client: tuple[AppServerClient, FakeTransport], +) -> None: + c, fake = client + fake.auto = _result_for("turn/start", {"turnId": "turn-1"}) + await c.turn_start("L1", "go", cwd="/work", service_tier="priority") + assert fake.sent[-1]["params"] == { + "threadId": "L1", + "input": [{"type": "text", "text": "go"}], + "cwd": "/work", + "approvalPolicy": "never", + "sandboxPolicy": {"type": "readOnly"}, + "serviceTier": "priority", + } + + async def test_request_error_raises_app_server_error( client: tuple[AppServerClient, FakeTransport], ) -> None: diff --git a/tests/client/test_events.py b/tests/client/test_events.py index 1430e3a..0b989a1 100644 --- a/tests/client/test_events.py +++ b/tests/client/test_events.py @@ -11,6 +11,7 @@ StatusChanged, ThreadCompacted, TurnCompleted, + TurnFailed, TurnStarted, project_notification, project_server_request, @@ -24,6 +25,13 @@ def test_turn_started_and_completed_carry_lane_and_turn() -> None: assert completed == [TurnCompleted("L1", "T1")] +def test_turn_failed_carries_message() -> None: + failed = project_notification( + "turn/failed", {"threadId": "L1", "turnId": "T1", "message": "unsupported model"} + ) + assert failed == [TurnFailed("L1", "T1", "unsupported model")] + + def test_item_completed_carries_item_id() -> None: assert project_notification("item/completed", {"threadId": "L1", "itemId": "I9"}) == [ ItemCompleted("L1", "I9") diff --git a/tests/client/test_models.py b/tests/client/test_models.py index 63b8ca8..ef336e7 100644 --- a/tests/client/test_models.py +++ b/tests/client/test_models.py @@ -11,8 +11,10 @@ ThreadGoal, ThreadGoalSetParams, ThreadInfo, + ThreadListParams, ThreadListResult, ThreadReadParams, + ThreadResumeParams, ThreadRollbackParams, ThreadStartParams, TurnStartParams, @@ -38,7 +40,7 @@ def test_thread_start_includes_rich_session_options() -> None: developer_instructions="dev", personality="pragmatic", service_tier="priority", - model="gpt-5-codex", + model="test-model", model_provider="openai", ) dumped = params.model_dump(by_alias=True, exclude_none=True) @@ -48,7 +50,7 @@ def test_thread_start_includes_rich_session_options() -> None: assert dumped["developerInstructions"] == "dev" assert dumped["personality"] == "pragmatic" assert dumped["serviceTier"] == "priority" - assert dumped["model"] == "gpt-5-codex" + assert dumped["model"] == "test-model" assert dumped["modelProvider"] == "openai" @@ -79,7 +81,8 @@ def test_turn_start_includes_optional_overrides_when_set() -> None: approvals_reviewer="user", effort="xhigh", summary="concise", - model="gpt-5-codex", + model="test-model", + service_tier="priority", personality="friendly", output_schema={"type": "object"}, ) @@ -87,7 +90,8 @@ def test_turn_start_includes_optional_overrides_when_set() -> None: assert dumped["approvalsReviewer"] == "user" assert dumped["effort"] == "xhigh" assert dumped["summary"] == "concise" - assert dumped["model"] == "gpt-5-codex" + assert dumped["model"] == "test-model" + assert dumped["serviceTier"] == "priority" assert dumped["personality"] == "friendly" assert dumped["outputSchema"] == {"type": "object"} @@ -106,11 +110,44 @@ def test_thread_list_result_reads_data_key() -> None: assert result.next_cursor == "c1" +def test_thread_list_params_include_current_native_filters() -> None: + params = ThreadListParams( + limit=25, + archived=False, + cwd=["/repo", "/other"], + search_term="schema drift", + sort_direction="desc", + sort_key="updated_at", + source_kinds=["cli", "appServer"], + use_state_db_only=True, + ) + dumped = params.model_dump(by_alias=True, exclude_none=True) + assert dumped == { + "limit": 25, + "archived": False, + "cwd": ["/repo", "/other"], + "searchTerm": "schema drift", + "sortDirection": "desc", + "sortKey": "updated_at", + "sourceKinds": ["cli", "appServer"], + "useStateDbOnly": True, + } + + +def test_thread_resume_can_request_low_hydration_subscription() -> None: + params = ThreadResumeParams(thread_id="t1", exclude_turns=True) + assert params.model_dump(by_alias=True, exclude_none=True) == { + "threadId": "t1", + "excludeTurns": True, + } + + def test_thread_info_keeps_sync_metadata_fields() -> None: thread = ThreadInfo.model_validate( { "id": "t1", "sessionId": "t1", + "parentThreadId": "parent", "path": "/tmp/rollout.jsonl", "modelProvider": "openai", "threadSource": "user", @@ -119,6 +156,7 @@ def test_thread_info_keeps_sync_metadata_fields() -> None: ) assert thread.session_id == "t1" + assert thread.parent_thread_id == "parent" assert thread.path == "/tmp/rollout.jsonl" assert thread.model_provider == "openai" assert thread.thread_source == "user" @@ -162,7 +200,7 @@ def test_thread_fork_rollback_and_compact_params() -> None: cwd="/w", sandbox="workspace-write", approval_policy="on-request", - model="gpt-5-codex", + model="test-model", ephemeral=True, ) assert fork.model_dump(by_alias=True, exclude_none=True) == { @@ -170,7 +208,7 @@ def test_thread_fork_rollback_and_compact_params() -> None: "cwd": "/w", "sandbox": "workspace-write", "approvalPolicy": "on-request", - "model": "gpt-5-codex", + "model": "test-model", "ephemeral": True, } assert ThreadRollbackParams(thread_id="t1", num_turns=2).model_dump( diff --git a/tests/core/test_handlers.py b/tests/core/test_handlers.py index bffc2db..f030fc2 100644 --- a/tests/core/test_handlers.py +++ b/tests/core/test_handlers.py @@ -11,6 +11,7 @@ from outfitter.dispatch.client.errors import AppServerError as ClientAppServerError from outfitter.dispatch.client.errors import TransportError +from outfitter.dispatch.client.events import LaneIdle, TurnFailed, TurnStarted from outfitter.dispatch.client.models import ( ThreadGoal, ThreadInfo, @@ -50,6 +51,8 @@ TranscriptInput, WatchInput, ) +from outfitter.dispatch.core.reactor import Reactor +from outfitter.dispatch.core.triggers import TriggerRunner from outfitter.dispatch.registry.store import Registry from tests.fakes import FakeLaneClient, make_ctx @@ -89,19 +92,22 @@ async def test_new_lane_sets_name_and_sends_initial_turn(store: Registry, tmp_pa sandbox="workspace-write", approval_policy="on-request", effort="low", - model="gpt-5-codex", + model="test-model", + service_tier="priority", developer_instructions="stay focused", ), ctx, ) assert out.handle == "@[dispatch] builder" - assert out.sent is True + assert out.message_accepted is True + assert out.goal_set is False + assert out.latest_turn.status is None assert any( name == "thread_start" and kw["sandbox"] == "workspace-write" and kw["approval_policy"] == "on-request" - and kw["model"] == "gpt-5-codex" + and kw["model"] == "test-model" and kw["developer_instructions"] == "stay focused" for name, kw in client.calls ) @@ -114,6 +120,7 @@ async def test_new_lane_sets_name_and_sends_initial_turn(store: Registry, tmp_pa and kw["text"] == "start" and kw["sandbox_policy"] == {"type": "workspaceWrite"} and kw["effort"] == "low" + and kw["service_tier"] == "priority" for name, kw in client.calls ) @@ -126,11 +133,47 @@ async def test_new_lane_no_send_registers_without_turn(store: Registry, tmp_path NewInput(name="idle", cwd=str(tmp_path), text="do not send", send=False), ctx ) - assert out.sent is False + assert out.message_accepted is False assert (await store.find_lane("lane-1")) is not None assert not any(name == "turn_start" for name, _ in client.calls) +async def test_new_lane_sets_native_goal_before_initial_turn( + store: Registry, tmp_path: Path +) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + out = await handlers.new_lane( + NewInput(name="goal-worker", cwd=str(tmp_path), goal="Loop until green.", text="Begin."), + ctx, + ) + + assert out.goal_set is True + assert out.message_accepted is True + goal_index = [name for name, _ in client.calls].index("thread_goal_set") + turn_index = [name for name, _ in client.calls].index("turn_start") + assert goal_index < turn_index + assert any( + name == "thread_goal_set" and kw["objective"] == "Loop until green." + for name, kw in client.calls + ) + + +async def test_new_lane_rejects_goal_slash_command_text_without_native_goal( + store: Registry, tmp_path: Path +) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + with pytest.raises(ValidationError, match="slash commands are not interpreted"): + await handlers.new_lane( + NewInput(name="bad-goal", cwd=str(tmp_path), text="/goal ship"), ctx + ) + + assert not client.calls + + class _FailingTurnClient(FakeLaneClient): async def turn_start(self, *args: object, **kwargs: object) -> dict[str, object]: self._record("turn_start", failed=True) @@ -148,6 +191,10 @@ async def test_new_lane_initial_send_failure_leaves_lane_registered( lane = await store.find_lane("lane-1") assert lane is not None + assert lane.status == "error" + assert lane.latest_turn_id is None + assert lane.latest_turn_status == "failed" + assert lane.latest_error == "boom" log = await handlers.show_log(LogInput(limit=10), ctx) send_records = [record for record in log.actions if record.op == "send"] assert send_records @@ -164,6 +211,21 @@ async def test_send_resolves_by_handle(store: Registry) -> None: assert by_ref.lane == "lane-1" +async def test_send_failure_marks_latest_turn_error(store: Registry) -> None: + client = _FailingTurnClient() + ctx = make_ctx(store, client) + await handlers.open_lane(OpenInput(name="beta"), ctx) + + with pytest.raises(TransportError): + await handlers.send(LaneTextInput(lane="@beta", text="boom"), ctx) + + detail = await handlers.show(ShowInput(lane="@beta"), ctx) + assert detail.status == "error" + assert detail.latest_turn.id is None + assert detail.latest_turn.status == "failed" + assert detail.latest_turn.error == "boom" + + async def test_send_modes_context_and_interject(store: Registry) -> None: client = FakeLaneClient() ctx = make_ctx(store, client) @@ -424,6 +486,22 @@ async def test_watch_zero_timeout_returns_immediately(store: Registry) -> None: assert out.timed_out is True +async def test_reactor_persists_turn_failure_for_get(store: Registry) -> None: + ctx = make_ctx(store) + await handlers.open_lane(OpenInput(name="alpha"), ctx) + reactor = Reactor(ctx, runner=TriggerRunner(ctx, now=lambda: store._now())) + + await reactor.handle(TurnStarted("lane-1", "turn-1")) + await reactor.handle(TurnFailed("lane-1", "turn-1", "unsupported model")) + await reactor.handle(LaneIdle("lane-1")) + + out = await handlers.show(ShowInput(lane="lane-1"), ctx) + assert out.status == "error" + assert out.latest_turn.id == "turn-1" + assert out.latest_turn.status == "failed" + assert out.latest_turn.error == "unsupported model" + + async def test_goal_get_set_and_clear_use_native_goal_api(store: Registry) -> None: client = FakeLaneClient() client.goal_result = ThreadGoal( @@ -818,7 +896,7 @@ async def test_attach_with_sync_indexes_jsonl_and_roster_reports_state( '"payload":{"id":"T9","cwd":"/work","source":"vscode",' '"thread_source":"user","model_provider":"openai"}}', '{"type":"turn_context","timestamp":"2026-06-05T10:00:01.000Z",' - '"payload":{"model":"gpt-5-codex","effort":"low"}}', + '"payload":{"model":"test-model","effort":"low"}}', '{"type":"event_msg","timestamp":"2026-06-05T10:00:02.000Z",' '"payload":{"type":"task_complete","turn_id":"turn-1"}}', ] @@ -900,7 +978,12 @@ async def test_discover_lists_persisted_sessions_from_client(store: Registry) -> # Discovery reads through to the client's thread_list with the requested limit # AND state-db only — the latter is what keeps it read-only (no live resume). assert any( - name == "thread_list" and kw["limit"] == 10 and kw["use_state_db_only"] is True + name == "thread_list" + and kw["limit"] == 10 + and kw["archived"] is False + and kw["sort_direction"] == "desc" + and kw["sort_key"] == "updated_at" + and kw["use_state_db_only"] is True for name, kw in client.calls ) # ...and registers nothing (pure read; lane authority untouched). diff --git a/tests/core/test_sync.py b/tests/core/test_sync.py index 2380f0b..bf510b5 100644 --- a/tests/core/test_sync.py +++ b/tests/core/test_sync.py @@ -39,7 +39,7 @@ def test_quick_scan_reads_bounded_top_and_tail_without_partial_line(tmp_path: Pa { "type": "turn_context", "timestamp": "2026-06-05T10:00:01.000Z", - "payload": {"cwd": "/work", "model": "gpt-5-codex", "effort": "low"}, + "payload": {"cwd": "/work", "model": "test-model", "effort": "low"}, }, { "type": "event_msg", @@ -71,7 +71,7 @@ def test_quick_scan_reads_bounded_top_and_tail_without_partial_line(tmp_path: Pa assert facts.source_kind == "vscode" assert facts.thread_source == "user" assert facts.model_provider == "openai" - assert facts.model == "gpt-5-codex" + assert facts.model == "test-model" assert facts.reasoning_effort == "low" assert facts.latest_turn_id == "turn-1" assert facts.latest_event_at == "2026-06-05T10:00:03.000Z" diff --git a/tests/fakes.py b/tests/fakes.py index 469db9b..59a46f3 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -25,6 +25,7 @@ ThreadGoal, ThreadGoalStatus, ThreadInfo, + ThreadListCwdFilter, ThreadSandbox, ThreadSearchMatch, ThreadSearchResult, @@ -89,8 +90,17 @@ async def thread_start( self.threads[thread.id] = thread return thread - async def thread_resume(self, thread_id: str) -> ThreadInfo: - self._record("thread_resume", thread_id=thread_id) + async def thread_resume( + self, + thread_id: str, + *, + exclude_turns: bool | None = None, + ) -> ThreadInfo: + self._record( + "thread_resume", + thread_id=thread_id, + exclude_turns=exclude_turns, + ) return self.threads.get(thread_id, ThreadInfo(id=thread_id)) async def thread_fork( @@ -127,9 +137,32 @@ async def thread_fork( return fork async def thread_list( - self, limit: int = 50, cursor: str | None = None, use_state_db_only: bool | None = None + self, + limit: int = 50, + cursor: str | None = None, + use_state_db_only: bool | None = None, + *, + archived: bool | None = None, + cwd: ThreadListCwdFilter | None = None, + model_providers: list[str] | None = None, + search_term: str | None = None, + sort_direction: SortDirection | None = None, + sort_key: ThreadSortKey | None = None, + source_kinds: list[ThreadSourceKind] | None = None, ) -> list[ThreadInfo]: - self._record("thread_list", limit=limit, use_state_db_only=use_state_db_only) + self._record( + "thread_list", + limit=limit, + cursor=cursor, + archived=archived, + cwd=cwd, + model_providers=model_providers, + search_term=search_term, + sort_direction=sort_direction, + sort_key=sort_key, + source_kinds=source_kinds, + use_state_db_only=use_state_db_only, + ) return self.list_result async def thread_read(self, thread_id: str, include_turns: bool = False) -> dict[str, object]: @@ -235,6 +268,7 @@ async def turn_start( effort: Effort | None = None, summary: ReasoningSummary | None = None, model: str | None = None, + service_tier: str | None = None, output_schema: dict[str, object] | None = None, personality: Personality | None = None, ) -> dict[str, object]: @@ -249,6 +283,7 @@ async def turn_start( effort=effort, summary=summary, model=model, + service_tier=service_tier, output_schema=output_schema, personality=personality, ) @@ -309,9 +344,14 @@ def __init__(self) -> None: self.closed = asyncio.Event() self.resumed: list[str] = [] - async def thread_resume(self, thread_id: str) -> ThreadInfo: + async def thread_resume( + self, + thread_id: str, + *, + exclude_turns: bool | None = None, + ) -> ThreadInfo: self.resumed.append(thread_id) - return await super().thread_resume(thread_id) + return await super().thread_resume(thread_id, exclude_turns=exclude_turns) async def wait_closed(self) -> None: await self.closed.wait() diff --git a/tests/registry/test_store.py b/tests/registry/test_store.py index 110d7d2..2dfaaac 100644 --- a/tests/registry/test_store.py +++ b/tests/registry/test_store.py @@ -6,6 +6,7 @@ from datetime import UTC, datetime from pathlib import Path +import aiosqlite import pytest import pytest_asyncio @@ -108,6 +109,7 @@ async def test_v2_registry_migration_backfills_unique_refs(tmp_path: Path) -> No assert [lane.id for lane in lanes] == ["A", "B"] assert len({lane.ref for lane in lanes}) == 2 assert all(lane.ref for lane in lanes) + assert all(lane.latest_turn_status is None for lane in lanes) async with migrated._conn.execute("PRAGMA user_version") as cur: row = await cur.fetchone() assert row is not None @@ -116,12 +118,133 @@ async def test_v2_registry_migration_backfills_unique_refs(tmp_path: Path) -> No await migrated.close() +async def test_migrates_v3_registry_with_runtime_columns(tmp_path: Path) -> None: + db = tmp_path / "registry-v3.db" + conn = await aiosqlite.connect(db) + await conn.executescript( + """ + CREATE TABLE lanes ( + id TEXT PRIMARY KEY, + ref TEXT NOT NULL UNIQUE, + ref_source TEXT NOT NULL, + ref_payload TEXT NOT NULL, + ref_mixer TEXT NOT NULL, + handle TEXT NOT NULL, + role TEXT, + cwd TEXT, + source TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'unknown', + pinned INTEGER NOT NULL DEFAULT 0, + active_turn_id TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_event_at TEXT + ); + INSERT INTO lanes ( + id, ref, ref_source, ref_payload, ref_mixer, handle, source, status, + pinned, created_at, updated_at + ) VALUES ( + 'A', '0abc1', '0', 'abc', '1', '@a', 'own', 'idle', 0, + '2026-06-03T12:00:01+00:00', '2026-06-03T12:00:01+00:00' + ); + CREATE TABLE triggers ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + lane_selector TEXT NOT NULL, + when_spec TEXT NOT NULL, + action_spec TEXT NOT NULL, + guard_spec TEXT, + enabled INTEGER NOT NULL DEFAULT 1, + created_at TEXT, + last_fired_at TEXT + ); + CREATE TABLE actions_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ts TEXT NOT NULL, + op TEXT NOT NULL, + lane TEXT, + trigger_id TEXT, + detail TEXT, + outcome TEXT NOT NULL DEFAULT 'ok' + ); + CREATE TABLE queued_messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + lane TEXT NOT NULL, + text TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + error TEXT + ); + CREATE TABLE lane_sync_sources ( + lane TEXT PRIMARY KEY, + state TEXT NOT NULL, + source_path TEXT, + source_device INTEGER, + source_inode INTEGER, + source_size INTEGER, + source_mtime_ns INTEGER, + line_count INTEGER, + first_offset INTEGER, + tail_offset INTEGER, + last_synced_at TEXT, + error TEXT + ); + CREATE TABLE lane_snapshots ( + lane TEXT PRIMARY KEY, + display_name TEXT, + preview TEXT, + cwd TEXT, + source TEXT, + thread_source TEXT, + model_provider TEXT, + model TEXT, + reasoning_effort TEXT, + session_id TEXT, + latest_event_at TEXT, + latest_turn_id TEXT, + transcript_partial INTEGER NOT NULL DEFAULT 1 + ); + PRAGMA user_version = 3; + """ + ) + await conn.commit() + await conn.close() + + migrated = await Registry.open(db, now=_clock) + try: + lane = await migrated.get_lane("A") + assert lane.latest_turn_id is None + assert lane.latest_turn_status is None + await migrated.record_turn_failed("A", "turn-1", "unsupported model") + failed = await migrated.get_lane("A") + assert failed.status == "error" + assert failed.latest_turn_id == "turn-1" + assert failed.latest_turn_status == "failed" + assert failed.latest_error == "unsupported model" + finally: + await migrated.close() + + async def test_get_missing_lane_raises_not_found(store: Registry) -> None: assert await store.find_lane("nope") is None with pytest.raises(NotFoundError): await store.get_lane("nope") +async def test_turn_request_failure_clears_stale_turn_id(store: Registry) -> None: + await store.add_lane(id="L1", handle="@a", source="own") + await store.record_turn_started("L1", "turn-1") + await store.record_turn_failed("L1", "turn-1", "old failure") + + await store.record_turn_request_failed("L1", "request rejected") + + lane = await store.get_lane("L1") + assert lane.latest_turn_id is None + assert lane.latest_turn_status == "failed" + assert lane.latest_error == "request rejected" + + async def test_list_excludes_archived_by_default(store: Registry) -> None: await store.add_lane(id="L1", handle="@a", source="own") await store.add_lane(id="L2", handle="@b", source="attached") @@ -240,7 +363,7 @@ async def test_lane_sync_roundtrip_and_many_lookup(store: Registry) -> None: source="vscode", thread_source="user", model_provider="openai", - model="gpt-5-codex", + model="test-model", reasoning_effort="low", session_id="L1", latest_event_at="2026-06-05T10:00:00.000Z", diff --git a/tests/surfaces/test_derive_cli.py b/tests/surfaces/test_derive_cli.py index 22e3253..d5fc748 100644 --- a/tests/surfaces/test_derive_cli.py +++ b/tests/surfaces/test_derive_cli.py @@ -93,7 +93,15 @@ def test_new_command_maps_repeated_presets_and_no_send() -> None: def invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: captured["op"] = op_id captured["params"] = params - return {"id": "L1", "handle": "@x", "source": "own", "status": "idle", "sent": False} + return { + "id": "L1", + "handle": "@x", + "source": "own", + "status": "idle", + "message_accepted": False, + "goal_set": False, + "latest_turn": {"id": None, "status": None, "error": None, "error_at": None}, + } app = derive_cli(REGISTRY, invoke) result = runner.invoke( @@ -275,6 +283,29 @@ def invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: assert confirmed.exit_code == 0 +def test_destroy_ops_support_explicit_noninteractive_confirmation() -> None: + calls: list[tuple[str, dict[str, object]]] = [] + + def invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: + calls.append((op_id, params)) + return { + "id": "L1", + "handle": "@x", + "managed": True, + "source": "own", + "status": "archived", + } + + app = derive_cli(REGISTRY, invoke) + refused = runner.invoke(app, ["archive", "L1", "--no-interactive", "--json"]) + confirmed = runner.invoke(app, ["archive", "L1", "--yes", "--no-interactive", "--json"]) + + assert refused.exit_code == 2 + assert "requires --yes" in refused.stderr + assert confirmed.exit_code == 0 + assert calls == [("archive", {"target": "L1"})] + + def test_json_destroy_prompt_does_not_pollute_stdout() -> None: def invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: return { diff --git a/tests/surfaces/test_derive_mcp.py b/tests/surfaces/test_derive_mcp.py index 3f6a50d..4cf0b02 100644 --- a/tests/surfaces/test_derive_mcp.py +++ b/tests/surfaces/test_derive_mcp.py @@ -39,7 +39,7 @@ def test_action_schema_and_annotations_from_op() -> None: one_of = lane_write.inputSchema["oneOf"] new_schema = next(s for s in one_of if s["properties"]["op"]["const"] == "new") send_schema = next(s for s in one_of if s["properties"]["op"]["const"] == "send") - assert set(new_schema["properties"]) >= {"op", "name", "preset", "text", "send"} + assert set(new_schema["properties"]) >= {"op", "name", "preset", "goal", "text", "send"} assert "caller_thread_id" not in send_schema["properties"] assert {s["properties"]["op"]["const"] for s in one_of} >= { "fork", diff --git a/tests/surfaces/test_parity.py b/tests/surfaces/test_parity.py index ebb4b72..8368b19 100644 --- a/tests/surfaces/test_parity.py +++ b/tests/surfaces/test_parity.py @@ -8,10 +8,17 @@ from __future__ import annotations import json +from pathlib import Path +import typer from typer.testing import CliRunner -from outfitter.dispatch.contracts.derive_cli import derive_cli +from outfitter.dispatch.contracts.derive_cli import ( + CLI_PROJECTION_CONTROL_PATHS, + cli_public_routes, + cli_schema_routes, + derive_cli, +) from outfitter.dispatch.contracts.derive_mcp import derive_mcp_projection from outfitter.dispatch.contracts.errors import ( AppServerError, @@ -24,6 +31,7 @@ ) from outfitter.dispatch.contracts.schema import is_internal_field from outfitter.dispatch.core.ops import REGISTRY +from outfitter.dispatch.surfaces.cli import CLI_SURFACE_CONTROL_PATHS, build_cli def _stub_invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: @@ -91,15 +99,52 @@ def test_mcp_model_parity_per_op() -> None: def test_cli_schema_routes_cover_public_ops() -> None: app = derive_cli(REGISTRY, _stub_invoke) - routed_ops = set(_EXPECTED_CLI_SCHEMA_ROUTES.values()) + routed_ops = set(cli_schema_routes().values()) assert set(REGISTRY.ids()) - routed_ops == {"open", "fork", "rollback", "compact"} + assert cli_schema_routes() == _EXPECTED_CLI_SCHEMA_ROUTES - for command, op_id in _EXPECTED_CLI_SCHEMA_ROUTES.items(): + for command, op_id in cli_schema_routes().items(): result = runner.invoke(app, ["schema", command]) assert result.exit_code == 0, command assert json.loads(result.output)["op"] == op_id +def test_cli_registered_commands_are_declared_in_projection_manifest() -> None: + app = derive_cli(REGISTRY, _stub_invoke) + + registered = _registered_paths(app) + declared = {route.path for route in cli_public_routes()} | set(CLI_PROJECTION_CONTROL_PATHS) + + assert registered == declared + + +def test_full_cli_commands_are_declared_projection_or_control_paths() -> None: + app = build_cli(socket_path=Path("/tmp/dispatch-test.sock")) + + registered = _registered_paths(app) + declared = ( + {route.path for route in cli_public_routes()} + | set(CLI_PROJECTION_CONTROL_PATHS) + | set(CLI_SURFACE_CONTROL_PATHS) + ) + + assert registered == declared + + +def _registered_paths(app: typer.Typer) -> set[tuple[str, ...]]: + root_paths = { + (command.name,) for command in app.registered_commands if command.name is not None + } + group_paths: set[tuple[str, str]] = set() + for group in app.registered_groups: + if group.name is None or group.typer_instance is None: + continue + for command in group.typer_instance.registered_commands: + if command.name is not None: + group_paths.add((group.name, command.name)) + return root_paths | group_paths + + def test_managed_thread_outputs_include_stable_identity_fields() -> None: app = derive_cli(REGISTRY, _stub_invoke) required = {"lane", "ref", "id", "title", "handle", "managed", "source", "status", "cwd"} diff --git a/tests/test_doctor.py b/tests/test_doctor.py index 74341c2..e07927b 100644 --- a/tests/test_doctor.py +++ b/tests/test_doctor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import sqlite3 from pathlib import Path @@ -11,10 +12,102 @@ from outfitter.dispatch.cli import app as cli_app from outfitter.dispatch.doctor import DoctorOptions, run_doctor from outfitter.dispatch.registry.store import SCHEMA_VERSION, Registry +from outfitter.dispatch.surfaces.cli import build_cli runner = CliRunner() +def _create_v3_registry(path: Path) -> None: + with sqlite3.connect(path) as conn: + conn.executescript( + """ + CREATE TABLE lanes ( + id TEXT PRIMARY KEY, + ref TEXT NOT NULL UNIQUE, + ref_source TEXT NOT NULL, + ref_payload TEXT NOT NULL, + ref_mixer TEXT NOT NULL, + handle TEXT NOT NULL, + role TEXT, + cwd TEXT, + source TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'unknown', + pinned INTEGER NOT NULL DEFAULT 0, + active_turn_id TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_event_at TEXT + ); + INSERT INTO lanes ( + id, ref, ref_source, ref_payload, ref_mixer, handle, source, status, + pinned, created_at, updated_at + ) VALUES ( + 'A', '0abc1', '0', 'abc', '1', '@a', 'own', 'idle', 0, + '2026-06-03T12:00:01+00:00', '2026-06-03T12:00:01+00:00' + ); + CREATE TABLE triggers ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + lane_selector TEXT NOT NULL, + when_spec TEXT NOT NULL, + action_spec TEXT NOT NULL, + guard_spec TEXT, + enabled INTEGER NOT NULL DEFAULT 1, + created_at TEXT, + last_fired_at TEXT + ); + CREATE TABLE actions_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ts TEXT NOT NULL, + op TEXT NOT NULL, + lane TEXT, + trigger_id TEXT, + detail TEXT, + outcome TEXT NOT NULL DEFAULT 'ok' + ); + CREATE TABLE queued_messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + lane TEXT NOT NULL, + text TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + error TEXT + ); + CREATE TABLE lane_sync_sources ( + lane TEXT PRIMARY KEY, + state TEXT NOT NULL, + source_path TEXT, + source_device INTEGER, + source_inode INTEGER, + source_size INTEGER, + source_mtime_ns INTEGER, + line_count INTEGER, + first_offset INTEGER, + tail_offset INTEGER, + last_synced_at TEXT, + error TEXT + ); + CREATE TABLE lane_snapshots ( + lane TEXT PRIMARY KEY, + display_name TEXT, + preview TEXT, + cwd TEXT, + source TEXT, + thread_source TEXT, + model_provider TEXT, + model TEXT, + reasoning_effort TEXT, + session_id TEXT, + latest_event_at TEXT, + latest_turn_id TEXT, + transcript_partial INTEGER NOT NULL DEFAULT 1 + ); + PRAGMA user_version = 3; + """ + ) + + def test_doctor_reports_missing_console_scripts_and_skips_app_server( monkeypatch: MonkeyPatch, tmp_path: Path ) -> None: @@ -75,6 +168,25 @@ def test_doctor_warns_for_unversioned_registry_migration( assert "dispatch down" in registry.recovery +def test_doctor_warns_for_old_registry_migration(monkeypatch: MonkeyPatch, tmp_path: Path) -> None: + home = tmp_path / "dispatch-home" + home.mkdir() + _create_v3_registry(home / "registry.db") + monkeypatch.setenv("DISPATCH_HOME", str(home)) + monkeypatch.setenv("CODEX_HOME", str(tmp_path / "codex-home")) + + report = run_doctor(DoctorOptions(app_server=False)) + + registry = next(check for check in report.checks if check.name == "registry") + assert registry.status == "warn" + assert registry.summary == "registry schema is older than this dispatch binary supports" + assert registry.recovery is not None + assert "dispatch registry migrate" in registry.recovery + assert registry.data["schema_version"] == 3 + assert registry.data["supported_schema_version"] == SCHEMA_VERSION + assert registry.data["row_counts"] == {"lanes": 1, "triggers": 0} + + async def test_registry_open_marks_schema_version(tmp_path: Path) -> None: path = tmp_path / "registry.db" @@ -110,3 +222,70 @@ def test_doctor_cli_json_and_text_modes(monkeypatch: MonkeyPatch, tmp_path: Path assert '"checks"' in json_result.output assert text_result.exit_code in {0, 8} assert "dispatch doctor" in text_result.output + + +def test_up_down_support_json(monkeypatch: MonkeyPatch, tmp_path: Path) -> None: + monkeypatch.setenv("DISPATCH_HOME", str(tmp_path / "dispatch-home")) + + def start_detached(_socket: Path, _pidfile: Path) -> bool: + return True + + def stop_daemon(_socket: Path, _pidfile: Path) -> bool: + return True + + monkeypatch.setattr("outfitter.dispatch.daemon.lifecycle.start_detached", start_detached) + monkeypatch.setattr("outfitter.dispatch.daemon.lifecycle.stop_daemon", stop_daemon) + app = build_cli(socket_path=tmp_path / "dispatchd.sock") + + up = runner.invoke(app, ["up", "--json"]) + down = runner.invoke(app, ["down", "--json"]) + + assert up.exit_code == 0 + assert down.exit_code == 0 + assert '"status": "started"' in up.output + assert '"started": true' in up.output + assert '"status": "stopped"' in down.output + assert '"stopped": true' in down.output + + +def test_registry_migrate_command_updates_old_schema( + monkeypatch: MonkeyPatch, tmp_path: Path +) -> None: + home = tmp_path / "dispatch-home" + home.mkdir() + _create_v3_registry(home / "registry.db") + monkeypatch.setenv("DISPATCH_HOME", str(home)) + app = build_cli(socket_path=tmp_path / "dispatchd.sock") + + result = runner.invoke(app, ["registry", "migrate", "--json", "--no-backup"]) + + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["status"] == "ok" + assert payload["from_schema_version"] == 3 + assert payload["to_schema_version"] == SCHEMA_VERSION + assert payload["migrated"] is True + with sqlite3.connect(home / "registry.db") as conn: + version = conn.execute("PRAGMA user_version").fetchone()[0] + columns = {row[1] for row in conn.execute("PRAGMA table_info(lanes)").fetchall()} + assert version == SCHEMA_VERSION + assert {"latest_turn_id", "latest_turn_status", "latest_error", "latest_error_at"} <= columns + + +def test_registry_migrate_blocks_while_daemon_running( + monkeypatch: MonkeyPatch, tmp_path: Path +) -> None: + home = tmp_path / "dispatch-home" + home.mkdir() + _create_v3_registry(home / "registry.db") + monkeypatch.setenv("DISPATCH_HOME", str(home)) + monkeypatch.setattr("outfitter.dispatch.daemon.lifecycle.is_daemon_up", lambda _path: True) + app = build_cli(socket_path=tmp_path / "dispatchd.sock") + + result = runner.invoke(app, ["registry", "migrate", "--json", "--no-backup"]) + + assert result.exit_code == 8 + payload = json.loads(result.output) + assert payload["status"] == "blocked" + assert payload["reason"] == "daemon_running" + assert "dispatch down" in payload["recovery"] diff --git a/uv.lock b/uv.lock index 54e39dd..9ad1a02 100644 --- a/uv.lock +++ b/uv.lock @@ -466,7 +466,7 @@ wheels = [ [[package]] name = "outfitter-dispatch" -version = "0.4.0" +version = "0.4.1" source = { editable = "." } dependencies = [ { name = "aiosqlite" },