From ddaf2a12bc897af68e31e5fc625237f6ee33e806 Mon Sep 17 00:00:00 2001 From: Pengfei Hu Date: Sat, 27 Jun 2026 18:16:36 -0700 Subject: [PATCH 1/3] Make primary Shipgate flows prominent --- .agents/skills/agents-shipgate/SKILL.md | 25 +-- .../agents-shipgate/references/recipes.md | 210 ++++++------------ .claude/commands/shipgate.md | 54 +++-- .cursor/rules/agents-shipgate.mdc | 24 +- .well-known/agents-shipgate.json | 10 +- README.md | 94 ++++---- STABILITY.md | 18 +- adoption-kits/claude-code-skill/SKILL.md | 8 +- .../prompts/add-shipgate-to-repo.md | 27 ++- .../prompts/decide-shipgate-relevance.md | 8 +- .../prompts/verify-agent-diff.md | 49 ++-- adoption-kits/codex-skill/SKILL.md | 25 +-- .../codex-skill/references/recipes.md | 210 ++++++------------ docs/agent-contract-current.md | 19 +- docs/architecture.md | 2 +- docs/quickstart.md | 47 ++-- docs/target-repo-agent-snippets.md | 73 +++--- docs/triggers.json | 12 +- llms-full.txt | 19 +- .../skills/agents-shipgate/SKILL.md | 25 +-- .../agents-shipgate/references/recipes.md | 210 ++++++------------ prompts/add-shipgate-to-repo.md | 27 ++- prompts/decide-shipgate-relevance.md | 8 +- prompts/verify-agent-diff.md | 49 ++-- skills/agents-shipgate/SKILL.md | 8 +- .../prompts/add-shipgate-to-repo.md | 27 ++- .../prompts/decide-shipgate-relevance.md | 8 +- .../prompts/verify-agent-diff.md | 49 ++-- src/agents_shipgate/cli/_register_baseline.py | 2 +- src/agents_shipgate/cli/_register_contract.py | 5 +- src/agents_shipgate/cli/_register_doctor.py | 2 +- src/agents_shipgate/cli/_register_explain.py | 2 +- src/agents_shipgate/cli/_register_init.py | 2 +- .../cli/_register_list_checks.py | 2 +- src/agents_shipgate/cli/_register_scan.py | 2 +- src/agents_shipgate/cli/diagnostics.py | 12 +- .../agent_instructions/renderers/agents_md.py | 23 +- .../agent_instructions/renderers/claude_md.py | 22 +- .../agent_instructions/renderers/cursor.py | 24 +- .../cli/discovery/local_contract.py | 5 +- src/agents_shipgate/cli/first_look.py | 7 +- src/agents_shipgate/cli/install_hooks.py | 9 +- src/agents_shipgate/cli/main.py | 37 +-- .../cli/verify/orchestrator.py | 11 +- src/agents_shipgate/core/codex_boundary.py | 28 +-- src/agents_shipgate/schemas/contract.py | 27 ++- src/agents_shipgate/triggers.py | 16 +- .../claude-code-block-stop.json | 4 +- .../agent_protocol/codex-block-stop.json | 4 +- .../agent_protocol/codex-repair-after.json | 2 +- .../agent_protocol/codex-repair-before.json | 2 +- .../agent_protocol/cursor-block-stop.json | 4 +- .../github_action_removed.json | 4 +- .../codex_boundary_result/malformed_toml.json | 2 +- .../mcp_auto_approve_write.json | 2 +- .../network_wildcard.json | 2 +- .../unknown_permission_key.json | 2 +- tests/test_agent_handoff.py | 4 +- tests/test_agent_instructions_apply.py | 6 +- tests/test_agent_instructions_renderers.py | 27 ++- tests/test_cli.py | 50 +++-- tests/test_codex_boundary_check.py | 43 ++-- tests/test_diagnostics.py | 8 +- tests/test_first_look.py | 4 +- tests/test_local_contract.py | 12 +- tests/test_prompt_parity.py | 16 +- tests/test_public_surface_contract.py | 77 +++++++ tests/test_schema_boundaries.py | 6 +- tests/test_trigger_command.py | 6 +- tests/test_verify.py | 2 +- 70 files changed, 880 insertions(+), 992 deletions(-) diff --git a/.agents/skills/agents-shipgate/SKILL.md b/.agents/skills/agents-shipgate/SKILL.md index 372a152b..16230781 100644 --- a/.agents/skills/agents-shipgate/SKILL.md +++ b/.agents/skills/agents-shipgate/SKILL.md @@ -1,6 +1,6 @@ --- name: agents-shipgate -description: Use when the user wants to add or run Agents Shipgate — the deterministic merge gate for AI-generated agent capability changes — on an AI agent's tool surface; review or prepare a tool-using agent for release; scan MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, LangChain/LangGraph, CrewAI, OpenAI API, Codex plugin, or n8n tool artifacts; add advisory CI; or interpret, fix, triage, suppress, or explain a Shipgate finding. +description: Use when the user wants to run the prominent Agents Shipgate flows — `shipgate check`, `shipgate verify`, or `shipgate audit --host` — for AI agent capability changes, PR release readiness, or coding-agent host grants. --- # Agents Shipgate @@ -13,33 +13,30 @@ Do not use it for general linting, runtime monitoring, evals, model-output quali ## Workflow -1. For relevance decisions, bootstrap, verifier runs, scanning, CI setup, finding fixes, false-positive triage, strict-mode promotion, or version upgrades, read `references/recipes.md`. +1. For local checks, verifier runs, host audits, and supporting recovery commands, read `references/recipes.md`. 2. For reading `report.json`, summarizing release decisions, or deciding what may be auto-applied, read `references/report-reading.md`. -3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "7"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. +3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "8"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. 4. Set `AGENTS_SHIPGATE_AGENT_MODE=1` before running Shipgate commands so errors include structured `next_action` JSON. 5. Default first-time CI to advisory mode. Do not enable release-blocking CI or save a baseline until a human has reviewed current findings. 6. For local agent control, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and read the stdout `shipgate.codex_boundary_result/v1` object. Switch on `decision`; follow `first_next_action`, `repair`, and `human_review`. -7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run `agents-shipgate preflight --workspace . --plan - --json` with a `PreflightPlanV1` object. Legacy `--changed-files`/`--diff` shorthands remain available. If `requires_human_review` is true or `first_next_action.actor` is `human`, stop and route to a human. +7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, plan to run `shipgate verify` before completion and route trust-root review to a human when the verifier requires it. 8. For full PR verification, read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json` for detailed controller state, then `verify-run.json` for reproducibility metadata, then `report.json` for reviewer detail; `report.json.release_decision.decision` remains the release gate. 9. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. 10. Ensure `.gitignore` covers `agents-shipgate-reports/` before committing. ## Fast Paths -- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "7"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. -- Protected-surface preflight: run `agents-shipgate preflight --workspace . --plan - --json` before touching trust roots; include `changed_files[]` or `diff_text` in the plan when you have concrete planned paths. +- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "8"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. - Agent-native check: run `shipgate check --agent codex --workspace . --format codex-boundary-json`; read only the JSON result for continue/repair/stop routing. -- First adoption: run `agents-shipgate detect --workspace . --json`, then follow `references/recipes.md`. -- Agent-related PR/CI diff: run `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. -- Existing manifest / ongoing PR: run `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. -- First GitHub CI: copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. -- Explain one finding: run `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json`. -- Triage heuristic findings: run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. +- Agent-related PR/CI diff: run `shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. +- Existing manifest / ongoing PR: run `shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. +- Unconfigured repo or uncertain relevance: run `shipgate verify --preview --json`. +- Host grants: run `shipgate audit --host --json --out agents-shipgate-reports/host-grants.json`. ## Boundaries -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and reporting the new decision/counts. -- Do not continue with protected-surface edits when preflight returns `requires_human_review=true`; coding agents must not self-approve trust-root changes. +- Do not claim a finding is fixed without re-running `shipgate verify` and reporting the new merge verdict and release decision. +- Do not self-approve trust-root changes; when `shipgate verify` returns human review required, surface it to a human. - Before finishing an agent-related diff, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and follow `shipgate.codex_boundary_result/v1`. - Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. - Do not silently suppress findings. Suppressions require a non-empty `reason`. diff --git a/.agents/skills/agents-shipgate/references/recipes.md b/.agents/skills/agents-shipgate/references/recipes.md index a663b4f5..9bf13480 100644 --- a/.agents/skills/agents-shipgate/references/recipes.md +++ b/.agents/skills/agents-shipgate/references/recipes.md @@ -1,12 +1,14 @@ # Agents Shipgate Recipes -Use these recipes after the `agents-shipgate` skill triggers. +Use these recipes after the `agents-shipgate` skill triggers. The prominent +flows are `shipgate check`, `shipgate verify`, and `shipgate audit --host`. +Supporting commands remain callable, but should not be the first thing an agent +recommends. ## CLI Preflight -The Codex plugin supplies the workflow instructions, not the scanner binary. -Before running `agents-shipgate` commands, confirm the CLI is installed and new -enough for the `verify` workflow: +The Codex plugin supplies workflow instructions, not the scanner binary. +Before running Shipgate commands, confirm the CLI is installed and new enough: ```bash command -v agents-shipgate @@ -14,75 +16,81 @@ agents-shipgate --version agents-shipgate contract --json ``` -Require `agents-shipgate contract --json` to report `contract_version: "7"` or -newer. If the command is missing or the contract is older, ask the user to -install or upgrade the CLI and rerun the task: +Require `agents-shipgate contract --json` to report `contract_version: "8"` or +newer. If it is missing or stale, ask the user to install or upgrade: ```bash pipx install agents-shipgate -pipx upgrade agents-shipgate # plain install is a no-op over a stale build +pipx upgrade agents-shipgate ``` -After installation, run `agents-shipgate --version` and -`agents-shipgate contract --json` again. Do not continue to `detect`, `init`, -`scan`, or `verify` until the CLI exists and reports contract v7 or newer. +Do not report the task complete until the CLI exists and reports contract v8 or +newer. Local boundary checks emit `shipgate.codex_boundary_result/v1`; legacy +`agent_result_v1` fixtures are retained only for older protocol integrations. -A missing or stale binary is a `decision="block"` install action in the -agent-native protocol, not a reason to proceed unverified. Until -`agents-shipgate contract --json` confirms contract v7 or newer, do not report -the task complete: surface the install/upgrade action and stop. Local boundary -checks emit `shipgate.codex_boundary_result/v1`; legacy `agent_result_v1` -fixtures are retained only for older protocol integrations. +## Local Agent Check -## Protected Surface Preflight +Run the boundary check before reporting an agent-related local diff complete: -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: +```bash +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate check \ + --agent codex --workspace . --format codex-boundary-json +``` + +Read only stdout JSON. Switch on `decision`, `completion_allowed`, +`must_stop`, `first_next_action`, `human_review`, `repair`, and `policy`. + +## Verify A Diff + +Use this before finishing a PR or local change that touches an agent tool +surface, prompts, policies, permissions, Shipgate CI, or other protected +release surfaces. ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify \ + --workspace . --config shipgate.yaml \ + --base origin/main --head HEAD --ci-mode advisory --format json ``` -Pass a `PreflightPlanV1` object on stdin. If you already have a path list or -local diff and need legacy shorthands, ask preflight about them before editing: +For local uncommitted work, omit `--head` and `--base` so `verify` scans the +checked-out working tree, including uncommitted edits. In committed PR or CI +contexts, make the base ref available first because `verify` never fetches. If +the repo is not configured or relevance is unclear, run: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --changed-files changed.txt --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --diff pr.diff --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify --preview --json ``` -If `requires_human_review` is true or `first_next_action.actor` is `human`, -stop and route the change to a human. Preflight is a routing surface only; -`release_decision.decision` remains the gate. +Read `agents-shipgate-reports/agent-handoff.json` first. Lead with +`gate.merge_verdict`, then inspect `next_action`, `controller`, +`fix_task.safe_to_attempt`, and `capability_review.top_changes[]`. Then read +`verifier.json`, `verify-run.json`, and `report.json`; the release gate remains +`report.json.release_decision.decision`. + +Do not bypass the verifier by suppressing findings, lowering severity, +expanding baselines or waivers, removing Shipgate CI, or weakening agent +instructions. Verify-mode `SHIP-VERIFY-*` findings route those trust-root +changes to human review. -## Decide Relevance +## Audit Host Grants -Run: +Run host audit when the task touches MCP servers, permission rules, hooks, +workflow scopes, or coding-agent host configuration: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate audit --host --json \ + --out agents-shipgate-reports/host-grants.json ``` -Proceed when any of these are true: - -- `is_agent_project: true` -- `suggested_sources` is non-empty -- `codex_plugin_candidates` is non-empty -- `shipgate.yaml` already exists -- the user explicitly asked for a Shipgate scan or Tool-Use Readiness gate +For drift checks against an acknowledged baseline, use the same flow with +`--drift` and optionally `--fail-on-drift`. -Stop only when all signals are absent and the user did not explicitly request Shipgate. +## Supporting Setup And Repair -## Bootstrap A Repo - -Run: +If `shipgate verify --preview --json` says the repo needs configuration, the +supporting setup commands remain available: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate contract --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate init --workspace . --write --ci --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml --suggest-patches --format json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ @@ -90,110 +98,26 @@ AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ --confidence high --apply ``` -If `init` reports placeholders, replace `CHANGE_ME` values from repo context before scanning. If `shipgate.yaml` already exists, edit it rather than overwriting it. - -## Verify An Agent-Related Diff - -Use this before finishing a PR or local change that touches an agent tool -surface, prompts, policies, permissions, Shipgate CI, or other protected -release surfaces. - -```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate trigger \ - --workspace . --base origin/main --head HEAD --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate verify \ - --workspace . --config shipgate.yaml \ - --base origin/main --head HEAD --ci-mode advisory --format json -``` - -For local uncommitted work, omit `--head` and omit `--base` so `verify` scans -the checked-out working tree, including uncommitted edits. In committed PR or -CI contexts, make the base ref available first because `verify` never fetches. -If you pass a missing `--base`, `verify` exits 2 with an unknown merge verdict. - -Read `agents-shipgate-reports/agent-handoff.json` first. Lead with -`gate.merge_verdict`, then inspect `capability_review.top_changes[]`, -`next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read -`agents-shipgate-reports/verifier.json` for detailed controller context and -`agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. `capability_review.top_changes[]` and `verifier_summary` are -supporting/provisional composition summaries: their verdict-like values mirror -`release_decision.decision`, and they add counts for protected-surface touches, -policy weakening, human acknowledgement, and top reason codes. - -Do not bypass the verifier. Do not suppress findings, lower severity, expand -baselines or waivers, remove Shipgate CI, or weaken agent instructions to make -the run pass. Verify-mode `SHIP-VERIFY-*` findings route those trust-root -changes to human review. - -## First-Time CI - -Use advisory mode only. Copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. +If `init` reports placeholders, replace `CHANGE_ME` values from repo context +before verification. If `shipgate.yaml` already exists, edit it rather than +overwriting it. -Do not switch to release-blocking behavior in the same task. Strict promotion requires human review, suppressions with reasons, and optionally a saved baseline. - -## Fix Top Finding +## Fix Or Explain Findings 1. Read `agents-shipgate-reports/report.json`. 2. Pick the first blocker, then highest-severity review item. -3. If `findings[].agent_action == "auto_apply"` and a high-confidence patch exists, apply it with `apply-patches --confidence high --apply`. -4. For policy/evidence gaps, propose the exact human decision needed. Do not fabricate approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. -5. Re-run scan and report the new `release_decision.decision`, blocker count, and review item count. - -## Recommend Fixes - -Group active findings by action: +3. Auto-apply only high-confidence safe patches. +4. For policy/evidence gaps, propose the exact human decision needed. Do not + fabricate approval, confirmation, idempotency, broad-scope, + prohibited-action, or runtime-trace evidence. +5. Re-run `shipgate verify` and report the new merge verdict, release + decision, blocker count, and review-item count. -- `auto_apply`: safe mechanical patches. -- `propose_patch_for_review`: show patch, leave final decision to user. -- `escalate_to_human`: policy/evidence decision. -- `suppress_with_reason`: only when the user confirms the finding is intentionally accepted. -- `informational`: summarize, no gate action. - -## Explain A Finding - -Run: +For one finding: ```bash agents-shipgate explain-finding \ --from agents-shipgate-reports/report.json --json ``` -Use the returned deterministic `explanation` for PR comments or chat replies. Keep it to 3-5 sentences and include the tool name, release risk, and next action. - -## Triage False Positives - -Prefer fixing the manifest or policy evidence over suppression. Suppress only with a specific reason: - -```yaml -checks: - ignore: - - check_id: SHIP-CHECK-ID - tool: tool.name - reason: specific accepted-risk rationale -``` - -## Promote Advisory To Strict - -Only after humans review advisory output: - -```bash -agents-shipgate baseline save -c shipgate.yaml --out .agents-shipgate/baseline.json -agents-shipgate scan -c shipgate.yaml \ - --baseline .agents-shipgate/baseline.json \ - --ci-mode strict --fail-on critical,high -``` - -The promoted gate should fail only on new findings above the selected threshold. - -## Upgrade Shipgate - -Update the GitHub Action tag and `shipgate_version` together. Re-run: - -```bash -agents-shipgate contract --json -agents-shipgate scan -c shipgate.yaml --suggest-patches --format json -``` - -If schema or decision fields changed, use `docs/agent-contract-current.md` from the installed version or upstream repo. +Suppressions require a specific non-empty reason and explicit user approval. diff --git a/.claude/commands/shipgate.md b/.claude/commands/shipgate.md index cd563b71..2cd961c7 100644 --- a/.claude/commands/shipgate.md +++ b/.claude/commands/shipgate.md @@ -1,31 +1,37 @@ --- -description: Bootstrap or verify agents-shipgate as the deterministic merge gate for AI-generated agent capability changes +description: Run the prominent Agents Shipgate flows: check, verify, or audit --host --- Arguments: `$ARGUMENTS` -If the arguments include `verify`, run the ongoing-PR verifier flow. Otherwise -run the agents-shipgate bootstrap flow on the current repo: install the CLI, -add the deterministic merge gate for AI-generated agent capability changes (a -local-first, static Tool-Use Readiness review), generate `shipgate.yaml`, fill -in placeholders, run a scan, and surface the top findings from the JSON report. +Agents Shipgate is the deterministic merge gate for AI-generated agent +capability changes. -The canonical, self-contained instructions live in the bundled prompt files. -For bootstrap, read `prompts/add-shipgate-to-repo.md`. For verifier runs, read -`prompts/verify-agent-diff.md`. Try these paths in order; use the first that -exists: +If the arguments include `audit`, run the host-grant audit flow. If they include +`check`, run the local boundary check. Otherwise run the verifier flow. The +supporting adoption and scan commands remain available, but this slash command +should lead with only the prominent flows: `shipgate check`, `shipgate verify`, +and `shipgate audit --host`. + +The canonical, self-contained verifier instructions live in the bundled prompt +files. For verifier runs, read `prompts/verify-agent-diff.md`. Try these paths +in order; use the first that exists: 1. `.claude/skills/agents-shipgate/prompts/.md` — bundled with the `agents-shipgate` skill if installed in this project. 2. `prompts/.md` — present when this repo is a clone of `agents-shipgate` itself. 3. `https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/prompts/.md` — last-resort fetch. -Verifier command: +Prominent commands: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate verify \ +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate check \ + --agent claude-code --workspace . --format codex-boundary-json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify \ --workspace . --config shipgate.yaml \ --base origin/main --head HEAD \ --ci-mode advisory --format json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate audit --host \ + --json --out agents-shipgate-reports/host-grants.json ``` For local uncommitted work, omit `--base`/`--head` so uncommitted edits are @@ -36,12 +42,19 @@ Required behavior (do not skip): 1. Set `AGENTS_SHIPGATE_AGENT_MODE=1` for every CLI call so errors emit a `next_action` JSON line on stderr. 2. Run `agents-shipgate contract --json` when available and use it to verify the installed CLI's schema versions and gating signal. -3. Confirm with the user before running `agents-shipgate init --workspace . --write` (it writes `shipgate.yaml` to the workspace). -4. Parse `agents-shipgate-reports/report.json` directly — do not scrape the markdown. **For release gating, read `release_decision.decision` first** (`"blocked" | "review_required" | "insufficient_evidence" | "passed"`; baseline-aware, v0.8+; `insufficient_evidence` added v0.14) along with `release_decision.{reason, blockers, review_items, fail_policy.would_fail_ci}`. Other stable fields: `findings[].{check_id, severity, tool_name, recommendation}`. For reviewer triage by source reliability, run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`; `findings[].provenance_kind` is not a gate input. `summary.{critical_count, high_count, medium_count, status}` is legacy and baseline-blind — kept for v0.7 callers, do not lead with it. The Release Evidence Packet is at `agents-shipgate-reports/packet.{md,json,html}`. Full contract: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). -5. For verifier runs, parse `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, and `capability_review.top_changes`; then read `agents-shipgate-reports/report.json.release_decision.decision` as the underlying release gate. -6. Do **not** bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions. Verify-mode `SHIP-VERIFY-*` checks route those trust-root edits to human review. +3. For verifier runs, parse `agents-shipgate-reports/agent-handoff.json` first, + then `verifier.json`, `verify-run.json`, and + `report.json.release_decision.decision` as the release gate. +4. For check runs, parse stdout as `shipgate.codex_boundary_result/v1` and + switch on `decision`, `completion_allowed`, `must_stop`, `first_next_action`, + `human_review`, and `repair`. +5. For host audits, parse `agents-shipgate-reports/host-grants.json` when + `--out` is used, or stdout when running JSON-only. +6. Do **not** bypass the verifier by suppressing findings, lowering severity, + expanding baselines or waivers, removing Shipgate CI, or weakening agent + instructions. Verify-mode `SHIP-VERIFY-*` checks route those trust-root edits + to human review. 7. Add `agents-shipgate-reports/` to `.gitignore` if it is not already. -8. Do **not** run `agents-shipgate baseline save` in this flow — baselining is a separate decision. Report back: `release_decision.decision` and `reason`, `merge_verdict`, `can_merge_without_human`, blocker / review-item counts, top 3 active findings @@ -50,12 +63,11 @@ change highlights, and one suggested next step. ## Ongoing PRs -The bootstrap flow above wires Shipgate into the repo. For an ongoing PR that -changes agent tools, MCP exports, OpenAPI specs, prompts, permissions, policies, -CI gates, or `shipgate.yaml`, run the verifier instead: +For an ongoing PR that changes agent tools, MCP exports, OpenAPI specs, prompts, +permissions, policies, CI gates, or `shipgate.yaml`, run the verifier: ```bash -agents-shipgate verify --base origin/main --head HEAD --json +shipgate verify --base origin/main --head HEAD --json ``` Read `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict` diff --git a/.cursor/rules/agents-shipgate.mdc b/.cursor/rules/agents-shipgate.mdc index 0fb85ec3..2769bf67 100644 --- a/.cursor/rules/agents-shipgate.mdc +++ b/.cursor/rules/agents-shipgate.mdc @@ -35,10 +35,6 @@ When a change affects agent tools, MCP exports, OpenAPI specs, prompts, permissions, approval policies, or release gates, run Agents Shipgate. Default to advisory verification while adopting the gate. -Before protected edits, run preflight and read `PreflightResultV2`: - - agents-shipgate preflight --workspace . --plan - --json - For local agent control, run: shipgate check --agent cursor --workspace . --format codex-boundary-json @@ -54,16 +50,11 @@ only the listed mechanical repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. +For local verification, run: + + shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -74,6 +65,13 @@ release gate. Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run: + + shipgate audit --host --json --out agents-shipgate-reports/host-grants.json + +Read the host-grants inventory before changing MCP servers, permission rules, +hooks, or workflow scopes. + Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/.well-known/agents-shipgate.json b/.well-known/agents-shipgate.json index b980a6dc..536297f6 100644 --- a/.well-known/agents-shipgate.json +++ b/.well-known/agents-shipgate.json @@ -72,6 +72,14 @@ }, "binaries": ["agents-shipgate", "shipgate"], "quickstart": "shipgate check --agent codex --workspace . --format codex-boundary-json", + "primary_commands": { + "check_codex": "shipgate check --agent codex --workspace . --format codex-boundary-json", + "check_claude_code": "shipgate check --agent claude-code --workspace . --format codex-boundary-json", + "check_cursor": "shipgate check --agent cursor --workspace . --format codex-boundary-json", + "verify_local": "shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --json", + "verify_pr": "shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --json", + "host_audit": "shipgate audit --host --json --out agents-shipgate-reports/host-grants.json" + }, "commands": { "agent_check": "shipgate check --agent codex --workspace . --format codex-boundary-json", "agent_check_codex": "shipgate check --agent codex --workspace . --format codex-boundary-json", @@ -118,7 +126,7 @@ "agent_handoff_schema_version": "shipgate.agent_handoff/v1", "agent_handoff_schema_path": "docs/agent-handoff-schema.v1.json", "agent_handoff_artifact": "agents-shipgate-reports/agent-handoff.json", - "contract_version": "7", + "contract_version": "8", "inputs": ["mcp", "openapi", "openai_agents_sdk", "anthropic_api", "google_adk", "langchain", "crewai", "openai_api", "codex_config", "codex_plugin", "n8n"], "outputs": ["markdown", "json", "sarif", "packet_md", "packet_json", "packet_html", "verifier_json", "verify_run_json", "agent_handoff_json", "pr_comment_md", "check_annotations_json", "capability_lock_json", "base_capability_lock_json", "capability_lock_diff_json", "capability_lock_diff_md", "feedback_json", "attestation_json", "org_evidence_bundle_json", "host_grants_json", "org_status_json", "scenario_json", "governance_benchmark_result_json"], "artifacts": { diff --git a/README.md b/README.md index d2aa7ed6..ca7890a7 100644 --- a/README.md +++ b/README.md @@ -103,8 +103,18 @@ above writes this comment verbatim to `reports/pr-comment.md`. ## Verify-first quickstart -For coding-agent local control, start with `shipgate check` and parse its -stdout `shipgate.codex_boundary_result/v1` object: +Install once: + +```bash +pipx install agents-shipgate +``` + +Then start from one of three prominent flows. + +### Local Boundary Check + +Coding agents run `shipgate check` before reporting an agent-capability change +complete. Parse the stdout `shipgate.codex_boundary_result/v1` object: ```bash shipgate check --agent codex --workspace . --format codex-boundary-json @@ -114,48 +124,43 @@ shipgate check --agent cursor --workspace . --format codex-boundary-json Switch on `decision`, `completion_allowed`, `must_stop`, `first_next_action`, `human_review`, `repair`, and `policy`; never infer a -decision from prose. For committed PRs, the release loop remains verify-first: -when a PR changes what your agent can do, run the deterministic verifier on the -diff and read its merge verdict before you merge. `shipgate check` is necessary -but not sufficient for capability-expanding diffs: if a change adds dynamic, -undeclared, or otherwise ambiguous tool capability, do not treat -`decision="allow"` as merge readiness; run `verify` and read -`release_decision.decision`. +decision from prose. `shipgate check` is necessary but not sufficient for +capability-expanding diffs: if a change adds dynamic, undeclared, or otherwise +ambiguous tool capability, do not treat `decision="allow"` as merge readiness; +run `shipgate verify` and read `release_decision.decision`. -First ask whether Shipgate applies to the current repo or diff: +### PR And Local Verification + +When a PR changes what your agent can do, run the deterministic verifier on the +diff and read its merge verdict before you merge. For committed PR/CI refs, +make the base ref available first because `verify` never fetches: ```bash -agents-shipgate verify --preview --json +shipgate verify --workspace . --config shipgate.yaml \ + --ci-mode advisory --format json --base origin/main --head HEAD ``` -If the repo is not configured yet, install the manifest, advisory CI, and -agent-facing instructions: +For local, uncommitted work, omit `--base`/`--head` so your working-tree edits +are scanned instead: ```bash -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json +shipgate verify --workspace . --config shipgate.yaml \ + --ci-mode advisory --format json ``` -Prefer to delegate? Paste the -[coding-agent snippet](#copy-this-into-your-coding-agent) into Claude Code, -Codex, or Cursor and let the agent wire the gate itself — the repo ships -`AGENTS.md` managed blocks, `llms.txt`, and structured error output for -exactly this path. - -Then verify the committed PR/CI ref. Pass the base and head so the diff — the -capability delta and trust-root signals — is in scope (the verifier never -fetches; make the base ref available first, e.g. `git fetch origin main`): +If a repo is not configured yet, use the verify flow's preview entry point: ```bash -agents-shipgate verify --workspace . --config shipgate.yaml \ - --ci-mode advisory --format json --base origin/main --head HEAD +shipgate verify --preview --json ``` -For local, uncommitted work, omit `--base`/`--head` so your working-tree edits -are scanned instead: +### Host-Grant Audit + +Before changing local MCP servers, Codex/Claude/Cursor permission rules, +hooks, workflow scopes, or other host grants, capture the host inventory: ```bash -agents-shipgate verify --workspace . --config shipgate.yaml \ - --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json ``` The release gate is `agents-shipgate-reports/report.json` → @@ -170,9 +175,9 @@ context. Zero-setup demos of both verdicts are in [60 seconds](#60-seconds-watch-it-block-two-prs) above; `uvx` runs them with no -persistent install. To install the CLI, use `pipx install agents-shipgate` -(then `pipx upgrade agents-shipgate` — a plain install is a no-op over a stale -build). Your agent project does **not** need Python 3.12; the CLI installs +persistent install. To upgrade the CLI, use `pipx upgrade agents-shipgate` - a +plain install is a no-op over a stale build. Your agent project does **not** +need Python 3.12; the CLI installs separately. To verify your own repo and write the standard `agents-shipgate-reports/` directory, see [Verify your repo](#verify-your-repo) below. @@ -238,18 +243,14 @@ Evidence Packet in [`packet.md`](samples/support_refund_agent/expected/packet.md ```text Add a Tool-Use Readiness release gate for this tool-using AI agent with Agents Shipgate. -Run the local command for your agent runtime: +Use only the prominent Shipgate flows as first-look commands: shipgate check --agent codex --workspace . --format codex-boundary-json shipgate check --agent claude-code --workspace . --format codex-boundary-json shipgate check --agent cursor --workspace . --format codex-boundary-json -agents-shipgate verify --preview --json -If Shipgate is relevant, run: -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json -Before editing protected surfaces, run: -agents-shipgate preflight --workspace . --plan - --json -For PR/reviewer evidence, run: -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json +shipgate verify --workspace . --config shipgate.yaml \ --base origin/main --head HEAD --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json For local control, parse the `shipgate check` stdout JSON (`shipgate.codex_boundary_result/v1`): switch on `decision`, `completion_allowed`, `must_stop`, `first_next_action`, `human_review`, @@ -298,7 +299,7 @@ agents-shipgate init --workspace . --write --agent-instructions=agents-md,codex- Then invoke `$agents-shipgate` in a fresh thread. The plugin supplies workflows, not the scanner binary — install the CLI (`pipx install agents-shipgate && pipx upgrade agents-shipgate`) where Codex runs commands and -require contract v7 or newer. Marketplace details, kit overrides, and the beta-migration +require contract v8 or newer. Marketplace details, kit overrides, and the beta-migration steps: [`docs/agents/use-with-codex.md`](docs/agents/use-with-codex.md). **Cursor** — `init --agent-instructions=cursor` writes the auto-attach rule; @@ -326,10 +327,7 @@ evidence around them: ## Verify your repo ```bash -agents-shipgate verify --preview --json -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json -# Replace any CHANGE_ME placeholders reported by init. -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml \ --base origin/main --head HEAD --ci-mode advisory --format json ``` @@ -353,7 +351,7 @@ Install alternatives (your agent project does **not** need Python 3.12 — insta ```bash python -m pip install -U --pre agents-shipgate # global pip uv tool install --upgrade agents-shipgate # via uv -agents-shipgate contract --json # require contract_version >= 7 +agents-shipgate contract --json # require contract_version >= 8 ``` ## Adopt in one turn (scan helper) @@ -498,9 +496,9 @@ Agents Shipgate is designed to be agent-friendly. If you're a coding agent (Clau - **[`llms.txt`](llms.txt)** — short index of every machine-readable surface, one fetch. - **[`llms-full.txt`](llms-full.txt)** — long-form concatenation of `AGENTS.md` + recipes + checks + concepts + autofix policy, in one document. Built by `scripts/build-llms-full.py`. - **[`.well-known/agents-shipgate.json`](.well-known/agents-shipgate.json)** — discovery metadata (tagline, install commands, schema URLs, gating signal, exit codes, trigger-catalog URL). -- **[`docs/triggers.json`](docs/triggers.json)** — machine-readable mirror of the AGENTS.md trigger table. Apply the rules to a PR diff to decide whether to propose `agents-shipgate detect`. Schema is stable for `0.x`. +- **[`docs/triggers.json`](docs/triggers.json)** — machine-readable mirror of the AGENTS.md trigger table. Apply the rules to a PR diff to decide whether to run `shipgate verify --preview --json` or the full verifier. Schema is stable for `0.x`. - **[`tools/shipgate-detect.py`](tools/shipgate-detect.py)** — zero-install, stdlib-only detector. `curl … | python3 - --workspace . --json` returns the same structural verdict as `agents-shipgate detect --json`. Pinned to the canonical CLI by [`tests/test_zero_install_detector.py`](tests/test_zero_install_detector.py). See [`docs/zero-install.md`](docs/zero-install.md). -- **`agents-shipgate contract --json`** — verify the installed CLI's local contract before relying on hard-coded schema or gating assumptions; contract v7 names the verifier, verify-run, agent-handoff, Codex boundary, attestation, registry, org evidence bundle, host-grants inventory, and legacy local-agent schema versions plus the agent read order. +- **`agents-shipgate contract --json`** — verify the installed CLI's local contract before relying on hard-coded schema or gating assumptions; contract v8 names `primary_commands`, the verifier, verify-run, agent-handoff, Codex boundary, attestation, registry, org evidence bundle, host-grants inventory, and legacy local-agent schema versions plus the agent read order. - **[`docs/agent-contract-current.md`](docs/agent-contract-current.md)** — single source of truth for the current schema versions and which JSON fields to read. Updated whenever the contract bumps; other agent-facing surfaces link here instead of restating the contract. - **[`docs/agent-native-merge-contract.md`](docs/agent-native-merge-contract.md)** — the agent-native protocol map: the eight contracts (trigger, capability change, merge verdict, repair, forbidden action, human authority, trust root, attestation) each mapped to the artifact that implements it. - **[`docs/capability-standard.md`](docs/capability-standard.md)** — stable non-gating capability lock/diff standard for external integrations and research tooling. diff --git a/STABILITY.md b/STABILITY.md index 172d6f86..53237a0f 100644 --- a/STABILITY.md +++ b/STABILITY.md @@ -42,7 +42,10 @@ Breaking changes from the `0.x` line: New outputs include `verify_run_json`, `run_id`, `agent_controller_must_stop`, `agent_controller_stop_reason`, and `agent_controller_completion_allowed`. -- The runtime contract payload is now `contract_version: "7"`. +- The runtime contract payload is now `contract_version: "8"`. + It adds `primary_commands{}` so agents can discover the three prominent + flows (`shipgate check`, `shipgate verify`, and `shipgate audit --host`) + without treating supporting/adoption commands as first-look guidance. Report JSON remains `report_schema_version: "0.27"` from the current `0.13.0` line; this alpha does not redefine that frozen report schema. v0.27 includes policy-pack distribution metadata @@ -215,9 +218,12 @@ Stable JSON fields: mcp-serve`. - `manual_review_signals[]` — stable report/packet fields an agent should read when surfacing human review work. -- `commands{}` — minimal stable commands for local `shipgate check` control, - preview, default local agent workflow install, local verify, PR verify, and - contract introspection. +- `primary_commands{}` — the prominent flow map for local boundary checks, + local/PR verification, and host-grant audits. Values use the `shipgate` + alias and contain only `check`, `verify`, and `audit --host` entry points. +- `commands{}` — compatibility/supporting commands for local `shipgate check` + control, preview, default local agent workflow install, local verify, PR + verify, and contract introspection. - `default_paths{}` — default manifest, report directory, and local contract paths used by generated downstream agent instructions. - `artifacts{}` — stable report artifact paths an agent should inspect first. @@ -813,8 +819,8 @@ failure, not a docs-only or no-trigger success: verify writes `verifier.json`, `head_status: "failed"`, `head_exit_code: 2`, `merge_verdict: "unknown"`, `applicability: "unknown"`, and `can_merge_without_human: false`; it writes no `report.json` and runs no head scan. The first next action directs agents to -fix the config path or run `agents-shipgate verify --preview --json` / -`agents-shipgate detect --workspace . --json` before initializing. +fix the config path or run `shipgate verify --preview --json` before +initializing. The head scan writes `report.md`, `report.json`, `report.sarif`, `packet.json`, `verifier.json`, `verify-run.json`, `agent-handoff.json`, and `pr-comment.md`. diff --git a/adoption-kits/claude-code-skill/SKILL.md b/adoption-kits/claude-code-skill/SKILL.md index e7780a8f..16acee78 100644 --- a/adoption-kits/claude-code-skill/SKILL.md +++ b/adoption-kits/claude-code-skill/SKILL.md @@ -1,6 +1,6 @@ --- name: agents-shipgate -description: Run the deterministic merge gate when a change touches what an AI agent can do. Use after adding or modifying MCP servers or tools, tool/function definitions (@tool, @function_tool), OpenAPI specs that describe agent tools, agent prompts, permission scopes, approval or confirmation policies, agent CI workflows, or shipgate.yaml — and before creating a PR for any such change. Also use to verify agent-related PRs, fix or triage Shipgate findings, add Shipgate to CI, or interpret Shipgate verifier/report artifacts. Triggers on phrases like "add shipgate", "verify this agent PR", "merge verdict", "release readiness for my agent", "tool-use readiness", "scan my agent", "shipgate scan", "shipgate.yaml", "agents-shipgate-reports/verifier.json", "agents-shipgate-reports/report.json", "fix shipgate finding". +description: Run prominent Agents Shipgate flows when a change touches what an AI agent can do: `shipgate check`, `shipgate verify`, or `shipgate audit --host`. Use after adding or modifying MCP servers or tools, tool/function definitions (@tool, @function_tool), OpenAPI specs that describe agent tools, agent prompts, permission scopes, approval or confirmation policies, agent CI workflows, or shipgate.yaml — and before creating a PR for any such change. Also use to verify agent-related PRs, fix or triage Shipgate findings, add Shipgate to CI, or interpret Shipgate verifier/report artifacts. Triggers on phrases like "add shipgate", "verify this agent PR", "merge verdict", "release readiness for my agent", "tool-use readiness", "shipgate check", "shipgate verify", "audit host grants", "shipgate.yaml", "agents-shipgate-reports/verifier.json", "agents-shipgate-reports/report.json", "fix shipgate finding". --- # agents-shipgate skill @@ -53,8 +53,8 @@ Always: `fix_task`, and `capability_review.top_changes`. Then parse `agents-shipgate-reports/report.json.release_decision.decision`; it is the release gate. -4. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run `agents-shipgate preflight --workspace . --plan - --json` with a `PreflightPlanV1` object. Legacy `--changed-files`/`--diff` shorthands remain available. If `requires_human_review` is true or `first_next_action.actor` is `human`, stop and route to a human. -5. Before finishing an agent-related diff, run `shipgate check --agent claude-code --workspace . --format codex-boundary-json`. For committed PR/CI verification, run `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. `verify` never fetches. +4. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, plan to run `shipgate verify` before completion and route trust-root review to a human when the verifier requires it. +5. Before finishing an agent-related diff, run `shipgate check --agent claude-code --workspace . --format codex-boundary-json`. For committed PR/CI verification, run `shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. `verify` never fetches. For host grants, run `shipgate audit --host --json --out agents-shipgate-reports/host-grants.json`. 6. Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. 7. Confirm with the user before any command that writes files (`init --write`, `baseline save`). @@ -86,7 +86,7 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc ## Boundaries (do not violate) -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. +- Do not claim a finding is fixed without re-running `shipgate verify` and reporting the new merge verdict and release decision. - Do not silently suppress findings — `checks.ignore` requires a `reason` and the manifest validator rejects empty reasons. - Do not commit `agents-shipgate-reports/` — it's regenerated each run; add it to `.gitignore`. - Do not run `agents-shipgate baseline save` until the user has reviewed the initial findings; baselining ratchets in noise. diff --git a/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md index 01b03928..9d5007a6 100644 --- a/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md +++ b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md @@ -5,23 +5,23 @@ You are working in a repo that may contain an AI agent — likely one of: an MCP Your job is to drive the first-adoption helper flow end-to-end in one tool-using turn, which adds the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. Ongoing -agent-related PRs should use `agents-shipgate verify` after this adoption step. +agent-related PRs should use `shipgate verify` after this adoption step. ## Your task -1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, and Codex-boundary contracts and requires **contract v7 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: +1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, primary-command, and Codex-boundary contracts and requires **contract v8 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: ```bash SG="uvx agents-shipgate@1.0.0a1" # uv: ephemeral, always the pinned build # or: SG="pipx run agents-shipgate==1.0.0a1" $SG --version # confirm the pinned runner resolves ``` - Every step below calls `$SG …`; e.g. `$SG detect …` runs `agents-shipgate detect` through the pinned runner, never a `PATH` copy. + Every step below calls `$SG …`; e.g. `$SG verify --preview --json` runs the verify preview through the pinned runner, never a `PATH` copy. If you would rather install onto `PATH`, pin the floor and **fail loudly when it resolves older** — a plain `pipx install agents-shipgate` is a no-op when an older build already exists — then set `SG=agents-shipgate`: ```bash python -m pip install -U --pre agents-shipgate - agents-shipgate contract --json # STOP if this reports contract_version < 5 - re-run pinned via uvx agents-shipgate@1.0.0a1 - SG=agents-shipgate # only after the line above confirms contract v7+ + agents-shipgate contract --json # STOP if this reports contract_version < 8 - re-run pinned via uvx agents-shipgate@1.0.0a1 + SG=agents-shipgate # only after the line above confirms contract v8+ ``` 2. **Sanity-check the install** before touching the user's code: @@ -40,13 +40,20 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. using [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) and upgrade before relying on local contract verification in automation. -3. **Detect:** +3. **Preview the verify flow:** ```bash - $SG detect --workspace . --json + $SG verify --preview --json ``` - Read the response: `is_agent_project`, `frameworks[]` (per-framework score + evidence + candidate files), `agent_name_candidates[]`, `suggested_sources[]` (MCP/OpenAPI files matched by glob). - - **Stop only when ALL of these hold:** `is_agent_project: false`, `suggested_sources` is empty, `codex_plugin_candidates` is empty, no `shipgate.yaml` already exists in the workspace, AND the user did not explicitly request a scan. Otherwise proceed — MCP/OpenAPI tool-surface repos and Codex plugin package repos register as `is_agent_project: false` because they have no Python framework imports, but they are valid Shipgate targets. MCP/OpenAPI hits surface as `suggested_sources`; Codex plugin hits surface as `codex_plugin_candidates`. + Read the response and next action. Preview is the first-look verify entry + point: it does not require a manifest, does not scan, and tells you whether + to configure Shipgate, skip, or run the full verifier. + + If preview exposes trigger/detection metadata, stop only when all relevance + signals are absent and the user did not explicitly request Shipgate. + Otherwise proceed. MCP/OpenAPI tool-surface repos and Codex plugin package + repos can be valid Shipgate targets even when Python framework detection + would classify `is_agent_project: false`; look for `suggested_sources` and + `codex_plugin_candidates` when those fields are present. 4. **Generate a starter manifest + GitHub Actions workflow:** ```bash diff --git a/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md b/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md index 63904ca1..1ba20ab9 100644 --- a/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md +++ b/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md @@ -69,12 +69,12 @@ the rules to the changed file list. ``` 5. **Then act.** - - If `run_shipgate: true`: run `detect` through a **version-pinned, + - If `run_shipgate: true`: run verify preview through a **version-pinned, zero-install** runner so a stale copy already on `PATH` can't answer — - `uvx agents-shipgate@1.0.0a1 detect --workspace . --json` (or + `uvx agents-shipgate@1.0.0a1 verify --preview --json` (or `pipx run agents-shipgate==1.0.0a1 ...`). Only fall back to a bare - `agents-shipgate detect` once `agents-shipgate --version` confirms - contract v7 or newer. Then follow + `shipgate verify --preview --json` once `agents-shipgate --version` + confirms contract v8 or newer. Then follow [`prompts/add-shipgate-to-repo.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/add-shipgate-to-repo.md) for the first-adoption helper flow, or point the user at the GitHub Action (`ThreeMoonsLab/agents-shipgate@v1.0.0a1`) if they prefer CI. diff --git a/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md b/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md index ec706e11..20bbcb2d 100644 --- a/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md +++ b/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md @@ -11,54 +11,31 @@ work is complete. export AGENTS_SHIPGATE_AGENT_MODE=1 ``` -2. **Decide whether the diff needs Shipgate.** - For a committed PR diff: +2. **Use verify preview only when relevance or setup is unclear.** ```bash - agents-shipgate trigger --workspace . --base origin/main --head HEAD --json - ``` - For a local pre-commit working-tree diff, or when the base ref is - unavailable locally, use the changed-files fallback: - ```bash - git diff --name-only HEAD > /tmp/shipgate-changed-files.txt - git diff HEAD > /tmp/shipgate.diff - agents-shipgate trigger --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json + shipgate verify --preview --json ``` + Preview is a lightweight verify entry point: no manifest required, no scan, + exit 0. It tells you whether to configure Shipgate, skip, or run the full + verifier. If the repo already has `shipgate.yaml`, proceed to full verify. - Continue when `should_run` is `true` or `force_run` is `true`. If the - repo already has `shipgate.yaml`, CI should verify every PR; for local - pre-commit work, verify when the changed files are agent-related or when - you need a full advisory check before handing off. - -3. **Run preflight before protected-surface edits.** +3. **Treat protected-surface edits as verifier-owned review.** Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex - plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: - ```bash - agents-shipgate preflight --workspace . --plan - --json - ``` - Pass a `PreflightPlanV1` object on stdin. If you need legacy shorthands, - pass changed-file or diff context directly: - ```bash - agents-shipgate preflight --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json - ``` - If `requires_human_review` is true or `first_next_action.actor` is `human`, - stop and route the change to a human. Preflight is a routing surface only; - it does not replace the verifier. + plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, do not + self-approve the trust-root change. Run full verify before reporting + completion and route human review when the verifier requires it. 4. **Run the verifier.** For local uncommitted work, omit `--head` and omit `--base` so the checked-out working tree is scanned, including uncommitted edits: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json ``` For committed PR or CI verification, pass the head ref explicitly: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --base origin/main --head HEAD --ci-mode advisory --format json ``` `verify` never fetches. If you pass `--base` and that ref is missing, @@ -96,8 +73,8 @@ work is complete. ## What NOT to do -- Do not claim the diff is verified until `agents-shipgate verify` has run or - `agents-shipgate trigger` has returned a clear skip verdict. +- Do not claim the diff is verified until `shipgate verify` has run or + `shipgate verify --preview --json` has returned a clear skip verdict. - Do not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly accepts human review. diff --git a/adoption-kits/codex-skill/SKILL.md b/adoption-kits/codex-skill/SKILL.md index 372a152b..16230781 100644 --- a/adoption-kits/codex-skill/SKILL.md +++ b/adoption-kits/codex-skill/SKILL.md @@ -1,6 +1,6 @@ --- name: agents-shipgate -description: Use when the user wants to add or run Agents Shipgate — the deterministic merge gate for AI-generated agent capability changes — on an AI agent's tool surface; review or prepare a tool-using agent for release; scan MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, LangChain/LangGraph, CrewAI, OpenAI API, Codex plugin, or n8n tool artifacts; add advisory CI; or interpret, fix, triage, suppress, or explain a Shipgate finding. +description: Use when the user wants to run the prominent Agents Shipgate flows — `shipgate check`, `shipgate verify`, or `shipgate audit --host` — for AI agent capability changes, PR release readiness, or coding-agent host grants. --- # Agents Shipgate @@ -13,33 +13,30 @@ Do not use it for general linting, runtime monitoring, evals, model-output quali ## Workflow -1. For relevance decisions, bootstrap, verifier runs, scanning, CI setup, finding fixes, false-positive triage, strict-mode promotion, or version upgrades, read `references/recipes.md`. +1. For local checks, verifier runs, host audits, and supporting recovery commands, read `references/recipes.md`. 2. For reading `report.json`, summarizing release decisions, or deciding what may be auto-applied, read `references/report-reading.md`. -3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "7"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. +3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "8"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. 4. Set `AGENTS_SHIPGATE_AGENT_MODE=1` before running Shipgate commands so errors include structured `next_action` JSON. 5. Default first-time CI to advisory mode. Do not enable release-blocking CI or save a baseline until a human has reviewed current findings. 6. For local agent control, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and read the stdout `shipgate.codex_boundary_result/v1` object. Switch on `decision`; follow `first_next_action`, `repair`, and `human_review`. -7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run `agents-shipgate preflight --workspace . --plan - --json` with a `PreflightPlanV1` object. Legacy `--changed-files`/`--diff` shorthands remain available. If `requires_human_review` is true or `first_next_action.actor` is `human`, stop and route to a human. +7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, plan to run `shipgate verify` before completion and route trust-root review to a human when the verifier requires it. 8. For full PR verification, read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json` for detailed controller state, then `verify-run.json` for reproducibility metadata, then `report.json` for reviewer detail; `report.json.release_decision.decision` remains the release gate. 9. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. 10. Ensure `.gitignore` covers `agents-shipgate-reports/` before committing. ## Fast Paths -- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "7"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. -- Protected-surface preflight: run `agents-shipgate preflight --workspace . --plan - --json` before touching trust roots; include `changed_files[]` or `diff_text` in the plan when you have concrete planned paths. +- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "8"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. - Agent-native check: run `shipgate check --agent codex --workspace . --format codex-boundary-json`; read only the JSON result for continue/repair/stop routing. -- First adoption: run `agents-shipgate detect --workspace . --json`, then follow `references/recipes.md`. -- Agent-related PR/CI diff: run `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. -- Existing manifest / ongoing PR: run `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. -- First GitHub CI: copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. -- Explain one finding: run `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json`. -- Triage heuristic findings: run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. +- Agent-related PR/CI diff: run `shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. +- Existing manifest / ongoing PR: run `shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. +- Unconfigured repo or uncertain relevance: run `shipgate verify --preview --json`. +- Host grants: run `shipgate audit --host --json --out agents-shipgate-reports/host-grants.json`. ## Boundaries -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and reporting the new decision/counts. -- Do not continue with protected-surface edits when preflight returns `requires_human_review=true`; coding agents must not self-approve trust-root changes. +- Do not claim a finding is fixed without re-running `shipgate verify` and reporting the new merge verdict and release decision. +- Do not self-approve trust-root changes; when `shipgate verify` returns human review required, surface it to a human. - Before finishing an agent-related diff, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and follow `shipgate.codex_boundary_result/v1`. - Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. - Do not silently suppress findings. Suppressions require a non-empty `reason`. diff --git a/adoption-kits/codex-skill/references/recipes.md b/adoption-kits/codex-skill/references/recipes.md index a663b4f5..9bf13480 100644 --- a/adoption-kits/codex-skill/references/recipes.md +++ b/adoption-kits/codex-skill/references/recipes.md @@ -1,12 +1,14 @@ # Agents Shipgate Recipes -Use these recipes after the `agents-shipgate` skill triggers. +Use these recipes after the `agents-shipgate` skill triggers. The prominent +flows are `shipgate check`, `shipgate verify`, and `shipgate audit --host`. +Supporting commands remain callable, but should not be the first thing an agent +recommends. ## CLI Preflight -The Codex plugin supplies the workflow instructions, not the scanner binary. -Before running `agents-shipgate` commands, confirm the CLI is installed and new -enough for the `verify` workflow: +The Codex plugin supplies workflow instructions, not the scanner binary. +Before running Shipgate commands, confirm the CLI is installed and new enough: ```bash command -v agents-shipgate @@ -14,75 +16,81 @@ agents-shipgate --version agents-shipgate contract --json ``` -Require `agents-shipgate contract --json` to report `contract_version: "7"` or -newer. If the command is missing or the contract is older, ask the user to -install or upgrade the CLI and rerun the task: +Require `agents-shipgate contract --json` to report `contract_version: "8"` or +newer. If it is missing or stale, ask the user to install or upgrade: ```bash pipx install agents-shipgate -pipx upgrade agents-shipgate # plain install is a no-op over a stale build +pipx upgrade agents-shipgate ``` -After installation, run `agents-shipgate --version` and -`agents-shipgate contract --json` again. Do not continue to `detect`, `init`, -`scan`, or `verify` until the CLI exists and reports contract v7 or newer. +Do not report the task complete until the CLI exists and reports contract v8 or +newer. Local boundary checks emit `shipgate.codex_boundary_result/v1`; legacy +`agent_result_v1` fixtures are retained only for older protocol integrations. -A missing or stale binary is a `decision="block"` install action in the -agent-native protocol, not a reason to proceed unverified. Until -`agents-shipgate contract --json` confirms contract v7 or newer, do not report -the task complete: surface the install/upgrade action and stop. Local boundary -checks emit `shipgate.codex_boundary_result/v1`; legacy `agent_result_v1` -fixtures are retained only for older protocol integrations. +## Local Agent Check -## Protected Surface Preflight +Run the boundary check before reporting an agent-related local diff complete: -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: +```bash +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate check \ + --agent codex --workspace . --format codex-boundary-json +``` + +Read only stdout JSON. Switch on `decision`, `completion_allowed`, +`must_stop`, `first_next_action`, `human_review`, `repair`, and `policy`. + +## Verify A Diff + +Use this before finishing a PR or local change that touches an agent tool +surface, prompts, policies, permissions, Shipgate CI, or other protected +release surfaces. ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify \ + --workspace . --config shipgate.yaml \ + --base origin/main --head HEAD --ci-mode advisory --format json ``` -Pass a `PreflightPlanV1` object on stdin. If you already have a path list or -local diff and need legacy shorthands, ask preflight about them before editing: +For local uncommitted work, omit `--head` and `--base` so `verify` scans the +checked-out working tree, including uncommitted edits. In committed PR or CI +contexts, make the base ref available first because `verify` never fetches. If +the repo is not configured or relevance is unclear, run: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --changed-files changed.txt --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --diff pr.diff --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify --preview --json ``` -If `requires_human_review` is true or `first_next_action.actor` is `human`, -stop and route the change to a human. Preflight is a routing surface only; -`release_decision.decision` remains the gate. +Read `agents-shipgate-reports/agent-handoff.json` first. Lead with +`gate.merge_verdict`, then inspect `next_action`, `controller`, +`fix_task.safe_to_attempt`, and `capability_review.top_changes[]`. Then read +`verifier.json`, `verify-run.json`, and `report.json`; the release gate remains +`report.json.release_decision.decision`. + +Do not bypass the verifier by suppressing findings, lowering severity, +expanding baselines or waivers, removing Shipgate CI, or weakening agent +instructions. Verify-mode `SHIP-VERIFY-*` findings route those trust-root +changes to human review. -## Decide Relevance +## Audit Host Grants -Run: +Run host audit when the task touches MCP servers, permission rules, hooks, +workflow scopes, or coding-agent host configuration: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate audit --host --json \ + --out agents-shipgate-reports/host-grants.json ``` -Proceed when any of these are true: - -- `is_agent_project: true` -- `suggested_sources` is non-empty -- `codex_plugin_candidates` is non-empty -- `shipgate.yaml` already exists -- the user explicitly asked for a Shipgate scan or Tool-Use Readiness gate +For drift checks against an acknowledged baseline, use the same flow with +`--drift` and optionally `--fail-on-drift`. -Stop only when all signals are absent and the user did not explicitly request Shipgate. +## Supporting Setup And Repair -## Bootstrap A Repo - -Run: +If `shipgate verify --preview --json` says the repo needs configuration, the +supporting setup commands remain available: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate contract --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate init --workspace . --write --ci --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml --suggest-patches --format json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ @@ -90,110 +98,26 @@ AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ --confidence high --apply ``` -If `init` reports placeholders, replace `CHANGE_ME` values from repo context before scanning. If `shipgate.yaml` already exists, edit it rather than overwriting it. - -## Verify An Agent-Related Diff - -Use this before finishing a PR or local change that touches an agent tool -surface, prompts, policies, permissions, Shipgate CI, or other protected -release surfaces. - -```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate trigger \ - --workspace . --base origin/main --head HEAD --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate verify \ - --workspace . --config shipgate.yaml \ - --base origin/main --head HEAD --ci-mode advisory --format json -``` - -For local uncommitted work, omit `--head` and omit `--base` so `verify` scans -the checked-out working tree, including uncommitted edits. In committed PR or -CI contexts, make the base ref available first because `verify` never fetches. -If you pass a missing `--base`, `verify` exits 2 with an unknown merge verdict. - -Read `agents-shipgate-reports/agent-handoff.json` first. Lead with -`gate.merge_verdict`, then inspect `capability_review.top_changes[]`, -`next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read -`agents-shipgate-reports/verifier.json` for detailed controller context and -`agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. `capability_review.top_changes[]` and `verifier_summary` are -supporting/provisional composition summaries: their verdict-like values mirror -`release_decision.decision`, and they add counts for protected-surface touches, -policy weakening, human acknowledgement, and top reason codes. - -Do not bypass the verifier. Do not suppress findings, lower severity, expand -baselines or waivers, remove Shipgate CI, or weaken agent instructions to make -the run pass. Verify-mode `SHIP-VERIFY-*` findings route those trust-root -changes to human review. - -## First-Time CI - -Use advisory mode only. Copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. +If `init` reports placeholders, replace `CHANGE_ME` values from repo context +before verification. If `shipgate.yaml` already exists, edit it rather than +overwriting it. -Do not switch to release-blocking behavior in the same task. Strict promotion requires human review, suppressions with reasons, and optionally a saved baseline. - -## Fix Top Finding +## Fix Or Explain Findings 1. Read `agents-shipgate-reports/report.json`. 2. Pick the first blocker, then highest-severity review item. -3. If `findings[].agent_action == "auto_apply"` and a high-confidence patch exists, apply it with `apply-patches --confidence high --apply`. -4. For policy/evidence gaps, propose the exact human decision needed. Do not fabricate approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. -5. Re-run scan and report the new `release_decision.decision`, blocker count, and review item count. - -## Recommend Fixes - -Group active findings by action: +3. Auto-apply only high-confidence safe patches. +4. For policy/evidence gaps, propose the exact human decision needed. Do not + fabricate approval, confirmation, idempotency, broad-scope, + prohibited-action, or runtime-trace evidence. +5. Re-run `shipgate verify` and report the new merge verdict, release + decision, blocker count, and review-item count. -- `auto_apply`: safe mechanical patches. -- `propose_patch_for_review`: show patch, leave final decision to user. -- `escalate_to_human`: policy/evidence decision. -- `suppress_with_reason`: only when the user confirms the finding is intentionally accepted. -- `informational`: summarize, no gate action. - -## Explain A Finding - -Run: +For one finding: ```bash agents-shipgate explain-finding \ --from agents-shipgate-reports/report.json --json ``` -Use the returned deterministic `explanation` for PR comments or chat replies. Keep it to 3-5 sentences and include the tool name, release risk, and next action. - -## Triage False Positives - -Prefer fixing the manifest or policy evidence over suppression. Suppress only with a specific reason: - -```yaml -checks: - ignore: - - check_id: SHIP-CHECK-ID - tool: tool.name - reason: specific accepted-risk rationale -``` - -## Promote Advisory To Strict - -Only after humans review advisory output: - -```bash -agents-shipgate baseline save -c shipgate.yaml --out .agents-shipgate/baseline.json -agents-shipgate scan -c shipgate.yaml \ - --baseline .agents-shipgate/baseline.json \ - --ci-mode strict --fail-on critical,high -``` - -The promoted gate should fail only on new findings above the selected threshold. - -## Upgrade Shipgate - -Update the GitHub Action tag and `shipgate_version` together. Re-run: - -```bash -agents-shipgate contract --json -agents-shipgate scan -c shipgate.yaml --suggest-patches --format json -``` - -If schema or decision fields changed, use `docs/agent-contract-current.md` from the installed version or upstream repo. +Suppressions require a specific non-empty reason and explicit user approval. diff --git a/docs/agent-contract-current.md b/docs/agent-contract-current.md index 336f6e8f..77a1730c 100644 --- a/docs/agent-contract-current.md +++ b/docs/agent-contract-current.md @@ -10,23 +10,26 @@ Verify the installed CLI contract locally before relying on hard-coded docs: agents-shipgate contract --json ``` -Runtime contract v7 also exposes the local agent command spec: -`commands{}`, `default_paths{}`, `artifacts{}`, `agent_read_order[]`, -`verifier_read_order[]`, `merge_verdicts[]`, `release_decisions[]`, -`do_not_auto_assert[]`, `verifier_schema_version`, +Runtime contract v8 also exposes the local agent command spec: +`primary_commands{}`, `commands{}`, `default_paths{}`, `artifacts{}`, +`agent_read_order[]`, `verifier_read_order[]`, `merge_verdicts[]`, +`release_decisions[]`, `do_not_auto_assert[]`, `verifier_schema_version`, `verify_run_schema_version`, `agent_handoff_schema_version`, `agent_handoff_schema_path`, `agent_handoff_artifact`, `codex_boundary_result_schema_version`, `attestation_schema_version`, `registry_schema_version`, `org_evidence_bundle_schema_version`, `host_grants_inventory_schema_version`, `agent_interface_operations[]`, `exit_code_policy`, `mcp_tools[]`, and the legacy `agent_result_*` fields -retained for older protocol consumers. +retained for older protocol consumers. `primary_commands{}` is the prominent +entry surface and contains only `shipgate check`, `shipgate verify`, and +`shipgate audit --host` flows; `commands{}` is compatibility/supporting +metadata. Downstream repos generated with `init --agent-instructions=default` get the minimal local copy at `.shipgate/agent-contract.json`. - Latest release: `v1.0.0a1` (see [pyproject.toml](../pyproject.toml) for the in-tree version) -- Runtime contract: `7` +- Runtime contract: `8` - Current report schema: `0.27` — [`docs/report-schema.v0.27.json`](report-schema.v0.27.json) - Current packet schema: `0.7` — [`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) - Current verifier schema: `0.1` — [`docs/verifier-schema.v0.1.json`](verifier-schema.v0.1.json) @@ -88,8 +91,8 @@ projections, and `agents-shipgate skill ...` review output as supporting/provisional surfaces. They may be useful for routing and review, but they do not replace the gate above and must not introduce a second verdict. -`agents-shipgate preflight --workspace . --plan - --json` is a proactive -routing surface for coding agents before edits. It accepts a single +`agents-shipgate preflight --workspace . --plan - --json` remains a supporting +proactive routing surface for coding agents before edits. It accepts a single `PreflightPlanV1` object with `changed_files[]`, optional `diff_text`, `capability_requests[]`, `host_permission_requests[]`, and `context.{agent,task}`. The emitted `PreflightResultV2` reports protected diff --git a/docs/architecture.md b/docs/architecture.md index 770e950d..80a0c2e2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -3,7 +3,7 @@ A single-page summary of the `agents-shipgate` codebase for new contributors and AI coding agents extending the project. Current as of 2026-06-08; auto-checked against `agents-shipgate contract --json`: -runtime contract `7`, report schema `v0.27`, packet schema `v0.7`. +runtime contract `8`, report schema `v0.27`, packet schema `v0.7`. For the per-field stability contract, see [`../STABILITY.md`](../STABILITY.md). For the agent-facing field index, diff --git a/docs/quickstart.md b/docs/quickstart.md index 267a7341..0447480b 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -4,26 +4,30 @@ A 60-second introduction to agents-shipgate for developers and AI coding agents. ## Verify-first quickstart -The main path is to verify a PR or local diff before merge. After installing -the CLI (see [Install](#install)), start with a preview so Shipgate can tell a -coding agent whether the repo or diff is relevant: +After installing the CLI (see [Install](#install)), start from one of three +prominent flows. -```bash -agents-shipgate verify --preview --json -``` +### Local Boundary Check -If the repo needs Shipgate and is not configured yet, install the manifest, -advisory CI, and agent-facing instructions: +Coding agents run `shipgate check` before reporting an agent-capability change +complete. Parse the stdout `shipgate.codex_boundary_result/v1` object: ```bash -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json +shipgate check --agent codex --workspace . --format codex-boundary-json +shipgate check --agent claude-code --workspace . --format codex-boundary-json +shipgate check --agent cursor --workspace . --format codex-boundary-json ``` -Then run the verifier. For local pre-commit work, omit `--base` and `--head` so -uncommitted edits are scanned: +Switch on `decision`, `completion_allowed`, `must_stop`, `first_next_action`, +`human_review`, `repair`, and `policy`; do not infer a decision from prose. + +### PR And Local Verification + +For local pre-commit work, omit `--base` and `--head` so uncommitted edits are +scanned: ```bash -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json ``` @@ -31,17 +35,32 @@ For committed PR/CI refs, make the base ref available first, then pass base and head: ```bash -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json --base origin/main --head HEAD ``` +If a repo is not configured yet, use the verify flow's preview entry point: + +```bash +shipgate verify --preview --json +``` + Read `agents-shipgate-reports/agent-handoff.json` first and lead with `gate.merge_verdict`, `gate.can_merge_without_human`, `controller`, `next_action`, `fix_task`, and `capability_review.top_changes`. Then read `agents-shipgate-reports/report.json`; the release gate remains `release_decision.decision`. -## Zero-install: is this even relevant? +### Host-Grant Audit + +Before changing local MCP servers, Codex/Claude/Cursor permission rules, +hooks, workflow scopes, or other host grants, capture the host inventory: + +```bash +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json +``` + +## Supporting zero-install relevance check Coding agents reading a fresh repo can answer "is this an agent project?" in one fetch, no install needed: diff --git a/docs/target-repo-agent-snippets.md b/docs/target-repo-agent-snippets.md index 0d68e851..8f6de559 100644 --- a/docs/target-repo-agent-snippets.md +++ b/docs/target-repo-agent-snippets.md @@ -50,11 +50,11 @@ Commands: shipgate check --agent codex --workspace . --format codex-boundary-json shipgate check --agent claude-code --workspace . --format codex-boundary-json shipgate check --agent cursor --workspace . --format codex-boundary-json -agents-shipgate verify --preview --json -agents-shipgate preflight --workspace . --plan - --json -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json +shipgate verify --workspace . --config shipgate.yaml \ + --base origin/main --head HEAD --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json ``` For local agent control, read the `shipgate check` stdout JSON only. It is @@ -62,22 +62,13 @@ For local agent control, read the `shipgate check` stdout JSON only. It is `completion_allowed`, and `must_stop`, then follow `first_next_action`, `human_review`, `repair`, and `policy`. Do not infer a decision from prose. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. - Before finishing an agent-related diff, run `shipgate check`. If `decision=allow` or `warn`, continue and summarize. If `first_next_action.kind` is `repair` and `repair.safe_to_attempt=true`, make only the listed mechanical repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -88,6 +79,10 @@ release gate. Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run `shipgate audit --host` and read the emitted +host-grants inventory before changing MCP servers, permission rules, hooks, or +workflow scopes. + Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, or prohibited-action policy decisions; surface those as human review items. @@ -150,19 +145,20 @@ advisory GitHub Action template under `ci-recipes/`. ## `CLAUDE.md` ````md -## Agents Shipgate — merge gate +## Agents Shipgate Agents Shipgate is the deterministic merge gate for AI-generated agent -capability changes. This repo uses it to gate what its AI agent can do. +capability changes — a local-first, static Tool-Use Readiness review. For agent tool-surface or release-policy changes, run: ```bash shipgate check --agent claude-code --workspace . --format codex-boundary-json -agents-shipgate verify --preview --json -agents-shipgate preflight --workspace . --plan - --json -agents-shipgate verify --workspace . --config shipgate.yaml \ +shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json +shipgate verify --workspace . --config shipgate.yaml \ + --base origin/main --head HEAD --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json ``` For local agent control, read the `shipgate check` stdout JSON only. It is @@ -176,16 +172,7 @@ is `repair` and `repair.safe_to_attempt=true`, make only the listed mechanical repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. - -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -196,6 +183,10 @@ release gate. Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run `shipgate audit --host` and read the emitted +host-grants inventory before changing MCP servers, permission rules, hooks, or +workflow scopes. + Use `apply-patches --confidence high --apply` only for high-confidence safe patches. Approval, confirmation, idempotency, broad-scope, and prohibited-action changes require human review. @@ -245,10 +236,6 @@ When a change affects agent tools, MCP exports, OpenAPI specs, prompts, permissions, approval policies, or release gates, run Agents Shipgate. Default to advisory verification while adopting the gate. -Before protected edits, run preflight and read `PreflightResultV2`: - - agents-shipgate preflight --workspace . --plan - --json - For local agent control, run: shipgate check --agent cursor --workspace . --format codex-boundary-json @@ -264,16 +251,11 @@ only the listed mechanical repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. +For local verification, run: + + shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -284,6 +266,13 @@ release gate. Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run: + + shipgate audit --host --json --out agents-shipgate-reports/host-grants.json + +Read the host-grants inventory before changing MCP servers, permission rules, +hooks, or workflow scopes. + Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/docs/triggers.json b/docs/triggers.json index 055d1f0f..2a707661 100644 --- a/docs/triggers.json +++ b/docs/triggers.json @@ -2,10 +2,10 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "schema_version": "0.1", "name": "agents-shipgate-triggers", - "description": "Machine-readable trigger catalog for Agents Shipgate. Mirrors the AGENTS.md trigger table — a coding agent that has not yet adopted Shipgate can fetch this and apply the rules to a PR diff or repo state to decide whether to run `agents-shipgate detect`. Stable for 0.x: rule IDs, predicate vocabulary, and action enum will not change in minor versions.", + "description": "Machine-readable trigger catalog for Agents Shipgate. Mirrors the AGENTS.md trigger table — a coding agent can fetch this and apply the rules to a PR diff or repo state to decide whether to run `shipgate verify --preview --json` or the full verifier. Stable for 0.x: rule IDs, predicate vocabulary, and action enum will not change in minor versions.", "source_of_truth": "AGENTS.md#should-i-run-shipgate-on-this-pr", "documentation_url": "https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/AGENTS.md", - "default_command": "agents-shipgate detect --workspace . --json", + "default_command": "shipgate verify --preview --json", "predicate_vocabulary": { "glob": "Any changed file in the PR matches the glob (relative to repo root). Globstar `**` matches zero or more path segments; `*` matches within a segment; brace expansion is not supported.", "diff_contains": "The unified diff (added or modified lines) of this PR contains the literal string. Use only with stable token forms — decorator names, package names, function calls.", @@ -30,7 +30,7 @@ }, "action": "run_shipgate", "rationale": "MCP exports declare a tool surface; changes need a tool-use readiness check.", - "command": "agents-shipgate detect --workspace . --json" + "command": "shipgate verify --preview --json" }, { "id": "TRIGGER-OPENAPI-SPEC-CHANGED", @@ -47,7 +47,7 @@ }, "action": "run_shipgate", "rationale": "OpenAPI specs define HTTP-tool surfaces; scope coverage and side-effect classification depend on them.", - "command": "agents-shipgate detect --workspace . --json" + "command": "shipgate verify --preview --json" }, { "id": "TRIGGER-STATIC-TOOL-INVENTORY-CHANGED", @@ -93,7 +93,7 @@ }, "action": "run_shipgate", "rationale": "Codex plugin packages and marketplaces are static release surfaces; plugin metadata, skills, apps, MCP server declarations, and hooks need a readiness check.", - "command": "agents-shipgate detect --workspace . --json" + "command": "shipgate verify --preview --json" }, { "id": "TRIGGER-N8N-WORKFLOW-CHANGED", @@ -230,7 +230,7 @@ }, "actions": { "force_run": "Propose Shipgate unconditionally. Overrides `skip_shipgate` rules — used when the repo has explicitly opted in (e.g. existing `shipgate.yaml`) and every PR should be checked.", - "run_shipgate": "Propose `agents-shipgate detect --json` as the next step; if `is_agent_project` or `suggested_sources` is non-trivial, follow with `init --write --ci` and `scan`. Does NOT win over `skip_shipgate` — a docs-only PR that incidentally mentions a tool decorator stays skipped.", + "run_shipgate": "Propose `shipgate verify --preview --json` as the next step; if preview says the repo needs configuration, use supporting setup commands before the full verifier. Does NOT win over `skip_shipgate` — a docs-only PR that incidentally mentions a tool decorator stays skipped.", "skip_shipgate": "Do not propose Shipgate. Beats `run_shipgate` (but loses to `force_run`). Recommend other appropriate checks for the PR.", "dry_run": "Advisory: `run_shipgate` stays false but `dry_run_recommended` is true in the evaluator output. Callers may propose a non-mutating scan; never propose `init --write` based on `dry_run` alone." }, diff --git a/llms-full.txt b/llms-full.txt index ff0c1921..89e7c2dc 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -982,23 +982,26 @@ Verify the installed CLI contract locally before relying on hard-coded docs: agents-shipgate contract --json ``` -Runtime contract v7 also exposes the local agent command spec: -`commands{}`, `default_paths{}`, `artifacts{}`, `agent_read_order[]`, -`verifier_read_order[]`, `merge_verdicts[]`, `release_decisions[]`, -`do_not_auto_assert[]`, `verifier_schema_version`, +Runtime contract v8 also exposes the local agent command spec: +`primary_commands{}`, `commands{}`, `default_paths{}`, `artifacts{}`, +`agent_read_order[]`, `verifier_read_order[]`, `merge_verdicts[]`, +`release_decisions[]`, `do_not_auto_assert[]`, `verifier_schema_version`, `verify_run_schema_version`, `agent_handoff_schema_version`, `agent_handoff_schema_path`, `agent_handoff_artifact`, `codex_boundary_result_schema_version`, `attestation_schema_version`, `registry_schema_version`, `org_evidence_bundle_schema_version`, `host_grants_inventory_schema_version`, `agent_interface_operations[]`, `exit_code_policy`, `mcp_tools[]`, and the legacy `agent_result_*` fields -retained for older protocol consumers. +retained for older protocol consumers. `primary_commands{}` is the prominent +entry surface and contains only `shipgate check`, `shipgate verify`, and +`shipgate audit --host` flows; `commands{}` is compatibility/supporting +metadata. Downstream repos generated with `init --agent-instructions=default` get the minimal local copy at `.shipgate/agent-contract.json`. - Latest release: `v1.0.0a1` (see [pyproject.toml](../pyproject.toml) for the in-tree version) -- Runtime contract: `7` +- Runtime contract: `8` - Current report schema: `0.27` — [`docs/report-schema.v0.27.json`](report-schema.v0.27.json) - Current packet schema: `0.7` — [`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) - Current verifier schema: `0.1` — [`docs/verifier-schema.v0.1.json`](verifier-schema.v0.1.json) @@ -1060,8 +1063,8 @@ projections, and `agents-shipgate skill ...` review output as supporting/provisional surfaces. They may be useful for routing and review, but they do not replace the gate above and must not introduce a second verdict. -`agents-shipgate preflight --workspace . --plan - --json` is a proactive -routing surface for coding agents before edits. It accepts a single +`agents-shipgate preflight --workspace . --plan - --json` remains a supporting +proactive routing surface for coding agents before edits. It accepts a single `PreflightPlanV1` object with `changed_files[]`, optional `diff_text`, `capability_requests[]`, `host_permission_requests[]`, and `context.{agent,task}`. The emitted `PreflightResultV2` reports protected diff --git a/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md b/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md index 372a152b..16230781 100644 --- a/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md +++ b/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md @@ -1,6 +1,6 @@ --- name: agents-shipgate -description: Use when the user wants to add or run Agents Shipgate — the deterministic merge gate for AI-generated agent capability changes — on an AI agent's tool surface; review or prepare a tool-using agent for release; scan MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, LangChain/LangGraph, CrewAI, OpenAI API, Codex plugin, or n8n tool artifacts; add advisory CI; or interpret, fix, triage, suppress, or explain a Shipgate finding. +description: Use when the user wants to run the prominent Agents Shipgate flows — `shipgate check`, `shipgate verify`, or `shipgate audit --host` — for AI agent capability changes, PR release readiness, or coding-agent host grants. --- # Agents Shipgate @@ -13,33 +13,30 @@ Do not use it for general linting, runtime monitoring, evals, model-output quali ## Workflow -1. For relevance decisions, bootstrap, verifier runs, scanning, CI setup, finding fixes, false-positive triage, strict-mode promotion, or version upgrades, read `references/recipes.md`. +1. For local checks, verifier runs, host audits, and supporting recovery commands, read `references/recipes.md`. 2. For reading `report.json`, summarizing release decisions, or deciding what may be auto-applied, read `references/report-reading.md`. -3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "7"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. +3. Before running Shipgate CLI commands, require a CLI whose `agents-shipgate contract --json` reports `contract_version: "8"` or newer: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. If it is missing or stale, tell the user to install or upgrade `agents-shipgate`. The Codex plugin supplies workflows, not the scanner binary. 4. Set `AGENTS_SHIPGATE_AGENT_MODE=1` before running Shipgate commands so errors include structured `next_action` JSON. 5. Default first-time CI to advisory mode. Do not enable release-blocking CI or save a baseline until a human has reviewed current findings. 6. For local agent control, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and read the stdout `shipgate.codex_boundary_result/v1` object. Switch on `decision`; follow `first_next_action`, `repair`, and `human_review`. -7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run `agents-shipgate preflight --workspace . --plan - --json` with a `PreflightPlanV1` object. Legacy `--changed-files`/`--diff` shorthands remain available. If `requires_human_review` is true or `first_next_action.actor` is `human`, stop and route to a human. +7. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, plan to run `shipgate verify` before completion and route trust-root review to a human when the verifier requires it. 8. For full PR verification, read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json` for detailed controller state, then `verify-run.json` for reproducibility metadata, then `report.json` for reviewer detail; `report.json.release_decision.decision` remains the release gate. 9. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. 10. Ensure `.gitignore` covers `agents-shipgate-reports/` before committing. ## Fast Paths -- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "7"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. -- Protected-surface preflight: run `agents-shipgate preflight --workspace . --plan - --json` before touching trust roots; include `changed_files[]` or `diff_text` in the plan when you have concrete planned paths. +- CLI preflight: run `command -v agents-shipgate`, `agents-shipgate --version`, and `agents-shipgate contract --json`. Continue only when the installed CLI reports `contract_version: "8"` or newer; if it is missing or stale, ask the user to install or upgrade `agents-shipgate`. - Agent-native check: run `shipgate check --agent codex --workspace . --format codex-boundary-json`; read only the JSON result for continue/repair/stop routing. -- First adoption: run `agents-shipgate detect --workspace . --json`, then follow `references/recipes.md`. -- Agent-related PR/CI diff: run `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. -- Existing manifest / ongoing PR: run `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. -- First GitHub CI: copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. -- Explain one finding: run `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json`. -- Triage heuristic findings: run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. +- Agent-related PR/CI diff: run `shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. For local uncommitted work, omit `--base`/`--head` so the working tree is scanned. `verify` never fetches. +- Existing manifest / ongoing PR: run `shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. +- Unconfigured repo or uncertain relevance: run `shipgate verify --preview --json`. +- Host grants: run `shipgate audit --host --json --out agents-shipgate-reports/host-grants.json`. ## Boundaries -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and reporting the new decision/counts. -- Do not continue with protected-surface edits when preflight returns `requires_human_review=true`; coding agents must not self-approve trust-root changes. +- Do not claim a finding is fixed without re-running `shipgate verify` and reporting the new merge verdict and release decision. +- Do not self-approve trust-root changes; when `shipgate verify` returns human review required, surface it to a human. - Before finishing an agent-related diff, run `shipgate check --agent codex --workspace . --format codex-boundary-json` and follow `shipgate.codex_boundary_result/v1`. - Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. - Do not silently suppress findings. Suppressions require a non-empty `reason`. diff --git a/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md b/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md index a663b4f5..9bf13480 100644 --- a/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md +++ b/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md @@ -1,12 +1,14 @@ # Agents Shipgate Recipes -Use these recipes after the `agents-shipgate` skill triggers. +Use these recipes after the `agents-shipgate` skill triggers. The prominent +flows are `shipgate check`, `shipgate verify`, and `shipgate audit --host`. +Supporting commands remain callable, but should not be the first thing an agent +recommends. ## CLI Preflight -The Codex plugin supplies the workflow instructions, not the scanner binary. -Before running `agents-shipgate` commands, confirm the CLI is installed and new -enough for the `verify` workflow: +The Codex plugin supplies workflow instructions, not the scanner binary. +Before running Shipgate commands, confirm the CLI is installed and new enough: ```bash command -v agents-shipgate @@ -14,75 +16,81 @@ agents-shipgate --version agents-shipgate contract --json ``` -Require `agents-shipgate contract --json` to report `contract_version: "7"` or -newer. If the command is missing or the contract is older, ask the user to -install or upgrade the CLI and rerun the task: +Require `agents-shipgate contract --json` to report `contract_version: "8"` or +newer. If it is missing or stale, ask the user to install or upgrade: ```bash pipx install agents-shipgate -pipx upgrade agents-shipgate # plain install is a no-op over a stale build +pipx upgrade agents-shipgate ``` -After installation, run `agents-shipgate --version` and -`agents-shipgate contract --json` again. Do not continue to `detect`, `init`, -`scan`, or `verify` until the CLI exists and reports contract v7 or newer. +Do not report the task complete until the CLI exists and reports contract v8 or +newer. Local boundary checks emit `shipgate.codex_boundary_result/v1`; legacy +`agent_result_v1` fixtures are retained only for older protocol integrations. -A missing or stale binary is a `decision="block"` install action in the -agent-native protocol, not a reason to proceed unverified. Until -`agents-shipgate contract --json` confirms contract v7 or newer, do not report -the task complete: surface the install/upgrade action and stop. Local boundary -checks emit `shipgate.codex_boundary_result/v1`; legacy `agent_result_v1` -fixtures are retained only for older protocol integrations. +## Local Agent Check -## Protected Surface Preflight +Run the boundary check before reporting an agent-related local diff complete: -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: +```bash +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate check \ + --agent codex --workspace . --format codex-boundary-json +``` + +Read only stdout JSON. Switch on `decision`, `completion_allowed`, +`must_stop`, `first_next_action`, `human_review`, `repair`, and `policy`. + +## Verify A Diff + +Use this before finishing a PR or local change that touches an agent tool +surface, prompts, policies, permissions, Shipgate CI, or other protected +release surfaces. ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify \ + --workspace . --config shipgate.yaml \ + --base origin/main --head HEAD --ci-mode advisory --format json ``` -Pass a `PreflightPlanV1` object on stdin. If you already have a path list or -local diff and need legacy shorthands, ask preflight about them before editing: +For local uncommitted work, omit `--head` and `--base` so `verify` scans the +checked-out working tree, including uncommitted edits. In committed PR or CI +contexts, make the base ref available first because `verify` never fetches. If +the repo is not configured or relevance is unclear, run: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --changed-files changed.txt --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . \ - --diff pr.diff --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify --preview --json ``` -If `requires_human_review` is true or `first_next_action.actor` is `human`, -stop and route the change to a human. Preflight is a routing surface only; -`release_decision.decision` remains the gate. +Read `agents-shipgate-reports/agent-handoff.json` first. Lead with +`gate.merge_verdict`, then inspect `next_action`, `controller`, +`fix_task.safe_to_attempt`, and `capability_review.top_changes[]`. Then read +`verifier.json`, `verify-run.json`, and `report.json`; the release gate remains +`report.json.release_decision.decision`. + +Do not bypass the verifier by suppressing findings, lowering severity, +expanding baselines or waivers, removing Shipgate CI, or weakening agent +instructions. Verify-mode `SHIP-VERIFY-*` findings route those trust-root +changes to human review. -## Decide Relevance +## Audit Host Grants -Run: +Run host audit when the task touches MCP servers, permission rules, hooks, +workflow scopes, or coding-agent host configuration: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json +AGENTS_SHIPGATE_AGENT_MODE=1 shipgate audit --host --json \ + --out agents-shipgate-reports/host-grants.json ``` -Proceed when any of these are true: - -- `is_agent_project: true` -- `suggested_sources` is non-empty -- `codex_plugin_candidates` is non-empty -- `shipgate.yaml` already exists -- the user explicitly asked for a Shipgate scan or Tool-Use Readiness gate +For drift checks against an acknowledged baseline, use the same flow with +`--drift` and optionally `--fail-on-drift`. -Stop only when all signals are absent and the user did not explicitly request Shipgate. +## Supporting Setup And Repair -## Bootstrap A Repo - -Run: +If `shipgate verify --preview --json` says the repo needs configuration, the +supporting setup commands remain available: ```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate contract --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate init --workspace . --write --ci --json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml --suggest-patches --format json AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ @@ -90,110 +98,26 @@ AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ --confidence high --apply ``` -If `init` reports placeholders, replace `CHANGE_ME` values from repo context before scanning. If `shipgate.yaml` already exists, edit it rather than overwriting it. - -## Verify An Agent-Related Diff - -Use this before finishing a PR or local change that touches an agent tool -surface, prompts, policies, permissions, Shipgate CI, or other protected -release surfaces. - -```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate trigger \ - --workspace . --base origin/main --head HEAD --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate preflight --workspace . --plan - --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate verify \ - --workspace . --config shipgate.yaml \ - --base origin/main --head HEAD --ci-mode advisory --format json -``` - -For local uncommitted work, omit `--head` and omit `--base` so `verify` scans -the checked-out working tree, including uncommitted edits. In committed PR or -CI contexts, make the base ref available first because `verify` never fetches. -If you pass a missing `--base`, `verify` exits 2 with an unknown merge verdict. - -Read `agents-shipgate-reports/agent-handoff.json` first. Lead with -`gate.merge_verdict`, then inspect `capability_review.top_changes[]`, -`next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read -`agents-shipgate-reports/verifier.json` for detailed controller context and -`agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. `capability_review.top_changes[]` and `verifier_summary` are -supporting/provisional composition summaries: their verdict-like values mirror -`release_decision.decision`, and they add counts for protected-surface touches, -policy weakening, human acknowledgement, and top reason codes. - -Do not bypass the verifier. Do not suppress findings, lower severity, expand -baselines or waivers, remove Shipgate CI, or weaken agent instructions to make -the run pass. Verify-mode `SHIP-VERIFY-*` findings route those trust-root -changes to human review. - -## First-Time CI - -Use advisory mode only. Copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. +If `init` reports placeholders, replace `CHANGE_ME` values from repo context +before verification. If `shipgate.yaml` already exists, edit it rather than +overwriting it. -Do not switch to release-blocking behavior in the same task. Strict promotion requires human review, suppressions with reasons, and optionally a saved baseline. - -## Fix Top Finding +## Fix Or Explain Findings 1. Read `agents-shipgate-reports/report.json`. 2. Pick the first blocker, then highest-severity review item. -3. If `findings[].agent_action == "auto_apply"` and a high-confidence patch exists, apply it with `apply-patches --confidence high --apply`. -4. For policy/evidence gaps, propose the exact human decision needed. Do not fabricate approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. -5. Re-run scan and report the new `release_decision.decision`, blocker count, and review item count. - -## Recommend Fixes - -Group active findings by action: +3. Auto-apply only high-confidence safe patches. +4. For policy/evidence gaps, propose the exact human decision needed. Do not + fabricate approval, confirmation, idempotency, broad-scope, + prohibited-action, or runtime-trace evidence. +5. Re-run `shipgate verify` and report the new merge verdict, release + decision, blocker count, and review-item count. -- `auto_apply`: safe mechanical patches. -- `propose_patch_for_review`: show patch, leave final decision to user. -- `escalate_to_human`: policy/evidence decision. -- `suppress_with_reason`: only when the user confirms the finding is intentionally accepted. -- `informational`: summarize, no gate action. - -## Explain A Finding - -Run: +For one finding: ```bash agents-shipgate explain-finding \ --from agents-shipgate-reports/report.json --json ``` -Use the returned deterministic `explanation` for PR comments or chat replies. Keep it to 3-5 sentences and include the tool name, release risk, and next action. - -## Triage False Positives - -Prefer fixing the manifest or policy evidence over suppression. Suppress only with a specific reason: - -```yaml -checks: - ignore: - - check_id: SHIP-CHECK-ID - tool: tool.name - reason: specific accepted-risk rationale -``` - -## Promote Advisory To Strict - -Only after humans review advisory output: - -```bash -agents-shipgate baseline save -c shipgate.yaml --out .agents-shipgate/baseline.json -agents-shipgate scan -c shipgate.yaml \ - --baseline .agents-shipgate/baseline.json \ - --ci-mode strict --fail-on critical,high -``` - -The promoted gate should fail only on new findings above the selected threshold. - -## Upgrade Shipgate - -Update the GitHub Action tag and `shipgate_version` together. Re-run: - -```bash -agents-shipgate contract --json -agents-shipgate scan -c shipgate.yaml --suggest-patches --format json -``` - -If schema or decision fields changed, use `docs/agent-contract-current.md` from the installed version or upstream repo. +Suppressions require a specific non-empty reason and explicit user approval. diff --git a/prompts/add-shipgate-to-repo.md b/prompts/add-shipgate-to-repo.md index 01b03928..9d5007a6 100644 --- a/prompts/add-shipgate-to-repo.md +++ b/prompts/add-shipgate-to-repo.md @@ -5,23 +5,23 @@ You are working in a repo that may contain an AI agent — likely one of: an MCP Your job is to drive the first-adoption helper flow end-to-end in one tool-using turn, which adds the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. Ongoing -agent-related PRs should use `agents-shipgate verify` after this adoption step. +agent-related PRs should use `shipgate verify` after this adoption step. ## Your task -1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, and Codex-boundary contracts and requires **contract v7 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: +1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, primary-command, and Codex-boundary contracts and requires **contract v8 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: ```bash SG="uvx agents-shipgate@1.0.0a1" # uv: ephemeral, always the pinned build # or: SG="pipx run agents-shipgate==1.0.0a1" $SG --version # confirm the pinned runner resolves ``` - Every step below calls `$SG …`; e.g. `$SG detect …` runs `agents-shipgate detect` through the pinned runner, never a `PATH` copy. + Every step below calls `$SG …`; e.g. `$SG verify --preview --json` runs the verify preview through the pinned runner, never a `PATH` copy. If you would rather install onto `PATH`, pin the floor and **fail loudly when it resolves older** — a plain `pipx install agents-shipgate` is a no-op when an older build already exists — then set `SG=agents-shipgate`: ```bash python -m pip install -U --pre agents-shipgate - agents-shipgate contract --json # STOP if this reports contract_version < 5 - re-run pinned via uvx agents-shipgate@1.0.0a1 - SG=agents-shipgate # only after the line above confirms contract v7+ + agents-shipgate contract --json # STOP if this reports contract_version < 8 - re-run pinned via uvx agents-shipgate@1.0.0a1 + SG=agents-shipgate # only after the line above confirms contract v8+ ``` 2. **Sanity-check the install** before touching the user's code: @@ -40,13 +40,20 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. using [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) and upgrade before relying on local contract verification in automation. -3. **Detect:** +3. **Preview the verify flow:** ```bash - $SG detect --workspace . --json + $SG verify --preview --json ``` - Read the response: `is_agent_project`, `frameworks[]` (per-framework score + evidence + candidate files), `agent_name_candidates[]`, `suggested_sources[]` (MCP/OpenAPI files matched by glob). - - **Stop only when ALL of these hold:** `is_agent_project: false`, `suggested_sources` is empty, `codex_plugin_candidates` is empty, no `shipgate.yaml` already exists in the workspace, AND the user did not explicitly request a scan. Otherwise proceed — MCP/OpenAPI tool-surface repos and Codex plugin package repos register as `is_agent_project: false` because they have no Python framework imports, but they are valid Shipgate targets. MCP/OpenAPI hits surface as `suggested_sources`; Codex plugin hits surface as `codex_plugin_candidates`. + Read the response and next action. Preview is the first-look verify entry + point: it does not require a manifest, does not scan, and tells you whether + to configure Shipgate, skip, or run the full verifier. + + If preview exposes trigger/detection metadata, stop only when all relevance + signals are absent and the user did not explicitly request Shipgate. + Otherwise proceed. MCP/OpenAPI tool-surface repos and Codex plugin package + repos can be valid Shipgate targets even when Python framework detection + would classify `is_agent_project: false`; look for `suggested_sources` and + `codex_plugin_candidates` when those fields are present. 4. **Generate a starter manifest + GitHub Actions workflow:** ```bash diff --git a/prompts/decide-shipgate-relevance.md b/prompts/decide-shipgate-relevance.md index 63904ca1..1ba20ab9 100644 --- a/prompts/decide-shipgate-relevance.md +++ b/prompts/decide-shipgate-relevance.md @@ -69,12 +69,12 @@ the rules to the changed file list. ``` 5. **Then act.** - - If `run_shipgate: true`: run `detect` through a **version-pinned, + - If `run_shipgate: true`: run verify preview through a **version-pinned, zero-install** runner so a stale copy already on `PATH` can't answer — - `uvx agents-shipgate@1.0.0a1 detect --workspace . --json` (or + `uvx agents-shipgate@1.0.0a1 verify --preview --json` (or `pipx run agents-shipgate==1.0.0a1 ...`). Only fall back to a bare - `agents-shipgate detect` once `agents-shipgate --version` confirms - contract v7 or newer. Then follow + `shipgate verify --preview --json` once `agents-shipgate --version` + confirms contract v8 or newer. Then follow [`prompts/add-shipgate-to-repo.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/add-shipgate-to-repo.md) for the first-adoption helper flow, or point the user at the GitHub Action (`ThreeMoonsLab/agents-shipgate@v1.0.0a1`) if they prefer CI. diff --git a/prompts/verify-agent-diff.md b/prompts/verify-agent-diff.md index ec706e11..20bbcb2d 100644 --- a/prompts/verify-agent-diff.md +++ b/prompts/verify-agent-diff.md @@ -11,54 +11,31 @@ work is complete. export AGENTS_SHIPGATE_AGENT_MODE=1 ``` -2. **Decide whether the diff needs Shipgate.** - For a committed PR diff: +2. **Use verify preview only when relevance or setup is unclear.** ```bash - agents-shipgate trigger --workspace . --base origin/main --head HEAD --json - ``` - For a local pre-commit working-tree diff, or when the base ref is - unavailable locally, use the changed-files fallback: - ```bash - git diff --name-only HEAD > /tmp/shipgate-changed-files.txt - git diff HEAD > /tmp/shipgate.diff - agents-shipgate trigger --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json + shipgate verify --preview --json ``` + Preview is a lightweight verify entry point: no manifest required, no scan, + exit 0. It tells you whether to configure Shipgate, skip, or run the full + verifier. If the repo already has `shipgate.yaml`, proceed to full verify. - Continue when `should_run` is `true` or `force_run` is `true`. If the - repo already has `shipgate.yaml`, CI should verify every PR; for local - pre-commit work, verify when the changed files are agent-related or when - you need a full advisory check before handing off. - -3. **Run preflight before protected-surface edits.** +3. **Treat protected-surface edits as verifier-owned review.** Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex - plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: - ```bash - agents-shipgate preflight --workspace . --plan - --json - ``` - Pass a `PreflightPlanV1` object on stdin. If you need legacy shorthands, - pass changed-file or diff context directly: - ```bash - agents-shipgate preflight --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json - ``` - If `requires_human_review` is true or `first_next_action.actor` is `human`, - stop and route the change to a human. Preflight is a routing surface only; - it does not replace the verifier. + plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, do not + self-approve the trust-root change. Run full verify before reporting + completion and route human review when the verifier requires it. 4. **Run the verifier.** For local uncommitted work, omit `--head` and omit `--base` so the checked-out working tree is scanned, including uncommitted edits: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json ``` For committed PR or CI verification, pass the head ref explicitly: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --base origin/main --head HEAD --ci-mode advisory --format json ``` `verify` never fetches. If you pass `--base` and that ref is missing, @@ -96,8 +73,8 @@ work is complete. ## What NOT to do -- Do not claim the diff is verified until `agents-shipgate verify` has run or - `agents-shipgate trigger` has returned a clear skip verdict. +- Do not claim the diff is verified until `shipgate verify` has run or + `shipgate verify --preview --json` has returned a clear skip verdict. - Do not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly accepts human review. diff --git a/skills/agents-shipgate/SKILL.md b/skills/agents-shipgate/SKILL.md index e7780a8f..16acee78 100644 --- a/skills/agents-shipgate/SKILL.md +++ b/skills/agents-shipgate/SKILL.md @@ -1,6 +1,6 @@ --- name: agents-shipgate -description: Run the deterministic merge gate when a change touches what an AI agent can do. Use after adding or modifying MCP servers or tools, tool/function definitions (@tool, @function_tool), OpenAPI specs that describe agent tools, agent prompts, permission scopes, approval or confirmation policies, agent CI workflows, or shipgate.yaml — and before creating a PR for any such change. Also use to verify agent-related PRs, fix or triage Shipgate findings, add Shipgate to CI, or interpret Shipgate verifier/report artifacts. Triggers on phrases like "add shipgate", "verify this agent PR", "merge verdict", "release readiness for my agent", "tool-use readiness", "scan my agent", "shipgate scan", "shipgate.yaml", "agents-shipgate-reports/verifier.json", "agents-shipgate-reports/report.json", "fix shipgate finding". +description: Run prominent Agents Shipgate flows when a change touches what an AI agent can do: `shipgate check`, `shipgate verify`, or `shipgate audit --host`. Use after adding or modifying MCP servers or tools, tool/function definitions (@tool, @function_tool), OpenAPI specs that describe agent tools, agent prompts, permission scopes, approval or confirmation policies, agent CI workflows, or shipgate.yaml — and before creating a PR for any such change. Also use to verify agent-related PRs, fix or triage Shipgate findings, add Shipgate to CI, or interpret Shipgate verifier/report artifacts. Triggers on phrases like "add shipgate", "verify this agent PR", "merge verdict", "release readiness for my agent", "tool-use readiness", "shipgate check", "shipgate verify", "audit host grants", "shipgate.yaml", "agents-shipgate-reports/verifier.json", "agents-shipgate-reports/report.json", "fix shipgate finding". --- # agents-shipgate skill @@ -53,8 +53,8 @@ Always: `fix_task`, and `capability_review.top_changes`. Then parse `agents-shipgate-reports/report.json.release_decision.decision`; it is the release gate. -4. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run `agents-shipgate preflight --workspace . --plan - --json` with a `PreflightPlanV1` object. Legacy `--changed-files`/`--diff` shorthands remain available. If `requires_human_review` is true or `first_next_action.actor` is `human`, stop and route to a human. -5. Before finishing an agent-related diff, run `shipgate check --agent claude-code --workspace . --format codex-boundary-json`. For committed PR/CI verification, run `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. `verify` never fetches. +4. Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, plan to run `shipgate verify` before completion and route trust-root review to a human when the verifier requires it. +5. Before finishing an agent-related diff, run `shipgate check --agent claude-code --workspace . --format codex-boundary-json`. For committed PR/CI verification, run `shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json` after making the base ref available. `verify` never fetches. For host grants, run `shipgate audit --host --json --out agents-shipgate-reports/host-grants.json`. 6. Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. 7. Confirm with the user before any command that writes files (`init --write`, `baseline save`). @@ -86,7 +86,7 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc ## Boundaries (do not violate) -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. +- Do not claim a finding is fixed without re-running `shipgate verify` and reporting the new merge verdict and release decision. - Do not silently suppress findings — `checks.ignore` requires a `reason` and the manifest validator rejects empty reasons. - Do not commit `agents-shipgate-reports/` — it's regenerated each run; add it to `.gitignore`. - Do not run `agents-shipgate baseline save` until the user has reviewed the initial findings; baselining ratchets in noise. diff --git a/skills/agents-shipgate/prompts/add-shipgate-to-repo.md b/skills/agents-shipgate/prompts/add-shipgate-to-repo.md index 01b03928..9d5007a6 100644 --- a/skills/agents-shipgate/prompts/add-shipgate-to-repo.md +++ b/skills/agents-shipgate/prompts/add-shipgate-to-repo.md @@ -5,23 +5,23 @@ You are working in a repo that may contain an AI agent — likely one of: an MCP Your job is to drive the first-adoption helper flow end-to-end in one tool-using turn, which adds the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. Ongoing -agent-related PRs should use `agents-shipgate verify` after this adoption step. +agent-related PRs should use `shipgate verify` after this adoption step. ## Your task -1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, and Codex-boundary contracts and requires **contract v7 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: +1. **Install the tool - pin the version so a stale build can't shadow it.** This flow uses the current verifier, agent-handoff, primary-command, and Codex-boundary contracts and requires **contract v8 or newer**; an older copy lingering on `PATH` may lack the command or schema fields this prompt expects. Prefer a **pinned, zero-install** runner that fetches the exact version every time instead of trusting whatever is already on `PATH`. **Pin it into one variable and use that for every step below**, so no single command can fall through to a stale binary: ```bash SG="uvx agents-shipgate@1.0.0a1" # uv: ephemeral, always the pinned build # or: SG="pipx run agents-shipgate==1.0.0a1" $SG --version # confirm the pinned runner resolves ``` - Every step below calls `$SG …`; e.g. `$SG detect …` runs `agents-shipgate detect` through the pinned runner, never a `PATH` copy. + Every step below calls `$SG …`; e.g. `$SG verify --preview --json` runs the verify preview through the pinned runner, never a `PATH` copy. If you would rather install onto `PATH`, pin the floor and **fail loudly when it resolves older** — a plain `pipx install agents-shipgate` is a no-op when an older build already exists — then set `SG=agents-shipgate`: ```bash python -m pip install -U --pre agents-shipgate - agents-shipgate contract --json # STOP if this reports contract_version < 5 - re-run pinned via uvx agents-shipgate@1.0.0a1 - SG=agents-shipgate # only after the line above confirms contract v7+ + agents-shipgate contract --json # STOP if this reports contract_version < 8 - re-run pinned via uvx agents-shipgate@1.0.0a1 + SG=agents-shipgate # only after the line above confirms contract v8+ ``` 2. **Sanity-check the install** before touching the user's code: @@ -40,13 +40,20 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. using [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) and upgrade before relying on local contract verification in automation. -3. **Detect:** +3. **Preview the verify flow:** ```bash - $SG detect --workspace . --json + $SG verify --preview --json ``` - Read the response: `is_agent_project`, `frameworks[]` (per-framework score + evidence + candidate files), `agent_name_candidates[]`, `suggested_sources[]` (MCP/OpenAPI files matched by glob). - - **Stop only when ALL of these hold:** `is_agent_project: false`, `suggested_sources` is empty, `codex_plugin_candidates` is empty, no `shipgate.yaml` already exists in the workspace, AND the user did not explicitly request a scan. Otherwise proceed — MCP/OpenAPI tool-surface repos and Codex plugin package repos register as `is_agent_project: false` because they have no Python framework imports, but they are valid Shipgate targets. MCP/OpenAPI hits surface as `suggested_sources`; Codex plugin hits surface as `codex_plugin_candidates`. + Read the response and next action. Preview is the first-look verify entry + point: it does not require a manifest, does not scan, and tells you whether + to configure Shipgate, skip, or run the full verifier. + + If preview exposes trigger/detection metadata, stop only when all relevance + signals are absent and the user did not explicitly request Shipgate. + Otherwise proceed. MCP/OpenAPI tool-surface repos and Codex plugin package + repos can be valid Shipgate targets even when Python framework detection + would classify `is_agent_project: false`; look for `suggested_sources` and + `codex_plugin_candidates` when those fields are present. 4. **Generate a starter manifest + GitHub Actions workflow:** ```bash diff --git a/skills/agents-shipgate/prompts/decide-shipgate-relevance.md b/skills/agents-shipgate/prompts/decide-shipgate-relevance.md index 63904ca1..1ba20ab9 100644 --- a/skills/agents-shipgate/prompts/decide-shipgate-relevance.md +++ b/skills/agents-shipgate/prompts/decide-shipgate-relevance.md @@ -69,12 +69,12 @@ the rules to the changed file list. ``` 5. **Then act.** - - If `run_shipgate: true`: run `detect` through a **version-pinned, + - If `run_shipgate: true`: run verify preview through a **version-pinned, zero-install** runner so a stale copy already on `PATH` can't answer — - `uvx agents-shipgate@1.0.0a1 detect --workspace . --json` (or + `uvx agents-shipgate@1.0.0a1 verify --preview --json` (or `pipx run agents-shipgate==1.0.0a1 ...`). Only fall back to a bare - `agents-shipgate detect` once `agents-shipgate --version` confirms - contract v7 or newer. Then follow + `shipgate verify --preview --json` once `agents-shipgate --version` + confirms contract v8 or newer. Then follow [`prompts/add-shipgate-to-repo.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/add-shipgate-to-repo.md) for the first-adoption helper flow, or point the user at the GitHub Action (`ThreeMoonsLab/agents-shipgate@v1.0.0a1`) if they prefer CI. diff --git a/skills/agents-shipgate/prompts/verify-agent-diff.md b/skills/agents-shipgate/prompts/verify-agent-diff.md index ec706e11..20bbcb2d 100644 --- a/skills/agents-shipgate/prompts/verify-agent-diff.md +++ b/skills/agents-shipgate/prompts/verify-agent-diff.md @@ -11,54 +11,31 @@ work is complete. export AGENTS_SHIPGATE_AGENT_MODE=1 ``` -2. **Decide whether the diff needs Shipgate.** - For a committed PR diff: +2. **Use verify preview only when relevance or setup is unclear.** ```bash - agents-shipgate trigger --workspace . --base origin/main --head HEAD --json - ``` - For a local pre-commit working-tree diff, or when the base ref is - unavailable locally, use the changed-files fallback: - ```bash - git diff --name-only HEAD > /tmp/shipgate-changed-files.txt - git diff HEAD > /tmp/shipgate.diff - agents-shipgate trigger --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json + shipgate verify --preview --json ``` + Preview is a lightweight verify entry point: no manifest required, no scan, + exit 0. It tells you whether to configure Shipgate, skip, or run the full + verifier. If the repo already has `shipgate.yaml`, proceed to full verify. - Continue when `should_run` is `true` or `force_run` is `true`. If the - repo already has `shipgate.yaml`, CI should verify every PR; for local - pre-commit work, verify when the changed files are agent-related or when - you need a full advisory check before handing off. - -3. **Run preflight before protected-surface edits.** +3. **Treat protected-surface edits as verifier-owned review.** Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex - plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run: - ```bash - agents-shipgate preflight --workspace . --plan - --json - ``` - Pass a `PreflightPlanV1` object on stdin. If you need legacy shorthands, - pass changed-file or diff context directly: - ```bash - agents-shipgate preflight --workspace . \ - --changed-files /tmp/shipgate-changed-files.txt \ - --diff /tmp/shipgate.diff --json - ``` - If `requires_human_review` is true or `first_next_action.actor` is `human`, - stop and route the change to a human. Preflight is a routing surface only; - it does not replace the verifier. + plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, do not + self-approve the trust-root change. Run full verify before reporting + completion and route human review when the verifier requires it. 4. **Run the verifier.** For local uncommitted work, omit `--head` and omit `--base` so the checked-out working tree is scanned, including uncommitted edits: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --ci-mode advisory --format json ``` For committed PR or CI verification, pass the head ref explicitly: ```bash - agents-shipgate verify --workspace . --config shipgate.yaml \ + shipgate verify --workspace . --config shipgate.yaml \ --base origin/main --head HEAD --ci-mode advisory --format json ``` `verify` never fetches. If you pass `--base` and that ref is missing, @@ -96,8 +73,8 @@ work is complete. ## What NOT to do -- Do not claim the diff is verified until `agents-shipgate verify` has run or - `agents-shipgate trigger` has returned a clear skip verdict. +- Do not claim the diff is verified until `shipgate verify` has run or + `shipgate verify --preview --json` has returned a clear skip verdict. - Do not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly accepts human review. diff --git a/src/agents_shipgate/cli/_register_baseline.py b/src/agents_shipgate/cli/_register_baseline.py index 81e9cd8f..2cb6c680 100644 --- a/src/agents_shipgate/cli/_register_baseline.py +++ b/src/agents_shipgate/cli/_register_baseline.py @@ -333,7 +333,7 @@ def baseline_status( if gating and violations: raise typer.Exit(20) - app.add_typer(baseline_app, name="baseline") + app.add_typer(baseline_app, name="baseline", hidden=True) def _coerce_evidence(evidence: dict[str, object]) -> dict[str, object]: diff --git a/src/agents_shipgate/cli/_register_contract.py b/src/agents_shipgate/cli/_register_contract.py index dc759495..3152fbe0 100644 --- a/src/agents_shipgate/cli/_register_contract.py +++ b/src/agents_shipgate/cli/_register_contract.py @@ -8,7 +8,7 @@ def register(app: typer.Typer) -> None: - @app.command() + @app.command(hidden=True) def contract( json_output: bool = typer.Option(False, "--json", help="Emit JSON instead of text."), ) -> None: @@ -78,6 +78,9 @@ def contract( typer.echo("MCP tools:") for tool in payload.mcp_tools: typer.echo(f" {tool}") + typer.echo("Primary commands:") + for name, command in payload.primary_commands.items(): + typer.echo(f" {name}: {command}") typer.echo("Commands:") for name, command in payload.commands.items(): typer.echo(f" {name}: {command}") diff --git a/src/agents_shipgate/cli/_register_doctor.py b/src/agents_shipgate/cli/_register_doctor.py index f0d9e7f5..2f9738d6 100644 --- a/src/agents_shipgate/cli/_register_doctor.py +++ b/src/agents_shipgate/cli/_register_doctor.py @@ -23,7 +23,7 @@ def register(app: typer.Typer) -> None: - @app.command() + @app.command(hidden=True) def doctor( config: str = typer.Option("shipgate.yaml", "--config", "-c", help="Path or quoted glob."), workspace: Path | None = typer.Option(None, "--workspace", help="Inspect every manifest below workspace."), diff --git a/src/agents_shipgate/cli/_register_explain.py b/src/agents_shipgate/cli/_register_explain.py index b893727a..9fe0920b 100644 --- a/src/agents_shipgate/cli/_register_explain.py +++ b/src/agents_shipgate/cli/_register_explain.py @@ -11,7 +11,7 @@ def register(app: typer.Typer) -> None: - @app.command() + @app.command(hidden=True) def explain( check_id: str, no_plugins: bool = typer.Option( diff --git a/src/agents_shipgate/cli/_register_init.py b/src/agents_shipgate/cli/_register_init.py index cf06b849..72a64b82 100644 --- a/src/agents_shipgate/cli/_register_init.py +++ b/src/agents_shipgate/cli/_register_init.py @@ -166,7 +166,7 @@ def _claude_code_outcome_lines(outcome: dict[str, object]) -> list[str]: def register(app: typer.Typer) -> None: - @app.command() + @app.command(hidden=True) def init( workspace: Path = typer.Option(Path("."), "--workspace", help="Workspace to inspect."), write: bool = typer.Option( diff --git a/src/agents_shipgate/cli/_register_list_checks.py b/src/agents_shipgate/cli/_register_list_checks.py index 26907b0f..4313d84c 100644 --- a/src/agents_shipgate/cli/_register_list_checks.py +++ b/src/agents_shipgate/cli/_register_list_checks.py @@ -8,7 +8,7 @@ def register(app: typer.Typer) -> None: - @app.command("list-checks") + @app.command("list-checks", hidden=True) def list_checks( json_output: bool = typer.Option(False, "--json", help="Emit JSON instead of text."), no_plugins: bool = typer.Option( diff --git a/src/agents_shipgate/cli/_register_scan.py b/src/agents_shipgate/cli/_register_scan.py index a1cbca06..59ab3c98 100644 --- a/src/agents_shipgate/cli/_register_scan.py +++ b/src/agents_shipgate/cli/_register_scan.py @@ -49,7 +49,7 @@ def _build_verification_context( def register(app: typer.Typer) -> None: - @app.command() + @app.command(hidden=True) def scan( config: str = typer.Option( "shipgate.yaml", diff --git a/src/agents_shipgate/cli/diagnostics.py b/src/agents_shipgate/cli/diagnostics.py index 86a351ac..c0476151 100644 --- a/src/agents_shipgate/cli/diagnostics.py +++ b/src/agents_shipgate/cli/diagnostics.py @@ -61,7 +61,7 @@ def _quote_path(value: str | Path) -> str: def diagnose_missing_manifest(workspace: Path) -> list[Diagnostic]: - """``shipgate.yaml`` is absent. The agent should detect, then init.""" + """``shipgate.yaml`` is absent. The agent should start with verify preview.""" workspace_q = _quote_path(workspace) return [ Diagnostic( @@ -71,14 +71,14 @@ def diagnose_missing_manifest(workspace: Path) -> list[Diagnostic]: next_actions=[ NextAction( kind="command", - command=f"agents-shipgate detect --workspace {workspace_q} --json", + command=f"shipgate verify --workspace {workspace_q} --preview --json", why=( - "Confirm this is an agent project before writing a " - "manifest. detect is read-only." + "Ask the verify flow whether this workspace needs " + "Shipgate configuration before writing a manifest." ), expects=( - "JSON with is_agent_project, suggested_sources, and " - "diagnostics." + "JSON preview result with the next setup or skip " + "action." ), ), NextAction( diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py index a995cd5c..bd6ddf1e 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py @@ -31,11 +31,11 @@ def render_block() -> str: shipgate check --agent codex --workspace . --format codex-boundary-json shipgate check --agent claude-code --workspace . --format codex-boundary-json shipgate check --agent cursor --workspace . --format codex-boundary-json -agents-shipgate verify --preview --json -agents-shipgate preflight --workspace . --plan - --json -agents-shipgate init --workspace . --write --ci --agent-instructions=default --json -agents-shipgate verify --workspace . --config shipgate.yaml \\ +shipgate verify --workspace . --config shipgate.yaml \\ --ci-mode advisory --format json +shipgate verify --workspace . --config shipgate.yaml \\ + --base origin/main --head HEAD --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json ``` For local agent control, read the `shipgate check` stdout JSON only. It is @@ -43,22 +43,13 @@ def render_block() -> str: `completion_allowed`, and `must_stop`, then follow `first_next_action`, `human_review`, `repair`, and `policy`. Do not infer a decision from prose. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. - Before finishing an agent-related diff, run `shipgate check`. If `decision=allow` or `warn`, continue and summarize. If `first_next_action.kind` is `repair` and `repair.safe_to_attempt=true`, make only the listed mechanical repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -69,6 +60,10 @@ def render_block() -> str: Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run `shipgate audit --host` and read the emitted +host-grants inventory before changing MCP servers, permission rules, hooks, or +workflow scopes. + Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, or prohibited-action policy decisions; surface those as human review items. diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py index 7627f71c..7c26b4b7 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py @@ -23,10 +23,11 @@ def render_block() -> str: ```bash shipgate check --agent claude-code --workspace . --format codex-boundary-json -agents-shipgate verify --preview --json -agents-shipgate preflight --workspace . --plan - --json -agents-shipgate verify --workspace . --config shipgate.yaml \\ +shipgate verify --workspace . --config shipgate.yaml \\ --ci-mode advisory --format json +shipgate verify --workspace . --config shipgate.yaml \\ + --base origin/main --head HEAD --ci-mode advisory --format json +shipgate audit --host --json --out agents-shipgate-reports/host-grants.json ``` For local agent control, read the `shipgate check` stdout JSON only. It is @@ -40,16 +41,7 @@ def render_block() -> str: repair and rerun the command. If `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. - -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -60,6 +52,10 @@ def render_block() -> str: Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run `shipgate audit --host` and read the emitted +host-grants inventory before changing MCP servers, permission rules, hooks, or +workflow scopes. + Use `apply-patches --confidence high --apply` only for high-confidence safe patches. Approval, confirmation, idempotency, broad-scope, and prohibited-action changes require human review. diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py index 74efee6c..fc3f55bd 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py @@ -51,10 +51,6 @@ def render_file() -> str: permissions, approval policies, or release gates, run Agents Shipgate. Default to advisory verification while adopting the gate. -Before protected edits, run preflight and read `PreflightResultV2`: - - agents-shipgate preflight --workspace . --plan - --json - For local agent control, run: shipgate check --agent cursor --workspace . --format codex-boundary-json @@ -70,16 +66,11 @@ def render_file() -> str: `human_review.required=true` or `must_stop=true`, stop and surface the JSON result to a human. -Before editing `shipgate.yaml`, Shipgate CI, AGENTS/CLAUDE/Cursor rules, -policy packs, baselines, waivers, suppressions, Codex hooks/config, Codex -plugin manifests, `.mcp.json`, `.app.json`, or `SKILL.md`, run -`agents-shipgate preflight --workspace . --plan - --json` with a -`PreflightPlanV1` object. Legacy shorthands such as -`agents-shipgate preflight --changed-files changed.txt --json` remain available. -If `requires_human_review` is `true` or -`first_next_action.actor` is `human`, stop and route the change to a human. +For local verification, run: + + shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json -For committed PR/CI verification, run `agents-shipgate verify --base +For committed PR/CI verification, run `shipgate verify --base origin/main --head HEAD --json` after making the base ref available; it never fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `gate.merge_verdict`, `gate.can_merge_without_human`, and `controller`; then read @@ -90,6 +81,13 @@ def render_file() -> str: Legacy `agent-result.json` surfaces, where present, are supporting/provisional projections and not the CI gate. +For coding-agent host grants, run: + + shipgate audit --host --json --out agents-shipgate-reports/host-grants.json + +Read the host-grants inventory before changing MCP servers, permission rules, +hooks, or workflow scopes. + Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/src/agents_shipgate/cli/discovery/local_contract.py b/src/agents_shipgate/cli/discovery/local_contract.py index 1ac5b3b5..ba1edaa4 100644 --- a/src/agents_shipgate/cli/discovery/local_contract.py +++ b/src/agents_shipgate/cli/discovery/local_contract.py @@ -28,6 +28,7 @@ MCP_TOOLS, MERGE_VERDICTS, ORG_EVIDENCE_BUNDLE_SCHEMA_VERSION, + PRIMARY_COMMANDS, REGISTRY_SCHEMA_VERSION, RELEASE_DECISIONS, VERIFIER_READ_ORDER, @@ -35,7 +36,7 @@ ) from agents_shipgate.schemas.verifier import VerifierArtifact -LOCAL_CONTRACT_SCHEMA_VERSION = "1" +LOCAL_CONTRACT_SCHEMA_VERSION = "2" LOCAL_CONTRACT_RELATIVE_PATH = ".shipgate/agent-contract.json" @@ -48,6 +49,7 @@ class LocalAgentContract(BaseModel): agents_shipgate_version: str contract_version: str default_paths: dict[str, str] + primary_commands: dict[str, str] commands: dict[str, str] artifacts: dict[str, str] agent_read_order: list[str] @@ -82,6 +84,7 @@ def build_local_agent_contract() -> LocalAgentContract: agents_shipgate_version=__version__, contract_version=CONTRACT_VERSION, default_paths=dict(DEFAULT_PATHS), + primary_commands=dict(PRIMARY_COMMANDS), commands=dict(COMMANDS), artifacts=dict(ARTIFACTS), agent_read_order=list(AGENT_READ_ORDER), diff --git a/src/agents_shipgate/cli/first_look.py b/src/agents_shipgate/cli/first_look.py index c2b4e4a2..99a53cc7 100644 --- a/src/agents_shipgate/cli/first_look.py +++ b/src/agents_shipgate/cli/first_look.py @@ -146,8 +146,8 @@ def _next_command(has_manifest: bool, has_agent_surface: bool, has_host: bool) - return "Next: `shipgate verify --base origin/main --head HEAD` to gate your PR." if has_agent_surface: return ( - "Next: `shipgate init` to set up the gate, or `shipgate check` to " - "check your working tree now." + "Next: `shipgate verify --preview --json` to confirm the setup path, " + "or `shipgate check` to check your working tree now." ) if has_host: return ( @@ -156,7 +156,8 @@ def _next_command(has_manifest: bool, has_agent_surface: bool, has_host: bool) - ) return ( "Next: no agent tool surface or host config detected — Shipgate may not " - "apply here. Run `shipgate detect` for the full classification." + "apply here. Run `shipgate audit --host` if you need a host-grant " + "inventory." ) diff --git a/src/agents_shipgate/cli/install_hooks.py b/src/agents_shipgate/cli/install_hooks.py index 6e8e313c..e195cf28 100644 --- a/src/agents_shipgate/cli/install_hooks.py +++ b/src/agents_shipgate/cli/install_hooks.py @@ -600,10 +600,7 @@ def _trigger(payload: dict[str, Any], root: Path, args: argparse.Namespace) -> i if (root / args.config).is_file(): command = _manual_verify_command(args, root=root) else: - command = ( - "AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json " - "&& agents-shipgate init --workspace . --write --ci --json" - ) + command = "AGENTS_SHIPGATE_AGENT_MODE=1 shipgate verify --preview --json" return _emit_context( "PostToolUse", ( @@ -632,8 +629,8 @@ def _verify(payload: dict[str, Any], root: Path, args: argparse.Namespace) -> in if trigger and trigger.get("should_run") and not stop_hook_active: return _emit_stop_block( "Agents Shipgate trigger matched, but no shipgate.yaml exists. " - "Before finishing, run `agents-shipgate detect --workspace . --json`, " - "initialize the manifest if relevant, then scan or verify." + "Before finishing, run `shipgate verify --preview --json`, " + "initialize the manifest if relevant, then verify." ) return 0 diff --git a/src/agents_shipgate/cli/main.py b/src/agents_shipgate/cli/main.py index 890b5aef..63687e36 100644 --- a/src/agents_shipgate/cli/main.py +++ b/src/agents_shipgate/cli/main.py @@ -50,10 +50,12 @@ ) app.command( "self-check", + hidden=True, help="Verify install and bundled fixtures. Run this first in a fresh environment.", )(self_check) app.command( "detect", + hidden=True, help="Classify a workspace: which agent framework(s), if any. Read-only.", )(_detect_command) app.command( @@ -62,6 +64,7 @@ )(_check_command) app.command( "preflight", + hidden=True, help=( "Run proactive static preflight: protected surfaces, forbidden edits, " "and high-risk capability evidence requirements." @@ -69,6 +72,7 @@ )(_preflight_command) app.command( "apply-patches", + hidden=True, help=( "Apply patches from a scan JSON report. Dry-run by default; pass " "--apply to mutate. Containment-checked against the report's " @@ -87,6 +91,7 @@ )(_evidence_packet_command) app.command( "bootstrap", + hidden=True, help=( "Run the canonical 4-call adoption flow in one command: " "detect → init --write --ci → scan --suggest-patches → " @@ -95,6 +100,7 @@ )(_bootstrap_command) app.command( "explain-finding", + hidden=True, help=( "Explain a specific finding from a `report.json`, with evidence " "and a 3–5 sentence prose summary. Companion to `explain " @@ -103,6 +109,7 @@ )(_explain_finding_command) app.command( "findings", + hidden=True, help=( "Filter findings from a `report.json` by provenance kind for " "reviewer triage." @@ -110,6 +117,7 @@ )(_findings_command) app.command( "trigger", + hidden=True, help=( "Evaluate the trigger catalog against a diff and emit a run/skip " "verdict. Reads --changed-files / --diff, or --base/--head (git)." @@ -124,6 +132,7 @@ )(_verify_command) app.command( "attest", + hidden=True, help=( "Derive a deterministic local release attestation from verifier.json " "(verdict, capability delta, human-ack state, policy + artifact hashes)." @@ -131,6 +140,7 @@ )(_attest_command) app.command( "install-hooks", + hidden=True, help=( "Install advisory local coding-agent hooks. Currently supports " "--target claude-code." @@ -140,7 +150,8 @@ app.command( "audit", help=( - "Zero-config host-grant audits. `audit --host` inventories " + "Run `shipgate audit --host` for zero-config host-grant audits. " + "`audit --host` inventories " "coding-agent host grants (MCP servers, permission rules, hooks, " "workflow scopes) without requiring shipgate.yaml; `--save-baseline` " "records the acknowledged state (writes one JSON file under " @@ -150,7 +161,7 @@ )(_audit_command) -@app.command("mcp-serve") +@app.command("mcp-serve", hidden=True) def _mcp_serve_command() -> None: """Serve the optional read-only MCP server over stdio. @@ -173,21 +184,19 @@ def _mcp_serve_command() -> None: _register_init.register(app) _register_doctor.register(app) _register_baseline.register(app) -# Visibility policy (WS-D): `--help` shows the core loop — detect / check / -# verify / init / scan / audit and their direct companions. Niche or -# maintainer-facing surfaces (`hidden=True` here and on `evidence-packet` -# above) stay fully invokable and documented — hiding is presentation only, -# not deprecation, so STABILITY.md is unaffected. `fixture` stays visible -# because the README's 60-second demo leads with `fixture run`. -app.add_typer(fixture_app, name="fixture") +# Visibility policy: root --help shows only the prominent flows: +# `shipgate check`, `shipgate verify`, and `shipgate audit --host`. +# Supporting/compatibility commands stay fully invokable and documented +# through their direct --help; hiding is presentation, not deprecation. +app.add_typer(fixture_app, name="fixture", hidden=True) app.add_typer(feedback_app, name="feedback", hidden=True) app.add_typer(scenario_app, name="scenario", hidden=True) app.add_typer(skill_app, name="skill", hidden=True) -app.add_typer(capability_app, name="capability") -app.add_typer(agent_app, name="agent") -app.add_typer(mcp_app, name="mcp") -app.add_typer(org_app, name="org") -app.add_typer(registry_app, name="registry") +app.add_typer(capability_app, name="capability", hidden=True) +app.add_typer(agent_app, name="agent", hidden=True) +app.add_typer(mcp_app, name="mcp", hidden=True) +app.add_typer(org_app, name="org", hidden=True) +app.add_typer(registry_app, name="registry", hidden=True) logger = logging.getLogger(__name__) diff --git a/src/agents_shipgate/cli/verify/orchestrator.py b/src/agents_shipgate/cli/verify/orchestrator.py index bae76ffc..0860f868 100644 --- a/src/agents_shipgate/cli/verify/orchestrator.py +++ b/src/agents_shipgate/cli/verify/orchestrator.py @@ -120,8 +120,8 @@ def run_verify( ) message = ( f"Shipgate config not found at {_display_path(config_path, git_root)}. " - "Correct --config, or run `agents-shipgate verify --preview --json` " - "and `agents-shipgate detect --workspace . --json` before initializing." + "Correct --config, or run `shipgate verify --preview --json` before " + "initializing." ) verifier = _build_verifier( git_root=git_root, @@ -145,11 +145,10 @@ def run_verify( first_next_action_override=VerifierNextAction( actor="coding_agent", kind="command", - command="agents-shipgate verify --preview --json", + command="shipgate verify --preview --json", why=( - "Shipgate could not find the configured manifest; preview or " - "detect the workspace, then correct --config or initialize " - "shipgate.yaml." + "Shipgate could not find the configured manifest; run verify " + "preview, then correct --config or initialize shipgate.yaml." ), ), ) diff --git a/src/agents_shipgate/core/codex_boundary.py b/src/agents_shipgate/core/codex_boundary.py index 62fec0ea..d12b995e 100644 --- a/src/agents_shipgate/core/codex_boundary.py +++ b/src/agents_shipgate/core/codex_boundary.py @@ -391,9 +391,9 @@ def evaluate_codex_boundary_result( tool sources. ``verify`` will gate these, so route to ``verify``. - ``undeclared_capability_surfaces`` — changed files that ARE tool surfaces but the manifest does not declare (or there is no manifest). ``verify`` - cannot gate an undeclared surface, so route to declare-then-verify - (``detect``). Takes precedence when a diff changes both, since ``verify`` - alone would miss the undeclared one. + cannot gate an undeclared surface yet, so route through verify preview + before full verify. Takes precedence when a diff changes both, since full + ``verify`` alone would miss the undeclared one. """ # Keep this local diff projector aligned with @@ -499,7 +499,7 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None: summary = _undeclared_summary(undeclared_surfaces) diagnostics = [*diagnostics, _undeclared_diagnostic(undeclared_surfaces)] trace = [*_trace_for(policy, decision, violations), _undeclared_trace(undeclared_surfaces)] - suggested_fixes = [_DETECT_COMMAND, _VERIFY_COMMAND] + suggested_fixes = [_VERIFY_PREVIEW_COMMAND, _VERIFY_COMMAND] elif coverage_gap: first_next_action = _coverage_next_action() summary = _coverage_summary(coverage_surfaces) @@ -1356,22 +1356,22 @@ def _risk_for(violations: list[AgentResultViolatedRule]) -> AgentResultRiskLevel # Canonical capability gate. check is boundary-only; verify computes the -# capability delta and owns release_decision.decision. Bare ``verify --json`` -# auto-detects the base (v0.13) and emits the boundary-result surface, so it -# works for both the local working tree and committed refs. -_VERIFY_COMMAND = "agents-shipgate verify --json" -_DETECT_COMMAND = "agents-shipgate detect --json" +# capability delta and owns release_decision.decision. Preview stays inside the +# verify flow for unconfigured or undeclared surfaces. +_VERIFY_COMMAND = "shipgate verify --json" +_VERIFY_PREVIEW_COMMAND = "shipgate verify --preview --json" def _undeclared_next_action() -> AgentResultNextAction: return AgentResultNextAction( actor="coding_agent", kind="warn", - command=_DETECT_COMMAND, + command=_VERIFY_PREVIEW_COMMAND, why=( "This diff changes a tool/capability surface that shipgate.yaml does not " "declare, so neither check nor verify gates it yet. Declare the surface " - "(run detect or add it to tool_sources), then run verify before completing." + "from verify preview guidance or add it to tool_sources, then run verify " + "before completing." ), ) @@ -1380,8 +1380,8 @@ def _undeclared_summary(surfaces: list[str]) -> str: return ( "No Codex boundary rule fired, but the diff changes a tool/capability surface " f"({', '.join(surfaces[:5])}) that shipgate.yaml does not declare, so verify " - "cannot gate it yet. Declare it (detect or tool_sources) and run verify before " - "reporting completion." + "cannot gate it yet. Use verify preview guidance or add it to tool_sources, " + "then run verify before reporting completion." ) @@ -1402,7 +1402,7 @@ def _undeclared_trace(surfaces: list[str]) -> AgentResultTraceEvent: step="coverage", summary=( f"boundary_only: {len(surfaces)} undeclared tool surface(s) changed; " - "routed to detect + verify." + "routed to verify preview + verify." ), ) diff --git a/src/agents_shipgate/schemas/contract.py b/src/agents_shipgate/schemas/contract.py index ca883036..9460ab69 100644 --- a/src/agents_shipgate/schemas/contract.py +++ b/src/agents_shipgate/schemas/contract.py @@ -33,7 +33,7 @@ from agents_shipgate.schemas.verifier import VerifierArtifact from agents_shipgate.schemas.verify_run import VERIFY_RUN_SCHEMA_VERSION -CONTRACT_VERSION: Literal["7"] = "7" +CONTRACT_VERSION: Literal["8"] = "8" GATING_SIGNAL: Literal["release_decision.decision"] = "release_decision.decision" AGENT_RESULT_SCHEMA_VERSION: Literal["agent_result_v1"] = "agent_result_v1" AGENT_RESULT_SCHEMA_PATH: Literal["docs/agent-result-schema.v1.json"] = ( @@ -178,6 +178,28 @@ ), "contract": "agents-shipgate contract --json", } +PRIMARY_COMMANDS: dict[str, str] = { + "check_codex": ( + "shipgate check --agent codex --workspace . --format codex-boundary-json" + ), + "check_claude_code": ( + "shipgate check --agent claude-code --workspace . --format codex-boundary-json" + ), + "check_cursor": ( + "shipgate check --agent cursor --workspace . --format codex-boundary-json" + ), + "verify_local": ( + "shipgate verify --workspace . --config shipgate.yaml " + "--ci-mode advisory --json" + ), + "verify_pr": ( + "shipgate verify --workspace . --config shipgate.yaml " + "--base origin/main --head HEAD --ci-mode advisory --json" + ), + "host_audit": ( + "shipgate audit --host --json --out agents-shipgate-reports/host-grants.json" + ), +} ARTIFACTS: dict[str, str] = { "verifier": "agents-shipgate-reports/verifier.json", "verify_run": "agents-shipgate-reports/verify-run.json", @@ -271,6 +293,7 @@ class ContractPayload(BaseModel): agent_interface_operations: list[str] exit_code_policy: dict[str, str] mcp_tools: list[str] + primary_commands: dict[str, str] commands: dict[str, str] default_paths: dict[str, str] artifacts: dict[str, str] @@ -317,6 +340,7 @@ def build_contract_payload() -> ContractPayload: agent_interface_operations=list(AGENT_INTERFACE_OPERATIONS), exit_code_policy=dict(EXIT_CODE_POLICY), mcp_tools=list(MCP_TOOLS), + primary_commands=dict(PRIMARY_COMMANDS), commands=dict(COMMANDS), default_paths=dict(DEFAULT_PATHS), artifacts=dict(ARTIFACTS), @@ -351,6 +375,7 @@ def build_contract_payload() -> ContractPayload: "MERGE_VERDICTS", "MCP_TOOLS", "ORG_EVIDENCE_BUNDLE_SCHEMA_VERSION", + "PRIMARY_COMMANDS", "REGISTRY_SCHEMA_VERSION", "RELEASE_DECISIONS", "SUPPORTED_INPUTS", diff --git a/src/agents_shipgate/triggers.py b/src/agents_shipgate/triggers.py index 638c4d74..a1cbe20b 100644 --- a/src/agents_shipgate/triggers.py +++ b/src/agents_shipgate/triggers.py @@ -3,7 +3,7 @@ The catalog (``docs/triggers.json``) is the machine-readable mirror of the AGENTS.md trigger table. A coding agent that has not yet adopted Shipgate can fetch ``triggers.json`` and apply the rules against a PR diff or repo -state to decide whether to propose ``agents-shipgate detect`` as the next +state to decide whether to propose ``shipgate verify --preview --json`` as the next step, without parsing prose. This module is the canonical evaluator. It exists primarily so: @@ -144,15 +144,15 @@ def _next_action( Deterministic projection of the run/skip decision into an actor- agnostic ``{kind, command, why}``. Adopted repos (a manifest is present) are pointed at ``verify`` — the canonical ongoing-PR gate; - un-adopted repos are pointed at the catalog ``detect`` command so a - coding agent can bootstrap. ``command`` is ``None`` when no action + un-adopted repos are pointed at the catalog verify-preview command so a + coding agent can route setup. ``command`` is ``None`` when no action is warranted. """ if run: if manifest_present: return { "kind": "command", - "command": "agents-shipgate verify --base origin/main --head HEAD --json", + "command": "shipgate verify --base origin/main --head HEAD --json", "why": ( "This change affects an agent tool or release-policy " "surface; verify whether the PR can merge." @@ -165,13 +165,13 @@ def _next_action( "kind": "command", "command": command, "why": ( - "This change looks agent-related; detect tool surfaces " - "and adopt Shipgate." + "This change looks agent-related; start with verify preview " + "and adopt Shipgate if the preview routes setup." ), } if dry_run_recommended: command = ( - "agents-shipgate verify --base origin/main --head HEAD " + "shipgate verify --base origin/main --head HEAD " "--ci-mode advisory --json" if manifest_present else default_command @@ -420,7 +420,7 @@ def evaluate( manifest_present=manifest_present, matched=matched, default_command=triggers.get( - "default_command", "agents-shipgate detect --workspace . --json" + "default_command", "shipgate verify --preview --json" ), rationale=rationale, ) diff --git a/tests/golden/agent_protocol/claude-code-block-stop.json b/tests/golden/agent_protocol/claude-code-block-stop.json index 31366efc..ea0c8e9f 100644 --- a/tests/golden/agent_protocol/claude-code-block-stop.json +++ b/tests/golden/agent_protocol/claude-code-block-stop.json @@ -124,8 +124,8 @@ "diff_tokens": [], "next_action": { "kind": "command", - "command": "agents-shipgate detect --workspace . --json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "command": "shipgate verify --preview --json", + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/agent_protocol/codex-block-stop.json b/tests/golden/agent_protocol/codex-block-stop.json index 16527f7f..1f12609b 100644 --- a/tests/golden/agent_protocol/codex-block-stop.json +++ b/tests/golden/agent_protocol/codex-block-stop.json @@ -124,8 +124,8 @@ "diff_tokens": [], "next_action": { "kind": "command", - "command": "agents-shipgate detect --workspace . --json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "command": "shipgate verify --preview --json", + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/agent_protocol/codex-repair-after.json b/tests/golden/agent_protocol/codex-repair-after.json index 1c79dc07..7a87e9e6 100644 --- a/tests/golden/agent_protocol/codex-repair-after.json +++ b/tests/golden/agent_protocol/codex-repair-after.json @@ -103,7 +103,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [], diff --git a/tests/golden/agent_protocol/codex-repair-before.json b/tests/golden/agent_protocol/codex-repair-before.json index f74f6dc1..ff785481 100644 --- a/tests/golden/agent_protocol/codex-repair-before.json +++ b/tests/golden/agent_protocol/codex-repair-before.json @@ -132,7 +132,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/agent_protocol/cursor-block-stop.json b/tests/golden/agent_protocol/cursor-block-stop.json index 00ba095e..aebda392 100644 --- a/tests/golden/agent_protocol/cursor-block-stop.json +++ b/tests/golden/agent_protocol/cursor-block-stop.json @@ -124,8 +124,8 @@ "diff_tokens": [], "next_action": { "kind": "command", - "command": "agents-shipgate detect --workspace . --json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "command": "shipgate verify --preview --json", + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/codex_boundary_result/github_action_removed.json b/tests/golden/codex_boundary_result/github_action_removed.json index b9ff51da..0d38f639 100644 --- a/tests/golden/codex_boundary_result/github_action_removed.json +++ b/tests/golden/codex_boundary_result/github_action_removed.json @@ -131,8 +131,8 @@ "diff_tokens": [], "next_action": { "kind": "command", - "command": "agents-shipgate detect --workspace . --json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "command": "shipgate verify --preview --json", + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/codex_boundary_result/malformed_toml.json b/tests/golden/codex_boundary_result/malformed_toml.json index 6f7aa40e..6a0da209 100644 --- a/tests/golden/codex_boundary_result/malformed_toml.json +++ b/tests/golden/codex_boundary_result/malformed_toml.json @@ -128,7 +128,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/codex_boundary_result/mcp_auto_approve_write.json b/tests/golden/codex_boundary_result/mcp_auto_approve_write.json index 595c7bb9..ead55e2c 100644 --- a/tests/golden/codex_boundary_result/mcp_auto_approve_write.json +++ b/tests/golden/codex_boundary_result/mcp_auto_approve_write.json @@ -132,7 +132,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/codex_boundary_result/network_wildcard.json b/tests/golden/codex_boundary_result/network_wildcard.json index f0a39f35..a8095a52 100644 --- a/tests/golden/codex_boundary_result/network_wildcard.json +++ b/tests/golden/codex_boundary_result/network_wildcard.json @@ -130,7 +130,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/golden/codex_boundary_result/unknown_permission_key.json b/tests/golden/codex_boundary_result/unknown_permission_key.json index afc51875..3af8d09b 100644 --- a/tests/golden/codex_boundary_result/unknown_permission_key.json +++ b/tests/golden/codex_boundary_result/unknown_permission_key.json @@ -129,7 +129,7 @@ "next_action": { "kind": "command", "command": "shipgate check --agent codex --workspace . --format codex-boundary-json", - "why": "This change looks agent-related; detect tool surfaces and adopt Shipgate." + "why": "This change looks agent-related; start with verify preview and adopt Shipgate if the preview routes setup." } }, "finding_fingerprints": [ diff --git a/tests/test_agent_handoff.py b/tests/test_agent_handoff.py index 1eb06aab..5fc05101 100644 --- a/tests/test_agent_handoff.py +++ b/tests/test_agent_handoff.py @@ -228,7 +228,7 @@ def test_agent_handoff_cli_missing_input_exits_three(tmp_path: Path) -> None: def test_agent_handoff_rejects_mismatched_decision_and_merge_verdict() -> None: with pytest.raises(ValidationError): AgentHandoffArtifact( - contract_version="7", + contract_version="8", operation="verify_pr", subject=AgentHandoffSubject(workspace="/tmp/repo", config="shipgate.yaml"), gate=AgentHandoffGate( @@ -243,7 +243,7 @@ def test_agent_handoff_rejects_mismatched_decision_and_merge_verdict() -> None: def test_agent_handoff_rejects_controller_completion_mismatch() -> None: with pytest.raises(ValidationError): AgentHandoffArtifact( - contract_version="7", + contract_version="8", operation="verify_pr", subject=AgentHandoffSubject(workspace="/tmp/repo", config="shipgate.yaml"), gate=AgentHandoffGate( diff --git a/tests/test_agent_instructions_apply.py b/tests/test_agent_instructions_apply.py index 609c0c9a..e1e0bd72 100644 --- a/tests/test_agent_instructions_apply.py +++ b/tests/test_agent_instructions_apply.py @@ -189,8 +189,10 @@ def test_claude_command_current_file_matches_renderer() -> None: def test_local_contract_renderer_has_required_fields() -> None: payload = json.loads(render_local_contract_file()) - assert payload["schema_version"] == "1" - assert payload["contract_version"] == "7" + assert payload["schema_version"] == "2" + assert payload["contract_version"] == "8" + assert payload["primary_commands"]["verify_local"].startswith("shipgate verify") + assert payload["primary_commands"]["host_audit"].startswith("shipgate audit --host") assert payload["agent_handoff_schema_version"] == "shipgate.agent_handoff/v1" assert payload["agent_handoff_artifact"] == "agents-shipgate-reports/agent-handoff.json" assert payload["attestation_schema_version"] == "0.4" diff --git a/tests/test_agent_instructions_renderers.py b/tests/test_agent_instructions_renderers.py index 3954e853..ae191874 100644 --- a/tests/test_agent_instructions_renderers.py +++ b/tests/test_agent_instructions_renderers.py @@ -45,16 +45,16 @@ REPO_ROOT = Path(__file__).resolve().parent.parent EXPECTED_CLAUDE_CODE_SKILL_RENDER_SHA256 = { ".claude/skills/agents-shipgate/SKILL.md": ( - "c2b0882af212c091d1b94c6c838ab312e25455057cf57e994a49c93d84646273" + "5062b30ee84b3871c6532ce0e6c7ad41ac0203a1bd29be51ac3342e01fcd09cb" ), ".claude/skills/agents-shipgate/ci-recipes/advisory-pr-comment.yml": ( "99b2acfbd9dfc6653a6bbee268b83f1e2d4297829636eba662d9f4ad6fa35423" ), ".claude/skills/agents-shipgate/prompts/add-shipgate-to-repo.md": ( - "b8403d6e873fbc343eb3677fca1e117faef1ec3743befae1a1fe0bf1e5ea003d" + "47f370db7820b665de6fcc61968c735e0dfb88715b9f666795687b73f0034dce" ), ".claude/skills/agents-shipgate/prompts/decide-shipgate-relevance.md": ( - "03df378c4dae05b0d7da558b3a7e868de4d1bcba5f55744615b1c2290a13879e" + "8d1540095101cd7ff3aec4ba998ced5c135cdbdb71637ad0c4e5d42fc6ec9ab7" ), ".claude/skills/agents-shipgate/prompts/explain-finding-to-user.md": ( "18031ed870b3c937a2996173820639ef441afe0a45e8171f16468826cd389829" @@ -75,12 +75,12 @@ "992122338eba26ae5d8056b9658117d718a6b477b9928c2a438dd449b5effb68" ), ".claude/skills/agents-shipgate/prompts/verify-agent-diff.md": ( - "919059f86649c7098a75922123c988b819da3094d5bb42ac1737af25e81604de" + "96a7eeeaf96df428575ad4758b48cdd6458c491bb067a99f73b06e6fd268c36d" ), } EXPECTED_CODEX_SKILL_RENDER_SHA256 = { ".agents/skills/agents-shipgate/SKILL.md": ( - "bf711ad6209b4a7ea5030bb97b3c0d1ce848dc4255f868bea2329bd06f8a9999" + "49c04766323dd3bec0b94f39ab236b84d0a6adc4e47bbaded70bc0f8f166f779" ), ".agents/skills/agents-shipgate/agents/openai.yaml": ( "aa511e933ff663dcd1e0d2af3da2a7101206ce2bb1bb98c4dae801bb3f4e42ef" @@ -89,7 +89,7 @@ "16894ce679eb55c69213070775cb265f0775ad7ff1cd08091a5c57627950871b" ), ".agents/skills/agents-shipgate/references/recipes.md": ( - "d1676a96e803a9526d715a58f458174bcb661d5c54156ecb823b0bd77bb35775" + "64cfd980d399f24995008eeca4d196a6efd224edd01e108543ec11aeb291d085" ), ".agents/skills/agents-shipgate/references/report-reading.md": ( "6d2848f3436f6e246bf553e6cf061c990888d6ff39eb82fec9a41f291b2e94fe" @@ -162,9 +162,11 @@ def test_committed_claude_command_matches_renderer() -> None: def test_local_contract_renderer_exposes_agent_operational_fields() -> None: payload = json.loads(render_local_contract_file()) - assert payload["schema_version"] == "1" + assert payload["schema_version"] == "2" assert payload["agents_shipgate_version"] - assert payload["contract_version"] == "7" + assert payload["contract_version"] == "8" + assert payload["primary_commands"]["verify_pr"].startswith("shipgate verify") + assert payload["primary_commands"]["host_audit"].startswith("shipgate audit --host") assert payload["verifier_schema_version"] == "0.1" assert payload["verify_run_schema_version"] == "shipgate.verify_run/v1" assert payload["agent_handoff_schema_version"] == "shipgate.agent_handoff/v1" @@ -324,7 +326,7 @@ def test_claude_code_skill_has_required_surfaces() -> None: assert "release_decision.decision" in skill assert "AGENTS_SHIPGATE_AGENT_MODE=1" in skill assert "Do not claim a finding is fixed" in skill - assert "agents-shipgate verify" in skill + assert "shipgate verify" in skill def test_codex_skill_has_required_surfaces() -> None: @@ -338,12 +340,12 @@ def test_codex_skill_has_required_surfaces() -> None: assert "release_decision.decision" in skill assert "AGENTS_SHIPGATE_AGENT_MODE=1" in skill assert "Do not auto-assert approval" in skill - assert "agents-shipgate verify" in skill + assert "shipgate verify" in skill assert "agents-shipgate --version" in skill assert "agents-shipgate contract --json" in skill assert "install or upgrade `agents-shipgate`" in skill recipes = files[".agents/skills/agents-shipgate/references/recipes.md"] - assert 'contract_version: "7"' in recipes + assert 'contract_version: "8"' in recipes assert "shipgate.codex_boundary_result/v1" in recipes @@ -366,7 +368,8 @@ def test_claude_md_is_self_contained_no_dangling_link() -> None: dangling reference to AGENTS.md.""" out = render_claude_md() # Self-contained means it lists its own commands and report.json contract. - assert "agents-shipgate verify --preview" in out + assert "shipgate verify --workspace . --config shipgate.yaml" in out + assert "shipgate audit --host" in out assert "merge_verdict" in out assert "release_decision.decision" in out # Cross-link to AGENTS.md is intentionally omitted. diff --git a/tests/test_cli.py b/tests/test_cli.py index f76e2683..39c0c0b4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -41,6 +41,7 @@ MCP_TOOLS, MERGE_VERDICTS, ORG_EVIDENCE_BUNDLE_SCHEMA_VERSION, + PRIMARY_COMMANDS, REGISTRY_SCHEMA_VERSION, RELEASE_DECISIONS, VERIFIER_READ_ORDER, @@ -169,7 +170,7 @@ def test_cli_missing_config_prints_next_action_hint(tmp_path, monkeypatch): assert "Config error:" in result.output # The human reader gets the same rank-1 recovery step that agent # mode emits as JSON — a cold user must not hit a dead end. - assert "next: agents-shipgate detect" in result.output + assert "next: shipgate verify" in result.output def test_cli_change_me_placeholder_error_routes_to_manifest_edit(tmp_path, monkeypatch): @@ -265,6 +266,7 @@ def test_cli_contract_json_outputs_runtime_contract(): "agent_interface_operations", "exit_code_policy", "mcp_tools", + "primary_commands", "commands", "default_paths", "artifacts", @@ -308,6 +310,7 @@ def test_cli_contract_json_outputs_runtime_contract(): "agent_interface_operations": list(AGENT_INTERFACE_OPERATIONS), "exit_code_policy": dict(EXIT_CODE_POLICY), "mcp_tools": list(MCP_TOOLS), + "primary_commands": dict(PRIMARY_COMMANDS), "commands": dict(COMMANDS), "default_paths": dict(DEFAULT_PATHS), "artifacts": dict(ARTIFACTS), @@ -422,27 +425,42 @@ def test_cli_scan_help_hides_deferred_flags(): assert "--policy-pack" in public_options -# WS-D visibility policy: --help shows the core loop; niche/maintainer -# surfaces are hidden but stay fully invokable (presentation, not -# deprecation — STABILITY.md is unaffected). +# Visibility policy: root --help shows only the three prominent flows; every +# supporting command remains directly invokable for compatibility. HIDDEN_TOP_LEVEL_COMMANDS = { + "agent", + "apply-patches", + "attest", + "baseline", + "bootstrap", + "capability", + "contract", + "detect", + "doctor", "evidence-packet", + "explain", + "explain-finding", "feedback", + "findings", + "fixture", + "init", + "install-hooks", + "list-checks", + "mcp", + "mcp-serve", + "org", + "preflight", + "registry", + "scan", "scenario", "skill", + "self-check", + "trigger", } VISIBLE_CORE_COMMANDS = { - "detect", "check", "verify", - "init", - "scan", "audit", - "org", - "registry", - "doctor", - "self-check", - "fixture", # the README 60-second demo leads with `fixture run` } @@ -453,13 +471,14 @@ def test_cli_help_hides_niche_commands_but_keeps_them_invokable(): for name in HIDDEN_TOP_LEVEL_COMMANDS: assert root.commands[name].hidden, f"{name} should be hidden from --help" - assert f" {name} " not in result.output + assert f"│ {name}" not in result.output # Hidden, not removed: the command still resolves and answers --help. invoked = runner.invoke(app, [name, "--help"]) assert invoked.exit_code == 0, f"{name} must remain invokable: {invoked.output}" for name in VISIBLE_CORE_COMMANDS: assert not root.commands[name].hidden, f"{name} must stay visible" + assert f" {name} " in result.output def test_cli_tool_surface_summary_detects_no_changes(): @@ -1132,7 +1151,7 @@ def test_missing_manifest_recovery_uses_config_workspace(tmp_path, monkeypatch): payloads = _stderr_json_lines(result.output) assert payloads, result.output rank_one = payloads[-1]["next_actions"][0] - assert "agents-shipgate detect --workspace" in rank_one["command"] + assert "shipgate verify --workspace" in rank_one["command"] # Routes recovery to the config's parent directory, not the foreign cwd. assert str(tmp_path / "repo") in rank_one["command"] assert str(foreign_cwd) not in rank_one["command"] @@ -1242,8 +1261,7 @@ def test_missing_manifest_command_quotes_workspace_with_spaces(tmp_path, monkeyp import shlex parts = shlex.split(command) - assert parts[0] == "agents-shipgate" - assert parts[1] == "detect" + assert parts[:2] == ["shipgate", "verify"] assert "--workspace" in parts workspace_arg = parts[parts.index("--workspace") + 1] assert workspace_arg == str(spaced) diff --git a/tests/test_codex_boundary_check.py b/tests/test_codex_boundary_check.py index f6e0786a..bf56e1b6 100644 --- a/tests/test_codex_boundary_check.py +++ b/tests/test_codex_boundary_check.py @@ -105,7 +105,7 @@ def test_declared_tool_surface_change_warns_and_routes_to_verify(tmp_path: Path) assert payload["completion_allowed"] is True assert payload["must_stop"] is False assert payload["first_next_action"]["kind"] == "warn" - assert payload["first_next_action"]["command"].startswith("agents-shipgate verify") + assert payload["first_next_action"]["command"].startswith("shipgate verify") assert any(d["code"] == "capability_change_requires_verify" for d in payload["diagnostics"]) assert any(t["step"] == "coverage" for t in payload["trace"]) @@ -152,7 +152,7 @@ def test_check_warns_when_manifest_declares_changed_tool_source(tmp_path: Path) policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate verify") + assert result.first_next_action.command.startswith("shipgate verify") def _write_manifest(tmp_path: Path, tool_sources: str) -> None: @@ -189,7 +189,7 @@ def test_check_warns_on_change_under_declared_directory_source(tmp_path: Path) - policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate verify") + assert result.first_next_action.command.startswith("shipgate verify") def test_check_does_not_warn_on_broad_root_source(tmp_path: Path) -> None: @@ -248,8 +248,8 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) -> # --- Undeclared coverage gap: a changed file IS a tool surface but the -------- # manifest does not declare it (or there is no manifest). verify only gates -# declared surfaces, so route to declare-then-verify (detect) rather than a -# clean allow or a verify that never scans it. +# declared surfaces, so route through verify preview before full verify rather +# than a clean allow or a full verify that never scans it. # A second changed file that is an *undeclared* tool surface (an OpenAPI spec), # used to exercise mixed declared+undeclared diffs (review finding P1). @@ -263,7 +263,7 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) -> ) -def test_undeclared_surface_warns_and_routes_to_detect(tmp_path: Path) -> None: +def test_undeclared_surface_warns_and_routes_to_verify_preview(tmp_path: Path) -> None: result = evaluate_codex_boundary_result( workspace=tmp_path, diff_text=_TOOL_SOURCE_DIFF, @@ -272,23 +272,23 @@ def test_undeclared_surface_warns_and_routes_to_detect(tmp_path: Path) -> None: ) payload = result.model_dump(mode="json", exclude_none=True) _validate(payload) - # Was a bare allow before the fix; now a warn that routes to detect/declare. + # Was a bare allow before the fix; now a warn that routes to verify preview. assert payload["decision"] == "warn" assert payload["completion_allowed"] is True assert payload["must_stop"] is False assert payload["first_next_action"]["kind"] == "warn" - assert payload["first_next_action"]["command"].startswith("agents-shipgate detect") + assert payload["first_next_action"]["command"].startswith("shipgate verify --preview") assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"]) assert any(t["step"] == "coverage" for t in payload["trace"]) - assert payload["suggested_fixes"][0].startswith("agents-shipgate detect") - assert any(fix.startswith("agents-shipgate verify") for fix in payload["suggested_fixes"]) + assert payload["suggested_fixes"][0].startswith("shipgate verify --preview") + assert any(fix.startswith("shipgate verify") for fix in payload["suggested_fixes"]) -def test_mixed_declared_and_undeclared_routes_to_detect(tmp_path: Path) -> None: +def test_mixed_declared_and_undeclared_routes_to_verify_preview(tmp_path: Path) -> None: # Review finding P1: a diff that changes BOTH a declared surface (verify - # gates it) and an undeclared one (verify does not) must route to detect — - # declare-then-verify — not a verify that silently misses the undeclared - # surface. Undeclared takes precedence over the declared coverage gap. + # gates it) and an undeclared one (verify does not) must route to verify + # preview before full verify. Undeclared takes precedence over the declared + # coverage gap. result = evaluate_codex_boundary_result( workspace=tmp_path, diff_text=_MIXED_TOOL_SOURCE_DIFF, @@ -297,13 +297,13 @@ def test_mixed_declared_and_undeclared_routes_to_detect(tmp_path: Path) -> None: undeclared_capability_surfaces=["api/openapi.yaml"], ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate detect") + assert result.first_next_action.command.startswith("shipgate verify --preview") payload = result.model_dump(mode="json", exclude_none=True) diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface") assert "api/openapi.yaml" in diag["message"] -def test_no_manifest_capability_add_via_check_warns_and_routes_to_detect( +def test_no_manifest_capability_add_via_check_warns_and_routes_to_verify_preview( tmp_path: Path, ) -> None: # End-to-end: empty workspace (no shipgate.yaml). build_codex_agent_result @@ -316,7 +316,7 @@ def test_no_manifest_capability_add_via_check_warns_and_routes_to_detect( policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate detect") + assert result.first_next_action.command.startswith("shipgate verify --preview") def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other( @@ -324,7 +324,8 @@ def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other ) -> None: # Manifest exists but declares a *different* tool source than the changed # file. The declared-coverage path does not match, so the undeclared path - # must catch it (and route to detect, not a verify that never scans it). + # must catch it and route through verify preview, not a full verify that + # never scans it. _write_manifest( tmp_path, " - id: other\n type: mcp\n path: other-tools.json\n trust: internal\n", @@ -337,12 +338,12 @@ def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate detect") + assert result.first_next_action.command.startswith("shipgate verify --preview") payload = result.model_dump(mode="json", exclude_none=True) assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"]) -def test_mixed_declared_and_undeclared_via_check_routes_to_detect( +def test_mixed_declared_and_undeclared_via_check_routes_to_verify_preview( tmp_path: Path, ) -> None: # Review finding P1, end-to-end through build_codex_agent_result: manifest @@ -359,7 +360,7 @@ def test_mixed_declared_and_undeclared_via_check_routes_to_detect( policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("agents-shipgate detect") + assert result.first_next_action.command.startswith("shipgate verify --preview") payload = result.model_dump(mode="json", exclude_none=True) diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface") assert "api/openapi.yaml" in diag["message"] diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index dfbb504f..f365e211 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -142,7 +142,7 @@ def test_emits_missing_manifest_diagnostic(self, tmp_path: Path) -> None: assert [d.id for d in diags] == [DIAG_MISSING_MANIFEST] assert diags[0].severity == "block" assert diags[0].next_actions[0].kind == "command" - assert "agents-shipgate detect" in diags[0].next_actions[0].command + assert "shipgate verify" in diags[0].next_actions[0].command def test_command_quotes_workspace_with_spaces( self, tmp_path: Path @@ -154,7 +154,7 @@ def test_command_quotes_workspace_with_spaces( diags = diagnose_missing_manifest(spaced) for action in diags[0].next_actions: parts = shlex.split(action.command) - assert parts[0] == "agents-shipgate" + assert parts[0] in {"agents-shipgate", "shipgate"} ws_idx = parts.index("--workspace") assert parts[ws_idx + 1] == str(spaced) @@ -584,10 +584,10 @@ def test_command_actions_use_known_subcommands( ) assert commands_to_check, "expected at least one command action" - pattern = re.compile(r"^agents-shipgate\s+([\w-]+)") + pattern = re.compile(r"^(?:agents-shipgate|shipgate)\s+([\w-]+)") for command in commands_to_check: match = pattern.match(command) - assert match, f"command does not start with agents-shipgate: {command!r}" + assert match, f"command does not start with a Shipgate binary: {command!r}" subcommand = match.group(1) assert subcommand in registered, ( f"command {command!r} references unknown subcommand " diff --git a/tests/test_first_look.py b/tests/test_first_look.py index f8d9f3d0..fae417c6 100644 --- a/tests/test_first_look.py +++ b/tests/test_first_look.py @@ -137,8 +137,8 @@ def test_next_command_routing_priority() -> None: # Manifest present wins regardless of the other signals. assert "verify" in _next_command(True, True, True) assert "verify" in _next_command(True, False, False) - # No manifest, but an agent tool surface: set up the gate / check now. - assert "init" in _next_command(False, True, False) + # No manifest, but an agent tool surface: stay inside the verify/check flows. + assert "verify --preview" in _next_command(False, True, False) assert "check" in _next_command(False, True, False) # No manifest, no agent surface, but host grants: route to the host audit. assert "audit --host" in _next_command(False, False, True) diff --git a/tests/test_local_contract.py b/tests/test_local_contract.py index 82edf062..5a9d2d27 100644 --- a/tests/test_local_contract.py +++ b/tests/test_local_contract.py @@ -9,7 +9,7 @@ build_local_agent_contract, render_local_agent_contract, ) -from agents_shipgate.schemas.contract import CONTRACT_VERSION, GATING_SIGNAL +from agents_shipgate.schemas.contract import CONTRACT_VERSION, GATING_SIGNAL, PRIMARY_COMMANDS def test_local_agent_contract_is_minimal_agent_operational_payload() -> None: @@ -20,6 +20,7 @@ def test_local_agent_contract_is_minimal_agent_operational_payload() -> None: "agents_shipgate_version", "contract_version", "default_paths", + "primary_commands", "commands", "artifacts", "agent_read_order", @@ -49,6 +50,15 @@ def test_local_agent_contract_is_minimal_agent_operational_payload() -> None: assert payload["agents_shipgate_version"] == __version__ assert payload["contract_version"] == CONTRACT_VERSION assert payload["default_paths"]["local_contract"] == LOCAL_CONTRACT_RELATIVE_PATH + assert payload["primary_commands"] == dict(PRIMARY_COMMANDS) + assert set(payload["primary_commands"]) == { + "check_codex", + "check_claude_code", + "check_cursor", + "verify_local", + "verify_pr", + "host_audit", + } assert payload["commands"]["install_agent_workflow"] == ( "agents-shipgate init --workspace . --write --ci --agent-instructions=default --json" ) diff --git a/tests/test_prompt_parity.py b/tests/test_prompt_parity.py index 34159db9..7cdadebd 100644 --- a/tests/test_prompt_parity.py +++ b/tests/test_prompt_parity.py @@ -100,20 +100,18 @@ def test_prompt_byte_identical_across_locations(name: str): ) -def test_add_shipgate_prompt_starts_with_detect_first_flow(): - """The canonical onboarding prompt must lead with the v0.7 - `detect → init --write --ci → scan --suggest-patches → apply-patches` - flow, not the pre-v0.6 install→init→scan path. Pin this so a - future edit doesn't accidentally regress to the older flow. +def test_add_shipgate_prompt_starts_with_verify_preview_flow(): + """The canonical onboarding prompt must lead with the v8 verify-preview + flow, keeping init/scan/apply-patches as supporting setup steps only. """ text = (TOP_LEVEL_PROMPTS / "add-shipgate-to-repo.md").read_text(encoding="utf-8") - assert "agents-shipgate detect" in text, ( - "add-shipgate-to-repo.md must reference `agents-shipgate detect` " - "(canonical 4-call flow leads with detection)." + assert "$SG verify --preview --json" in text, ( + "add-shipgate-to-repo.md must lead with `verify --preview --json` " + "as the first-look flow." ) assert "--suggest-patches" in text, ( "add-shipgate-to-repo.md must reference `--suggest-patches` " - "(scan step in the canonical flow)." + "(supporting scan step after preview routes setup)." ) assert "apply-patches" in text, ( "add-shipgate-to-repo.md must reference `apply-patches` " diff --git a/tests/test_public_surface_contract.py b/tests/test_public_surface_contract.py index d1e7c104..42bd913b 100644 --- a/tests/test_public_surface_contract.py +++ b/tests/test_public_surface_contract.py @@ -1440,6 +1440,18 @@ def test_well_known_seo_geo_positioning_fields_are_pinned(): assert keyword in primary_keywords commands = data.get("commands", {}) + primary_commands = data.get("primary_commands", {}) + contract = build_contract_payload().model_dump(mode="json") + assert primary_commands == contract["primary_commands"] + assert set(primary_commands) == { + "check_codex", + "check_claude_code", + "check_cursor", + "verify_local", + "verify_pr", + "host_audit", + } + assert all(command.startswith("shipgate ") for command in primary_commands.values()) assert commands.get("preview") == "agents-shipgate verify --preview --json" assert commands.get("install_ai_coding_workflow") == ( "agents-shipgate init --workspace . --write --ci --agent-instructions=default --json" @@ -1500,6 +1512,71 @@ def test_well_known_seo_geo_positioning_fields_are_pinned(): ) +def test_prominent_surfaces_only_promote_check_verify_and_host_audit(): + """First-look surfaces must not promote supporting setup commands.""" + + forbidden = ( + "agents-shipgate detect", + "agents-shipgate init", + "agents-shipgate scan", + "agents-shipgate preflight", + "agents-shipgate bootstrap", + "agents-shipgate apply-patches", + ) + readme = _read("README.md") + readme_top = readme.split("## Verify-first quickstart", 1)[1].split( + "## How to read your first result", 1 + )[0] + quickstart = _read("docs/quickstart.md") + quickstart_top = quickstart.split("## Verify-first quickstart", 1)[1].split( + "## Supporting zero-install relevance check", 1 + )[0] + slash = _read(".claude/commands/shipgate.md") + slash_commands = slash.split("Prominent commands:", 1)[1].split( + "Required behavior", 1 + )[0] + target_snippets = _read("docs/target-repo-agent-snippets.md") + agents_block = target_snippets.split("## `AGENTS.md`", 1)[1].split( + "## Codex Skill", 1 + )[0] + claude_block = target_snippets.split("## `CLAUDE.md`", 1)[1].split( + "## `.cursor/rules/agents-shipgate.mdc`", 1 + )[0] + cursor_block = target_snippets.split("## `.cursor/rules/agents-shipgate.mdc`", 1)[ + 1 + ].split("## `.github/pull_request_template.md`", 1)[0] + + surfaces = { + "README quickstart": readme_top, + "docs/quickstart": quickstart_top, + "slash prominent commands": slash_commands, + "target AGENTS snippet": agents_block, + "target CLAUDE snippet": claude_block, + "target Cursor snippet": cursor_block, + } + for name, text in surfaces.items(): + for command in forbidden: + assert command not in text, f"{name} promotes supporting command {command!r}" + + well_known = json.loads(_read(".well-known/agents-shipgate.json")) + contract = build_contract_payload().model_dump(mode="json") + for name, command in { + **well_known["primary_commands"], + **contract["primary_commands"], + }.items(): + assert command.startswith("shipgate "), f"{name} should use the shipgate alias" + assert any( + command.startswith(prefix) + for prefix in ( + "shipgate check ", + "shipgate verify ", + "shipgate audit --host", + ) + ), f"{name} is not one of the prominent flows: {command}" + for forbidden_command in forbidden: + assert forbidden_command not in command + + def test_llms_txt_advertises_triggers_and_llms_full(): """llms.txt is the short fan-out for AI search; it must list the trigger catalog and llms-full URLs so they are discoverable from diff --git a/tests/test_schema_boundaries.py b/tests/test_schema_boundaries.py index aa0b5a94..2977ad8e 100644 --- a/tests/test_schema_boundaries.py +++ b/tests/test_schema_boundaries.py @@ -275,7 +275,7 @@ def test_representative_schema_payloads_keep_wire_fields() -> None: } assert ContractPayload( - contract_version="7", + contract_version="8", cli_version="0.0.0", report_schema_version="0.27", packet_schema_version="0.7", @@ -304,6 +304,7 @@ def test_representative_schema_payloads_keep_wire_fields() -> None: agent_interface_operations=["verify_pr"], exit_code_policy={"3": "input parse or missing artifact error"}, mcp_tools=["shipgate.handoff"], + primary_commands={"verify_local": "shipgate verify --json"}, commands={"preview": "agents-shipgate verify --preview --json"}, default_paths={"manifest": "shipgate.yaml"}, artifacts={ @@ -323,7 +324,7 @@ def test_representative_schema_payloads_keep_wire_fields() -> None: release_decisions=["passed", "blocked"], do_not_auto_assert=["approval"], ).model_dump(mode="json") == { - "contract_version": "7", + "contract_version": "8", "cli_version": "0.0.0", "report_schema_version": "0.27", "packet_schema_version": "0.7", @@ -352,6 +353,7 @@ def test_representative_schema_payloads_keep_wire_fields() -> None: "agent_interface_operations": ["verify_pr"], "exit_code_policy": {"3": "input parse or missing artifact error"}, "mcp_tools": ["shipgate.handoff"], + "primary_commands": {"verify_local": "shipgate verify --json"}, "commands": {"preview": "agents-shipgate verify --preview --json"}, "default_paths": {"manifest": "shipgate.yaml"}, "artifacts": { diff --git a/tests/test_trigger_command.py b/tests/test_trigger_command.py index 47b257a3..3ba1c71c 100644 --- a/tests/test_trigger_command.py +++ b/tests/test_trigger_command.py @@ -21,7 +21,7 @@ def _catalog(when: dict) -> dict: """A minimal one-rule catalog for predicate-isolation tests.""" return { "schema_version": "test", - "default_command": "agents-shipgate detect --workspace . --json", + "default_command": "shipgate verify --preview --json", "rules": [ {"id": "R", "action": "run_shipgate", "when": when, "rationale": ""} ], @@ -209,11 +209,11 @@ def test_evaluate_emits_skip_and_next_action_fields(): assert skipped["next_action"]["command"] is None -def test_next_action_points_at_detect_when_not_adopted(): +def test_next_action_points_at_verify_preview_when_not_adopted(): res = evaluate(paths=["tools/my_mcp.json"]) # run rule fires, no manifest assert res["should_run"] is True assert res["next_action"]["kind"] == "command" - assert "detect" in res["next_action"]["command"] + assert res["next_action"]["command"] == "shipgate verify --preview --json" def test_stop_conditions_not_evaluated_without_detect_result(): diff --git a/tests/test_verify.py b/tests/test_verify.py index a7fb57c7..194baa22 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -137,7 +137,7 @@ def test_verify_missing_config_docs_only_diff_fails_closed(tmp_path: Path) -> No assert payload["human_review"]["required"] is True assert "verify --preview --json" in payload["human_review"]["why"] assert payload["first_next_action"]["command"] == ( - "agents-shipgate verify --preview --json" + "shipgate verify --preview --json" ) assert (out_dir / "verifier.json").is_file() assert (out_dir / "verify-run.json").is_file() From 59801919d1905022cd45f268ca953a181b3c3178 Mon Sep 17 00:00:00 2001 From: Pengfei Hu Date: Sat, 27 Jun 2026 20:55:31 -0700 Subject: [PATCH 2/3] Address primary flow review findings --- docs/diagnostics.md | 2 +- docs/errors.json | 2 +- src/agents_shipgate/cli/agent_result.py | 4 +- src/agents_shipgate/cli/main.py | 1 + .../cli/verify/orchestrator.py | 4 +- src/agents_shipgate/core/codex_boundary.py | 73 ++++++++++++++----- tests/test_codex_boundary_check.py | 41 +++++++---- tests/test_verify.py | 6 +- 8 files changed, 93 insertions(+), 40 deletions(-) diff --git a/docs/diagnostics.md b/docs/diagnostics.md index 18e3b727..33b6a175 100644 --- a/docs/diagnostics.md +++ b/docs/diagnostics.md @@ -67,7 +67,7 @@ diagnostics where no command should run. | ID | Severity | Fires when | | ----------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -| `SHIP-DIAG-MISSING-MANIFEST` | block | The manifest file does not exist on disk. Rank-1 action: `agents-shipgate detect --workspace --json`. | +| `SHIP-DIAG-MISSING-MANIFEST` | block | The manifest file does not exist on disk. Rank-1 action: `shipgate verify --workspace --preview --json`. | | `SHIP-DIAG-INVALID-MANIFEST` | block | The manifest file exists but the loader rejected it (invalid YAML, schema validation failure, unsupported version). Rank-1 action: `edit `. | | `SHIP-DIAG-NO-AGENT-SURFACE` | info | `is_agent_project=false` AND `suggested_sources=[]` AND `codex_plugin_candidates=[]` AND no manifest. Catch-all negative control. | | `SHIP-DIAG-NON-AGENT-LIBRARY` | info | Python project (≥1 .py file + pyproject/requirements) with no agent framework, prompts, or tool surface. | diff --git a/docs/errors.json b/docs/errors.json index e673caad..bf52740e 100644 --- a/docs/errors.json +++ b/docs/errors.json @@ -21,7 +21,7 @@ "exit_code": 2, "typical_cause": "shipgate.yaml does not exist OR exists but the loader rejected it (extra key, missing required field, wrong nesting level).", "additional_fields": ["message", "next_action", "next_actions"], - "recovery_hint": "Distinguish missing vs. invalid via the diagnostic id: `SHIP-DIAG-MISSING-MANIFEST` → run `agents-shipgate detect --workspace --json` then `init --write`. `SHIP-DIAG-INVALID-MANIFEST` → edit the manifest at the path in the error; do NOT re-run `init` (it refuses to overwrite an existing file)." + "recovery_hint": "Distinguish missing vs. invalid via the diagnostic id: `SHIP-DIAG-MISSING-MANIFEST` → run `shipgate verify --workspace --preview --json`; if preview routes setup, run the emitted init command. `SHIP-DIAG-INVALID-MANIFEST` → edit the manifest at the path in the error; do NOT re-run `init` (it refuses to overwrite an existing file)." }, { "id": "config_already_exists", diff --git a/src/agents_shipgate/cli/agent_result.py b/src/agents_shipgate/cli/agent_result.py index d109f5eb..75c4c927 100644 --- a/src/agents_shipgate/cli/agent_result.py +++ b/src/agents_shipgate/cli/agent_result.py @@ -46,10 +46,11 @@ def build_codex_agent_result( diff_files = parse_unified_diff(diff_text) changed_files = sorted({item.path for item in diff_files if item.path}) config_path = config if config.is_absolute() else workspace / config + manifest_present = config_path.is_file() trigger = evaluate_trigger( paths=changed_files, diff_text=diff_text, - manifest_present=config_path.is_file(), + manifest_present=manifest_present, user_requested=True, ) declared = _declared_tool_surfaces_changed( @@ -63,6 +64,7 @@ def build_codex_agent_result( agent=agent, policy_path=policy, trigger=trigger, + manifest_present=manifest_present, capability_surfaces_changed=declared, undeclared_capability_surfaces=_undeclared_tool_surfaces_changed( diff_files=diff_files, diff --git a/src/agents_shipgate/cli/main.py b/src/agents_shipgate/cli/main.py index 63687e36..73ca437e 100644 --- a/src/agents_shipgate/cli/main.py +++ b/src/agents_shipgate/cli/main.py @@ -188,6 +188,7 @@ def _mcp_serve_command() -> None: # `shipgate check`, `shipgate verify`, and `shipgate audit --host`. # Supporting/compatibility commands stay fully invokable and documented # through their direct --help; hiding is presentation, not deprecation. +# README fixture demos remain runnable, but are not a root-help flow. app.add_typer(fixture_app, name="fixture", hidden=True) app.add_typer(feedback_app, name="feedback", hidden=True) app.add_typer(scenario_app, name="scenario", hidden=True) diff --git a/src/agents_shipgate/cli/verify/orchestrator.py b/src/agents_shipgate/cli/verify/orchestrator.py index 0860f868..104e34f4 100644 --- a/src/agents_shipgate/cli/verify/orchestrator.py +++ b/src/agents_shipgate/cli/verify/orchestrator.py @@ -1388,7 +1388,7 @@ def _shell_join(parts: list[str]) -> str: def _preview_init_command(workspace: Path) -> str: return _shell_join( [ - "agents-shipgate", + "shipgate", "init", "--workspace", str(workspace), @@ -1409,7 +1409,7 @@ def _preview_verify_command( out: Path | None, ) -> str: parts = [ - "agents-shipgate", + "shipgate", "verify", "--workspace", str(workspace), diff --git a/src/agents_shipgate/core/codex_boundary.py b/src/agents_shipgate/core/codex_boundary.py index d12b995e..5a2a99a6 100644 --- a/src/agents_shipgate/core/codex_boundary.py +++ b/src/agents_shipgate/core/codex_boundary.py @@ -377,6 +377,7 @@ def evaluate_codex_boundary_result( release_decision: dict[str, Any] | None = None, capability_surfaces_changed: list[str] | None = None, undeclared_capability_surfaces: list[str] | None = None, + manifest_present: bool | None = None, ) -> AgentResultV1: """Return the local Codex boundary-result projection for a unified diff. @@ -390,10 +391,12 @@ def evaluate_codex_boundary_result( - ``capability_surfaces_changed`` — changed files the manifest DECLARES as tool sources. ``verify`` will gate these, so route to ``verify``. - ``undeclared_capability_surfaces`` — changed files that ARE tool surfaces - but the manifest does not declare (or there is no manifest). ``verify`` - cannot gate an undeclared surface yet, so route through verify preview - before full verify. Takes precedence when a diff changes both, since full - ``verify`` alone would miss the undeclared one. + but the manifest does not declare (or there is no manifest). In an + adopted repo, route to ``detect`` so the agent gets ``suggested_sources`` + for the missing ``tool_sources`` entry. In an unconfigured repo, route + through verify preview so the preview can decide whether to initialize. + Takes precedence when a diff changes both, since full ``verify`` alone + would miss the undeclared one. """ # Keep this local diff projector aligned with @@ -480,6 +483,7 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None: undeclared_surfaces = sorted(dict.fromkeys(undeclared_capability_surfaces or [])) coverage_gap = boundary_clean_allow and bool(coverage_surfaces) undeclared_gap = boundary_clean_allow and bool(undeclared_surfaces) + is_adopted_repo = bool(manifest_present) if coverage_gap or undeclared_gap: decision = "warn" @@ -495,11 +499,24 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None: evaluated_files=evaluated_files, ) if undeclared_gap: - first_next_action = _undeclared_next_action() - summary = _undeclared_summary(undeclared_surfaces) + first_next_action = _undeclared_next_action(manifest_present=is_adopted_repo) + summary = _undeclared_summary( + undeclared_surfaces, manifest_present=is_adopted_repo + ) diagnostics = [*diagnostics, _undeclared_diagnostic(undeclared_surfaces)] - trace = [*_trace_for(policy, decision, violations), _undeclared_trace(undeclared_surfaces)] - suggested_fixes = [_VERIFY_PREVIEW_COMMAND, _VERIFY_COMMAND] + trace = [ + *_trace_for(policy, decision, violations), + _undeclared_trace(undeclared_surfaces), + ] + suggested_fixes = ( + [ + _DETECT_COMMAND, + "Add the surface to shipgate.yaml tool_sources", + _VERIFY_COMMAND, + ] + if is_adopted_repo + else [_VERIFY_PREVIEW_COMMAND, _VERIFY_COMMAND] + ) elif coverage_gap: first_next_action = _coverage_next_action() summary = _coverage_summary(coverage_surfaces) @@ -1357,31 +1374,53 @@ def _risk_for(violations: list[AgentResultViolatedRule]) -> AgentResultRiskLevel # Canonical capability gate. check is boundary-only; verify computes the # capability delta and owns release_decision.decision. Preview stays inside the -# verify flow for unconfigured or undeclared surfaces. +# verify flow for unconfigured workspaces; adopted repos with undeclared +# surfaces still use detect for suggested_sources. _VERIFY_COMMAND = "shipgate verify --json" _VERIFY_PREVIEW_COMMAND = "shipgate verify --preview --json" +_DETECT_COMMAND = "shipgate detect --workspace . --json" -def _undeclared_next_action() -> AgentResultNextAction: +def _undeclared_next_action(*, manifest_present: bool) -> AgentResultNextAction: + if manifest_present: + return AgentResultNextAction( + actor="coding_agent", + kind="warn", + command=_DETECT_COMMAND, + why=( + "This diff changes a tool/capability surface that shipgate.yaml does " + "not declare, so verify cannot gate it yet. Run detect for " + "suggested_sources, add the missing surface to tool_sources, then " + "run verify before completing." + ), + ) return AgentResultNextAction( actor="coding_agent", kind="warn", command=_VERIFY_PREVIEW_COMMAND, why=( "This diff changes a tool/capability surface that shipgate.yaml does not " - "declare, so neither check nor verify gates it yet. Declare the surface " - "from verify preview guidance or add it to tool_sources, then run verify " - "before completing." + "declare because no manifest is configured yet. Run verify preview so it " + "can route setup when Shipgate is relevant, then run verify before " + "completing." ), ) -def _undeclared_summary(surfaces: list[str]) -> str: +def _undeclared_summary(surfaces: list[str], *, manifest_present: bool) -> str: + if manifest_present: + return ( + "No Codex boundary rule fired, but the diff changes a tool/capability " + f"surface ({', '.join(surfaces[:5])}) that shipgate.yaml does not " + "declare, so verify cannot gate it yet. Run detect for suggested_sources, " + "add the missing surface to tool_sources, then run verify before reporting " + "completion." + ) return ( "No Codex boundary rule fired, but the diff changes a tool/capability surface " f"({', '.join(surfaces[:5])}) that shipgate.yaml does not declare, so verify " - "cannot gate it yet. Use verify preview guidance or add it to tool_sources, " - "then run verify before reporting completion." + "cannot gate it yet. Run verify preview to route setup when Shipgate is " + "relevant, then run verify before reporting completion." ) @@ -1402,7 +1441,7 @@ def _undeclared_trace(surfaces: list[str]) -> AgentResultTraceEvent: step="coverage", summary=( f"boundary_only: {len(surfaces)} undeclared tool surface(s) changed; " - "routed to verify preview + verify." + "routed to declaration guidance before verify." ), ) diff --git a/tests/test_codex_boundary_check.py b/tests/test_codex_boundary_check.py index bf56e1b6..7beb1050 100644 --- a/tests/test_codex_boundary_check.py +++ b/tests/test_codex_boundary_check.py @@ -248,8 +248,10 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) -> # --- Undeclared coverage gap: a changed file IS a tool surface but the -------- # manifest does not declare it (or there is no manifest). verify only gates -# declared surfaces, so route through verify preview before full verify rather -# than a clean allow or a full verify that never scans it. +# declared surfaces, so adopted repos route to detect for suggested_sources +# before full verify; unconfigured repos route through verify preview for setup +# guidance. Either way, do not return a clean allow or a full verify that never +# scans the surface. # A second changed file that is an *undeclared* tool surface (an OpenAPI spec), # used to exercise mixed declared+undeclared diffs (review finding P1). @@ -263,31 +265,38 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) -> ) -def test_undeclared_surface_warns_and_routes_to_verify_preview(tmp_path: Path) -> None: +def test_undeclared_surface_warns_and_routes_to_detect_when_manifest_present( + tmp_path: Path, +) -> None: result = evaluate_codex_boundary_result( workspace=tmp_path, diff_text=_TOOL_SOURCE_DIFF, agent="claude-code", undeclared_capability_surfaces=["mcp-tools.json"], + manifest_present=True, ) payload = result.model_dump(mode="json", exclude_none=True) _validate(payload) - # Was a bare allow before the fix; now a warn that routes to verify preview. + # Was a bare allow before the fix; now a warn that routes to detect so the + # agent gets suggested_sources before editing shipgate.yaml. assert payload["decision"] == "warn" assert payload["completion_allowed"] is True assert payload["must_stop"] is False assert payload["first_next_action"]["kind"] == "warn" - assert payload["first_next_action"]["command"].startswith("shipgate verify --preview") + assert payload["first_next_action"]["command"] == "shipgate detect --workspace . --json" + assert "suggested_sources" in payload["first_next_action"]["why"] assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"]) assert any(t["step"] == "coverage" for t in payload["trace"]) - assert payload["suggested_fixes"][0].startswith("shipgate verify --preview") + assert payload["suggested_fixes"][0] == "shipgate detect --workspace . --json" assert any(fix.startswith("shipgate verify") for fix in payload["suggested_fixes"]) -def test_mixed_declared_and_undeclared_routes_to_verify_preview(tmp_path: Path) -> None: +def test_mixed_declared_and_undeclared_routes_to_detect_when_manifest_present( + tmp_path: Path, +) -> None: # Review finding P1: a diff that changes BOTH a declared surface (verify - # gates it) and an undeclared one (verify does not) must route to verify - # preview before full verify. Undeclared takes precedence over the declared + # gates it) and an undeclared one (verify does not) must route to detect + # before full verify. Undeclared takes precedence over the declared # coverage gap. result = evaluate_codex_boundary_result( workspace=tmp_path, @@ -295,9 +304,10 @@ def test_mixed_declared_and_undeclared_routes_to_verify_preview(tmp_path: Path) agent="claude-code", capability_surfaces_changed=["mcp-tools.json"], undeclared_capability_surfaces=["api/openapi.yaml"], + manifest_present=True, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("shipgate verify --preview") + assert result.first_next_action.command == "shipgate detect --workspace . --json" payload = result.model_dump(mode="json", exclude_none=True) diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface") assert "api/openapi.yaml" in diag["message"] @@ -324,8 +334,8 @@ def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other ) -> None: # Manifest exists but declares a *different* tool source than the changed # file. The declared-coverage path does not match, so the undeclared path - # must catch it and route through verify preview, not a full verify that - # never scans it. + # must catch it and route to detect for suggested_sources, not a full verify + # that never scans it. _write_manifest( tmp_path, " - id: other\n type: mcp\n path: other-tools.json\n trust: internal\n", @@ -338,12 +348,13 @@ def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("shipgate verify --preview") + assert result.first_next_action.command == "shipgate detect --workspace . --json" payload = result.model_dump(mode="json", exclude_none=True) assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"]) + assert "suggested_sources" in payload["first_next_action"]["why"] -def test_mixed_declared_and_undeclared_via_check_routes_to_verify_preview( +def test_mixed_declared_and_undeclared_via_check_routes_to_detect( tmp_path: Path, ) -> None: # Review finding P1, end-to-end through build_codex_agent_result: manifest @@ -360,7 +371,7 @@ def test_mixed_declared_and_undeclared_via_check_routes_to_verify_preview( policy=None, ) assert result.decision == "warn" - assert result.first_next_action.command.startswith("shipgate verify --preview") + assert result.first_next_action.command == "shipgate detect --workspace . --json" payload = result.model_dump(mode="json", exclude_none=True) diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface") assert "api/openapi.yaml" in diag["message"] diff --git a/tests/test_verify.py b/tests/test_verify.py index 194baa22..7a249252 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -1282,7 +1282,7 @@ def test_verify_preview_docs_only_diff_does_not_recommend_init(tmp_path: Path) - assert payload["trigger"]["should_run"] is False assert payload["first_next_action"]["kind"] == "command" assert payload["first_next_action"]["command"] == ( - f"agents-shipgate init --workspace {repo} --write --ci --agent-instructions=default --json" + f"shipgate init --workspace {repo} --write --ci --agent-instructions=default --json" ) @@ -1314,7 +1314,7 @@ def test_verify_preview_missing_base_without_manifest_recommends_init(tmp_path: assert payload["mode"] == "preview" assert payload["first_next_action"]["kind"] == "command" assert payload["first_next_action"]["command"] == ( - f"agents-shipgate init --workspace {repo} --write --ci --agent-instructions=default --json" + f"shipgate init --workspace {repo} --write --ci --agent-instructions=default --json" ) assert payload["base_notes"] @@ -1352,7 +1352,7 @@ def test_verify_preview_configured_repo_preserves_exact_verify_args(tmp_path: Pa payload = json.loads(result.output) assert payload["mode"] == "preview" assert payload["first_next_action"]["command"] == ( - f"agents-shipgate verify --workspace {repo} --config shipgate.yaml " + f"shipgate verify --workspace {repo} --config shipgate.yaml " f"--base origin/main --head HEAD --out {out} --ci-mode advisory --json" ) From 4a2c1f894789c20bbaa2e24619c39fece829d1fc Mon Sep 17 00:00:00 2001 From: Pengfei Hu Date: Sat, 27 Jun 2026 21:08:43 -0700 Subject: [PATCH 3/3] Harden root help visibility test --- tests/test_cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 39c0c0b4..3a5900e9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ import json +import re from pathlib import Path from types import UnionType from typing import Union, get_args, get_origin @@ -58,6 +59,11 @@ from agents_shipgate.schemas.verifier import VerifierArtifact runner = CliRunner() +ANSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]") + + +def _plain_cli_output(text: str) -> str: + return ANSI_RE.sub("", text) def test_cli_advisory_exits_zero(tmp_path): @@ -468,17 +474,18 @@ def test_cli_help_hides_niche_commands_but_keeps_them_invokable(): root = get_command(app) result = runner.invoke(app, ["--help"]) assert result.exit_code == 0 + output = _plain_cli_output(result.output) for name in HIDDEN_TOP_LEVEL_COMMANDS: assert root.commands[name].hidden, f"{name} should be hidden from --help" - assert f"│ {name}" not in result.output + assert f"│ {name}" not in output # Hidden, not removed: the command still resolves and answers --help. invoked = runner.invoke(app, [name, "--help"]) assert invoked.exit_code == 0, f"{name} must remain invokable: {invoked.output}" for name in VISIBLE_CORE_COMMANDS: assert not root.commands[name].hidden, f"{name} must stay visible" - assert f" {name} " in result.output + assert re.search(rf"\b{re.escape(name)}\b", output), output def test_cli_tool_surface_summary_detects_no_changes():