From 5d8fa5d2d09ad06652be7607c04a494a27579b4a Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 13:52:16 -0700
Subject: [PATCH 1/9] fix: harden truth and coverage gates

---
 README.md                                     |   9 +
 apps/dashboard/vitest.config.mts              |   4 +-
 apps/desktop/scripts/playwright-tempdir.mjs   |   2 +-
 .../src/components/chain/ChainPanel.test.tsx  |   8 +-
 .../copilot/DesktopCopilotPanel.test.tsx      |  98 ++++++
 .../desktop/src/hooks/useDesktopData.test.tsx |  22 +-
 apps/desktop/src/lib/desktopUi.test.ts        |  17 +-
 apps/desktop/src/lib/uiError.test.ts          |  10 +-
 .../src/pages/AgentsRoleConfigPanel.test.tsx  | 174 +++++++++
 apps/desktop/src/pages/EventsPage.test.tsx    |   4 +-
 apps/desktop/src/pages/ReviewsPage.test.tsx   |   6 +-
 apps/desktop/src/pages/TestsPage.test.tsx     |   7 +-
 .../pages/coverage_sprint_f_pages.test.tsx    |   8 +-
 .../pages/workflow_queue_controls.test.tsx    |   2 +-
 .../src/cortexpilot_orch/gates/tests_gate.py  |  53 ++-
 .../services/control_plane_read_service.py    |  17 +-
 .../tests/test_bench_e2e_speed_gate.py        | 100 ++++++
 .../tests/test_control_plane_read_service.py  | 330 ++++++++++++++++++
 .../test_mcp_queue_pilot_server_branches.py   | 132 +++++++
 .../tests/test_repo_coverage_gate.py          |  40 +++
 .../tests/test_tests_gate_extended.py         |  57 +++
 configs/env.registry.json                     |  36 ++
 .../storefront/benchmark-methodology.md       |  18 +
 package.json                                  |   7 +-
 scripts/check_bench_e2e_speed_gate.py         | 135 +++++++
 scripts/repo_coverage_gate.py                 |  44 ++-
 26 files changed, 1272 insertions(+), 68 deletions(-)
 create mode 100644 apps/desktop/src/components/copilot/DesktopCopilotPanel.test.tsx
 create mode 100644 apps/desktop/src/pages/AgentsRoleConfigPanel.test.tsx
 create mode 100644 apps/orchestrator/tests/test_bench_e2e_speed_gate.py
 create mode 100644 apps/orchestrator/tests/test_control_plane_read_service.py
 create mode 100644 apps/orchestrator/tests/test_mcp_queue_pilot_server_branches.py
 create mode 100644 apps/orchestrator/tests/test_repo_coverage_gate.py
 create mode 100644 scripts/check_bench_e2e_speed_gate.py

diff --git a/README.md b/README.md
index 8ba5279..9126d8c 100644
--- a/README.md
+++ b/README.md
@@ -526,6 +526,8 @@ Default local verification path:
 npm run ci
 npm run test:quick
 npm run test
+npm run mutation:gate
+npm run bench:e2e:speed:gate
 ```
 
 `npm run ci` is now the hosted-aligned local fast gate. Use
@@ -533,6 +535,13 @@ npm run test
 `npm run scan:workflow-security`, `npm run scan:trivy`, and
 `npm run security:scan:closeout` only when you intentionally want the stricter
 closeout/manual layers.
+`npm run mutation:gate` is the root mutation entrypoint for the existing
+Orchestrator mutation profiles, `npm run bench:e2e:speed:gate` is the
+fail-closed benchmark gate that evaluates a real benchmark summary once a run
+has produced one, and `npm run coverage:repo` now points to the active
+coverage runner that prepares subproject dependencies before generating fresh
+repo-level coverage receipts. Use `npm run coverage:repo:aggregate` only when
+you intentionally want to re-aggregate already-existing coverage artifacts.
 
 Current CI contract has five layers only:
 
diff --git a/apps/dashboard/vitest.config.mts b/apps/dashboard/vitest.config.mts
index bc44f9e..adaab2e 100644
--- a/apps/dashboard/vitest.config.mts
+++ b/apps/dashboard/vitest.config.mts
@@ -27,7 +27,9 @@ if (pool !== requestedPool) {
 }
 const shouldEmitHtmlCoverage = !process.env.CI || process.env.CORTEXPILOT_COVERAGE_HTML === "1";
 const coverageReporter = shouldEmitHtmlCoverage ? ["text", "html", "json-summary"] : ["text", "json-summary"];
-const coverageReportsDirectory = path.resolve(process.cwd(), "coverage");
+const coverageReportsDirectory = process.env.CORTEXPILOT_DASHBOARD_COVERAGE_DIR
+  ? path.resolve(process.env.CORTEXPILOT_DASHBOARD_COVERAGE_DIR)
+  : path.resolve(process.cwd(), "coverage");
 const coverageClean = !serialCoverageMode;
 const coverageProcessingConcurrency = serialCoverageMode ? 1 : undefined;
 const testTimeout = process.env.CI ? 45000 : 15000;
diff --git a/apps/desktop/scripts/playwright-tempdir.mjs b/apps/desktop/scripts/playwright-tempdir.mjs
index 3d5be06..7fa4c67 100644
--- a/apps/desktop/scripts/playwright-tempdir.mjs
+++ b/apps/desktop/scripts/playwright-tempdir.mjs
@@ -14,7 +14,7 @@ function sanitizeScope(scope) {
 function resolveTempRoot(scriptDir) {
   const runnerTemp = normalizeValue(process.env.RUNNER_TEMP);
   if (runnerTemp) return resolve(runnerTemp);
-  return resolve(scriptDir, "..", "..", "..", ".runtime-cache", "temp");
+  return resolve(scriptDir, "..", "..", "..", ".runtime-cache", "cache", "tmp");
 }
 
 export function configurePlaywrightTempDir(scope) {
diff --git a/apps/desktop/src/components/chain/ChainPanel.test.tsx b/apps/desktop/src/components/chain/ChainPanel.test.tsx
index 73e5dd4..25191f0 100644
--- a/apps/desktop/src/components/chain/ChainPanel.test.tsx
+++ b/apps/desktop/src/components/chain/ChainPanel.test.tsx
@@ -51,9 +51,9 @@ describe("ChainPanel", () => {
       />
     );
 
-    fireEvent.click(screen.getByRole("button", { name: "简洁视图" }));
-    fireEvent.click(screen.getByRole("button", { name: "详细视图" }));
-    fireEvent.click(screen.getByRole("button", { name: "Chain 优先" }));
+    fireEvent.click(screen.getByRole("button", { name: "Compact view" }));
+    fireEvent.click(screen.getByRole("button", { name: "Detailed view" }));
+    fireEvent.click(screen.getByRole("button", { name: "Chain first" }));
 
     expect(setChainDisplayMode).toHaveBeenCalledWith("compact");
     expect(setChainDisplayMode).toHaveBeenCalledWith("detail");
@@ -79,7 +79,7 @@ describe("ChainPanel", () => {
       />
     );
 
-    const legend = screen.getByLabelText("节点状态说明");
+    const legend = screen.getByLabelText("Node status legend");
     const items = legend.querySelectorAll("li");
     expect(items).toHaveLength(2);
     expect(items[0]).toHaveClass("is-active");
diff --git a/apps/desktop/src/components/copilot/DesktopCopilotPanel.test.tsx b/apps/desktop/src/components/copilot/DesktopCopilotPanel.test.tsx
new file mode 100644
index 0000000..50e9059
--- /dev/null
+++ b/apps/desktop/src/components/copilot/DesktopCopilotPanel.test.tsx
@@ -0,0 +1,98 @@
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { describe, expect, it, vi } from "vitest";
+
+import { DesktopCopilotPanel } from "./DesktopCopilotPanel";
+
+describe("DesktopCopilotPanel", () => {
+  it("renders operator-brief truth surfaces and grounded takeaways after generation", async () => {
+    const loadBrief = vi.fn().mockResolvedValue({
+      report_type: "operator_copilot_brief",
+      status: "AVAILABLE",
+      scope: "run_detail",
+      subject_id: "run-123",
+      summary: "The operator should compare the staged diff before accepting the run.",
+      likely_cause: "The last proof pack is stale.",
+      compare_takeaway: "Compare the staged diff against the last approved run.",
+      proof_takeaway: "Refresh the proof pack before asking for review.",
+      incident_takeaway: "Treat stale proof as an incident until it is re-generated.",
+      queue_takeaway: "Keep the queue paused until proof is current.",
+      approval_takeaway: "Approval should wait for a fresh proof receipt.",
+      used_truth_surfaces: ["run_detail", "", "proof_pack"],
+      limitations: ["review not started", "   "],
+      recommended_actions: ["Refresh proof", "Request review", "   "],
+      top_risks: ["stale-proof", "", "queue drift"],
+    });
+
+    render(
+      <DesktopCopilotPanel
+        intro="Only grounded control-plane truth belongs here."
+        questionSet={["What is blocked?", "What should the operator do next?"]}
+        loadBrief={loadBrief}
+      />,
+    );
+
+    expect(screen.getByText("Only grounded control-plane truth belongs here.")).toBeInTheDocument();
+    expect(screen.getByText("What is blocked?")).toBeInTheDocument();
+    expect(screen.getByText("What should the operator do next?")).toBeInTheDocument();
+    expect(screen.getByText("On demand")).toBeInTheDocument();
+
+    fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
+
+    expect(await screen.findByText("Grounded brief")).toBeInTheDocument();
+    expect(await screen.findByText("The operator should compare the staged diff before accepting the run.")).toBeInTheDocument();
+    expect(screen.getByText("The last proof pack is stale.")).toBeInTheDocument();
+    expect(screen.getByText("Scope: run_detail")).toBeInTheDocument();
+    expect(screen.getByText("Subject: run-123")).toBeInTheDocument();
+    expect(screen.getByText("Truth surfaces: run_detail | proof_pack")).toBeInTheDocument();
+    expect(screen.getByText("Limitations: review not started")).toBeInTheDocument();
+    expect(screen.getByText("Compare the staged diff against the last approved run.")).toBeInTheDocument();
+    expect(screen.getByText("Keep the queue paused until proof is current.")).toBeInTheDocument();
+    expect(screen.getByText("Refresh proof")).toBeInTheDocument();
+    expect(screen.getByText("queue drift")).toBeInTheDocument();
+    expect(screen.getByRole("button", { name: "Regenerate brief" })).toBeInTheDocument();
+
+    expect(loadBrief).toHaveBeenCalledTimes(1);
+  });
+
+  it("covers flight-plan fallback labels and empty action/risk lists", async () => {
+    const loadBrief = vi.fn().mockResolvedValue({
+      report_type: "flight_plan_copilot_brief",
+      status: "UNAVAILABLE",
+      summary: "The plan is still advisory because execution has not started yet.",
+      risk_takeaway: "Approval is still blocked on a missing operator confirmation.",
+      capability_takeaway: "Runtime capability is unresolved until the runner binds.",
+      approval_takeaway: "An operator must confirm the start gate before execution.",
+      used_truth_surfaces: ["execution_plan_preview"],
+      recommended_actions: ["", "   "],
+      top_risks: [],
+      limitations: undefined,
+    });
+
+    render(<DesktopCopilotPanel title="Flight plan panel" intro={undefined} questionSet={["Why this plan?"]} loadBrief={loadBrief} />);
+
+    fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
+
+    expect(await screen.findByText("Unavailable")).toBeInTheDocument();
+    expect(screen.getByText("Scope: flight_plan")).toBeInTheDocument();
+    expect(screen.getByText("Subject: execution_plan_report")).toBeInTheDocument();
+    expect(screen.getByText("Truth surfaces: execution_plan_preview")).toBeInTheDocument();
+    expect(screen.getByText("Limitations: -")).toBeInTheDocument();
+    expect(screen.getAllByText("Approval is still blocked on a missing operator confirmation.").length).toBeGreaterThan(0);
+    expect(screen.getByText("This brief stays advisory until a run actually starts.")).toBeInTheDocument();
+    expect(screen.getByText("No recommended actions were returned.")).toBeInTheDocument();
+    expect(screen.getByText("No explicit risks were returned.")).toBeInTheDocument();
+  });
+
+  it("surfaces load failures without leaving the panel in generating state", async () => {
+    const loadBrief = vi.fn().mockRejectedValue("brief backend unavailable");
+
+    render(<DesktopCopilotPanel questionSet={["Why did this fail?"]} loadBrief={loadBrief} />);
+
+    fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
+
+    expect(await screen.findByText("brief backend unavailable")).toBeInTheDocument();
+    await waitFor(() => {
+      expect(screen.getByRole("button", { name: "Generate operator brief" })).toBeEnabled();
+    });
+  });
+});
diff --git a/apps/desktop/src/hooks/useDesktopData.test.tsx b/apps/desktop/src/hooks/useDesktopData.test.tsx
index 6d5ea46..df8e113 100644
--- a/apps/desktop/src/hooks/useDesktopData.test.tsx
+++ b/apps/desktop/src/hooks/useDesktopData.test.tsx
@@ -104,7 +104,9 @@ describe("useDesktopData", () => {
     const user = userEvent.setup();
     render(<HookHarness activePage="overview" />);
     await waitFor(() => {
-      expect(screen.getByTestId("live-error")).toHaveTextContent("总览数据拉取失败");
+      expect(screen.getByTestId("live-error")).toHaveTextContent(
+        "Failed to refresh overview data: the service is temporarily unavailable. Try again in a moment.",
+      );
     });
 
     overviewFail = false;
@@ -131,7 +133,9 @@ describe("useDesktopData", () => {
     );
     render(<HookHarness activePage="sessions" />);
     await waitFor(() => {
-      expect(screen.getByTestId("live-error")).toHaveTextContent("会话列表拉取失败");
+      expect(screen.getByTestId("live-error")).toHaveTextContent(
+        "Failed to refresh the session list: the service is temporarily unavailable. Try again in a moment.",
+      );
     });
   });
 
@@ -154,7 +158,9 @@ describe("useDesktopData", () => {
     );
     render(<HookHarness activePage="sessions" />);
     await waitFor(() => {
-      expect(screen.getByTestId("live-error")).toHaveTextContent("后端暂不可达，已进入退避重试");
+      expect(screen.getByTestId("live-error")).toHaveTextContent(
+        "The backend is currently unreachable. Backoff retry is active and local actions can continue.",
+      );
     });
   });
 
@@ -233,7 +239,9 @@ describe("useDesktopData", () => {
     try {
       render(<HookHarness activePage="sessions" />);
       await waitFor(() => {
-        expect(screen.getByTestId("live-error")).toHaveTextContent("当前网络离线，已暂停实时拉取。恢复联网后将自动重试。");
+        expect(screen.getByTestId("live-error")).toHaveTextContent(
+          "The network is offline. Live polling is paused and will retry automatically when connectivity returns.",
+        );
       });
     } finally {
       Object.defineProperty(window.navigator, "onLine", { configurable: true, value: originalOnLine });
@@ -260,7 +268,9 @@ describe("useDesktopData", () => {
 
     render(<HookHarness activePage="sessions" />);
     await waitFor(() => {
-      expect(screen.getByTestId("live-error")).toHaveTextContent("会话列表拉取失败：权限或认证异常，请确认登录状态。");
+      expect(screen.getByTestId("live-error")).toHaveTextContent(
+        "Failed to refresh the session list: authentication or permission check failed. Confirm your sign-in state.",
+      );
     });
   });
 
@@ -380,7 +390,7 @@ describe("useDesktopData", () => {
     try {
       render(<HookHarness activePage="gates" />);
       await waitFor(() => {
-        expect(screen.getByTestId("live-error")).toHaveTextContent("策略告警拉取失败");
+        expect(screen.getByTestId("live-error")).toHaveTextContent("Failed to refresh policy alerts");
       });
       expect(consoleSpy).toHaveBeenCalled();
     } finally {
diff --git a/apps/desktop/src/lib/desktopUi.test.ts b/apps/desktop/src/lib/desktopUi.test.ts
index c3700f9..375fea2 100644
--- a/apps/desktop/src/lib/desktopUi.test.ts
+++ b/apps/desktop/src/lib/desktopUi.test.ts
@@ -49,7 +49,7 @@ describe("desktopUi seed timeline", () => {
       ),
     );
 
-    fireEvent.click(screen.getByRole("button", { name: "查看完整 Diff" }));
+    fireEvent.click(screen.getByRole("button", { name: "View full diff" }));
     expect(onViewDiff).toHaveBeenCalledWith("report-1");
   });
 
@@ -108,8 +108,8 @@ describe("desktopUi seed timeline", () => {
 
     render(createElement("div", null, renderChatEmbed(message as any, embed as any, chooseDecision)));
 
-    expect(screen.getByText("推荐")).toBeInTheDocument();
-    fireEvent.click(screen.getByRole("button", { name: "选择" }));
+    expect(screen.getByText("Recommended")).toBeInTheDocument();
+    fireEvent.click(screen.getByRole("button", { name: "Choose" }));
     expect(chooseDecision).toHaveBeenCalledWith("msg-decision", "decision-1", "fast");
   });
 
@@ -157,10 +157,11 @@ describe("desktopUi seed timeline", () => {
       )
     );
 
-    expect(screen.getByText("任务：")).toBeInTheDocument();
-    expect(screen.getAllByText("进行中")).toHaveLength(2);
-    expect(screen.getByText("等待")).toBeInTheDocument();
-    expect(screen.getByText("完成")).toBeInTheDocument();
-    expect(screen.getByLabelText("警报卡片")).toHaveClass("is-critical");
+    expect(screen.getByText("Task:")).toBeInTheDocument();
+    expect(screen.getByText("进行中")).toBeInTheDocument();
+    expect(screen.getAllByText("In progress")).toHaveLength(1);
+    expect(screen.getByText("Waiting")).toBeInTheDocument();
+    expect(screen.getByText("Done")).toBeInTheDocument();
+    expect(screen.getByLabelText("Alert card")).toHaveClass("is-critical");
   });
 });
diff --git a/apps/desktop/src/lib/uiError.test.ts b/apps/desktop/src/lib/uiError.test.ts
index 1232157..9e7bd56 100644
--- a/apps/desktop/src/lib/uiError.test.ts
+++ b/apps/desktop/src/lib/uiError.test.ts
@@ -7,13 +7,13 @@ describe("uiError", () => {
   });
 
   it("maps network-style messages", () => {
-    expect(sanitizeUiError(new Error("Network timeout"), "加载失败")).toContain("未连接到本地服务");
-    expect(sanitizeUiError(new Error("fetch failed"), "加载失败")).toContain("未连接到本地服务");
+    expect(sanitizeUiError(new Error("Network timeout"), "Load failed")).toContain("unable to reach the local service");
+    expect(sanitizeUiError(new Error("fetch failed"), "Load failed")).toContain("unable to reach the local service");
   });
 
   it("maps auth-style messages", () => {
-    expect(sanitizeUiError(new Error("401 unauthorized"), "加载失败")).toContain("权限或认证异常");
-    expect(sanitizeUiError(new Error("token invalid"), "加载失败")).toContain("权限或认证异常");
+    expect(sanitizeUiError(new Error("401 unauthorized"), "Load failed")).toContain("authentication or permission check failed");
+    expect(sanitizeUiError(new Error("token invalid"), "Load failed")).toContain("authentication or permission check failed");
   });
 
   it("keeps generic fallback for unknown errors", () => {
@@ -21,7 +21,7 @@ describe("uiError", () => {
   });
 
   it("maps backend 5xx-style messages", () => {
-    expect(sanitizeUiError(new Error("API /path failed: 503"), "加载失败")).toContain("服务暂时不可用");
+    expect(sanitizeUiError(new Error("API /path failed: 503"), "Load failed")).toContain("service is temporarily unavailable");
   });
 
   it("extracts detail from unknown payload", () => {
diff --git a/apps/desktop/src/pages/AgentsRoleConfigPanel.test.tsx b/apps/desktop/src/pages/AgentsRoleConfigPanel.test.tsx
new file mode 100644
index 0000000..e41e723
--- /dev/null
+++ b/apps/desktop/src/pages/AgentsRoleConfigPanel.test.tsx
@@ -0,0 +1,174 @@
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { AgentsRoleConfigPanel } from "./AgentsRoleConfigPanel";
+
+vi.mock("../lib/api", () => ({
+  applyRoleConfig: vi.fn(),
+  fetchRoleConfig: vi.fn(),
+  mutationExecutionCapability: vi.fn(() => ({ executable: false, operatorRole: null })),
+  previewRoleConfig: vi.fn(),
+}));
+
+import { applyRoleConfig, fetchRoleConfig, mutationExecutionCapability, previewRoleConfig } from "../lib/api";
+
+function makeSurface(overrides: Record<string, unknown> = {}) {
+  return {
+    persisted_source: "policies/role_config_registry.json",
+    execution_authority: "task_contract",
+    editable_now: {
+      system_prompt_ref: "policies/agents/codex/roles/20_planner_core.md",
+      skills_bundle_ref: "policies/skills_bundle_registry.json#bundles.planner",
+      mcp_bundle_ref: "policies/agent_registry.json#agents(role=PLANNER).capabilities.mcp_tools",
+      runtime_binding: {
+        runner: "agents",
+        provider: "cliproxyapi",
+        model: "gpt-5.4",
+      },
+    },
+    ...overrides,
+  } as any;
+}
+
+describe("AgentsRoleConfigPanel", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.mocked(mutationExecutionCapability).mockReturnValue({ executable: false, operatorRole: null } as any);
+  });
+
+  it("shows the empty-state desk when no roles are available", () => {
+    render(<AgentsRoleConfigPanel roleCatalog={[]} onApplied={vi.fn()} />);
+
+    expect(screen.getByRole("heading", { name: "Role configuration desk" })).toBeInTheDocument();
+    expect(screen.getByText("No registered roles are available for configuration yet.")).toBeInTheDocument();
+  });
+
+  it("supports preview mode and reports role-load failures when switching roles", async () => {
+    let resolveFirstFetch: (value: any) => void = () => {};
+    vi.mocked(fetchRoleConfig)
+      .mockImplementationOnce(() => new Promise((resolve) => {
+        resolveFirstFetch = resolve;
+      }) as any)
+      .mockRejectedValueOnce("role config fetch failed");
+    vi.mocked(previewRoleConfig).mockResolvedValue({
+      changes: [
+        { field: "runtime_binding.runner", current: "agents", next: "codex" },
+      ],
+      preview_surface: {
+        runtime_capability: {
+          lane: "tool-capable-provider",
+          tool_execution: "available",
+        },
+      },
+    } as any);
+
+    render(
+      <AgentsRoleConfigPanel
+        roleCatalog={[
+          { role: "PLANNER", purpose: "Drive wave planning" },
+          { role: "REVIEWER" },
+        ] as any}
+        onApplied={vi.fn()}
+      />,
+    );
+
+    expect(screen.getByText("Loading role configuration…")).toBeInTheDocument();
+
+    resolveFirstFetch(makeSurface());
+    expect(await screen.findByText("Drive wave planning")).toBeInTheDocument();
+    expect(screen.getByText("Preview only")).toBeInTheDocument();
+    expect(screen.getByText("Preview is available, but saving defaults requires an operator role.")).toBeInTheDocument();
+
+    fireEvent.change(screen.getByLabelText("Runtime runner"), { target: { value: "codex" } });
+    fireEvent.click(screen.getByRole("button", { name: "Preview defaults" }));
+
+    await waitFor(() => {
+      expect(previewRoleConfig).toHaveBeenCalledWith("PLANNER", {
+        system_prompt_ref: "policies/agents/codex/roles/20_planner_core.md",
+        skills_bundle_ref: "policies/skills_bundle_registry.json#bundles.planner",
+        mcp_bundle_ref: "policies/agent_registry.json#agents(role=PLANNER).capabilities.mcp_tools",
+        runtime_binding: {
+          runner: "codex",
+          provider: "cliproxyapi",
+          model: "gpt-5.4",
+        },
+      });
+    });
+    await waitFor(() => {
+      expect(screen.getAllByText("Runtime runner").length).toBeGreaterThan(0);
+    });
+    expect(screen.getByText("agents → codex")).toBeInTheDocument();
+    expect(screen.getByText("tool-capable-provider")).toBeInTheDocument();
+
+    fireEvent.change(screen.getByLabelText("Select role for role configuration"), { target: { value: "REVIEWER" } });
+
+    expect(await screen.findByText("role config fetch failed")).toBeInTheDocument();
+    expect(screen.getByText("No role purpose published yet.")).toBeInTheDocument();
+  });
+
+  it("applies repo defaults when mutation execution is enabled", async () => {
+    const onApplied = vi.fn().mockResolvedValue(undefined);
+
+    vi.mocked(fetchRoleConfig).mockResolvedValue(makeSurface());
+    vi.mocked(mutationExecutionCapability).mockReturnValue({ executable: true, operatorRole: "OPS" } as any);
+    vi.mocked(previewRoleConfig).mockResolvedValue({
+      changes: [],
+      preview_surface: {
+        runtime_capability: {
+          lane: "standard-provider-path",
+          tool_execution: "provider-path-required",
+        },
+      },
+    } as any);
+    vi.mocked(applyRoleConfig).mockResolvedValue({
+      role: "PLANNER",
+      surface: makeSurface({
+        editable_now: {
+          system_prompt_ref: "policies/agents/codex/roles/30_ops.md",
+          skills_bundle_ref: "policies/skills_bundle_registry.json#bundles.planner",
+          mcp_bundle_ref: "policies/agent_registry.json#agents(role=PLANNER).capabilities.mcp_tools",
+          runtime_binding: {
+            runner: "codex",
+            provider: null,
+            model: null,
+          },
+        },
+      }),
+    } as any);
+
+    render(
+      <AgentsRoleConfigPanel
+        roleCatalog={[{ role: "PLANNER", purpose: "Drive wave planning" }] as any}
+        onApplied={onApplied}
+      />,
+    );
+
+    expect(await screen.findByText("Apply enabled for OPS")).toBeInTheDocument();
+
+    fireEvent.change(screen.getByLabelText("System prompt ref"), {
+      target: { value: "  policies/agents/codex/roles/30_ops.md  " },
+    });
+    fireEvent.change(screen.getByLabelText("Runtime runner"), { target: { value: "codex" } });
+    fireEvent.change(screen.getByLabelText("Runtime provider"), { target: { value: "   " } });
+    fireEvent.change(screen.getByLabelText("Runtime model"), { target: { value: "" } });
+
+    fireEvent.click(screen.getByRole("button", { name: "Save repo defaults" }));
+
+    await waitFor(() => {
+      expect(applyRoleConfig).toHaveBeenCalledWith("PLANNER", {
+        system_prompt_ref: "policies/agents/codex/roles/30_ops.md",
+        skills_bundle_ref: "policies/skills_bundle_registry.json#bundles.planner",
+        mcp_bundle_ref: "policies/agent_registry.json#agents(role=PLANNER).capabilities.mcp_tools",
+        runtime_binding: {
+          runner: "codex",
+          provider: null,
+          model: null,
+        },
+      });
+    });
+
+    expect(await screen.findByText("Saved repo-owned defaults for PLANNER.")).toBeInTheDocument();
+    expect(onApplied).toHaveBeenCalledTimes(1);
+    expect(screen.getByText("codex / Not set / Not set")).toBeInTheDocument();
+  });
+});
diff --git a/apps/desktop/src/pages/EventsPage.test.tsx b/apps/desktop/src/pages/EventsPage.test.tsx
index b4b21bb..f640fdc 100644
--- a/apps/desktop/src/pages/EventsPage.test.tsx
+++ b/apps/desktop/src/pages/EventsPage.test.tsx
@@ -30,7 +30,7 @@ describe("EventsPage", () => {
     const user = userEvent.setup();
     render(<EventsPage />);
 
-    const rowToggle = await screen.findByRole("button", { name: "查看事件详情 TEST_EVENT" });
+    const rowToggle = await screen.findByRole("button", { name: "View event details TEST_EVENT" });
     expect(rowToggle).toHaveAttribute("aria-expanded", "false");
 
     await user.click(rowToggle);
@@ -57,7 +57,7 @@ describe("EventsPage", () => {
     const user = userEvent.setup();
     render(<EventsPage />);
 
-    const rowToggle = await screen.findByRole("button", { name: "查看事件详情 KEYBOARD_EVENT" });
+    const rowToggle = await screen.findByRole("button", { name: "View event details KEYBOARD_EVENT" });
     rowToggle.focus();
     await user.keyboard("{Enter}");
     expect(rowToggle).toHaveAttribute("aria-expanded", "true");
diff --git a/apps/desktop/src/pages/ReviewsPage.test.tsx b/apps/desktop/src/pages/ReviewsPage.test.tsx
index ec4eb65..31ea985 100644
--- a/apps/desktop/src/pages/ReviewsPage.test.tsx
+++ b/apps/desktop/src/pages/ReviewsPage.test.tsx
@@ -33,10 +33,10 @@ describe("ReviewsPage", () => {
     ] as any);
     const user = userEvent.setup();
     render(<ReviewsPage />);
-    expect(screen.getByRole("button", { name: "刷新中..." })).toBeDisabled();
+    expect(screen.getByRole("button", { name: "Refreshing..." })).toBeDisabled();
     resolveFirstFetch([]);
-    expect(await screen.findByText("暂无评审记录")).toBeInTheDocument();
-    await user.click(screen.getByRole("button", { name: "刷新" }));
+    expect(await screen.findByText("No review records yet")).toBeInTheDocument();
+    await user.click(screen.getByRole("button", { name: "Refresh" }));
     expect(await screen.findByText("run-1")).toBeInTheDocument();
     expect(screen.getByText("looks good")).toBeInTheDocument();
     expect(screen.getByText("Scope: ok")).toBeInTheDocument();
diff --git a/apps/desktop/src/pages/TestsPage.test.tsx b/apps/desktop/src/pages/TestsPage.test.tsx
index dcb0c48..e440dc1 100644
--- a/apps/desktop/src/pages/TestsPage.test.tsx
+++ b/apps/desktop/src/pages/TestsPage.test.tsx
@@ -13,6 +13,7 @@ import { fetchTests } from "../lib/api";
 describe("TestsPage", () => {
   beforeEach(() => {
     vi.clearAllMocks();
+    vi.mocked(fetchTests).mockReset();
   });
 
   it("renders empty state and then status cards after refresh", async () => {
@@ -25,11 +26,11 @@ describe("TestsPage", () => {
           command: "pnpm test",
           failure_info: "snapshot mismatch",
         },
-      ] as any);
+    ] as any);
     const user = userEvent.setup();
     render(<TestsPage />);
-    expect(await screen.findByText("暂无测试记录")).toBeInTheDocument();
-    await user.click(screen.getByRole("button", { name: "刷新" }));
+    expect(await screen.findByText("No test records yet")).toBeInTheDocument();
+    await user.click(screen.getByRole("button", { name: "Refresh" }));
     expect(await screen.findByText("回归检查")).toBeInTheDocument();
     expect(screen.getByText("pnpm test")).toBeInTheDocument();
     expect(screen.getByText("snapshot mismatch")).toBeInTheDocument();
diff --git a/apps/desktop/src/pages/coverage_sprint_f_pages.test.tsx b/apps/desktop/src/pages/coverage_sprint_f_pages.test.tsx
index ed15b13..54de14c 100644
--- a/apps/desktop/src/pages/coverage_sprint_f_pages.test.tsx
+++ b/apps/desktop/src/pages/coverage_sprint_f_pages.test.tsx
@@ -125,13 +125,13 @@ describe("coverage sprint F: low-branch pages", () => {
       locks: [],
       role_catalog: [],
     } as FirstAgentsPayload);
-    expect(await screen.findByText(/活跃状态机|Active State Machines/)).toBeInTheDocument();
-    expect(screen.getByText(/注册代理 \(1\)|Registered Agents \(1\)/)).toBeInTheDocument();
+    expect(await screen.findByText("Execution lane triage")).toBeInTheDocument();
+    expect(screen.getByText("Registered execution seats (expandable, 1 items)")).toBeInTheDocument();
     expect(screen.getByText("run-12345678")).toBeInTheDocument();
 
     fireEvent.click(screen.getByRole("button", { name: /刷新|Refresh/ }));
-    expect(await screen.findByText(/暂无注册代理|No agents are registered yet/)).toBeInTheDocument();
-    expect(screen.queryByText(/活跃状态机|Active state machines/)).not.toBeInTheDocument();
+    expect(await screen.findByText("No registered agents")).toBeInTheDocument();
+    expect(screen.queryByText("Execution lane triage")).not.toBeInTheDocument();
 
     fireEvent.click(screen.getByRole("button", { name: /刷新|Refresh/ }));
     const errorBanner = await screen.findByRole("alert");
diff --git a/apps/desktop/src/pages/workflow_queue_controls.test.tsx b/apps/desktop/src/pages/workflow_queue_controls.test.tsx
index f124b83..0453813 100644
--- a/apps/desktop/src/pages/workflow_queue_controls.test.tsx
+++ b/apps/desktop/src/pages/workflow_queue_controls.test.tsx
@@ -141,7 +141,7 @@ describe("workflow queue controls", () => {
         }),
       );
     });
-    expect(await screen.findByText("Queued task-queue.")).toBeInTheDocument();
+    expect(await screen.findByText("Queued task-queue. Refreshing the workflow view...")).toBeInTheDocument();
   });
 
   it("renders locale-aware workflow detail labels when zh-CN is requested", async () => {
diff --git a/apps/orchestrator/src/cortexpilot_orch/gates/tests_gate.py b/apps/orchestrator/src/cortexpilot_orch/gates/tests_gate.py
index 7753920..0fccf98 100644
--- a/apps/orchestrator/src/cortexpilot_orch/gates/tests_gate.py
+++ b/apps/orchestrator/src/cortexpilot_orch/gates/tests_gate.py
@@ -25,6 +25,7 @@
     "true",
     ":",
 }
+_TRIVIAL_ECHO_PAYLOADS = {"", "ok", "hello", "pass", "success", "done", "1"}
 
 
 def _coerce_timeout_sec(raw: object) -> float:
@@ -38,6 +39,16 @@ def _coerce_timeout_sec(raw: object) -> float:
     return timeout_sec
 
 
+def _coerce_gate_result(gate: object) -> dict[str, object]:
+    if isinstance(gate, dict):
+        return gate
+    return {
+        "ok": False,
+        "reason": "invalid validate_command result",
+        "raw": repr(gate),
+    }
+
+
 def _now_ts() -> str:
     return datetime.now(timezone.utc).isoformat()
 
@@ -145,9 +156,13 @@ def _is_trivial_acceptance_command(command: str) -> bool:
         return True
     if normalized in _TRIVIAL_ACCEPTANCE_COMMANDS:
         return True
-    if normalized.startswith("echo "):
-        payload = normalized[5:].strip().strip('"').strip("'")
-        if payload in {"", "ok", "hello", "pass", "success", "done", "1"}:
+    try:
+        tokens = shlex.split(command)
+    except ValueError:
+        return False
+    if tokens and tokens[0].lower() == "echo":
+        payload = " ".join(tokens[1:]).strip().lower()
+        if payload in _TRIVIAL_ECHO_PAYLOADS:
             return True
     return False
 
@@ -167,7 +182,7 @@ def _normalize_tests(test_items: Iterable[object]) -> list[dict[str, object]]:
             continue
         if isinstance(item, dict):
             cmd = item.get("cmd") or item.get("command")
-            if isinstance(cmd, str) and cmd.strip():
+            if isinstance(cmd, str):
                 timeout_sec = _coerce_timeout_sec(item.get("timeout_sec", _DEFAULT_TIMEOUT_SEC))
                 normalized.append(
                     {
@@ -233,7 +248,7 @@ def run_acceptance_tests(
     strict_nontrivial_enabled = (
         bool(strict_nontrivial) if strict_nontrivial is not None else _is_strict_nontrivial_enabled()
     )
-    has_must_pass = any(bool(test.get("must_pass", True)) for test in normalized)
+    has_must_pass = any(_coerce_must_pass(test.get("must_pass", True)) for test in normalized)
     if not has_must_pass:
         finished_at = _now_ts()
         report = _build_report(
@@ -277,12 +292,14 @@ def run_acceptance_tests(
                 "reason": "trivial acceptance command blocked",
             }
 
-        gate = validate_command(
-            command,
-            forbidden,
-            network_policy=network_policy,
-            policy_pack=policy_pack,
-            repo_root=worktree_root,
+        gate = _coerce_gate_result(
+            validate_command(
+                command,
+                forbidden,
+                network_policy=network_policy,
+                policy_pack=policy_pack,
+                repo_root=worktree_root,
+            )
         )
         if not gate.get("ok", False):
             finished_at = _now_ts()
@@ -416,12 +433,14 @@ def run_evals_gate(
 
     relative = script_path.relative_to(repo_root)
     command = f"bash {relative}"
-    gate = validate_command(
-        command,
-        forbidden_actions or [],
-        network_policy=network_policy,
-        policy_pack=policy_pack,
-        repo_root=repo_root,
+    gate = _coerce_gate_result(
+        validate_command(
+            command,
+            forbidden_actions or [],
+            network_policy=network_policy,
+            policy_pack=policy_pack,
+            repo_root=repo_root,
+        )
     )
     if not gate.get("ok", False):
         return {"ok": False, "reason": "tool gate violation", "gate": gate, "command": command}
diff --git a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
index 7d20138..2a30a79 100644
--- a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
+++ b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
@@ -2,6 +2,7 @@
 
 from dataclasses import dataclass
 from datetime import datetime, timezone
+import importlib
 import json
 from pathlib import Path
 from typing import Any, Callable
@@ -40,9 +41,9 @@ class ControlPlaneReadService:
 
     @classmethod
     def from_api_main(cls) -> "ControlPlaneReadService":
-        from cortexpilot_orch.api import main as api_main
-        from cortexpilot_orch.api import main_state_store_helpers
-        from cortexpilot_orch.queue import QueueStore
+        api_main = importlib.import_module("cortexpilot_orch.api.main")
+        main_state_store_helpers = importlib.import_module("cortexpilot_orch.api.main_state_store_helpers")
+        QueueStore = importlib.import_module("cortexpilot_orch.queue").QueueStore
 
         def _list_workflows_readonly() -> list[dict[str, Any]]:
             workflows = list(
@@ -135,11 +136,11 @@ def _list_queue_readonly(*, workflow_id: str | None = None, status: str | None =
 
     @classmethod
     def from_runtime(cls) -> "ControlPlaneReadService":
-        from cortexpilot_orch.api import main_run_views_helpers
-        from cortexpilot_orch.api import main_state_store_helpers
-        from cortexpilot_orch.config import load_config
-        from cortexpilot_orch.contract.compiler import build_role_binding_summary
-        from cortexpilot_orch.queue import QueueStore
+        main_run_views_helpers = importlib.import_module("cortexpilot_orch.api.main_run_views_helpers")
+        main_state_store_helpers = importlib.import_module("cortexpilot_orch.api.main_state_store_helpers")
+        load_config = importlib.import_module("cortexpilot_orch.config").load_config
+        build_role_binding_summary = importlib.import_module("cortexpilot_orch.contract.compiler").build_role_binding_summary
+        QueueStore = importlib.import_module("cortexpilot_orch.queue").QueueStore
 
         cfg = load_config()
         runs_root = cfg.runs_root
diff --git a/apps/orchestrator/tests/test_bench_e2e_speed_gate.py b/apps/orchestrator/tests/test_bench_e2e_speed_gate.py
new file mode 100644
index 0000000..ddc246a
--- /dev/null
+++ b/apps/orchestrator/tests/test_bench_e2e_speed_gate.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+def _repo_root() -> Path:
+    return Path(__file__).resolve().parents[3]
+
+
+def _gate_script() -> Path:
+    return _repo_root() / "scripts" / "check_bench_e2e_speed_gate.py"
+
+
+def _write_summary(path: Path, *, overall_fail_rate: float, ui_p95: float, dash_p95: float) -> None:
+    path.write_text(
+        json.dumps(
+            {
+                "run_id": "bench_test",
+                "overall": {"fail_rate": overall_fail_rate},
+                "suites": {
+                    "ui_full_gemini_strict": {"duration_sec": {"p95": ui_p95}},
+                    "dashboard_high_risk_e2e": {"duration_sec": {"p95": dash_p95}},
+                },
+            }
+        )
+        + "\n",
+        encoding="utf-8",
+    )
+
+
+def test_bench_gate_passes_for_summary_within_thresholds(tmp_path: Path) -> None:
+    summary = tmp_path / "summary.json"
+    _write_summary(summary, overall_fail_rate=0.0, ui_p95=90.0, dash_p95=45.0)
+
+    result = subprocess.run(
+        [sys.executable, str(_gate_script()), "--summary", str(summary), "--ui-max-p95-sec", "120", "--dash-max-p95-sec", "60"],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 0
+    assert "benchmark gate passed" in result.stdout
+
+
+def test_bench_gate_fails_when_overall_fail_rate_exceeds_threshold(tmp_path: Path) -> None:
+    summary = tmp_path / "summary.json"
+    _write_summary(summary, overall_fail_rate=0.2, ui_p95=90.0, dash_p95=45.0)
+
+    result = subprocess.run(
+        [sys.executable, str(_gate_script()), "--summary", str(summary), "--max-overall-fail-rate", "0.1"],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 1
+    assert "overall.fail_rate=0.2000 > max_overall_fail_rate=0.1000" in result.stderr
+
+
+def test_bench_gate_fails_when_suite_p95_exceeds_threshold(tmp_path: Path) -> None:
+    summary = tmp_path / "summary.json"
+    _write_summary(summary, overall_fail_rate=0.0, ui_p95=181.0, dash_p95=91.0)
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            str(_gate_script()),
+            "--summary",
+            str(summary),
+            "--ui-max-p95-sec",
+            "180",
+            "--dash-max-p95-sec",
+            "90",
+        ],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 1
+    assert "ui_full_gemini_strict.p95=181.000s > max_p95=180.000s" in result.stderr
+    assert "dashboard_high_risk_e2e.p95=91.000s > max_p95=90.000s" in result.stderr
+
+
+def test_bench_gate_fails_closed_when_summary_is_missing(tmp_path: Path) -> None:
+    summary = tmp_path / "missing-summary.json"
+
+    result = subprocess.run(
+        [sys.executable, str(_gate_script()), "--summary", str(summary)],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 2
+    assert "benchmark summary not found" in result.stderr
diff --git a/apps/orchestrator/tests/test_control_plane_read_service.py b/apps/orchestrator/tests/test_control_plane_read_service.py
new file mode 100644
index 0000000..c075ebf
--- /dev/null
+++ b/apps/orchestrator/tests/test_control_plane_read_service.py
@@ -0,0 +1,330 @@
+from __future__ import annotations
+
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from types import SimpleNamespace
+from types import ModuleType
+
+import pytest
+
+from cortexpilot_orch.services.control_plane_read_service import (
+    ControlPlaneReadService,
+    _as_array,
+    _as_record,
+    _as_text,
+    _find_report,
+)
+
+
+def _write_json(path: Path, payload: object) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload), encoding="utf-8")
+
+
+def test_control_plane_read_service_wrapper_filters_and_summary_helpers() -> None:
+    service = ControlPlaneReadService(
+        list_runs_fn=lambda: [{"run_id": "run-1"}],
+        get_run_fn=lambda run_id: {"run_id": run_id},
+        get_events_fn=lambda run_id: [{"run_id": run_id, "event": "RUN_UPDATED"}],
+        get_reports_fn=lambda run_id: [
+            {"name": "run_compare_report.json", "data": {"compare_summary": {"mismatched_count": 2}}},
+            {"name": "proof_pack.json", "data": {"summary": "proof-ready"}},
+            {"name": "incident_pack.json", "data": "not-a-record"},
+        ]
+        if run_id == "run-1"
+        else "not-a-list",
+        list_workflows_fn=lambda: [{"workflow_id": "wf-1"}],
+        get_workflow_fn=lambda workflow_id: {"workflow": {"workflow_id": workflow_id}, "runs": [], "events": []},
+        list_queue_fn=lambda **_: [{"queue_id": "queue-1"}],
+        list_pending_approvals_fn=lambda: [
+            {"run_id": "run-1", "status": "pending"},
+            {"run_id": "run-2", "status": "pending"},
+        ],
+        list_diff_gate_fn=lambda: [
+            {"run_id": "run-1", "status": "FAILED"},
+            {"run_id": "run-2", "status": "PASS"},
+        ],
+    )
+
+    assert _as_record({"ok": True}) == {"ok": True}
+    assert _as_record("bad") == {}
+    assert _as_array([1, 2]) == [1, 2]
+    assert _as_array("bad") == []
+    assert _as_text("  run-1  ") == "run-1"
+    assert _find_report([{"name": "proof_pack.json", "data": {"summary": "ready"}}], "proof_pack.json") == {
+        "summary": "ready"
+    }
+    assert _find_report([{"name": "proof_pack.json", "data": "bad"}], "proof_pack.json") == {}
+
+    assert service.list_runs() == [{"run_id": "run-1"}]
+    assert service.get_run("run-9") == {"run_id": "run-9"}
+    assert service.get_run_events("run-9") == [{"run_id": "run-9", "event": "RUN_UPDATED"}]
+    assert service.get_run_reports("run-2") == []
+    assert service.list_workflows() == [{"workflow_id": "wf-1"}]
+    assert service.get_workflow("wf-1") == {"workflow": {"workflow_id": "wf-1"}, "runs": [], "events": []}
+    assert service.list_queue(workflow_id="wf-1", status="pending") == [{"queue_id": "queue-1"}]
+    assert service.get_pending_approvals() == [
+        {"run_id": "run-1", "status": "pending"},
+        {"run_id": "run-2", "status": "pending"},
+    ]
+    assert service.get_pending_approvals(run_id="run-1") == [{"run_id": "run-1", "status": "pending"}]
+    assert service.get_diff_gate_state() == [
+        {"run_id": "run-1", "status": "FAILED"},
+        {"run_id": "run-2", "status": "PASS"},
+    ]
+    assert service.get_diff_gate_state(run_id="run-2") == [{"run_id": "run-2", "status": "PASS"}]
+    assert service.get_compare_summary("run-1") == {"mismatched_count": 2}
+    assert service.get_proof_summary("run-1") == {"summary": "proof-ready"}
+    assert service.get_incident_summary("run-1") == {}
+
+
+def test_control_plane_read_service_from_api_main_builds_workflows_and_queue_filters(monkeypatch) -> None:
+    event_map = {
+        "run-b": [
+            {"event": "WORKFLOW_STATUS", "ts": "2026-04-12T10:00:00Z", "context": {"workflow_id": "wf-1"}},
+            {"event": "IGNORED", "context": {"workflow_id": "wf-2"}},
+        ],
+        "run-a": [
+            {"event": "WORKFLOW_BOUND", "ts": "2026-04-11T10:00:00Z"},
+            {"event": "CUSTOM", "_ts": "2026-04-11T09:00:00Z", "context": {"workflow_id": "wf-1"}},
+        ],
+    }
+
+    workflows = {
+        "wf-1": {
+            "workflow_id": "wf-1",
+            "runs": [
+                {"run_id": "run-a", "created_at": "2026-04-11T08:00:00Z"},
+                {"run_id": "run-b", "created_at": "2026-04-12T08:00:00Z"},
+            ],
+        },
+        "wf-2": {"workflow_id": "wf-2", "runs": [{"run_id": "run-z", "created_at": "broken-ts"}]},
+    }
+
+    class _FakeQueueStore:
+        def __init__(self, *, ensure_storage: bool = False) -> None:
+            self.ensure_storage = ensure_storage
+
+        def list_items(self) -> list[dict[str, str]]:
+            return [
+                {"queue_id": "queue-1", "workflow_id": "wf-1", "status": "PENDING"},
+                {"queue_id": "queue-2", "workflow_id": "wf-2", "status": "DONE"},
+            ]
+
+    api_main = ModuleType("cortexpilot_orch.api.main")
+    api_main.load_config = lambda: SimpleNamespace(runs_root=Path("/tmp/runs"), runtime_root=Path("/tmp/runtime"))
+    api_main._read_events = lambda run_id: event_map.get(run_id, [])
+    api_main._parse_iso_ts = lambda value: datetime.fromisoformat(value.replace("Z", "+00:00"))
+    api_main.list_runs = lambda: [{"run_id": "api-run"}]
+    api_main.get_run = lambda run_id: {"run_id": run_id, "source": "api"}
+    api_main.get_events = lambda run_id: event_map.get(run_id, [])
+    api_main.get_reports = lambda run_id: [{"name": "proof_pack.json", "data": {"run_id": run_id}}]
+    api_main.list_pending_approvals = lambda: [{"run_id": "run-a"}]
+    api_main.list_diff_gate = lambda: [{"run_id": "run-b", "status": "FAILED"}]
+
+    main_state_store_helpers = ModuleType("cortexpilot_orch.api.main_state_store_helpers")
+    main_state_store_helpers.collect_workflows = lambda **_: workflows
+
+    queue_module = ModuleType("cortexpilot_orch.queue")
+    queue_module.QueueStore = _FakeQueueStore
+
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.api.main", api_main)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.api.main_state_store_helpers", main_state_store_helpers)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.queue", queue_module)
+
+    service = ControlPlaneReadService.from_api_main()
+
+    assert service.list_runs() == [{"run_id": "api-run"}]
+    assert service.get_run("run-a") == {"run_id": "run-a", "source": "api"}
+    assert [item["workflow_id"] for item in service.list_workflows()] == ["wf-1", "wf-2"]
+
+    workflow_payload = service.get_workflow("wf-1")
+    assert [event["_run_id"] for event in workflow_payload["events"]] == ["run-b", "run-a", "run-a"]
+    assert workflow_payload["runs"][0]["run_id"] == "run-a"
+    with pytest.raises(KeyError, match="workflow `missing` not found"):
+        service.get_workflow("missing")
+
+    assert service.list_queue(workflow_id="wf-1") == [{"queue_id": "queue-1", "workflow_id": "wf-1", "status": "PENDING"}]
+    assert service.list_queue(status="done") == [{"queue_id": "queue-2", "workflow_id": "wf-2", "status": "DONE"}]
+    assert service.get_pending_approvals() == [{"run_id": "run-a"}]
+    assert service.get_diff_gate_state(run_id="run-b") == [{"run_id": "run-b", "status": "FAILED"}]
+
+
+def test_control_plane_read_service_from_runtime_builds_runtime_views_and_pending_approvals(
+    monkeypatch, tmp_path: Path
+) -> None:
+    runtime_root = tmp_path / "runtime"
+    runs_root = runtime_root / "runs"
+    run_a = runs_root / "run-a"
+    run_b = runs_root / "run-b"
+    run_skip = runs_root / "run-skip"
+
+    _write_json(
+        run_a / "manifest.json",
+        {
+            "run_id": "run-a",
+            "task_id": "task-a",
+            "status": "",
+            "role_binding_summary": {"source": "persisted"},
+        },
+    )
+    _write_json(
+        run_a / "contract.json",
+        {
+            "task_id": "contract-task-a",
+            "allowed_paths": ["apps/orchestrator"],
+        },
+    )
+    _write_json(run_a / "reports" / "proof_pack.json", {"summary": "proof-a"})
+    _write_json(run_a / "reports" / "run_compare_report.json", {"compare_summary": {"mismatched_count": 3}})
+
+    _write_json(
+        run_b / "manifest.json",
+        {
+            "status": "SUCCESS",
+        },
+    )
+    _write_json(
+        run_b / "contract.json",
+        {
+            "task_id": "contract-task-b",
+            "allowed_paths": "not-a-list",
+        },
+    )
+    _write_json(run_b / "reports" / "incident_pack.json", {"summary": "incident-b"})
+
+    run_skip.mkdir(parents=True, exist_ok=True)
+    (run_skip / "manifest.json").write_text("{bad json", encoding="utf-8")
+
+    run_a.touch()
+    run_b.touch()
+    run_skip.touch()
+    (run_a / "manifest.json").touch()
+    (run_b / "manifest.json").touch()
+    (run_skip / "manifest.json").touch()
+
+    event_map = {
+        "run-a": [
+            {"event": "WORKFLOW_BOUND", "ts": "2026-04-12T10:00:00Z"},
+            {
+                "event": "HUMAN_APPROVAL_REQUIRED",
+                "ts": "2026-04-12T10:01:00Z",
+                "context": {
+                    "reason": ["owner review"],
+                    "actions": ["approve"],
+                    "verify_steps": ["pytest"],
+                    "resume_step": "resume-from-review",
+                    "workflow_id": "wf-1",
+                },
+            },
+            {"event": "CUSTOM", "_ts": "2026-04-12T10:02:00Z", "context": {"workflow_id": "wf-1"}},
+        ],
+        "run-b": [
+            {"event": "HUMAN_APPROVAL_REQUIRED", "ts": "2026-04-11T09:00:00Z", "meta": {"workflow_id": "wf-2"}},
+            {"event": "HUMAN_APPROVAL_COMPLETED", "ts": "2026-04-11T09:05:00Z"},
+            {"event": "TEMPORAL_NOTIFY_DONE", "ts": "2026-04-11T09:10:00Z"},
+        ],
+        "run-skip": [],
+    }
+
+    workflows = {
+        "wf-1": {
+            "workflow_id": "wf-1",
+            "runs": [
+                {"run_id": "run-b", "created_at": "2026-04-11T08:00:00Z"},
+                {"run_id": "run-a", "created_at": "2026-04-12T08:00:00Z"},
+            ],
+        },
+        "wf-2": {"workflow_id": "wf-2", "runs": [{"run_id": "run-b", "created_at": "invalid"}]},
+    }
+
+    class _FakeQueueStore:
+        def __init__(self, *, ensure_storage: bool = False) -> None:
+            self.ensure_storage = ensure_storage
+
+        def list_items(self) -> list[dict[str, str]]:
+            return [
+                {"queue_id": "queue-1", "workflow_id": "wf-1", "status": "PENDING"},
+                {"queue_id": "queue-2", "workflow_id": "wf-2", "status": "DONE"},
+            ]
+
+    config_module = ModuleType("cortexpilot_orch.config")
+    config_module.load_config = lambda: SimpleNamespace(runs_root=runs_root, runtime_root=runtime_root)
+
+    main_state_store_helpers = ModuleType("cortexpilot_orch.api.main_state_store_helpers")
+    main_state_store_helpers.read_events = lambda *, run_id, runs_root: event_map.get(run_id, [])
+    main_state_store_helpers.collect_workflows = lambda **_: workflows
+
+    main_run_views_helpers = ModuleType("cortexpilot_orch.api.main_run_views_helpers")
+    main_run_views_helpers.list_diff_gate = lambda **_: [
+        {"run_id": "run-a", "status": "FAILED"},
+        {"run_id": "run-b", "status": "PASS"},
+    ]
+
+    compiler_module = ModuleType("cortexpilot_orch.contract.compiler")
+    compiler_module.build_role_binding_summary = lambda contract: {
+        "source": "generated",
+        "task_id": contract.get("task_id"),
+    }
+
+    queue_module = ModuleType("cortexpilot_orch.queue")
+    queue_module.QueueStore = _FakeQueueStore
+
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.config", config_module)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.api.main_state_store_helpers", main_state_store_helpers)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.api.main_run_views_helpers", main_run_views_helpers)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.contract.compiler", compiler_module)
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.queue", queue_module)
+
+    service = ControlPlaneReadService.from_runtime()
+
+    listed_runs = service.list_runs()
+    assert [item["run_id"] for item in listed_runs] == ["run-b", "run-a"]
+    assert listed_runs[1]["status"] == "UNKNOWN"
+    assert listed_runs[1]["last_event_ts"] == "2026-04-12T10:02:00Z"
+
+    runtime_run = service.get_run("run-a")
+    assert runtime_run["task_id"] == "task-a"
+    assert runtime_run["allowed_paths"] == ["apps/orchestrator"]
+    assert runtime_run["role_binding_read_model"] == {"source": "persisted"}
+
+    generated_run = service.get_run("run-b")
+    assert generated_run["task_id"] == "contract-task-b"
+    assert generated_run["allowed_paths"] == []
+    assert generated_run["role_binding_read_model"] == {"source": "generated", "task_id": "contract-task-b"}
+    with pytest.raises(KeyError, match="run `missing` not found"):
+        service.get_run("missing")
+
+    assert service.get_run_reports("run-a") == [
+        {"name": "proof_pack.json", "data": {"summary": "proof-a"}},
+        {"name": "run_compare_report.json", "data": {"compare_summary": {"mismatched_count": 3}}},
+    ]
+    assert [item["workflow_id"] for item in service.list_workflows()] == ["wf-1", "wf-2"]
+
+    workflow_payload = service.get_workflow("wf-1")
+    assert [event["_run_id"] for event in workflow_payload["events"]] == ["run-a", "run-a", "run-a", "run-b"]
+    with pytest.raises(KeyError, match="workflow `missing` not found"):
+        service.get_workflow("missing")
+
+    assert service.list_queue(workflow_id="wf-1", status="pending") == [
+        {"queue_id": "queue-1", "workflow_id": "wf-1", "status": "PENDING"}
+    ]
+    assert service.get_pending_approvals() == [
+        {
+            "run_id": "run-a",
+            "status": "pending",
+            "task_id": "task-a",
+            "failure_reason": "",
+            "reason": ["owner review"],
+            "actions": ["approve"],
+            "verify_steps": ["pytest"],
+            "resume_step": "resume-from-review",
+        }
+    ]
+    assert service.get_pending_approvals(run_id="run-a")[0]["run_id"] == "run-a"
+    assert service.get_diff_gate_state(run_id="run-a") == [{"run_id": "run-a", "status": "FAILED"}]
+    assert service.get_compare_summary("run-a") == {"mismatched_count": 3}
+    assert service.get_proof_summary("run-a") == {"summary": "proof-a"}
+    assert service.get_incident_summary("run-b") == {"summary": "incident-b"}
diff --git a/apps/orchestrator/tests/test_mcp_queue_pilot_server_branches.py b/apps/orchestrator/tests/test_mcp_queue_pilot_server_branches.py
new file mode 100644
index 0000000..d210814
--- /dev/null
+++ b/apps/orchestrator/tests/test_mcp_queue_pilot_server_branches.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+from dataclasses import replace
+import io
+import sys
+from types import ModuleType
+
+from cortexpilot_orch import mcp_queue_pilot_server as queue_pilot_module
+
+
+def test_mcp_queue_pilot_helpers_and_protocol_edges(monkeypatch) -> None:
+    monkeypatch.setenv("CORTEXPILOT_APPROVAL_ALLOWED_ROLES", " owner , ops ")
+
+    assert queue_pilot_module._mutation_roles() == {"OWNER", "OPS"}
+    assert queue_pilot_module._required_role_arg({"actor_role": "owner"}) == "OWNER"
+    assert queue_pilot_module._queue_payload(
+        {"priority": 3, "scheduled_at": " 2026-04-12T09:00:00Z ", "deadline_at": " "}
+    ) == {
+        "priority": 3,
+        "scheduled_at": "2026-04-12T09:00:00Z",
+    }
+    assert queue_pilot_module._error_response(7, -32601, "boom") == {
+        "jsonrpc": "2.0",
+        "id": 7,
+        "error": {"code": -32601, "message": "boom"},
+    }
+
+
+def test_mcp_queue_pilot_server_covers_default_constructor_unknown_methods_and_stream(monkeypatch) -> None:
+    captured: list[tuple[str, dict[str, object]]] = []
+
+    def _preview(run_id: str, payload: dict[str, object]) -> dict[str, object]:
+        captured.append((run_id, payload))
+        return {
+            "run_id": run_id,
+            "validation": "ok",
+            "can_apply": True,
+            "preview_item": {"queue_id": "preview-1"},
+        }
+
+    def _apply(run_id: str, payload: dict[str, object]) -> dict[str, object]:
+        return {"queue_id": f"{run_id}-queue", "task_id": "task-1", "status": "PENDING"}
+
+    api_main = ModuleType("cortexpilot_orch.api.main")
+    api_main.preview_enqueue_run_queue = _preview
+    api_main.enqueue_run_queue = _apply
+    monkeypatch.setitem(sys.modules, "cortexpilot_orch.api.main", api_main)
+
+    server = queue_pilot_module.CortexPilotQueuePilotMcpServer()
+
+    assert server.handle_message({"jsonrpc": "2.0", "method": "initialized"}) is None
+    assert server.handle_message({"jsonrpc": "2.0", "id": 1, "method": "ping"}) == {
+        "jsonrpc": "2.0",
+        "id": 1,
+        "result": {},
+    }
+    init_response = server.handle_message({"jsonrpc": "2.0", "id": 2, "method": "initialize"})
+    assert init_response is not None
+    assert init_response["result"]["serverInfo"]["name"] == "cortexpilot-queue-pilot"
+
+    alias_list = server.handle_message({"jsonrpc": "2.0", "id": 3, "method": "tooling/list"})
+    assert alias_list is not None
+    assert {tool["name"] for tool in alias_list["result"]["tools"]} == {
+        "preview_enqueue_from_run",
+        "enqueue_from_run",
+    }
+
+    unknown_tool = server.handle_message(
+        {
+            "jsonrpc": "2.0",
+            "id": 4,
+            "method": "tools/call",
+            "params": {"name": "missing_tool", "arguments": {}},
+        }
+    )
+    assert unknown_tool == {
+        "jsonrpc": "2.0",
+        "id": 4,
+        "error": {"code": -32601, "message": "unknown tool `missing_tool`"},
+    }
+
+    missing_run_id = server.handle_message(
+        {
+            "jsonrpc": "2.0",
+            "id": 5,
+            "method": "tools/call",
+            "params": {"name": "preview_enqueue_from_run", "arguments": {}},
+        }
+    )
+    assert missing_run_id is not None
+    assert missing_run_id["result"]["isError"] is True
+    assert "`run_id` is required" in missing_run_id["result"]["structuredContent"]["error"]
+
+    broken_tool = replace(
+        server._tool_map["preview_enqueue_from_run"],
+        handler=lambda arguments: (_ for _ in ()).throw(RuntimeError("preview exploded")),
+    )
+    server._tool_map["preview_enqueue_from_run"] = broken_tool
+    runtime_error = server.handle_message(
+        {
+            "jsonrpc": "2.0",
+            "id": 6,
+            "method": "tools/call",
+            "params": {"name": "preview_enqueue_from_run", "arguments": {"run_id": "run-9"}},
+        }
+    )
+    assert runtime_error is not None
+    assert runtime_error["result"]["isError"] is True
+    assert runtime_error["result"]["structuredContent"]["error"] == "preview exploded"
+
+    assert server.handle_message({"jsonrpc": "2.0", "method": "unsupported"}) is None
+    unsupported = server.handle_message({"jsonrpc": "2.0", "id": 7, "method": "unsupported"})
+    assert unsupported == {
+        "jsonrpc": "2.0",
+        "id": 7,
+        "error": {"code": -32601, "message": "method `unsupported` is not supported"},
+    }
+
+    source = io.StringIO('\nnot-json\n[]\n{"jsonrpc":"2.0","id":8,"method":"ping"}\n')
+    target = io.StringIO()
+    server.serve_forever(instream=source, outstream=target)
+    assert target.getvalue().strip() == '{"jsonrpc": "2.0", "id": 8, "result": {}}'
+
+    called = {"serve_forever": False}
+
+    class _FakeServer:
+        def serve_forever(self) -> None:
+            called["serve_forever"] = True
+
+    monkeypatch.setattr(queue_pilot_module, "CortexPilotQueuePilotMcpServer", _FakeServer)
+    queue_pilot_module.serve_queue_pilot_mcp()
+    assert called["serve_forever"] is True
diff --git a/apps/orchestrator/tests/test_repo_coverage_gate.py b/apps/orchestrator/tests/test_repo_coverage_gate.py
new file mode 100644
index 0000000..a08dd6c
--- /dev/null
+++ b/apps/orchestrator/tests/test_repo_coverage_gate.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def _read_script() -> str:
+    script_path = Path(__file__).resolve().parents[3] / "scripts" / "repo_coverage_gate.py"
+    return script_path.read_text(encoding="utf-8")
+
+
+def test_dashboard_coverage_installs_deps_before_vitest() -> None:
+    text = _read_script()
+    install_idx = text.index('run_command(["bash", "scripts/install_dashboard_deps.sh"])')
+    vitest_idx = text.index('"pnpm",\n        "--dir",\n        "apps/dashboard",\n        "exec",\n        "vitest"')
+    assert install_idx < vitest_idx
+
+
+def test_desktop_coverage_installs_deps_before_vitest() -> None:
+    text = _read_script()
+    install_idx = text.index('run_command(["bash", "scripts/install_desktop_deps.sh"])')
+    vitest_idx = text.index('"pnpm",\n        "--dir",\n        "apps/desktop",\n        "exec",\n        "vitest"')
+    assert install_idx < vitest_idx
+
+
+def test_orchestrator_coverage_uses_managed_coverage_file() -> None:
+    text = _read_script()
+    assert 'DEFAULT_COVERAGE_DATA_DIR = ROOT_DIR / ".runtime-cache" / "cache" / "test" / "coverage" / "repo_coverage_gate"' in text
+    assert '"COVERAGE_FILE": str(coverage_file)' in text
+
+
+def test_dashboard_and_desktop_coverage_use_managed_report_dirs() -> None:
+    text = _read_script()
+    assert '"CORTEXPILOT_DASHBOARD_COVERAGE_DIR": str(report_path.parent)' in text
+    assert '"CORTEXPILOT_DESKTOP_COVERAGE_DIR": str(report_path.parent)' in text
+
+
+def test_orchestrator_coverage_uses_managed_hypothesis_storage() -> None:
+    text = _read_script()
+    assert 'DEFAULT_HYPOTHESIS_DATA_DIR = ROOT_DIR / ".runtime-cache" / "cache" / "hypothesis" / "repo_coverage_gate"' in text
+    assert '"HYPOTHESIS_STORAGE_DIRECTORY": str(DEFAULT_HYPOTHESIS_DATA_DIR)' in text
diff --git a/apps/orchestrator/tests/test_tests_gate_extended.py b/apps/orchestrator/tests/test_tests_gate_extended.py
index e916320..535335a 100644
--- a/apps/orchestrator/tests/test_tests_gate_extended.py
+++ b/apps/orchestrator/tests/test_tests_gate_extended.py
@@ -42,6 +42,19 @@ def test_tests_gate_tool_gate_violation(tmp_path: Path, monkeypatch) -> None:
     assert result["reason"] == "tool gate violation"
 
 
+def test_tests_gate_tool_gate_non_dict_result_fails_closed(tmp_path: Path, monkeypatch) -> None:
+    monkeypatch.setattr(tests_gate, "validate_command", lambda *args, **kwargs: False)
+
+    result = tests_gate.run_acceptance_tests(
+        tmp_path,
+        [{"name": "hygiene", "cmd": "bash scripts/check_repo_hygiene.sh", "must_pass": True}],
+    )
+
+    assert result["ok"] is False
+    assert result["reason"] == "tool gate violation"
+    assert result["gate"]["reason"] == "invalid validate_command result"
+
+
 def test_tests_gate_invalid_shlex(tmp_path: Path, monkeypatch) -> None:
     monkeypatch.setattr(tests_gate, "validate_command", lambda *args, **kwargs: {"ok": True})
     result = tests_gate.run_acceptance_tests(tmp_path, ['echo "unterminated'])
@@ -131,6 +144,16 @@ def test_tests_gate_strict_nontrivial_blocks_echo_numeric_payload(tmp_path: Path
     assert result["reason"] == "trivial acceptance command blocked"
 
 
+def test_tests_gate_strict_nontrivial_blocks_echo_whitespace_payload(tmp_path: Path, monkeypatch) -> None:
+    monkeypatch.setenv("CORTEXPILOT_ACCEPTANCE_STRICT_NONTRIVIAL", "1")
+    monkeypatch.setattr(tests_gate, "validate_command", lambda *args, **kwargs: {"ok": True})
+
+    result = tests_gate.run_acceptance_tests(tmp_path, ['echo "   "'])
+
+    assert result["ok"] is False
+    assert result["reason"] == "trivial acceptance command blocked"
+
+
 def test_is_trivial_acceptance_command_treats_whitespace_only_as_trivial() -> None:
     assert tests_gate._is_trivial_acceptance_command(" \t  \n ") is True
 
@@ -284,6 +307,16 @@ def test_tests_gate_rejects_when_all_acceptance_tests_are_not_must_pass(tmp_path
     assert result["reason"] == "missing must_pass acceptance test"
 
 
+def test_tests_gate_blank_dict_command_reports_empty_command(tmp_path: Path) -> None:
+    result = tests_gate.run_acceptance_tests(
+        tmp_path,
+        [{"name": "blank", "cmd": "   ", "must_pass": True}],
+    )
+
+    assert result["ok"] is False
+    assert result["reason"] == "empty command"
+
+
 def test_run_evals_gate_blocks_tool_gate_violation(tmp_path: Path, monkeypatch) -> None:
     repo_root = tmp_path / "repo"
     worktree = repo_root / "worktree"
@@ -307,6 +340,30 @@ def _fake_run(*args, **kwargs):
     assert called["run"] is False
 
 
+def test_run_evals_gate_non_dict_tool_gate_result_fails_closed(tmp_path: Path, monkeypatch) -> None:
+    repo_root = tmp_path / "repo"
+    worktree = repo_root / "worktree"
+    (repo_root / "scripts").mkdir(parents=True)
+    worktree.mkdir(parents=True)
+    (repo_root / "scripts" / "run_evals.sh").write_text("#!/usr/bin/env bash\necho evals\n", encoding="utf-8")
+
+    called: dict[str, bool] = {"run": False}
+
+    def _fake_run(*args, **kwargs):
+        called["run"] = True
+        return subprocess.CompletedProcess(args, 0, "ok", "")
+
+    monkeypatch.setattr(tests_gate, "validate_command", lambda *args, **kwargs: False)
+    _patch_tests_gate_subprocess(monkeypatch, _fake_run)
+
+    result = tests_gate.run_evals_gate(repo_root, worktree)
+
+    assert result["ok"] is False
+    assert result["reason"] == "tool gate violation"
+    assert result["gate"]["reason"] == "invalid validate_command result"
+    assert called["run"] is False
+
+
 def test_tests_gate_coerces_string_must_pass_false(tmp_path: Path, monkeypatch) -> None:
     monkeypatch.setattr(tests_gate, "validate_command", lambda *args, **kwargs: {"ok": True})
 
diff --git a/configs/env.registry.json b/configs/env.registry.json
index 492bd40..293cf2e 100644
--- a/configs/env.registry.json
+++ b/configs/env.registry.json
@@ -596,6 +596,42 @@
       "scripts/lib/toolchain_env.sh"
     ]
   },
+  {
+    "name": "CORTEXPILOT_BENCH_DASHBOARD_HIGH_RISK_E2E_MAX_P95_SEC",
+    "scope": "platform",
+    "secret": false,
+    "required": false,
+    "default": "90",
+    "owner": "platform",
+    "description": "Maximum allowed p95 latency, in seconds, for the dashboard_high_risk_e2e suite when benchmark summaries are evaluated by the fail-closed benchmark gate.",
+    "consumers": [
+      "scripts/check_bench_e2e_speed_gate.py"
+    ]
+  },
+  {
+    "name": "CORTEXPILOT_BENCH_MAX_FAIL_RATE",
+    "scope": "platform",
+    "secret": false,
+    "required": false,
+    "default": "0.05",
+    "owner": "platform",
+    "description": "Maximum allowed overall benchmark fail rate enforced by the fail-closed benchmark gate.",
+    "consumers": [
+      "scripts/check_bench_e2e_speed_gate.py"
+    ]
+  },
+  {
+    "name": "CORTEXPILOT_BENCH_UI_FULL_GEMINI_STRICT_MAX_P95_SEC",
+    "scope": "platform",
+    "secret": false,
+    "required": false,
+    "default": "180",
+    "owner": "platform",
+    "description": "Maximum allowed p95 latency, in seconds, for the ui_full_gemini_strict suite when benchmark summaries are evaluated by the fail-closed benchmark gate.",
+    "consumers": [
+      "scripts/check_bench_e2e_speed_gate.py"
+    ]
+  },
   {
     "name": "CORTEXPILOT_BROWSER_ALLOWLIST",
     "scope": "platform",
diff --git a/docs/assets/storefront/benchmark-methodology.md b/docs/assets/storefront/benchmark-methodology.md
index 3b13f2d..b7f1f8f 100644
--- a/docs/assets/storefront/benchmark-methodology.md
+++ b/docs/assets/storefront/benchmark-methodology.md
@@ -8,6 +8,7 @@ inventing numbers.
 - Benchmark execution tooling exists:
   - `scripts/bench_e2e_speed.py`
   - `scripts/bench_e2e_speed.sh`
+  - `scripts/check_bench_e2e_speed_gate.py`
 - A first tracked public single-run baseline now exists at
   `docs/releases/assets/news-digest-benchmark-summary-2026-03-27.md`.
 - Broader multi-round public benchmark figures do **not** exist yet.
@@ -62,6 +63,23 @@ A tracked public benchmark artifact should include:
   `.runtime-cache/`
 - enough metadata to show which happy path was exercised
 
+## Gate Contract
+
+Once a real benchmark summary exists, the repo-owned fail-closed gate is:
+
+```bash
+npm run bench:e2e:speed:gate
+```
+
+Default thresholds are driven by:
+
+- `CORTEXPILOT_BENCH_MAX_FAIL_RATE`
+- `CORTEXPILOT_BENCH_UI_FULL_GEMINI_STRICT_MAX_P95_SEC`
+- `CORTEXPILOT_BENCH_DASHBOARD_HIGH_RISK_E2E_MAX_P95_SEC`
+
+The gate is intentionally strict about artifact presence: if no benchmark
+summary exists yet, it fails instead of inventing a baseline.
+
 ## Anti-Fraud Rule
 
 Do not copy raw numbers into README, release notes, or social posts unless they
diff --git a/package.json b/package.json
index 8770f1e..718d8b2 100644
--- a/package.json
+++ b/package.json
@@ -41,8 +41,10 @@
     "test:smell": "bash scripts/test_smell_gate.sh",
     "quality:full": "npm run lint && npm run test:smell && npm run test",
     "quality:full:host": "npm run lint && npm run test:smell && npm run test:host",
-    "coverage:repo": "bash scripts/run_governance_py.sh scripts/repo_coverage_aggregate.py --threshold ${CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD:-95}",
-    "coverage:repo:gate": "bash scripts/run_governance_py.sh scripts/repo_coverage_aggregate.py --threshold ${CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD:-95} --enforce-gate",
+    "coverage:repo": "bash scripts/run_governance_py.sh scripts/repo_coverage_gate.py --threshold ${CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD:-95}",
+    "coverage:repo:gate": "bash scripts/run_governance_py.sh scripts/repo_coverage_gate.py --threshold ${CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD:-95} --enforce-gate",
+    "coverage:repo:aggregate": "bash scripts/run_governance_py.sh scripts/repo_coverage_aggregate.py --threshold ${CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD:-95}",
+    "mutation:gate": "bash scripts/mutation_gate.sh",
     "test:quick": "bash scripts/docker_ci.sh test-quick",
     "test:quick:host": "bash scripts/test_quick.sh",
     "test:live:preflight": "${CORTEXPILOT_PYTHON:-python3} scripts/e2e_external_web_probe.py --url ${CORTEXPILOT_EXTERNAL_WEB_PROBE_URL:-https://example.com} --provider-api-mode ${CORTEXPILOT_EXTERNAL_WEB_PROBE_PROVIDER_API_MODE:-require} --hard-timeout-sec ${CORTEXPILOT_EXTERNAL_WEB_PROBE_HARD_TIMEOUT_SEC:-120}",
@@ -99,6 +101,7 @@
     "bench:e2e:speed": "bash scripts/bench_e2e_speed.sh",
     "bench:e2e:speed:dry-run": "bash scripts/bench_e2e_speed.sh --rounds 3 --ui-full-gemini-strict --dashboard-high-risk --dry-run",
     "bench:e2e:speed:report-only": "bash scripts/bench_e2e_speed.sh --report-only",
+    "bench:e2e:speed:gate": "python3 scripts/check_bench_e2e_speed_gate.py",
     "e2e:pm-chat": "bash scripts/e2e_pm_chat_command_tower_success.sh",
     "e2e:pm-chat:real": "CORTEXPILOT_E2E_RUN_MODE=real CORTEXPILOT_E2E_RUNNER=agents CORTEXPILOT_E2E_REEXEC_STRICT=true bash scripts/e2e_pm_chat_command_tower_success.sh",
     "ci": "bash scripts/ci_local_fast.sh",
diff --git a/scripts/check_bench_e2e_speed_gate.py b/scripts/check_bench_e2e_speed_gate.py
new file mode 100644
index 0000000..e82ad0b
--- /dev/null
+++ b/scripts/check_bench_e2e_speed_gate.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""Fail-closed gate for benchmark summaries produced by scripts/bench_e2e_speed.py."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+import sys
+from pathlib import Path
+from typing import Any
+
+
+ROOT = Path(__file__).resolve().parents[1]
+BENCH_ROOT = ROOT / ".runtime-cache" / "test_output" / "benchmarks"
+DEFAULT_MAX_FAIL_RATE = float(os.environ.get("CORTEXPILOT_BENCH_MAX_FAIL_RATE", "0.05"))
+DEFAULT_UI_MAX_P95_SEC = float(os.environ.get("CORTEXPILOT_BENCH_UI_FULL_GEMINI_STRICT_MAX_P95_SEC", "180"))
+DEFAULT_DASH_MAX_P95_SEC = float(os.environ.get("CORTEXPILOT_BENCH_DASHBOARD_HIGH_RISK_E2E_MAX_P95_SEC", "90"))
+
+
+def _find_latest_summary() -> Path | None:
+    candidates = sorted(BENCH_ROOT.glob("*/summary.json"), key=lambda path: path.stat().st_mtime, reverse=True)
+    return candidates[0] if candidates else None
+
+
+def _load_json(path: Path) -> dict[str, Any]:
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except FileNotFoundError as exc:
+        raise FileNotFoundError(f"benchmark summary not found: {path}") from exc
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"invalid JSON in benchmark summary {path}: {exc}") from exc
+
+
+def _to_float(value: Any, *, field: str) -> float:
+    try:
+        parsed = float(value)
+    except (TypeError, ValueError) as exc:
+        raise ValueError(f"invalid numeric field {field!r}: {value!r}") from exc
+    if not math.isfinite(parsed):
+        raise ValueError(f"non-finite numeric field {field!r}: {value!r}")
+    return parsed
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Fail-closed gate for benchmark summaries emitted by scripts/bench_e2e_speed.py."
+    )
+    parser.add_argument("--summary", default="", help="Explicit benchmark summary path. Defaults to the latest summary.json.")
+    parser.add_argument(
+        "--max-overall-fail-rate",
+        type=float,
+        default=DEFAULT_MAX_FAIL_RATE,
+        help="Maximum allowed overall fail_rate (default from CORTEXPILOT_BENCH_MAX_FAIL_RATE or 0.05).",
+    )
+    parser.add_argument(
+        "--ui-max-p95-sec",
+        type=float,
+        default=DEFAULT_UI_MAX_P95_SEC,
+        help="Maximum allowed p95 for ui_full_gemini_strict (default env or 180).",
+    )
+    parser.add_argument(
+        "--dash-max-p95-sec",
+        type=float,
+        default=DEFAULT_DASH_MAX_P95_SEC,
+        help="Maximum allowed p95 for dashboard_high_risk_e2e (default env or 90).",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    summary_path = Path(args.summary).expanduser().resolve() if args.summary else _find_latest_summary()
+    if summary_path is None:
+        print("❌ benchmark gate requires a benchmark summary; run `npm run bench:e2e:speed` first", file=sys.stderr)
+        return 2
+
+    try:
+        payload = _load_json(summary_path)
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"❌ [bench-gate] {exc}", file=sys.stderr)
+        return 2
+    overall = payload.get("overall")
+    suites = payload.get("suites")
+    if not isinstance(overall, dict) or not isinstance(suites, dict):
+        print(f"❌ benchmark summary missing overall/suites maps: {summary_path}", file=sys.stderr)
+        return 2
+
+    failures: list[str] = []
+    overall_fail_rate = _to_float(overall.get("fail_rate"), field="overall.fail_rate")
+    if overall_fail_rate > args.max_overall_fail_rate:
+        failures.append(
+            f"overall.fail_rate={overall_fail_rate:.4f} > max_overall_fail_rate={args.max_overall_fail_rate:.4f}"
+        )
+
+    suite_thresholds = {
+        "ui_full_gemini_strict": args.ui_max_p95_sec,
+        "dashboard_high_risk_e2e": args.dash_max_p95_sec,
+    }
+    for suite_name, max_p95 in suite_thresholds.items():
+        if suite_name not in suites:
+            failures.append(f"missing suite in benchmark summary: {suite_name}")
+            continue
+        suite = suites[suite_name]
+        if not isinstance(suite, dict):
+            failures.append(f"invalid suite payload: {suite_name}")
+            continue
+        duration = suite.get("duration_sec")
+        if not isinstance(duration, dict):
+            failures.append(f"missing duration metrics for suite: {suite_name}")
+            continue
+        p95 = _to_float(duration.get("p95"), field=f"{suite_name}.duration_sec.p95")
+        if p95 > max_p95:
+            failures.append(f"{suite_name}.p95={p95:.3f}s > max_p95={max_p95:.3f}s")
+
+    print(f"📄 [bench-gate] summary={summary_path}")
+    print(
+        "ℹ️ [bench-gate] thresholds: "
+        f"overall_fail_rate<={args.max_overall_fail_rate:.4f} "
+        f"ui_p95<={args.ui_max_p95_sec:.3f}s "
+        f"dashboard_p95<={args.dash_max_p95_sec:.3f}s"
+    )
+    if failures:
+        print("❌ [bench-gate] benchmark gate failed:", file=sys.stderr)
+        for failure in failures:
+            print(f"  - {failure}", file=sys.stderr)
+        return 1
+
+    print("✅ [bench-gate] benchmark gate passed")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/repo_coverage_gate.py b/scripts/repo_coverage_gate.py
index e2bd0ed..2c058c4 100644
--- a/scripts/repo_coverage_gate.py
+++ b/scripts/repo_coverage_gate.py
@@ -25,6 +25,8 @@
 DEFAULT_DESKTOP_REPORT = (
     ROOT_DIR / ".runtime-cache" / "test_output" / "repo_coverage" / "desktop" / "coverage-summary.json"
 )
+DEFAULT_COVERAGE_DATA_DIR = ROOT_DIR / ".runtime-cache" / "cache" / "test" / "coverage" / "repo_coverage_gate"
+DEFAULT_HYPOTHESIS_DATA_DIR = ROOT_DIR / ".runtime-cache" / "cache" / "hypothesis" / "repo_coverage_gate"
 DEFAULT_THRESHOLD = float(os.environ.get("CORTEXPILOT_REPO_COVERAGE_GATE_THRESHOLD", "95"))
 
 
@@ -178,8 +180,17 @@ def run_command(command: list[str], env_overrides: dict[str, str] | None = None)
         raise RuntimeError(f"command failed (exit={result.returncode}): {' '.join(command)}")
 
 
+def _prepare_coverage_file(path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.unlink(missing_ok=True)
+    for stale_path in path.parent.glob(f"{path.name}.*"):
+        stale_path.unlink(missing_ok=True)
+
+
 def run_orchestrator_coverage(report_path: Path, pytest_target: str, pytest_mark: str) -> None:
     report_path.parent.mkdir(parents=True, exist_ok=True)
+    coverage_file = DEFAULT_COVERAGE_DATA_DIR / ".coverage"
+    _prepare_coverage_file(coverage_file)
     override = os.getenv("CORTEXPILOT_PYTHON", "").strip()
     toolchain_python = ROOT_DIR / ".runtime-cache" / "cache" / "toolchains" / "python" / "current" / "bin" / "python"
     python_bin = Path(override) if override else toolchain_python
@@ -201,11 +212,22 @@ def run_orchestrator_coverage(report_path: Path, pytest_target: str, pytest_mark
         f"--cov-report=json:{report_path}",
         "--cov-fail-under=0",
     ]
-    run_command(command, env_overrides={"PYTHONPATH": "apps/orchestrator/src"})
+    try:
+        run_command(
+            command,
+            env_overrides={
+                "PYTHONPATH": "apps/orchestrator/src",
+                "COVERAGE_FILE": str(coverage_file),
+                "HYPOTHESIS_STORAGE_DIRECTORY": str(DEFAULT_HYPOTHESIS_DATA_DIR),
+            },
+        )
+    finally:
+        _prepare_coverage_file(coverage_file)
 
 
 def run_dashboard_coverage(report_path: Path, test_targets: list[str]) -> None:
     report_path.parent.mkdir(parents=True, exist_ok=True)
+    run_command(["bash", "scripts/install_dashboard_deps.sh"])
     command = [
         "pnpm",
         "--dir",
@@ -225,11 +247,19 @@ def run_dashboard_coverage(report_path: Path, test_targets: list[str]) -> None:
         f"--coverage.reportsDirectory={report_path.parent}",
     ]
     command.extend(test_targets)
-    run_command(command, env_overrides={"CI": "1", "CORTEXPILOT_COVERAGE_HTML": "0"})
+    run_command(
+        command,
+        env_overrides={
+            "CI": "1",
+            "CORTEXPILOT_COVERAGE_HTML": "0",
+            "CORTEXPILOT_DASHBOARD_COVERAGE_DIR": str(report_path.parent),
+        },
+    )
 
 
 def run_desktop_coverage(report_path: Path, test_targets: list[str]) -> None:
     report_path.parent.mkdir(parents=True, exist_ok=True)
+    run_command(["bash", "scripts/install_desktop_deps.sh"])
     command = [
         "pnpm",
         "--dir",
@@ -247,7 +277,15 @@ def run_desktop_coverage(report_path: Path, test_targets: list[str]) -> None:
         f"--coverage.reportsDirectory={report_path.parent}",
     ]
     command.extend(test_targets)
-    run_command(command)
+    run_command(
+        command,
+        env_overrides={
+            "CI": "1",
+            "CORTEXPILOT_COVERAGE_HTML": "0",
+            "CORTEXPILOT_DESKTOP_COVERAGE_DIR": str(report_path.parent),
+            "CORTEXPILOT_DESKTOP_COVERAGE_RUN_ID": "repo-coverage-gate",
+        },
+    )
 
 
 def aggregate_repo_totals(project_totals: dict[str, CoverageTotals]) -> CoverageTotals:

From a668fbbbbaa9c17c5044dfc47e55693601384792 Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:14:14 -0700
Subject: [PATCH 2/9] fix: stabilize runtime run ordering

---
 .../services/control_plane_read_service.py    | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
index 2a30a79..74c00db 100644
--- a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
+++ b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
@@ -178,13 +178,31 @@ def _last_event_ts(run_id: str) -> str:
                         return value
             return ""
 
+        def _run_sort_ts(run_dir: Path, manifest_record: dict[str, Any]) -> float:
+            manifest_path = run_dir / "manifest.json"
+            if manifest_path.exists():
+                return manifest_path.stat().st_mtime
+            created_at = _as_text(manifest_record.get("created_at"))
+            if created_at:
+                try:
+                    return _parse_iso_ts(created_at).timestamp()
+                except Exception:
+                    pass
+            return run_dir.stat().st_mtime
+
         def _list_runs_runtime() -> list[dict[str, Any]]:
             results: list[dict[str, Any]] = []
-            for run_dir in sorted(runs_root.glob("*"), key=lambda item: item.stat().st_mtime, reverse=True):
+            run_dirs = []
+            for run_dir in runs_root.glob("*"):
+                if not run_dir.is_dir():
+                    continue
                 manifest = _read_json(run_dir / "manifest.json", {})
                 manifest_record = _as_record(manifest)
                 if not manifest_record:
                     continue
+                run_dirs.append((run_dir, manifest_record, _run_sort_ts(run_dir, manifest_record)))
+
+            for run_dir, manifest_record, _sort_ts in sorted(run_dirs, key=lambda item: item[2], reverse=True):
                 run_id = _as_text(manifest_record.get("run_id")) or run_dir.name
                 payload = dict(manifest_record)
                 payload["run_id"] = run_id

From 75d1cf1671aeed2e3d727695ac5bd9ce6de51afa Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:25:35 -0700
Subject: [PATCH 3/9] feat: persist prompt artifacts per run

---
 .../src/cortexpilot_orch/contract/compiler.py | 47 +++++++++++++++++++
 .../scheduler/scheduler_bridge_contract.py    | 19 +++++++-
 .../tests/test_scheduler_bridge_runtime.py    |  8 ++++
 docs/architecture/runtime-topology.md         |  3 ++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/contract/compiler.py b/apps/orchestrator/src/cortexpilot_orch/contract/compiler.py
index b992dbf..3c410de 100644
--- a/apps/orchestrator/src/cortexpilot_orch/contract/compiler.py
+++ b/apps/orchestrator/src/cortexpilot_orch/contract/compiler.py
@@ -421,6 +421,53 @@ def build_role_binding_summary(contract: dict[str, Any]) -> dict[str, Any]:
     }
 
 
+def build_prompt_artifact(
+    contract: dict[str, Any],
+    *,
+    run_id: str = "",
+    task_id: str = "",
+) -> dict[str, Any]:
+    role_contract = contract.get("role_contract") if isinstance(contract.get("role_contract"), dict) else {}
+    if not role_contract:
+        role_contract = _build_role_contract(contract, _load_agent_registry())
+    assigned_agent = contract.get("assigned_agent") if isinstance(contract.get("assigned_agent"), dict) else {}
+    role = str(
+        assigned_agent.get("role")
+        or (role_contract.get("identity", {}) if isinstance(role_contract.get("identity"), dict) else {}).get("role")
+        or "WORKER"
+    ).strip().upper() or "WORKER"
+    role_contract = _merge_role_config_defaults(
+        role_contract,
+        _find_role_config_defaults(_load_role_config_registry(), role),
+    )
+    identity = role_contract.get("identity") if isinstance(role_contract.get("identity"), dict) else {}
+    runtime_binding_raw = role_contract.get("runtime_binding") if isinstance(role_contract.get("runtime_binding"), dict) else {}
+    runtime_binding = {
+        "runner": _normalize_optional_ref(runtime_binding_raw.get("runner")),
+        "provider": _normalize_optional_ref(runtime_binding_raw.get("provider")),
+        "model": _normalize_optional_ref(runtime_binding_raw.get("model")),
+    }
+    resolved_task_id = str(task_id or contract.get("task_id") or "").strip()
+    return {
+        "artifact_type": "prompt_artifact",
+        "version": "v1",
+        "source": "contract-derived",
+        "execution_authority": "task_contract",
+        "run_id": str(run_id or "").strip(),
+        "task_id": resolved_task_id,
+        "assigned_agent": {
+            "role": role,
+            "agent_id": str(identity.get("agent_id") or assigned_agent.get("agent_id") or "").strip(),
+        },
+        "purpose": str(role_contract.get("purpose") or "").strip(),
+        "system_prompt_ref": _normalize_optional_ref(role_contract.get("system_prompt_ref")),
+        "skills_bundle_ref": _normalize_optional_ref(role_contract.get("skills_bundle_ref")),
+        "mcp_bundle_ref": _normalize_optional_ref(role_contract.get("mcp_bundle_ref")),
+        "runtime_binding": runtime_binding,
+        "role_binding_summary": build_role_binding_summary(contract),
+    }
+
+
 def _build_role_contract(contract: dict[str, Any], registry: dict[str, Any] | None) -> dict[str, Any]:
     assigned_agent = contract.get("assigned_agent") if isinstance(contract.get("assigned_agent"), dict) else {}
     role = str(assigned_agent.get("role") or "WORKER").strip().upper() or "WORKER"
diff --git a/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py b/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
index 9888504..dbf16ea 100644
--- a/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
+++ b/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
@@ -1,10 +1,11 @@
 from __future__ import annotations
 
 from collections.abc import Callable
+import json
 from pathlib import Path
 from typing import Any
 
-from cortexpilot_orch.contract.compiler import build_role_binding_summary
+from cortexpilot_orch.contract.compiler import build_prompt_artifact, build_role_binding_summary
 from cortexpilot_orch.store.run_store import RunStore
 
 
@@ -208,5 +209,21 @@ def persist_contract_state(
             )
     store.write_task_contract(run_id, task_id, contract)
     store.write_active_contract(run_id, contract)
+    prompt_artifact = build_prompt_artifact(contract, run_id=run_id, task_id=task_id)
+    prompt_artifact_path = store.write_artifact(
+        run_id,
+        "prompt_artifact.json",
+        json.dumps(prompt_artifact, ensure_ascii=False, indent=2),
+    )
+    store.append_event(
+        run_id,
+        {
+            "level": "INFO",
+            "event": "PROMPT_ARTIFACT_WRITTEN",
+            "run_id": run_id,
+            "task_id": task_id,
+            "meta": {"path": str(prompt_artifact_path.relative_to(store.run_dir(run_id)))},
+        },
+    )
     if ensure_evidence_bundle_fn is not None and failure_reason:
         ensure_evidence_bundle_fn(store, run_id, contract, failure_reason)
diff --git a/apps/orchestrator/tests/test_scheduler_bridge_runtime.py b/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
index 27a92d8..faac59d 100644
--- a/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
+++ b/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
@@ -155,3 +155,11 @@ def test_persist_contract_state_writes_role_binding_summary_to_manifest(tmp_path
 
     written = json.loads((store._runs_root / run_id / "manifest.json").read_text(encoding="utf-8"))
     assert written["role_binding_summary"] == build_role_binding_summary(contract)
+    prompt_artifact = json.loads(
+        (store._runs_root / run_id / "artifacts" / "prompt_artifact.json").read_text(encoding="utf-8")
+    )
+    assert prompt_artifact["artifact_type"] == "prompt_artifact"
+    assert prompt_artifact["execution_authority"] == "task_contract"
+    assert prompt_artifact["run_id"] == run_id
+    assert prompt_artifact["task_id"] == "task-role-binding-summary"
+    assert prompt_artifact["role_binding_summary"] == build_role_binding_summary(contract)
diff --git a/docs/architecture/runtime-topology.md b/docs/architecture/runtime-topology.md
index 8dba6fc..c1c719e 100644
--- a/docs/architecture/runtime-topology.md
+++ b/docs/architecture/runtime-topology.md
@@ -72,6 +72,9 @@ flowchart LR
   `workflow_case_read_model` directly for operator inspection, but those UI
   cards remain read-only mirrors below `task_contract`.
 - Runtime artifacts (`manifest`, `events.jsonl`, reports) are generated per run.
+- Runs may now also persist `artifacts/prompt_artifact.json`, a contract-derived
+  snapshot of prompt/bundle/runtime-binding refs for that run. It is a
+  read-only audit artifact, not a second execution authority source.
 - Run detail views may now include derived decision packs such as
   `incident_pack.json`, while approval queues synthesize `approval_pack`
   summaries from run events plus manifest metadata. These are derived operator

From 721f9081de16b9c880c35d7d01e018b4a1b69513 Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:35:23 -0700
Subject: [PATCH 4/9] fix: use nanos for runtime run ordering

---
 .../services/control_plane_read_service.py                | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
index 74c00db..f89e4bd 100644
--- a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
+++ b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
@@ -178,17 +178,17 @@ def _last_event_ts(run_id: str) -> str:
                         return value
             return ""
 
-        def _run_sort_ts(run_dir: Path, manifest_record: dict[str, Any]) -> float:
+        def _run_sort_ts(run_dir: Path, manifest_record: dict[str, Any]) -> int:
             manifest_path = run_dir / "manifest.json"
             if manifest_path.exists():
-                return manifest_path.stat().st_mtime
+                return manifest_path.stat().st_mtime_ns
             created_at = _as_text(manifest_record.get("created_at"))
             if created_at:
                 try:
-                    return _parse_iso_ts(created_at).timestamp()
+                    return int(_parse_iso_ts(created_at).timestamp() * 1_000_000_000)
                 except Exception:
                     pass
-            return run_dir.stat().st_mtime
+            return run_dir.stat().st_mtime_ns
 
         def _list_runs_runtime() -> list[dict[str, Any]]:
             results: list[dict[str, Any]] = []

From 05d0f3cbc561a9258ef61aea817ff1351635f459 Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:41:01 -0700
Subject: [PATCH 5/9] feat: persist planning artifacts for runs

---
 .../api/main_pm_intake_helpers.py             |  58 ++++++++-
 .../test_main_pm_intake_helpers_branches.py   | 110 ++++++++++++++++++
 2 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py b/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
index 2d8c4ad..a670d4b 100644
--- a/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
+++ b/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
@@ -17,8 +17,9 @@
 from cortexpilot_orch.config import load_config
 from cortexpilot_orch.contract.compiler import build_role_binding_summary, sync_role_contract
 from cortexpilot_orch.observability.logger import log_event
-from cortexpilot_orch.planning.intake import IntakeService
+from cortexpilot_orch.planning.intake import IntakeService, _build_wave_plan, _build_worker_prompt_contracts
 from cortexpilot_orch.store.intake_store import IntakeStore
+from cortexpilot_orch.store.run_store import RunStore
 
 
 _TRUTHY_VALUES = {"1", "true", "yes", "y", "on"}
@@ -118,6 +119,55 @@ def _strip_intake_only_contract_fields(contract: dict[str, Any]) -> dict[str, An
     return sanitized
 
 
+def _safe_read_intake_store_payload(store: object, method_name: str, intake_id: str) -> dict[str, Any]:
+    reader = getattr(store, method_name, None)
+    if not callable(reader):
+        return {}
+    try:
+        payload = reader(intake_id)
+    except Exception:
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
+def _persist_planning_artifacts_for_run(
+    *,
+    intake_id: str,
+    run_id: str,
+    runs_root: Path,
+) -> list[str]:
+    intake_store = IntakeStore()
+    intake_payload = _safe_read_intake_store_payload(intake_store, "read_intake", intake_id)
+    response_payload = _safe_read_intake_store_payload(intake_store, "read_response", intake_id)
+    plan_bundle = response_payload.get("plan_bundle") if isinstance(response_payload.get("plan_bundle"), dict) else None
+    if not intake_payload or not isinstance(plan_bundle, dict):
+        return []
+
+    run_store = RunStore(runs_root=runs_root)
+    artifacts_to_write: list[tuple[str, Any]] = [
+        ("planning_wave_plan.json", _build_wave_plan(plan_bundle)),
+        ("planning_worker_prompt_contracts.json", _build_worker_prompt_contracts(plan_bundle, intake_payload)),
+    ]
+    written: list[str] = []
+    for filename, payload in artifacts_to_write:
+        if payload in ({}, [], None):
+            continue
+        run_store.write_artifact(run_id, filename, json.dumps(payload, ensure_ascii=False, indent=2))
+        written.append(filename)
+
+    if written:
+        run_store.append_event(
+            run_id,
+            {
+                "level": "INFO",
+                "event": "PLANNING_ARTIFACTS_WRITTEN",
+                "run_id": run_id,
+                "meta": {"intake_id": intake_id, "artifacts": written},
+            },
+        )
+    return written
+
+
 def configure_pm_session_aggregation(
     *,
     runs_root_fn: Callable[[], Path],
@@ -597,10 +647,16 @@ def _execute_in_background() -> None:
         )
 
     IntakeStore().append_event(intake_id, {"event": "INTAKE_RUN", "run_id": run_id})
+    planning_artifacts = _persist_planning_artifacts_for_run(
+        intake_id=intake_id,
+        run_id=run_id,
+        runs_root=runs_root,
+    )
     return {
         "ok": True,
         "run_id": run_id,
         "contract_path": str(contract_path),
         "strict_acceptance": bool(runtime_options.get("strict_acceptance", False)),
         "role_binding_summary": build_role_binding_summary(contract),
+        "planning_artifacts": planning_artifacts,
     }
diff --git a/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py b/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
index 3f9ee3f..2d3999a 100644
--- a/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
+++ b/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
@@ -463,6 +463,116 @@ def execute_task(contract_path: Path, mock_mode: bool = False) -> str:
     assert observed_contract["runtime_options"]["strict_acceptance"] is True
 
 
+def test_run_intake_persists_planning_artifacts_into_run_bundle(monkeypatch, tmp_path: Path) -> None:
+    runs_root = tmp_path / "runs"
+    runtime_contract_root = tmp_path / ".runtime-cache" / "cortexpilot" / "contracts"
+    intake_payload = {
+        "objective": "Ship one planning artifact bridge",
+        "constraints": ["truthful-public-surface"],
+        "search_queries": ["command tower planning artifact"],
+    }
+    response_payload = {
+        "plan_bundle": {
+            "bundle_id": "bundle-1",
+            "objective": "Ship one planning artifact bridge",
+            "owner_agent": {"role": "PM", "agent_id": "pm-1"},
+            "plans": [
+                {
+                    "plan_id": "worker-1",
+                    "assigned_agent": {"role": "WORKER", "agent_id": "worker-1"},
+                    "spec": "Persist the planning artifact into the run bundle.",
+                    "allowed_paths": ["apps/orchestrator"],
+                    "acceptance_tests": [{"name": "pytest", "cmd": "python3 -m pytest -q", "must_pass": True}],
+                    "mcp_tool_set": ["codex"],
+                    "required_outputs": [{"name": "task_result.json", "type": "report"}],
+                }
+            ],
+        }
+    }
+    intake_events: list[tuple[str, dict[str, object]]] = []
+
+    class _Store:
+        def append_event(self, intake_id: str, payload: dict[str, object]) -> None:
+            intake_events.append((intake_id, payload))
+
+        def read_intake(self, intake_id: str) -> dict[str, object]:
+            assert intake_id == "persist"
+            return intake_payload
+
+        def read_response(self, intake_id: str) -> dict[str, object]:
+            assert intake_id == "persist"
+            return response_payload
+
+    monkeypatch.setattr(helpers, "IntakeStore", lambda: _Store())
+    monkeypatch.setattr(
+        helpers,
+        "load_config",
+        lambda: types.SimpleNamespace(
+            repo_root=tmp_path,
+            runs_root=runs_root,
+            contract_root=tmp_path / "contracts",
+            runtime_contract_root=runtime_contract_root,
+        ),
+    )
+
+    class _BuildOK:
+        def build_contract(self, intake_id: str) -> dict[str, object]:
+            assert intake_id == "persist"
+            return {
+                "task_id": "task-persist",
+                "owner_agent": {"role": "PM", "agent_id": "pm-1"},
+                "assigned_agent": {"role": "WORKER", "agent_id": "worker-1"},
+                "inputs": {"spec": "repro", "artifacts": []},
+                "required_outputs": [{"name": "task_result.json", "type": "json", "acceptance": "ok"}],
+                "allowed_paths": ["apps/orchestrator"],
+                "forbidden_actions": [],
+                "acceptance_tests": [{"name": "pytest", "cmd": "python3 -m pytest -q", "must_pass": True}],
+                "tool_permissions": {
+                    "filesystem": "workspace-write",
+                    "shell": "on-request",
+                    "network": "deny",
+                    "mcp_tools": ["codex"],
+                },
+                "mcp_tool_set": ["codex"],
+                "timeout_retry": {"timeout_sec": 60, "max_retries": 0, "retry_backoff_sec": 0},
+                "rollback": {"strategy": "git_reset_hard", "baseline_ref": "HEAD"},
+                "evidence_links": [],
+                "log_refs": {"run_id": "", "paths": {}},
+            }
+
+    class _Orchestrator:
+        @staticmethod
+        def execute_task(contract_path: Path, mock_mode: bool = False) -> str:
+            del mock_mode
+            payload = json.loads(contract_path.read_text(encoding="utf-8"))
+            store = RunStore(runs_root=runs_root)
+            run_id = store.create_run(str(payload.get("task_id") or "task"))
+            store.write_manifest(run_id, {"run_id": run_id, "task_id": payload.get("task_id"), "status": "RUNNING", "repo": {}})
+            return run_id
+
+    result = helpers.run_intake(
+        "persist",
+        {"mock": True},
+        intake_service_cls=_BuildOK,
+        orchestration_service=_Orchestrator(),
+        error_detail_fn=lambda code: {"code": code},
+        current_request_id_fn=lambda: "req-persist",
+    )
+
+    run_id = result["run_id"]
+    wave_plan = json.loads((runs_root / run_id / "artifacts" / "planning_wave_plan.json").read_text(encoding="utf-8"))
+    worker_contracts = json.loads(
+        (runs_root / run_id / "artifacts" / "planning_worker_prompt_contracts.json").read_text(encoding="utf-8")
+    )
+
+    assert result["planning_artifacts"] == ["planning_wave_plan.json", "planning_worker_prompt_contracts.json"]
+    assert wave_plan["wave_id"] == "bundle-1"
+    assert wave_plan["objective"] == "Ship one planning artifact bridge"
+    assert worker_contracts[0]["prompt_contract_id"] == "worker-1"
+    assert worker_contracts[0]["continuation_policy"]["on_blocked"] == "spawn_independent_temporary_unblock_task"
+    assert intake_events[-1] == ("persist", {"event": "INTAKE_RUN", "run_id": run_id})
+
+
 def test_build_role_binding_summary_marks_skills_and_mcp_registry_refs_as_registry_backed() -> None:
     summary = build_role_binding_summary(
         {

From cdfc07f880cdc1c9618bb068da127e6fc0ee596b Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:43:08 -0700
Subject: [PATCH 6/9] feat: persist planning artifacts for runs


From b2cb869d65ad989d8c55de9128f2077823c62f5b Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:50:47 -0700
Subject: [PATCH 7/9] feat: register planning artifacts in run manifests

---
 .../api/main_pm_intake_helpers.py             | 41 ++++++++++++++++++-
 .../scheduler/scheduler_bridge_contract.py    | 34 +++++++++++++++
 .../test_main_pm_intake_helpers_branches.py   |  4 ++
 .../tests/test_scheduler_bridge_runtime.py    |  2 +
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py b/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
index a670d4b..53a01c1 100644
--- a/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
+++ b/apps/orchestrator/src/cortexpilot_orch/api/main_pm_intake_helpers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import hashlib
 import json
 import threading
 from datetime import datetime, timezone
@@ -119,6 +120,29 @@ def _strip_intake_only_contract_fields(contract: dict[str, Any]) -> dict[str, An
     return sanitized
 
 
+def _artifact_ref_for_path(path: Path, *, rel_path: str, name: str, media_type: str = "application/json") -> dict[str, Any]:
+    payload = path.read_bytes()
+    return {
+        "name": name,
+        "path": rel_path,
+        "sha256": hashlib.sha256(payload).hexdigest(),
+        "media_type": media_type,
+        "size_bytes": len(payload),
+    }
+
+
+def _append_manifest_artifact(manifest: dict[str, Any], ref: dict[str, Any]) -> None:
+    artifacts = manifest.get("artifacts") if isinstance(manifest.get("artifacts"), list) else []
+    key = (str(ref.get("name") or ""), str(ref.get("path") or ""))
+    for item in artifacts:
+        if not isinstance(item, dict):
+            continue
+        if (str(item.get("name") or ""), str(item.get("path") or "")) == key:
+            return
+    artifacts.append(ref)
+    manifest["artifacts"] = artifacts
+
+
 def _safe_read_intake_store_payload(store: object, method_name: str, intake_id: str) -> dict[str, Any]:
     reader = getattr(store, method_name, None)
     if not callable(reader):
@@ -144,18 +168,33 @@ def _persist_planning_artifacts_for_run(
         return []
 
     run_store = RunStore(runs_root=runs_root)
+    run_dir = run_store.run_dir(run_id)
     artifacts_to_write: list[tuple[str, Any]] = [
         ("planning_wave_plan.json", _build_wave_plan(plan_bundle)),
         ("planning_worker_prompt_contracts.json", _build_worker_prompt_contracts(plan_bundle, intake_payload)),
     ]
     written: list[str] = []
+    artifact_refs: list[dict[str, Any]] = []
     for filename, payload in artifacts_to_write:
         if payload in ({}, [], None):
             continue
-        run_store.write_artifact(run_id, filename, json.dumps(payload, ensure_ascii=False, indent=2))
+        artifact_path = run_store.write_artifact(run_id, filename, json.dumps(payload, ensure_ascii=False, indent=2))
         written.append(filename)
+        artifact_refs.append(
+            _artifact_ref_for_path(
+                artifact_path,
+                rel_path=f"artifacts/{filename}",
+                name=filename.removesuffix(".json"),
+            )
+        )
 
     if written:
+        manifest_path = run_dir / "manifest.json"
+        manifest = _read_json_file(manifest_path)
+        if manifest:
+            for ref in artifact_refs:
+                _append_manifest_artifact(manifest, ref)
+            manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8")
         run_store.append_event(
             run_id,
             {
diff --git a/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py b/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
index dbf16ea..6d16354 100644
--- a/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
+++ b/apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_contract.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections.abc import Callable
+import hashlib
 import json
 from pathlib import Path
 from typing import Any
@@ -9,6 +10,29 @@
 from cortexpilot_orch.store.run_store import RunStore
 
 
+def _artifact_ref_for_path(path: Path, *, rel_path: str, name: str, media_type: str = "application/json") -> dict[str, Any]:
+    payload = path.read_bytes()
+    return {
+        "name": name,
+        "path": rel_path,
+        "sha256": hashlib.sha256(payload).hexdigest(),
+        "media_type": media_type,
+        "size_bytes": len(payload),
+    }
+
+
+def _append_manifest_artifact(manifest: dict[str, Any], ref: dict[str, Any]) -> None:
+    artifacts = manifest.get("artifacts") if isinstance(manifest.get("artifacts"), list) else []
+    key = (str(ref.get("name") or ""), str(ref.get("path") or ""))
+    for item in artifacts:
+        if not isinstance(item, dict):
+            continue
+        if (str(item.get("name") or ""), str(item.get("path") or "")) == key:
+            return
+    artifacts.append(ref)
+    manifest["artifacts"] = artifacts
+
+
 class ContractStateWriter:
     def __init__(
         self,
@@ -215,6 +239,16 @@ def persist_contract_state(
         "prompt_artifact.json",
         json.dumps(prompt_artifact, ensure_ascii=False, indent=2),
     )
+    if manifest is not None:
+        _append_manifest_artifact(
+            manifest,
+            _artifact_ref_for_path(
+                prompt_artifact_path,
+                rel_path="artifacts/prompt_artifact.json",
+                name="prompt_artifact",
+            ),
+        )
+        write_manifest_fn(store, run_id, manifest)
     store.append_event(
         run_id,
         {
diff --git a/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py b/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
index 2d3999a..6499250 100644
--- a/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
+++ b/apps/orchestrator/tests/test_main_pm_intake_helpers_branches.py
@@ -564,12 +564,16 @@ def execute_task(contract_path: Path, mock_mode: bool = False) -> str:
     worker_contracts = json.loads(
         (runs_root / run_id / "artifacts" / "planning_worker_prompt_contracts.json").read_text(encoding="utf-8")
     )
+    manifest = json.loads((runs_root / run_id / "manifest.json").read_text(encoding="utf-8"))
 
     assert result["planning_artifacts"] == ["planning_wave_plan.json", "planning_worker_prompt_contracts.json"]
     assert wave_plan["wave_id"] == "bundle-1"
     assert wave_plan["objective"] == "Ship one planning artifact bridge"
     assert worker_contracts[0]["prompt_contract_id"] == "worker-1"
     assert worker_contracts[0]["continuation_policy"]["on_blocked"] == "spawn_independent_temporary_unblock_task"
+    artifact_names = [item["name"] for item in manifest["artifacts"]]
+    assert "planning_wave_plan" in artifact_names
+    assert "planning_worker_prompt_contracts" in artifact_names
     assert intake_events[-1] == ("persist", {"event": "INTAKE_RUN", "run_id": run_id})
 
 
diff --git a/apps/orchestrator/tests/test_scheduler_bridge_runtime.py b/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
index faac59d..0fe1c0f 100644
--- a/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
+++ b/apps/orchestrator/tests/test_scheduler_bridge_runtime.py
@@ -163,3 +163,5 @@ def test_persist_contract_state_writes_role_binding_summary_to_manifest(tmp_path
     assert prompt_artifact["run_id"] == run_id
     assert prompt_artifact["task_id"] == "task-role-binding-summary"
     assert prompt_artifact["role_binding_summary"] == build_role_binding_summary(contract)
+    artifact_names = [item["name"] for item in written["artifacts"]]
+    assert "prompt_artifact" in artifact_names

From 700b6bf83c96c2463c8b6497d30ba1d3048316d4 Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:59:42 -0700
Subject: [PATCH 8/9] fix: stabilize hosted run ordering

---
 .../cortexpilot_orch/services/control_plane_read_service.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
index f89e4bd..16ef84b 100644
--- a/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
+++ b/apps/orchestrator/src/cortexpilot_orch/services/control_plane_read_service.py
@@ -202,7 +202,11 @@ def _list_runs_runtime() -> list[dict[str, Any]]:
                     continue
                 run_dirs.append((run_dir, manifest_record, _run_sort_ts(run_dir, manifest_record)))
 
-            for run_dir, manifest_record, _sort_ts in sorted(run_dirs, key=lambda item: item[2], reverse=True):
+            for run_dir, manifest_record, _sort_ts in sorted(
+                run_dirs,
+                key=lambda item: (item[2], item[0].name),
+                reverse=True,
+            ):
                 run_id = _as_text(manifest_record.get("run_id")) or run_dir.name
                 payload = dict(manifest_record)
                 payload["run_id"] = run_id

From 5de7f4d6a03d3796b8a7119127b4e20caa7b22ae Mon Sep 17 00:00:00 2001
From: "Yifeng[Terry] Yu" <125581657+xiaojiou176@users.noreply.github.com>
Date: Sun, 12 Apr 2026 15:18:16 -0700
Subject: [PATCH 9/9] fix: drop unused control-plane test import

---
 apps/orchestrator/tests/test_control_plane_read_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/orchestrator/tests/test_control_plane_read_service.py b/apps/orchestrator/tests/test_control_plane_read_service.py
index c075ebf..0ccc00f 100644
--- a/apps/orchestrator/tests/test_control_plane_read_service.py
+++ b/apps/orchestrator/tests/test_control_plane_read_service.py
@@ -2,7 +2,7 @@
 
 import json
 import sys
-from datetime import datetime, timezone
+from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
 from types import ModuleType