Skip to content

Commit 5d8fa5d

Browse files
committed
fix: harden truth and coverage gates
1 parent 6246841 commit 5d8fa5d

26 files changed

Lines changed: 1272 additions & 68 deletions

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,13 +526,22 @@ Default local verification path:
526526
npm run ci
527527
npm run test:quick
528528
npm run test
529+
npm run mutation:gate
530+
npm run bench:e2e:speed:gate
529531
```
530532

531533
`npm run ci` is now the hosted-aligned local fast gate. Use
532534
`npm run ci:strict`, `npm run docs:check`, `bash scripts/check_repo_hygiene.sh`,
533535
`npm run scan:workflow-security`, `npm run scan:trivy`, and
534536
`npm run security:scan:closeout` only when you intentionally want the stricter
535537
closeout/manual layers.
538+
`npm run mutation:gate` is the root mutation entrypoint for the existing
539+
Orchestrator mutation profiles, `npm run bench:e2e:speed:gate` is the
540+
fail-closed benchmark gate that evaluates a real benchmark summary once a run
541+
has produced one, and `npm run coverage:repo` now points to the active
542+
coverage runner that prepares subproject dependencies before generating fresh
543+
repo-level coverage receipts. Use `npm run coverage:repo:aggregate` only when
544+
you intentionally want to re-aggregate already-existing coverage artifacts.
536545

537546
Current CI contract has five layers only:
538547

apps/dashboard/vitest.config.mts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ if (pool !== requestedPool) {
2727
}
2828
const shouldEmitHtmlCoverage = !process.env.CI || process.env.CORTEXPILOT_COVERAGE_HTML === "1";
2929
const coverageReporter = shouldEmitHtmlCoverage ? ["text", "html", "json-summary"] : ["text", "json-summary"];
30-
const coverageReportsDirectory = path.resolve(process.cwd(), "coverage");
30+
const coverageReportsDirectory = process.env.CORTEXPILOT_DASHBOARD_COVERAGE_DIR
31+
? path.resolve(process.env.CORTEXPILOT_DASHBOARD_COVERAGE_DIR)
32+
: path.resolve(process.cwd(), "coverage");
3133
const coverageClean = !serialCoverageMode;
3234
const coverageProcessingConcurrency = serialCoverageMode ? 1 : undefined;
3335
const testTimeout = process.env.CI ? 45000 : 15000;

apps/desktop/scripts/playwright-tempdir.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ function sanitizeScope(scope) {
1414
function resolveTempRoot(scriptDir) {
1515
const runnerTemp = normalizeValue(process.env.RUNNER_TEMP);
1616
if (runnerTemp) return resolve(runnerTemp);
17-
return resolve(scriptDir, "..", "..", "..", ".runtime-cache", "temp");
17+
return resolve(scriptDir, "..", "..", "..", ".runtime-cache", "cache", "tmp");
1818
}
1919

2020
export function configurePlaywrightTempDir(scope) {

apps/desktop/src/components/chain/ChainPanel.test.tsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ describe("ChainPanel", () => {
5151
/>
5252
);
5353

54-
fireEvent.click(screen.getByRole("button", { name: "简洁视图" }));
55-
fireEvent.click(screen.getByRole("button", { name: "详细视图" }));
56-
fireEvent.click(screen.getByRole("button", { name: "Chain 优先" }));
54+
fireEvent.click(screen.getByRole("button", { name: "Compact view" }));
55+
fireEvent.click(screen.getByRole("button", { name: "Detailed view" }));
56+
fireEvent.click(screen.getByRole("button", { name: "Chain first" }));
5757

5858
expect(setChainDisplayMode).toHaveBeenCalledWith("compact");
5959
expect(setChainDisplayMode).toHaveBeenCalledWith("detail");
@@ -79,7 +79,7 @@ describe("ChainPanel", () => {
7979
/>
8080
);
8181

82-
const legend = screen.getByLabelText("节点状态说明");
82+
const legend = screen.getByLabelText("Node status legend");
8383
const items = legend.querySelectorAll("li");
8484
expect(items).toHaveLength(2);
8585
expect(items[0]).toHaveClass("is-active");
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { fireEvent, render, screen, waitFor } from "@testing-library/react";
2+
import { describe, expect, it, vi } from "vitest";
3+
4+
import { DesktopCopilotPanel } from "./DesktopCopilotPanel";
5+
6+
describe("DesktopCopilotPanel", () => {
7+
it("renders operator-brief truth surfaces and grounded takeaways after generation", async () => {
8+
const loadBrief = vi.fn().mockResolvedValue({
9+
report_type: "operator_copilot_brief",
10+
status: "AVAILABLE",
11+
scope: "run_detail",
12+
subject_id: "run-123",
13+
summary: "The operator should compare the staged diff before accepting the run.",
14+
likely_cause: "The last proof pack is stale.",
15+
compare_takeaway: "Compare the staged diff against the last approved run.",
16+
proof_takeaway: "Refresh the proof pack before asking for review.",
17+
incident_takeaway: "Treat stale proof as an incident until it is re-generated.",
18+
queue_takeaway: "Keep the queue paused until proof is current.",
19+
approval_takeaway: "Approval should wait for a fresh proof receipt.",
20+
used_truth_surfaces: ["run_detail", "", "proof_pack"],
21+
limitations: ["review not started", " "],
22+
recommended_actions: ["Refresh proof", "Request review", " "],
23+
top_risks: ["stale-proof", "", "queue drift"],
24+
});
25+
26+
render(
27+
<DesktopCopilotPanel
28+
intro="Only grounded control-plane truth belongs here."
29+
questionSet={["What is blocked?", "What should the operator do next?"]}
30+
loadBrief={loadBrief}
31+
/>,
32+
);
33+
34+
expect(screen.getByText("Only grounded control-plane truth belongs here.")).toBeInTheDocument();
35+
expect(screen.getByText("What is blocked?")).toBeInTheDocument();
36+
expect(screen.getByText("What should the operator do next?")).toBeInTheDocument();
37+
expect(screen.getByText("On demand")).toBeInTheDocument();
38+
39+
fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
40+
41+
expect(await screen.findByText("Grounded brief")).toBeInTheDocument();
42+
expect(await screen.findByText("The operator should compare the staged diff before accepting the run.")).toBeInTheDocument();
43+
expect(screen.getByText("The last proof pack is stale.")).toBeInTheDocument();
44+
expect(screen.getByText("Scope: run_detail")).toBeInTheDocument();
45+
expect(screen.getByText("Subject: run-123")).toBeInTheDocument();
46+
expect(screen.getByText("Truth surfaces: run_detail | proof_pack")).toBeInTheDocument();
47+
expect(screen.getByText("Limitations: review not started")).toBeInTheDocument();
48+
expect(screen.getByText("Compare the staged diff against the last approved run.")).toBeInTheDocument();
49+
expect(screen.getByText("Keep the queue paused until proof is current.")).toBeInTheDocument();
50+
expect(screen.getByText("Refresh proof")).toBeInTheDocument();
51+
expect(screen.getByText("queue drift")).toBeInTheDocument();
52+
expect(screen.getByRole("button", { name: "Regenerate brief" })).toBeInTheDocument();
53+
54+
expect(loadBrief).toHaveBeenCalledTimes(1);
55+
});
56+
57+
it("covers flight-plan fallback labels and empty action/risk lists", async () => {
58+
const loadBrief = vi.fn().mockResolvedValue({
59+
report_type: "flight_plan_copilot_brief",
60+
status: "UNAVAILABLE",
61+
summary: "The plan is still advisory because execution has not started yet.",
62+
risk_takeaway: "Approval is still blocked on a missing operator confirmation.",
63+
capability_takeaway: "Runtime capability is unresolved until the runner binds.",
64+
approval_takeaway: "An operator must confirm the start gate before execution.",
65+
used_truth_surfaces: ["execution_plan_preview"],
66+
recommended_actions: ["", " "],
67+
top_risks: [],
68+
limitations: undefined,
69+
});
70+
71+
render(<DesktopCopilotPanel title="Flight plan panel" intro={undefined} questionSet={["Why this plan?"]} loadBrief={loadBrief} />);
72+
73+
fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
74+
75+
expect(await screen.findByText("Unavailable")).toBeInTheDocument();
76+
expect(screen.getByText("Scope: flight_plan")).toBeInTheDocument();
77+
expect(screen.getByText("Subject: execution_plan_report")).toBeInTheDocument();
78+
expect(screen.getByText("Truth surfaces: execution_plan_preview")).toBeInTheDocument();
79+
expect(screen.getByText("Limitations: -")).toBeInTheDocument();
80+
expect(screen.getAllByText("Approval is still blocked on a missing operator confirmation.").length).toBeGreaterThan(0);
81+
expect(screen.getByText("This brief stays advisory until a run actually starts.")).toBeInTheDocument();
82+
expect(screen.getByText("No recommended actions were returned.")).toBeInTheDocument();
83+
expect(screen.getByText("No explicit risks were returned.")).toBeInTheDocument();
84+
});
85+
86+
it("surfaces load failures without leaving the panel in generating state", async () => {
87+
const loadBrief = vi.fn().mockRejectedValue("brief backend unavailable");
88+
89+
render(<DesktopCopilotPanel questionSet={["Why did this fail?"]} loadBrief={loadBrief} />);
90+
91+
fireEvent.click(screen.getByRole("button", { name: "Generate operator brief" }));
92+
93+
expect(await screen.findByText("brief backend unavailable")).toBeInTheDocument();
94+
await waitFor(() => {
95+
expect(screen.getByRole("button", { name: "Generate operator brief" })).toBeEnabled();
96+
});
97+
});
98+
});

apps/desktop/src/hooks/useDesktopData.test.tsx

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ describe("useDesktopData", () => {
104104
const user = userEvent.setup();
105105
render(<HookHarness activePage="overview" />);
106106
await waitFor(() => {
107-
expect(screen.getByTestId("live-error")).toHaveTextContent("总览数据拉取失败");
107+
expect(screen.getByTestId("live-error")).toHaveTextContent(
108+
"Failed to refresh overview data: the service is temporarily unavailable. Try again in a moment.",
109+
);
108110
});
109111

110112
overviewFail = false;
@@ -131,7 +133,9 @@ describe("useDesktopData", () => {
131133
);
132134
render(<HookHarness activePage="sessions" />);
133135
await waitFor(() => {
134-
expect(screen.getByTestId("live-error")).toHaveTextContent("会话列表拉取失败");
136+
expect(screen.getByTestId("live-error")).toHaveTextContent(
137+
"Failed to refresh the session list: the service is temporarily unavailable. Try again in a moment.",
138+
);
135139
});
136140
});
137141

@@ -154,7 +158,9 @@ describe("useDesktopData", () => {
154158
);
155159
render(<HookHarness activePage="sessions" />);
156160
await waitFor(() => {
157-
expect(screen.getByTestId("live-error")).toHaveTextContent("后端暂不可达,已进入退避重试");
161+
expect(screen.getByTestId("live-error")).toHaveTextContent(
162+
"The backend is currently unreachable. Backoff retry is active and local actions can continue.",
163+
);
158164
});
159165
});
160166

@@ -233,7 +239,9 @@ describe("useDesktopData", () => {
233239
try {
234240
render(<HookHarness activePage="sessions" />);
235241
await waitFor(() => {
236-
expect(screen.getByTestId("live-error")).toHaveTextContent("当前网络离线,已暂停实时拉取。恢复联网后将自动重试。");
242+
expect(screen.getByTestId("live-error")).toHaveTextContent(
243+
"The network is offline. Live polling is paused and will retry automatically when connectivity returns.",
244+
);
237245
});
238246
} finally {
239247
Object.defineProperty(window.navigator, "onLine", { configurable: true, value: originalOnLine });
@@ -260,7 +268,9 @@ describe("useDesktopData", () => {
260268

261269
render(<HookHarness activePage="sessions" />);
262270
await waitFor(() => {
263-
expect(screen.getByTestId("live-error")).toHaveTextContent("会话列表拉取失败:权限或认证异常,请确认登录状态。");
271+
expect(screen.getByTestId("live-error")).toHaveTextContent(
272+
"Failed to refresh the session list: authentication or permission check failed. Confirm your sign-in state.",
273+
);
264274
});
265275
});
266276

@@ -380,7 +390,7 @@ describe("useDesktopData", () => {
380390
try {
381391
render(<HookHarness activePage="gates" />);
382392
await waitFor(() => {
383-
expect(screen.getByTestId("live-error")).toHaveTextContent("策略告警拉取失败");
393+
expect(screen.getByTestId("live-error")).toHaveTextContent("Failed to refresh policy alerts");
384394
});
385395
expect(consoleSpy).toHaveBeenCalled();
386396
} finally {

apps/desktop/src/lib/desktopUi.test.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ describe("desktopUi seed timeline", () => {
4949
),
5050
);
5151

52-
fireEvent.click(screen.getByRole("button", { name: "查看完整 Diff" }));
52+
fireEvent.click(screen.getByRole("button", { name: "View full diff" }));
5353
expect(onViewDiff).toHaveBeenCalledWith("report-1");
5454
});
5555

@@ -108,8 +108,8 @@ describe("desktopUi seed timeline", () => {
108108

109109
render(createElement("div", null, renderChatEmbed(message as any, embed as any, chooseDecision)));
110110

111-
expect(screen.getByText("推荐")).toBeInTheDocument();
112-
fireEvent.click(screen.getByRole("button", { name: "选择" }));
111+
expect(screen.getByText("Recommended")).toBeInTheDocument();
112+
fireEvent.click(screen.getByRole("button", { name: "Choose" }));
113113
expect(chooseDecision).toHaveBeenCalledWith("msg-decision", "decision-1", "fast");
114114
});
115115

@@ -157,10 +157,11 @@ describe("desktopUi seed timeline", () => {
157157
)
158158
);
159159

160-
expect(screen.getByText("任务:")).toBeInTheDocument();
161-
expect(screen.getAllByText("进行中")).toHaveLength(2);
162-
expect(screen.getByText("等待")).toBeInTheDocument();
163-
expect(screen.getByText("完成")).toBeInTheDocument();
164-
expect(screen.getByLabelText("警报卡片")).toHaveClass("is-critical");
160+
expect(screen.getByText("Task:")).toBeInTheDocument();
161+
expect(screen.getByText("进行中")).toBeInTheDocument();
162+
expect(screen.getAllByText("In progress")).toHaveLength(1);
163+
expect(screen.getByText("Waiting")).toBeInTheDocument();
164+
expect(screen.getByText("Done")).toBeInTheDocument();
165+
expect(screen.getByLabelText("Alert card")).toHaveClass("is-critical");
165166
});
166167
});

apps/desktop/src/lib/uiError.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@ describe("uiError", () => {
77
});
88

99
it("maps network-style messages", () => {
10-
expect(sanitizeUiError(new Error("Network timeout"), "加载失败")).toContain("未连接到本地服务");
11-
expect(sanitizeUiError(new Error("fetch failed"), "加载失败")).toContain("未连接到本地服务");
10+
expect(sanitizeUiError(new Error("Network timeout"), "Load failed")).toContain("unable to reach the local service");
11+
expect(sanitizeUiError(new Error("fetch failed"), "Load failed")).toContain("unable to reach the local service");
1212
});
1313

1414
it("maps auth-style messages", () => {
15-
expect(sanitizeUiError(new Error("401 unauthorized"), "加载失败")).toContain("权限或认证异常");
16-
expect(sanitizeUiError(new Error("token invalid"), "加载失败")).toContain("权限或认证异常");
15+
expect(sanitizeUiError(new Error("401 unauthorized"), "Load failed")).toContain("authentication or permission check failed");
16+
expect(sanitizeUiError(new Error("token invalid"), "Load failed")).toContain("authentication or permission check failed");
1717
});
1818

1919
it("keeps generic fallback for unknown errors", () => {
2020
expect(sanitizeUiError(new Error("boom"), "加载失败")).toBe("加载失败");
2121
});
2222

2323
it("maps backend 5xx-style messages", () => {
24-
expect(sanitizeUiError(new Error("API /path failed: 503"), "加载失败")).toContain("服务暂时不可用");
24+
expect(sanitizeUiError(new Error("API /path failed: 503"), "Load failed")).toContain("service is temporarily unavailable");
2525
});
2626

2727
it("extracts detail from unknown payload", () => {

0 commit comments

Comments
 (0)