From 88190edb12355c80cfcda56667fb5d791f9d0da2 Mon Sep 17 00:00:00 2001 From: cyning Date: Fri, 29 May 2026 18:38:09 +0800 Subject: [PATCH] =?UTF-8?q?feat(harness):=20=E7=90=86=E8=AE=BA=E5=AF=B9?= =?UTF-8?q?=E9=BD=90=20P1=20=E2=80=94=20Fresh=20Context=20=C2=B7=20?= =?UTF-8?q?=E5=8D=8A=E8=87=AA=E5=8A=A8=E8=A1=A8=20=C2=B7=20=E9=94=99?= =?UTF-8?q?=E8=AF=AF=20shape=20Linter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增补 22/40/50 与 invoke 模板 Fresh Context 纪律;README semi_auto 决策表; 首条领域 Linter(候选 C)校验 rate_limit/circuit_breaker 结构化错误必填键,pytest 覆盖。 Co-authored-by: Cursor --- docs/harness/README.md | 10 ++ .../linters/structured_error_registry_v1.json | 19 ++++ docs/harness/prompts/hats/22-task-audit.md | 1 + docs/harness/prompts/hats/40-self-check.md | 1 + .../prompts/hats/50-independent-reinspect.md | 1 + .../templates/TEMPLATE-execute-invoke.md | 1 + .../TEMPLATE-independent-reinspect-invoke.md | 1 + .../templates/TEMPLATE-self-check-invoke.md | 3 +- .../templates/TEMPLATE-task-audit-invoke.md | 1 + docs/tasks/README.md | 15 +++ .../active/task_harness_theory_align_p1_v1.md | 37 +++++-- ...st_harness_structured_error_shape_check.py | 19 ++++ tools/harness_structured_error_shape_check.py | 104 ++++++++++++++++++ 13 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 docs/harness/linters/structured_error_registry_v1.json create mode 100644 tests/test_harness_structured_error_shape_check.py create mode 100644 tools/harness_structured_error_shape_check.py diff --git a/docs/harness/README.md b/docs/harness/README.md index 9354b3fa..57b46f62 100644 --- a/docs/harness/README.md +++ b/docs/harness/README.md @@ -86,6 +86,16 @@ docs/harness/ 4. **`human_gate`** → `approved`(人改) 5. CI 绿(**Required**:`pytest` + tech-graph;`verify-fast` 见排期表 §6.5,**非**默认 Required) +### 3.1 领域结构 Linter(P1 · Sensors) + +| 项 | 命令 | +|----|------| +| **结构化错误响应 shape**(候选 C) | `python tools/harness_structured_error_shape_check.py` | +| 注册表 | [`linters/structured_error_registry_v1.json`](linters/structured_error_registry_v1.json) | +| CI | 经 `pytest` → `tests/test_harness_structured_error_shape_check.py` | + +**失败时**:改 `api/`、测试或 registry;**禁止**在 Linter 内硬编码绕过(见 P1 SPEC §3.2)。 + --- ## 4. 上游同步(**仅维护者** · 非 Agent 路径) diff --git a/docs/harness/linters/structured_error_registry_v1.json b/docs/harness/linters/structured_error_registry_v1.json new file mode 100644 index 00000000..ab2e75aa --- /dev/null +++ b/docs/harness/linters/structured_error_registry_v1.json @@ -0,0 +1,19 @@ +{ + "schema": "harness_structured_error_registry_v1", + "freeze_id": "GOV-HARNESS-THEORY-ALIGN-P1@2026-05-29", + "description": "P1-3 首条领域 Linter:ChatBI 韧性路径结构化错误响应必填键(候选 C)", + "required_keys": ["ok", "error_code", "message"], + "cases": [ + { + "id": "rate_limit_429", + "module": "api.chatbi_rate_limit", + "factory": "rate_limit_response_body_sample" + }, + { + "id": "circuit_breaker_open", + "module": "api.chatbi_circuit_breaker", + "factory": "circuit_breaker_open_error_body_sample" + } + ], + "remediation": "失败时改 api/ 实现、tests/ 或本 registry;禁止在 Linter 内硬编码绕过。" +} diff --git a/docs/harness/prompts/hats/22-task-audit.md b/docs/harness/prompts/hats/22-task-audit.md index 4105196f..b99f2678 100644 --- a/docs/harness/prompts/hats/22-task-audit.md +++ b/docs/harness/prompts/hats/22-task-audit.md @@ -62,6 +62,7 @@ - **禁止**仅在对话里口头「过了」而不写 **`docs/harness/reviews/task_*_audit_*.md`**。 - 不在未落盘审查文档时,指示执行帽对 **尚有阻塞** 的 task 开工。 - 不代替 **独立复检帽** 做逐条代码证据复核(本帽停在 **task 与文档层**)。 +- **Fresh Context(P1)**:**禁止**要求 22 阅读 **30 执行 invoke 全文** 或粘贴思考链;复审输入限于 **task、reviews、40 `### 自检结论`、diff 摘要**(见 [`SPEC-Governance-Harness-Theory-Align-P1-v1.md`](../../../spec/governance/SPEC-Governance-Harness-Theory-Align-P1-v1.md) §4)。 ## 输入假设 diff --git a/docs/harness/prompts/hats/40-self-check.md b/docs/harness/prompts/hats/40-self-check.md index 06664811..3003a9d7 100644 --- a/docs/harness/prompts/hats/40-self-check.md +++ b/docs/harness/prompts/hats/40-self-check.md @@ -15,6 +15,7 @@ - 不凭记忆声称「测过」;无命令输出则 **不勾选** 验收项。 - 不把 **应属于独立复检** 的深度代码走查塞满本帽(本帽以 **命令与验收表** 为主)。 +- **Fresh Context(P1)**:交接 50 / 22 时 **禁止**粘贴 30 执行 invoke 全文或长思考链;仅交 **`diff` 要点、验收表、`### 自检结论`**(见 P1 SPEC §4)。 ## 输入假设 diff --git a/docs/harness/prompts/hats/50-independent-reinspect.md b/docs/harness/prompts/hats/50-independent-reinspect.md index 8d594031..ce227706 100644 --- a/docs/harness/prompts/hats/50-independent-reinspect.md +++ b/docs/harness/prompts/hats/50-independent-reinspect.md @@ -17,6 +17,7 @@ - 不替执行者改代码(除非任务明确要求复检提交 patch)。 - 不顺杆爬补需求;缺口退回 **需求/审查帽**。 +- **Fresh Context(P1)**:**须新对话**开帽;输入仅 **task、reviews、diff 摘要、40 自检三件**;**禁止**附带 30 invoke 全文(见 [`SPEC-Governance-Harness-Theory-Align-P1-v1.md`](../../../spec/governance/SPEC-Governance-Harness-Theory-Align-P1-v1.md) §4)。 ### 输入假设 diff --git a/docs/harness/prompts/templates/TEMPLATE-execute-invoke.md b/docs/harness/prompts/templates/TEMPLATE-execute-invoke.md index 0185fe50..4b3aadb7 100644 --- a/docs/harness/prompts/templates/TEMPLATE-execute-invoke.md +++ b/docs/harness/prompts/templates/TEMPLATE-execute-invoke.md @@ -69,6 +69,7 @@ 9. **半自动下一棒(可选)**:若 task `semi_auto: true` 且下一棒(如 40)无 `human_gate` 阻塞:先将 **下一棒 §3 全文** 落盘新 invoke 并 commit,再切换角色执行;规则见 HANDOFF_SEMI_AUTO.md §3。否则仅输出下一棒 Prompt 供人开新会话。 禁止:在未读完必读与 failure_paths 的情况下改路由/契约;删除与 task 无关的大段重构;口头宣称「已测过」而无命令输出。 +**Fresh Context(P1)**:40→50/22 交接时 **禁止**粘贴本帽 invoke 全文或长思考链;仅交 diff 要点、验收表、`### 自检结论`。 ``` --- diff --git a/docs/harness/prompts/templates/TEMPLATE-independent-reinspect-invoke.md b/docs/harness/prompts/templates/TEMPLATE-independent-reinspect-invoke.md index ded6eac6..2373a5b3 100644 --- a/docs/harness/prompts/templates/TEMPLATE-independent-reinspect-invoke.md +++ b/docs/harness/prompts/templates/TEMPLATE-independent-reinspect-invoke.md @@ -62,6 +62,7 @@ 3. 对 task 每条验收项输出表格:验收项 | pass/fail | 证据(文件:行 / 测试名 / 日志片段)| 备注;fail 须写复现步骤或缺失证据。 4. 汇总阻塞合并项;给出是否建议合并(供维护者决策)。 5. 禁止:替执行者改代码(除非用户明确要求复检提交 patch);缺口退回需求/审查帽。 +6. **Fresh Context(P1)**:**新对话**开帽;**禁止**附带 30 invoke 全文;输入限于 task、reviews、40 自检、diff 摘要。 【当模式为「全局验收」或「两者」时 — 对应 hat §二】 6. 若 task 声明 freeze_id:核对 PR 变更是否在冻结基准内;契约升级是否在 SPEC/task 显式记录。 diff --git a/docs/harness/prompts/templates/TEMPLATE-self-check-invoke.md b/docs/harness/prompts/templates/TEMPLATE-self-check-invoke.md index 001e2c09..90644b16 100644 --- a/docs/harness/prompts/templates/TEMPLATE-self-check-invoke.md +++ b/docs/harness/prompts/templates/TEMPLATE-self-check-invoke.md @@ -55,9 +55,10 @@ 2. 输出 **验收表**(每项 pass/fail + 证据:命令名/测试名/日志摘录);fail 时写明是否可重试(环境/flaky)。 3. 将 **`### 自检结论(执行者)`** 写入 **{{TASK_PATH}}** 指向的 task 正文(若尚无该小节则新增;位置与团队习惯一致即可):含命令列表、退出码、验收摘要、已知未测项。 4. 禁止:凭记忆声称「测过」;把独立复检的深度走查塞进本帽(本帽以命令与验收表为主)。 +5. **Fresh Context(P1)**:交接 50/22 时 **禁止**粘贴 30 invoke 全文;仅交 diff 要点、验收表、`### 自检结论`。 对话回复:生成可以完整复制的 Prompt,用于直接交给下一棒执行;须兼顾打回、二次审查等情形,下一棒也可能是上一棒(由其修复问题)。 -5. **自动 commit**:在输出下一棒 Prompt 且 task 内「### 自检结论(执行者)」已写入后,按 docs/harness/prompts/handoff/HANDOFF_AUTO_COMMIT.md 分仓 commit(仅本轮路径;对话报 short-hash)。用户写明「不要 commit」则跳过。 +6. **自动 commit**:在输出下一棒 Prompt 且 task 内「### 自检结论(执行者)」已写入后,按 docs/harness/prompts/handoff/HANDOFF_AUTO_COMMIT.md 分仓 commit(仅本轮路径;对话报 short-hash)。用户写明「不要 commit」则跳过。 ``` --- diff --git a/docs/harness/prompts/templates/TEMPLATE-task-audit-invoke.md b/docs/harness/prompts/templates/TEMPLATE-task-audit-invoke.md index f0e71807..6591f3b7 100644 --- a/docs/harness/prompts/templates/TEMPLATE-task-audit-invoke.md +++ b/docs/harness/prompts/templates/TEMPLATE-task-audit-invoke.md @@ -59,6 +59,7 @@ 3. 落盘一篇审查文档至 **上表路径**(与 `reviews/README.md`、`hats/22-task-audit.md` 子仓规则一致)。 4. 文内结构:元信息 → 审查结论摘要 → 阻塞 / 非阻塞 → 需任务帽回填清单(若有)→ 是否建议执行帽开工 → 「签收 / 关闭」→ 收尾二选一:**有下一棒** → **「下一棒可复制 Prompt」**(`text` 围栏,§3 全文);**终轮无下一棒** → **「执行路线与 Commit 回溯」**(见 docs/harness/prompts/handoff/HANDOFF_CLOSE_TRACE.md,含阶段表 + 分仓 commit 列表)。 5. 禁止仅在对话里说「过了」而不写 reviews;禁止在仍有阻塞时指示执行帽开工。 +6. **Fresh Context(P1)**:**新对话**开帽;**禁止**要求阅读 30 invoke 全文;输入限于 task、reviews、40 自检、diff 摘要。 6. 不要写业务实现代码;不要擅自改写 task 正文。 7. **对话与归档**:与步骤 4 审查 md 末节 **逐字或语义一致**——有下一棒则对话输出完整 Prompt;无下一棒则输出完整回溯表,**禁止**用空 Prompt 占位。 8. **自动 commit**:完成步骤 3–7 后,按 docs/harness/prompts/handoff/HANDOFF_AUTO_COMMIT.md 在相关 git 根分别 commit(仅本轮路径;对话末尾一行报 short-hash)。用户本轮写明「不要 commit」则跳过。 diff --git a/docs/tasks/README.md b/docs/tasks/README.md index 58712757..da9dd09e 100644 --- a/docs/tasks/README.md +++ b/docs/tasks/README.md @@ -133,6 +133,21 @@ docs/tasks/ **50 硬规则**(`required` + `api/`/契约):关账前须有 `reinspect_results/` 落盘,见 RECENT **§0.0**。 +**22 抽检(P1 运维)**:每季度抽查 1~2 份 active task 的 `test_strategy` 是否与上表及变更类型一致(见 P1 SPEC §4)。 + +--- + +## `semi_auto` 决策表(理论对齐 P1) + +> 真值:[`SPEC-Governance-Harness-Theory-Align-P1-v1.md`](../spec/governance/SPEC-Governance-Harness-Theory-Align-P1-v1.md) **§5**;通则 [`HANDOFF_SEMI_AUTO.md`](../harness/prompts/handoff/HANDOFF_SEMI_AUTO.md)。 + +| `semi_auto` | 适用场景 | 链式帽序 | 仍须人做 | +|-------------|----------|----------|----------| +| `true` | 小改动、纯文档、单文件治理;无 `pending` 人工闸 | `10→30→40→22`(无闸时同会话) | 终轮 **22 签收**、合并 PR、`human_gate: approved` | +| `false` | 契约 / 跨仓 / 架构 / `test_strategy: required` 含 `api/` | 每帽可新会话;默认经 **22 R1** | 各 `HG-*` 按表;**50** 关账 | + +task 头 **须**显式 `semi_auto` + `audit_profile`;22 R1 核对合理性。 + --- ## Harness V2 · 任务单扩展字段 diff --git a/docs/tasks/active/task_harness_theory_align_p1_v1.md b/docs/tasks/active/task_harness_theory_align_p1_v1.md index 9aa5d858..1a3f251e 100644 --- a/docs/tasks/active/task_harness_theory_align_p1_v1.md +++ b/docs/tasks/active/task_harness_theory_align_p1_v1.md @@ -1,6 +1,6 @@ # Task:Harness 理论对齐 · P1 收口 -> **状态**:`pending`(P0 已关账 · PR #90 · 2026-05-29) +> **状态**:`in_progress` > **关联 SPEC**:[`docs/spec/governance/SPEC-Governance-Harness-Theory-Align-P1-v1.md`](../spec/governance/SPEC-Governance-Harness-Theory-Align-P1-v1.md) > **排期**:[`RECENT_TASK_SCHEDULE.md`](../RECENT_TASK_SCHEDULE.md) **§0.5** @@ -15,28 +15,39 @@ | **semi_auto** | `false` | | **audit_profile** | `full` | | **git_branch** | `task/harness-theory-align-p1` | +| **linter_target** | **候选 C** — 结构化错误响应 `ok` / `error_code` / `message` | ### 人工闸 `human_gate` | human_gate_id | status | blocks_hats | 说明 | |---------------|--------|-------------|------| | HG-AUDIT-R1 | approved | 30 | P1 含 Linter/CI | -| HG-AUDIT-CLOSE | approved | done | P1 关账 | +| HG-AUDIT-CLOSE | pending | done | P1 关账(PR 合并后) | --- ## 背景与目标 -P0 完成后落实 **Fresh Context**、半自动推广、**首条** 领域结构 CI(见 SPEC §3 候选)。 +P0 完成后落实 **Fresh Context**、半自动推广、**首条** 领域结构 CI(见 SPEC §3 候选 C)。 + +--- + +## 失败路径 + +| # | 触发条件 | 系统行为 | 可重试 | 用户可见 | +|---|----------|----------|--------|----------| +| F1 | Linter 缺必填键 | CI / pytest **失败**;禁止合并 | — | CI 红 | +| F2 | 22/50 附带 30 invoke 长文 | 违反 Fresh Context;须裁剪输入 | — | — | --- ## 范围 -- [ ] P1-1:22/50/40 与 invoke 模板 Fresh Context 条款 -- [ ] P1-2:`docs/tasks/README.md` 半自动决策表 -- [ ] P1-3:首条 Linter + CI 绿 -- [ ] `RECENT_TASK_SCHEDULE.md` §0.5 P1 → done +- [x] P1-1:22/50/40 与 invoke 模板 Fresh Context 条款 +- [x] P1-2:`docs/tasks/README.md` 半自动决策表 +- [x] P1-3:首条 Linter + CI 绿(`harness_structured_error_shape_check`) +- [x] P1-4:README `test_strategy` 季度抽检说明 +- [ ] `RECENT_TASK_SCHEDULE.md` §0.5 P1 → done(PR 合并后) ## 非范围 @@ -51,6 +62,16 @@ P0 完成后落实 **Fresh Context**、半自动推广、**首条** 领域结构 --- +## 自检结论(执行者 · 40 帽回填) + +| 项 | 结果 | +|----|------| +| 命令 | `python tools/harness_structured_error_shape_check.py` · `pytest tests/test_harness_structured_error_shape_check.py` | +| 结论 | pass | +| 要点 | 结构化错误 registry 覆盖 rate_limit + circuit_breaker | + +--- + ## 给 Cursor -`harness-theory-align-p1`、blocked until P0 done、`RECENT_TASK_SCHEDULE` §0.5 +`harness-theory-align-p1`、`GOV-HARNESS-THEORY-ALIGN-P1`、`RECENT_TASK_SCHEDULE` §0.5、Fresh Context、`harness_structured_error_shape_check` diff --git a/tests/test_harness_structured_error_shape_check.py b/tests/test_harness_structured_error_shape_check.py new file mode 100644 index 00000000..22150730 --- /dev/null +++ b/tests/test_harness_structured_error_shape_check.py @@ -0,0 +1,19 @@ +"""Harness P1-3:结构化错误响应 shape 门禁测试。""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent + + +def test_harness_structured_error_shape_check_passes(): + proc = subprocess.run( + [sys.executable, "tools/harness_structured_error_shape_check.py", "--check"], + cwd=ROOT, + capture_output=True, + text=True, + ) + assert proc.returncode == 0, proc.stdout + proc.stderr diff --git a/tools/harness_structured_error_shape_check.py b/tools/harness_structured_error_shape_check.py new file mode 100644 index 00000000..4153278a --- /dev/null +++ b/tools/harness_structured_error_shape_check.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""Harness P1-3:结构化错误响应必填键检查(候选 C)。 + +与 ``tech_graph_contract_check`` 互补:校验已知错误工厂返回的 JSON shape, +非 SSE 事件白名单。 + +Usage:: + + python tools/harness_structured_error_shape_check.py + python tools/harness_structured_error_shape_check.py --check # CI 等价 +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Callable + +ROOT = Path(__file__).resolve().parent.parent +REGISTRY_PATH = ROOT / "docs" / "harness" / "linters" / "structured_error_registry_v1.json" + +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +def rate_limit_response_body_sample() -> dict[str, Any]: + from api.chatbi_rate_limit import rate_limit_response_body + + return rate_limit_response_body(3) + + +def circuit_breaker_open_error_body_sample() -> dict[str, Any]: + from api.chatbi_circuit_breaker import CircuitBreakerOpenError, CircuitState + + return CircuitBreakerOpenError( + breaker_name="llm_outbound", + state=CircuitState.OPEN, + ).to_error_body() + + +_FACTORIES: dict[str, Callable[[], dict[str, Any]]] = { + "rate_limit_response_body_sample": rate_limit_response_body_sample, + "circuit_breaker_open_error_body_sample": circuit_breaker_open_error_body_sample, +} + + +def _load_registry() -> dict[str, Any]: + raw = json.loads(REGISTRY_PATH.read_text(encoding="utf-8")) + if not isinstance(raw, dict): + raise TypeError("registry root must be object") + return raw + + +def run_check() -> list[str]: + registry = _load_registry() + required = list(registry.get("required_keys") or []) + if not required: + return ["registry: required_keys is empty"] + + errors: list[str] = [] + for case in registry.get("cases") or []: + case_id = case.get("id", "?") + factory_name = case.get("factory") + if not factory_name or factory_name not in _FACTORIES: + errors.append(f"{case_id}: unknown factory {factory_name!r}") + continue + body = _FACTORIES[factory_name]() + if not isinstance(body, dict): + errors.append(f"{case_id}: factory did not return dict") + continue + missing = [k for k in required if k not in body] + if missing: + errors.append(f"{case_id}: missing keys {missing} in {sorted(body.keys())}") + for key in ("error_code", "message"): + if key in body and body[key] in (None, ""): + errors.append(f"{case_id}: key {key!r} is empty") + if "ok" in body and body["ok"] is not False: + errors.append(f"{case_id}: ok must be False for error body") + return errors + + +def main() -> None: + parser = argparse.ArgumentParser(description="Harness structured error shape check") + parser.add_argument("--check", action="store_true", help="CI mode (non-zero on failure)") + args = parser.parse_args() + + errors = run_check() + if errors: + print("harness_structured_error_shape_check: FAIL") + for line in errors: + print(f" - {line}") + print(f"registry: {REGISTRY_PATH.relative_to(ROOT)}") + print("remediation: fix api/ + tests/ or registry; do not bypass linter.") + sys.exit(1) + + print("harness_structured_error_shape_check: OK") + if not args.check: + print(f"registry: {REGISTRY_PATH.relative_to(ROOT)}") + + +if __name__ == "__main__": + main()