feat(notify): structured artifact summaries in review notifications

PPPPanda · PPPPanda · commit 27acbdd963f0 · 2026-03-13T15:35:36.000+08:00
Add _build_node_summary() — pure Python parsers for each node's
official artifact (no LLM calls):
- scout: title, angle, reference count, keywords (from topic.yaml)
- researcher: verification stats, source count (from research.yaml)
- writer: char count, section count (from article_edited.md)
- director: style, cover, placement count (from visual_plan.json)
- formatter: HTML size, image count (from formatted.html + images/)

notify_review_needed() now accepts state dict and embeds structured
summary in the [REVIEW] notification message.
diff --git a/scripts/web/notify.py b/scripts/web/notify.py
@@ -2,14 +2,16 @@
 ContentPipe — Discord 通知集成
 
 通过 OpenClaw Gateway 的 message API 向 Discord 频道推送 Pipeline 事件。
+审核通知内嵌结构化摘要 + 操作指引，Agent 自动识别进入桥接模式。
 """
 
 from __future__ import annotations
 
 import os
 import json
 import httpx
-from typing import Optional
+from pathlib import Path
+from typing import Any, Dict, List, Optional
 
 from gateway_auth import build_gateway_headers
 from logutil import get_logger
@@ -53,6 +55,209 @@ def _read_config_val(key: str, default: str = "") -> str:
 }
 
 
+# ── 产物摘要生成器（纯 Python 解析，不走 LLM）──────────────────────
+
+def _build_node_summary(run_id: str, node: str, state: Dict[str, Any]) -> str:
+    """从 state / 正式产物文件生成人类可读摘要。
+
+    每个节点的正式产物格式不同，直接解析文件或 state 字段。
+    返回多行字符串，用于嵌入通知消息。
+    """
+    builders = {
+        "scout": _summary_scout,
+        "researcher": _summary_researcher,
+        "writer": _summary_writer,
+        "de_ai_editor": _summary_writer,  # 复用 writer 摘要
+        "director": _summary_director,
+        "formatter": _summary_formatter,
+    }
+    builder = builders.get(node)
+    if not builder:
+        # fallback: 只显示标题
+        title = state.get("topic", {}).get("title", "")
+        return f"标题: {title}" if title else ""
+    try:
+        return builder(run_id, state)
+    except Exception as e:
+        logger.warning("Summary build failed for %s/%s: %s", run_id, node, e)
+        title = state.get("topic", {}).get("title", "")
+        return f"标题: {title}" if title else ""
+
+
+def _runs_dir() -> Path:
+    return Path(__file__).parent.parent.parent / "output" / "runs"
+
+
+def _summary_scout(run_id: str, state: Dict[str, Any]) -> str:
+    """Scout 摘要: 标题、角度、参考文章数、关键词"""
+    lines: List[str] = []
+    # 优先从产物文件读取
+    topic_path = _runs_dir() / run_id / "topic.yaml"
+    topic_data = state.get("topic", {})
+    if topic_path.exists():
+        try:
+            import yaml
+            topic_data = yaml.safe_load(topic_path.read_text(encoding="utf-8")) or {}
+            topic_data = topic_data.get("topic", topic_data)
+        except Exception:
+            pass
+
+    title = topic_data.get("title", "")
+    if title:
+        lines.append(f"📌 标题: {title}")
+
+    angle = topic_data.get("content_angle", "")
+    if angle:
+        lines.append(f"🎯 角度: {angle}")
+
+    # 参考文章数
+    refs = state.get("reference_articles") or []
+    if not refs:
+        # 从 topic.yaml 中读
+        raw = {}
+        if topic_path.exists():
+            try:
+                import yaml
+                raw = yaml.safe_load(topic_path.read_text(encoding="utf-8")) or {}
+            except Exception:
+                pass
+        refs = raw.get("reference_articles") or raw.get("reference_index", {}).get("all_links", [])
+    if refs:
+        lines.append(f"📎 参考文章: {len(refs)} 篇")
+
+    # 关键词
+    keywords = topic_data.get("required_keywords", []) or topic_data.get("keywords", [])
+    if not keywords:
+        ur = state.get("user_requirements", {}) if isinstance(state, dict) else {}
+        keywords = ur.get("required_keywords", [])
+    if keywords:
+        lines.append(f"🏷️ 关键词: {', '.join(keywords[:6])}")
+
+    return "\n".join(lines)
+
+
+def _summary_researcher(run_id: str, state: Dict[str, Any]) -> str:
+    """Researcher 摘要: 核查结果数、研究问题数、引用来源数"""
+    lines: List[str] = []
+    research_path = _runs_dir() / run_id / "research.yaml"
+    data: Dict[str, Any] = {}
+    if research_path.exists():
+        try:
+            import yaml
+            data = yaml.safe_load(research_path.read_text(encoding="utf-8")) or {}
+        except Exception:
+            pass
+
+    title = state.get("topic", {}).get("title", "")
+    if title:
+        lines.append(f"📌 标题: {title}")
+
+    verifications = data.get("verification_results", [])
+    if verifications:
+        verified = sum(1 for v in verifications if v.get("status") == "verified")
+        lines.append(f"✅ 核查结果: {verified}/{len(verifications)} 条已验证")
+
+    findings = data.get("research_findings", [])
+    if findings:
+        lines.append(f"🔬 研究发现: {len(findings)} 条")
+
+    # 统计独立来源数
+    sources_set: set = set()
+    for v in verifications:
+        for s in v.get("sources", []):
+            url = s.get("url", "")
+            if url:
+                sources_set.add(url)
+    for f in findings:
+        for s in f.get("sources", []):
+            url = s.get("url", "")
+            if url:
+                sources_set.add(url)
+    if sources_set:
+        lines.append(f"📚 引用来源: {len(sources_set)} 个")
+
+    return "\n".join(lines)
+
+
+def _summary_writer(run_id: str, state: Dict[str, Any]) -> str:
+    """Writer 摘要: 标题、字数、段落数"""
+    lines: List[str] = []
+    title = state.get("topic", {}).get("title", "")
+    if title:
+        lines.append(f"📌 标题: {title}")
+
+    # 读取正式正文
+    for fname in ("article_edited.md", "article_draft.md"):
+        article_path = _runs_dir() / run_id / fname
+        if article_path.exists():
+            try:
+                text = article_path.read_text(encoding="utf-8")
+                char_count = len(text.strip())
+                # 段落数 = 非空行中以 ## 开头的数量
+                sections = [l for l in text.splitlines() if l.strip().startswith("## ")]
+                lines.append(f"📝 字数: {char_count} 字")
+                if sections:
+                    lines.append(f"📑 章节: {len(sections)} 节")
+                break
+            except Exception:
+                pass
+
+    return "\n".join(lines)
+
+
+def _summary_director(run_id: str, state: Dict[str, Any]) -> str:
+    """Director 摘要: 配图数量、封面、风格"""
+    lines: List[str] = []
+    vp_path = _runs_dir() / run_id / "visual_plan.json"
+    data: Dict[str, Any] = {}
+    if vp_path.exists():
+        try:
+            data = json.loads(vp_path.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+
+    title = state.get("topic", {}).get("title", "")
+    if title:
+        lines.append(f"📌 标题: {title}")
+
+    style = data.get("style", "")
+    if style:
+        lines.append(f"🎨 风格: {style}")
+
+    cover = data.get("cover", {})
+    if cover.get("title"):
+        lines.append(f"🖼️ 封面: {cover['title']}")
+
+    placements = data.get("placements", [])
+    if placements:
+        lines.append(f"📸 配图: {len(placements)} 张")
+
+    return "\n".join(lines)
+
+
+def _summary_formatter(run_id: str, state: Dict[str, Any]) -> str:
+    """Formatter 摘要: HTML 大小、图片数"""
+    lines: List[str] = []
+    title = state.get("topic", {}).get("title", "")
+    if title:
+        lines.append(f"📌 标题: {title}")
+
+    html_path = _runs_dir() / run_id / "formatted.html"
+    if html_path.exists():
+        size_kb = html_path.stat().st_size / 1024
+        lines.append(f"📐 HTML: {size_kb:.1f} KB")
+
+    images_dir = _runs_dir() / run_id / "images"
+    if images_dir.exists():
+        img_count = len([f for f in images_dir.iterdir() if f.suffix in (".png", ".jpg", ".jpeg", ".webp")])
+        if img_count:
+            lines.append(f"🖼️ 图片: {img_count} 张")
+
+    return "\n".join(lines)
+
+
+# ── Discord 通知 ────────────────────────────────────────────────────
+
 async def notify_discord(
     message: str,
     *,
@@ -107,15 +312,36 @@ async def notify_node_complete(run_id: str, node: str, title: str = "", summary:
     await notify_discord(msg, run_id=run_id, node=node)
 
 
-async def notify_review_needed(run_id: str, node: str, output_summary: str = ""):
-    """需要人工审核通知（内嵌审核指引，agent 自动识别进入桥接模式）"""
+async def notify_review_needed(
+    run_id: str,
+    node: str,
+    output_summary: str = "",
+    state: Optional[Dict[str, Any]] = None,
+):
+    """需要人工审核通知（内嵌结构化摘要 + 审核指引）。
+
+    Args:
+        run_id: Run ID
+        node: 当前节点 ID
+        output_summary: 旧的纯文本摘要（兼容 fallback）
+        state: Pipeline state dict（有则生成结构化摘要）
+    """
     emoji = NODE_EMOJI.get(node, "📌")
     lines = [
         f"⏸️ **{emoji} {node} 等待审核**  `[REVIEW]`",
         f"`run_id: {run_id}` · `node: {node}`",
     ]
-    if output_summary:
+
+    # 结构化摘要（纯 Python 解析产物文件）
+    if state:
+        summary = _build_node_summary(run_id, node, state)
+        if summary:
+            lines.append("")
+            for sl in summary.splitlines():
+                lines.append(sl)
+    elif output_summary:
         lines.append(f"> {output_summary[:300]}")
+
     lines.append("")
     lines.append(f"💬 直接回复审核意见 → `contentpipe_chat({run_id})`")
     lines.append(f"✅ 说「通过/OK」→ `contentpipe_approve({run_id})`")
diff --git a/scripts/web/routes/api.py b/scripts/web/routes/api.py
@@ -1796,11 +1796,10 @@ async def _execute_pipeline(run_id: str):
                     state["_node_done"] = True
                     _save_state(state)
                     emit_review_needed(run_id, node_id, node_id)
-                    # Discord 通知
+                    # Discord 通知（含结构化产物摘要）
                     try:
                         from web.notify import notify_review_needed as _discord_notify
-                        title = state.get("topic", {}).get("title", "")
-                        await _discord_notify(run_id, node_id, title[:200])
+                        await _discord_notify(run_id, node_id, state=state)
                     except Exception:
                         pass
                     return  # 暂停，等用户 approve