Skip to content

Commit 456bc4a

Browse files
committed
feat(director): closed style enum + YAML template mapping + caption
- style field now closed enum: tech-digital/business-finance/news-insight/lifestyle/education - Add style_variant for free-form variant description - Move style→template mapping from Python hardcode to config/template-mapping.yaml - Support exact/aliases/prefixes matching in YAML config - Add caption field to placements (reader-visible, replaces internal purpose) - Validate style against ALLOWED_VISUAL_STYLES from config - Add validate_chat_article_markdown() (reserved for future use) - Remove SequenceMatcher similarity check from de-ai validator
1 parent a4ab7d3 commit 456bc4a

4 files changed

Lines changed: 114 additions & 32 deletions

File tree

config/template-mapping.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,29 @@
22
# Pipeline Formatter 阶段根据话题分类自动匹配模板
33

44
wechat:
5+
styles:
6+
exact:
7+
tech-digital: tech-digital.html
8+
business-finance: business-finance.html
9+
news-insight: news-insight.html
10+
lifestyle: lifestyle.html
11+
education: education.html
12+
aliases:
13+
tech-flat: tech-digital.html
14+
tech-minimal: tech-digital.html
15+
tech-satirical: tech-digital.html
16+
tech-editorial: tech-digital.html
17+
cyberpunk: tech-digital.html
18+
watercolor: lifestyle.html
19+
chinese-ink: lifestyle.html
20+
business: business-finance.html
21+
news: news-insight.html
22+
prefixes:
23+
tech: tech-digital.html
24+
business: business-finance.html
25+
news: news-insight.html
26+
edu: education.html
27+
life: lifestyle.html
528
mapping:
629
# 科技类
730
- keywords: ["科技", "AI", "人工智能", "数码", "编程", "互联网", "区块链", "Web3", "芯片", "手机", "电脑", "软件", "开源"]

prompts/art-director.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323

2424
### 1. 确定全局视觉风格
2525

26-
根据文章主题和平台,选择或定义:
27-
- `style`:风格标识(如 tech-flat / watercolor / cyberpunk / chinese-ink)
26+
根据文章主题和平台,输出:
27+
- `style`**封闭枚举,只能从以下值中选择一个**`tech-digital` / `business-finance` / `news-insight` / `lifestyle` / `education`
28+
- `style_variant`:该模板家族下的自由变体说明(例如 satirical / flat / editorial / clean / magazine)
2829
- `global_tone`:一段话描述整体视觉基调(配色、氛围、密度等)
2930

3031
### 2. 先设计封面,再规划正文配图
@@ -55,7 +56,8 @@
5556
| 字段 | 要求 |
5657
|------|------|
5758
| `description` | **详细的画面描述**,50-150字。包含主体、构图、背景、氛围、配色、要避免的元素。必须详细到足以让人类理解你想要什么画面。 |
58-
| `purpose` | 这张图在文章叙事中的作用(1-2句话) |
59+
| `purpose` | 这张图在文章叙事中的作用(内部字段,给系统/编辑看,不直接给读者看)(1-2句话) |
60+
| `caption` | **给读者看的短图注**,1句话,尽量自然、像编辑写的,不要解释“这张图在干什么”。禁止使用“具象化/可视化/强化/呼应/增强可读性/给读者一个锚点”等解释性 AI 话语。 |
5961
| `type` | illustration / infographic / photo / diagram |
6062
| `aspect_ratio` | 公众号用 16:9 或 4:3,小红书用 3:4 或 1:1 |
6163
| `size_hint` | full_width / half / thumbnail |
@@ -81,7 +83,8 @@
8183

8284
```json
8385
{
84-
"style": "tech-flat",
86+
"style": "tech-digital",
87+
"style_variant": "satirical",
8588
"global_tone": "现代科技感,配色以蓝绿为主,留白多,信息密度低,整体氛围安静专注",
8689
"cover": {
8790
"title": "500元上门安装,299元上门卸载",
@@ -98,6 +101,7 @@
98101
"type": "illustration",
99102
"description": "详细的画面描述(50-150字)...",
100103
"purpose": "在文章中的叙事作用...",
104+
"caption": "闲鱼上,装机和卸载已经同时开卖了。",
101105
"aspect_ratio": "16:9",
102106
"size_hint": "full_width"
103107
}

scripts/formatter.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,11 @@ def insert_images(content_html: str, placements: list, image_map: dict, platform
290290
else:
291291
img_src = f"/api/runs/{run_id}/images/{pid}.png"
292292

293-
# 图注(深色模板用浅色文字)
293+
# 图注:只渲染给读者看的 caption,不渲染内部 purpose
294294
caption_html = ""
295-
if purpose:
296-
caption_html = f'\n<p style="text-align:center;font-size:12px;color:{caption_color};margin:4px 0 16px;">{purpose}</p>'
295+
caption = str(placement.get("caption", "")).strip()
296+
if caption:
297+
caption_html = f'\n<p style="text-align:center;font-size:12px;color:{caption_color};margin:4px 0 16px;">{caption}</p>'
297298

298299
img_tag = f'\n<img src="{img_src}" alt="{desc}" {style}>{caption_html}\n'
299300
blocks.insert(min(para_idx, len(blocks)), img_tag)
@@ -304,11 +305,11 @@ def insert_images(content_html: str, placements: list, image_map: dict, platform
304305
# ── 模板匹配 ─────────────────────────────────────────────────
305306

306307
def match_template(platform: str, keywords: list[str], director_style: str = "") -> str:
307-
"""根据 Director style + 关键词匹配排版模板
308+
"""根据 Director style + 关键词匹配排版模板
308309
309310
优先级:
310-
1. Director style 直接映射(如 style="tech-flat" → tech-digital.html)
311-
2. keywords 子串匹配("AI Agent" 包含 "AI" → tech-digital.html)
311+
1. YAML 中配置的 style exact / aliases / prefixes
312+
2. keywords 子串匹配
312313
3. fallback 到 default
313314
"""
314315
mapping_path = CONFIG_DIR / "template-mapping.yaml"
@@ -319,23 +320,25 @@ def match_template(platform: str, keywords: list[str], director_style: str = "")
319320
platform_config = mapping.get(platform, {})
320321
rules = platform_config.get("mapping", [])
321322
default = platform_config.get("default", "base.html")
323+
styles_cfg = platform_config.get("styles", {}) if isinstance(platform_config, dict) else {}
324+
exact_map = styles_cfg.get("exact", {}) if isinstance(styles_cfg, dict) else {}
325+
alias_map = styles_cfg.get("aliases", {}) if isinstance(styles_cfg, dict) else {}
326+
prefix_map = styles_cfg.get("prefixes", {}) if isinstance(styles_cfg, dict) else {}
322327

323-
# ── 1. Director style 直接映射 ──
324-
STYLE_TO_TEMPLATE = {
325-
"tech-flat": "tech-digital.html",
326-
"tech-minimal": "tech-digital.html",
327-
"tech-digital": "tech-digital.html",
328-
"cyberpunk": "tech-digital.html",
329-
"watercolor": "lifestyle.html",
330-
"chinese-ink": "lifestyle.html",
331-
"business": "business-finance.html",
332-
"news": "news-insight.html",
333-
"education": "education.html",
334-
}
328+
# ── 1. Director style 直接映射(来自 YAML 配置)──
335329
if director_style:
336330
style_lower = director_style.lower().strip()
337-
if style_lower in STYLE_TO_TEMPLATE:
338-
tpl = STYLE_TO_TEMPLATE[style_lower]
331+
tpl = None
332+
if style_lower in exact_map:
333+
tpl = exact_map[style_lower]
334+
elif style_lower in alias_map:
335+
tpl = alias_map[style_lower]
336+
else:
337+
for prefix, candidate_tpl in prefix_map.items():
338+
if style_lower.startswith(str(prefix).lower().strip()):
339+
tpl = candidate_tpl
340+
break
341+
if tpl:
339342
tpl_path = TEMPLATES_DIR / platform / tpl
340343
if tpl_path.exists():
341344
return tpl

scripts/validators.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import json
44
import re
5+
from pathlib import Path
56
from dataclasses import dataclass, field
6-
from difflib import SequenceMatcher
77
from typing import Any, Callable
88

99
import yaml
@@ -153,6 +153,19 @@ def validate_research_yaml(text: str) -> ValidationResult:
153153
return ValidationResult(ok=True, parsed=parsed, normalized_text=normalized)
154154

155155

156+
def _allowed_visual_styles() -> set[str]:
157+
cfg_path = Path(__file__).resolve().parent.parent / "config" / "template-mapping.yaml"
158+
try:
159+
cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
160+
exact = ((cfg.get("wechat") or {}).get("styles") or {}).get("exact") or {}
161+
return {str(k).strip() for k in exact.keys() if str(k).strip()}
162+
except Exception:
163+
return {"tech-digital", "business-finance", "news-insight", "lifestyle", "education"}
164+
165+
166+
ALLOWED_VISUAL_STYLES = _allowed_visual_styles()
167+
168+
156169
def validate_visual_plan_json(text: str) -> ValidationResult:
157170
raw = _strip_code_fence(text)
158171
try:
@@ -165,12 +178,17 @@ def validate_visual_plan_json(text: str) -> ValidationResult:
165178
return ValidationResult(ok=False, message="visual_plan.json top-level must be an object", details=[f"got {type(parsed).__name__}"])
166179

167180
style = parsed.get("style")
181+
style_variant = parsed.get("style_variant")
168182
global_tone = parsed.get("global_tone")
169183
cover = parsed.get("cover")
170184
placements = parsed.get("placements")
171185

172186
if not isinstance(style, str) or not style.strip():
173187
details.append("style must be a non-empty string")
188+
elif style.strip() not in ALLOWED_VISUAL_STYLES:
189+
details.append(f"style must be one of: {', '.join(sorted(ALLOWED_VISUAL_STYLES))}")
190+
if not isinstance(style_variant, str) or not style_variant.strip():
191+
details.append("style_variant must be a non-empty string")
174192
if not isinstance(global_tone, str) or not global_tone.strip():
175193
details.append("global_tone must be a non-empty string")
176194
if not isinstance(cover, dict):
@@ -196,7 +214,7 @@ def validate_visual_plan_json(text: str) -> ValidationResult:
196214
details.append(f"duplicate placement id: {pid}")
197215
else:
198216
seen_ids.add(pid)
199-
for field in ("after_section", "type", "description", "purpose"):
217+
for field in ("after_section", "type", "description", "purpose", "caption"):
200218
value = placement.get(field)
201219
if not isinstance(value, str) or not value.strip():
202220
details.append(f"{prefix}.{field} must be a non-empty string")
@@ -320,6 +338,46 @@ def validate_writer_markdown(text: str, min_chars: int = 1200) -> ValidationResu
320338
return ValidationResult(ok=True, parsed=raw, normalized_text=raw + "\n")
321339

322340

341+
def validate_chat_article_markdown(text: str, min_chars: int = 800) -> ValidationResult:
342+
raw = _strip_code_fence(text).strip()
343+
details: list[str] = []
344+
bad_markers = [
345+
"微信文章无法直接提取",
346+
"基于",
347+
"我直接修改",
348+
"根据你的要求",
349+
"下面是修改后的",
350+
"以下是修改后的",
351+
"我会按这个风格",
352+
"已按",
353+
"我已经根据",
354+
"让我根据",
355+
"已重写",
356+
"主要调整",
357+
"风格特点是",
358+
]
359+
360+
if len(raw) < min_chars:
361+
details.append(f"content too short: {len(raw)} chars (< {min_chars})")
362+
363+
head = raw[:500]
364+
for marker in bad_markers:
365+
if marker in head:
366+
details.append(f"meta marker near opening: {marker}")
367+
368+
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", raw) if p.strip()]
369+
if len(paragraphs) < 4:
370+
details.append(f"too few paragraphs: {len(paragraphs)}")
371+
372+
heading_count = sum(1 for line in raw.splitlines() if line.strip().startswith("## "))
373+
if heading_count < 2:
374+
details.append(f"too few section headings: {heading_count}")
375+
376+
if details:
377+
return ValidationResult(ok=False, message="chat article failed quality checks", details=details)
378+
return ValidationResult(ok=True, parsed=raw, normalized_text=raw + "\n")
379+
380+
323381
def validate_de_ai_markdown(text: str, original_text: str) -> ValidationResult:
324382
raw = _strip_code_fence(text).strip()
325383
details: list[str] = []
@@ -349,12 +407,6 @@ def validate_de_ai_markdown(text: str, original_text: str) -> ValidationResult:
349407
if ratio < 0.6 or ratio > 1.4:
350408
details.append(f"length ratio out of range: {ratio:.2f}x")
351409

352-
sim = SequenceMatcher(None, original[:5000], raw[:5000]).ratio()
353-
if sim < 0.30:
354-
details.append(f"too different from source article: similarity={sim:.2f}")
355-
if sim > 0.985:
356-
details.append(f"too similar to source article: similarity={sim:.2f}")
357-
358410
if details:
359411
return ValidationResult(ok=False, message="article_edited.md failed quality checks", details=details)
360412
return ValidationResult(ok=True, parsed=raw, normalized_text=raw + "\n")

0 commit comments

Comments
 (0)