Skip to content

Commit 9ac6c0d

Browse files
committed
feat(nodes): session keys + image-gen resilience + formatter unification
Session: - Add _node_session_key() with generation support in nodes.py - All node execution uses generation-aware session keys Image generation: - Reuse existing uploaded/generated images (skip redundant API calls) - Reuse existing cover if already present - Support user_image_url for cover download - Add _clean_llm_prompt_text() for prompt sanitization - Image path fallback across extensions (.jpg/.png/.webp) Formatter: - Delegate to formatter.py shared implementation (match_template, insert_images) - Eliminate duplicate template matching logic between nodes.py and formatter.py Publisher: - Environment variables (WECHAT_APPID/SECRET) take priority over config
1 parent 456bc4a commit 9ac6c0d

3 files changed

Lines changed: 145 additions & 47 deletions

File tree

scripts/image_engines/api_pollinations.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import annotations
1717

1818
import os
19+
import re
1920
import time
2021
from pathlib import Path
2122
from urllib.parse import quote
@@ -143,6 +144,33 @@ def is_available(self) -> bool:
143144
return True
144145

145146

147+
def _clean_llm_prompt_text(text: str, max_len: int = 180) -> str:
148+
"""把 LLM 返回的 prompt 清洗成适合图片接口的纯文本。"""
149+
if not text:
150+
return ""
151+
152+
cleaned = text.strip().strip('"').strip("'")
153+
154+
# 去 code fence
155+
cleaned = re.sub(r"^```[a-zA-Z0-9_-]*\\s*", "", cleaned)
156+
cleaned = re.sub(r"\\s*```$", "", cleaned)
157+
158+
# 去常见说明性前缀 / 标题
159+
cleaned = re.sub(r"^\*\*\s*Condensed\s+Prompt[^\n::]*[::]\*\*\s*", "", cleaned, flags=re.I)
160+
cleaned = re.sub(r"^Condensed\s+Prompt[^\n::]*[::]\s*", "", cleaned, flags=re.I)
161+
cleaned = re.sub(r"^\*\*\s*English\s+Prompt[^\n::]*[::]\*\*\s*", "", cleaned, flags=re.I)
162+
cleaned = re.sub(r"^English\s+Prompt[^\n::]*[::]\s*", "", cleaned, flags=re.I)
163+
cleaned = re.sub(r"^Prompt\s*[::]\s*", "", cleaned, flags=re.I)
164+
165+
# 去 markdown 强调符 / 项目符号
166+
cleaned = cleaned.replace("**", " ").replace("__", " ")
167+
cleaned = re.sub(r"^[\-•*]+\s*", "", cleaned)
168+
169+
# 多行压一行
170+
cleaned = re.sub(r"\s+", " ", cleaned).strip()
171+
return cleaned[:max_len]
172+
173+
146174
def _shorten_prompt(prompt: str, max_len: int = 180) -> str:
147175
"""
148176
精简长 prompt。先尝试 LLM 摘要,失败则硬截断。
@@ -160,9 +188,9 @@ def _shorten_prompt(prompt: str, max_len: int = 180) -> str:
160188
max_tokens=150,
161189
gateway_session_key=build_contentpipe_session_key("helper", "image-prompt", "shorten"),
162190
)
163-
short = result.strip().strip('"').strip("'")
191+
short = _clean_llm_prompt_text(result, max_len=max_len)
164192
if short and len(short) > 10:
165-
return short[:max_len]
193+
return short
166194
except Exception:
167195
pass
168196
return prompt[:max_len]
@@ -191,7 +219,7 @@ def _translate_prompt(chinese_prompt: str) -> str:
191219
max_tokens=200,
192220
gateway_session_key=build_contentpipe_session_key("helper", "image-prompt", "translate"),
193221
)
194-
translated = result.strip().strip('"').strip("'")
222+
translated = _clean_llm_prompt_text(result, max_len=240)
195223
if translated and len(translated) > 10:
196224
return translated
197225
except Exception as e:

scripts/nodes.py

Lines changed: 101 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import re
2121
import yaml
2222

23-
from gateway_auth import build_contentpipe_session_key
23+
from gateway_auth import build_contentpipe_node_session_key
2424
from logutil import get_logger
2525
from state import ContentState
2626
from tools import call_llm, load_pipeline_config
@@ -38,6 +38,12 @@
3838
logger = get_logger(__name__)
3939

4040

41+
def _node_session_key(state: ContentState, node_id: str, lane: str = "main") -> str:
42+
session_gen = state.get("_session_gen", {}) if isinstance(state.get("_session_gen", {}), dict) else {}
43+
generation = int(session_gen.get(node_id, 0) or 0)
44+
return build_contentpipe_node_session_key(state["run_id"], node_id, lane, generation)
45+
46+
4147
def _strip_code_fence(text: str) -> str:
4248
"""去掉 LLM 返回的 ```yaml ... ``` 或 ```json ... ``` 包裹"""
4349
text = text.strip()
@@ -119,7 +125,7 @@ def _call_llm_with_session(
119125
recent = [{"role": m["role"], "content": m["content"]} for m in history[:-1]][-20:]
120126

121127
cfg = load_pipeline_config().get("pipeline", {})
122-
gateway_session_key = build_contentpipe_session_key(state["run_id"], node_id, "main")
128+
gateway_session_key = _node_session_key(state, node_id, "main")
123129
result = call_llm(
124130
prompt,
125131
context,
@@ -181,7 +187,7 @@ def _call_llm_to_file_with_session(
181187
max_tokens=max_tokens,
182188
chat_history=recent,
183189
system_prompt=prompt,
184-
gateway_session_key=build_contentpipe_session_key(state["run_id"], node_id, "main"),
190+
gateway_session_key=_node_session_key(state, node_id, "main"),
185191
gateway_agent_id=gateway_agent_id,
186192
)
187193
_append_node_session(state, node_id, "assistant", agent_reply, tag=f"{node_id}_exec", internal=True)
@@ -816,6 +822,30 @@ def image_gen_node(state: ContentState) -> ContentState:
816822
img_dir = OUTPUT_DIR / "runs" / run_id / "images"
817823
img_dir.mkdir(parents=True, exist_ok=True)
818824

825+
existing_generated = {}
826+
for img in state.get("generated_images", []) or []:
827+
pid = img.get("placement_id", "")
828+
fp = img.get("file_path", "")
829+
if pid and img.get("success") and fp and os.path.exists(fp):
830+
existing_generated[pid] = img
831+
832+
existing_cover = state.get("generated_cover", {}) if isinstance(state.get("generated_cover"), dict) else {}
833+
has_existing_cover = bool(existing_cover.get("success") and existing_cover.get("file_path") and os.path.exists(existing_cover.get("file_path")))
834+
835+
placement_ids = [str(p.get("id", f"img_{i+1:03d}")) for i, p in enumerate(placements)]
836+
all_placements_ready = bool(placement_ids) and all(pid in existing_generated for pid in placement_ids)
837+
if has_existing_cover and all_placements_ready:
838+
logger.info("image_gen: all assets already provided, skip generation")
839+
generated = [existing_generated[pid] for pid in placement_ids]
840+
state["generated_images"] = generated
841+
state["generated_cover"] = existing_cover
842+
state["selected_images"] = {g["placement_id"]: "A" for g in generated if g.get("success")}
843+
state["current_stage"] = "image_gen"
844+
_save_artifact(run_id, "generated_images.json", json.dumps(generated, ensure_ascii=False, indent=2))
845+
_save_artifact(run_id, "generated_cover.json", json.dumps(existing_cover, ensure_ascii=False, indent=2))
846+
_save_state(state)
847+
return state
848+
819849
engine = create_engine_from_config()
820850
logger.info("Image engine: %s", engine)
821851

@@ -825,10 +855,31 @@ def image_gen_node(state: ContentState) -> ContentState:
825855
"1:1": (1024, 1024), "9:16": (576, 1024), "2.35:1": (1410, 600),
826856
}
827857

828-
# 先生成封面(P0.5: 专门 cover,不再复用正文首图
858+
# 先处理封面(优先复用用户已替换/已存在的 cover)
829859
cover = visual_plan.get("cover", {}) if isinstance(visual_plan, dict) else {}
830-
generated_cover = {}
831-
if isinstance(cover, dict) and cover.get("description"):
860+
generated_cover = existing_cover if has_existing_cover else {}
861+
if has_existing_cover:
862+
logger.info("cover: reuse existing uploaded/generated cover")
863+
elif isinstance(cover, dict) and cover.get("user_image_url"):
864+
cover_path = img_dir / "cover.jpg"
865+
try:
866+
import httpx
867+
resp = httpx.get(cover.get("user_image_url"), timeout=30, follow_redirects=True)
868+
resp.raise_for_status()
869+
cover_path.write_bytes(resp.content)
870+
generated_cover = {
871+
"file_path": str(cover_path),
872+
"engine": "user_provided",
873+
"prompt_used": "",
874+
"generation_time_ms": 0,
875+
"success": True,
876+
"error": "",
877+
}
878+
logger.info("cover: user image saved")
879+
except Exception as e:
880+
generated_cover = {"success": False, "error": str(e)[:200], "file_path": "", "engine": "user_provided", "prompt_used": "", "generation_time_ms": 0}
881+
logger.warning("cover: download failed (%s), falling back to generation", e)
882+
if (not generated_cover or not generated_cover.get("success")) and isinstance(cover, dict) and cover.get("description"):
832883
cover_path = img_dir / "cover.jpg"
833884
cover_aspect = cover.get("aspect_ratio", "2.35:1")
834885
cover_width, cover_height = aspect_map.get(cover_aspect, (1410, 600))
@@ -868,6 +919,12 @@ def image_gen_node(state: ContentState) -> ContentState:
868919
width, height = aspect_map.get(aspect, (1024, 576))
869920
file_path = img_dir / f"{pid}.jpg"
870921

922+
# 优先复用已上传/已存在图片
923+
if pid in existing_generated:
924+
generated.append(existing_generated[pid])
925+
logger.info("%s: reuse existing uploaded/generated image", pid)
926+
continue
927+
871928
# 用户提供了图片 URL → 直接下载
872929
user_url = placement.get("user_image_url", "")
873930
if user_url:
@@ -930,16 +987,13 @@ def image_gen_node(state: ContentState) -> ContentState:
930987

931988
def formatter_node(state: ContentState) -> ContentState:
932989
"""
933-
排版:将文章 + 选中图片嵌入微信/小红书模板
990+
排版:将文章 + 选中图片嵌入微信/小红书模板
934991
935-
流程:
936-
1. Markdown → 微信兼容 HTML(段落、标题、引用、列表)
937-
2. 在正确位置插入选中的配图 <img> 标签
938-
3. 匹配模板,渲染完整 HTML
939-
4. 持久化到 formatted.html
992+
统一复用 scripts/formatter.py 中的共享实现,避免 nodes.py 与 formatter.py
993+
各自维护一套模板匹配 / 图片插入逻辑而产生漂移。
940994
"""
941995
import jinja2
942-
import re
996+
import formatter as cp_formatter
943997

944998
article_content = state.get("article_edited") or state.get("article", {}).get("content", "")
945999
article = state.get("article", {})
@@ -948,44 +1002,49 @@ def formatter_node(state: ContentState) -> ContentState:
9481002
visual_plan = state.get("visual_plan", {})
9491003
run_id = state["run_id"]
9501004
generated_cover = state.get("generated_cover", {})
1005+
generated = state.get("generated_images", [])
9511006

952-
# ── Step 1: Markdown → 微信兼容 HTML ──
953-
954-
content_html = _markdown_to_wechat_html(article_content or "", platform)
955-
956-
# ── Step 2: 插入选中的配图 ──
1007+
# ── Step 1: 先确定模板(要吃 director.style)──
1008+
director_style = visual_plan.get("style", "") if isinstance(visual_plan, dict) else ""
1009+
topic_keywords = state.get("topic", {}).get("keywords", []) or []
1010+
template_name = cp_formatter.match_template(platform, topic_keywords, director_style=director_style)
9571011

958-
placements = visual_plan.get("placements", [])
959-
generated = state.get("generated_images", [])
1012+
# ── Step 2: Markdown → HTML(模板感知)──
1013+
content_html = cp_formatter.markdown_to_wechat_html(article_content or "", platform, template_name=template_name)
9601014

961-
# 建立 placement_id → 图片路径的映射(每个 placement 只有一张图)
1015+
# ── Step 3: 插图(共享定位算法)──
1016+
placements = visual_plan.get("placements", []) if isinstance(visual_plan, dict) else []
9621017
image_map = {}
963-
for img in generated:
964-
pid = img.get("placement_id", "")
965-
if pid and img.get("success", True) and img.get("file_path"):
966-
image_map[pid] = img["file_path"]
9671018

968-
# 在 content_html 中根据段落位置插入图片
969-
if placements and image_map:
970-
content_html = _insert_images_into_html(
971-
content_html, placements, image_map, platform, run_id,
972-
)
1019+
# 先尝试 selected_images 匹配;单候选模式下 option=None 也允许 fallback
1020+
for pid, option in selected.items():
1021+
matched = None
1022+
for img in generated:
1023+
if img.get("placement_id") == pid and img.get("option") == option and img.get("success", True) and img.get("file_path"):
1024+
matched = img
1025+
break
1026+
if matched is None:
1027+
for img in generated:
1028+
if img.get("placement_id") == pid and img.get("success", True) and img.get("file_path"):
1029+
matched = img
1030+
break
1031+
if matched:
1032+
image_map[pid] = matched.get("file_path", "")
9731033

974-
# ── Step 3: 匹配模板 ──
1034+
if not image_map:
1035+
for img in generated:
1036+
if img.get("success", True) and img.get("placement_id") and img.get("file_path"):
1037+
image_map[img["placement_id"]] = img["file_path"]
9751038

976-
mapping_path = CONFIG_DIR / "template-mapping.yaml"
977-
mapping = yaml.safe_load(mapping_path.read_text(encoding="utf-8")) if mapping_path.exists() else {}
1039+
if placements and image_map:
1040+
content_html = cp_formatter.insert_images(content_html, placements, image_map, platform, run_id, template_name=template_name)
9781041

979-
template_name = _match_template(mapping, platform, state.get("topic", {}).get("keywords", []))
1042+
# ── Step 4: 渲染完整 HTML ──
9801043
template_path = PROJECT_ROOT / "templates" / platform / template_name
981-
9821044
if not template_path.exists():
9831045
template_path = PROJECT_ROOT / "templates" / platform / "base.html"
9841046

9851047
template_str = template_path.read_text(encoding="utf-8")
986-
987-
# ── Step 4: 渲染完整 HTML ──
988-
9891048
config = load_pipeline_config()
9901049
author = config.get("wechat", {}).get("author", "ContentPipe")
9911050

@@ -1001,7 +1060,7 @@ def formatter_node(state: ContentState) -> ContentState:
10011060
date=datetime.now().strftime("%Y-%m-%d"),
10021061
lead=article.get("subtitle", ""),
10031062
content=content_html,
1004-
category=", ".join(state.get("topic", {}).get("keywords", [])[:2]),
1063+
category=", ".join(topic_keywords[:2]),
10051064
cover_url=cover_url,
10061065
)
10071066

@@ -1010,7 +1069,7 @@ def formatter_node(state: ContentState) -> ContentState:
10101069
_save_artifact(run_id, "formatted.html", html)
10111070
_save_artifact(run_id, "content_body.html", content_html)
10121071
_save_state(state)
1013-
logger.info("Formatted: %s chars, %s images inserted", len(html), len(image_map))
1072+
logger.info("Formatted: %s chars, %s images inserted, template=%s", len(html), len(image_map), template_name)
10141073
return state
10151074

10161075

@@ -1356,8 +1415,8 @@ def _publish_wechat(state: dict, config: dict) -> dict:
13561415
from tools import wechat_get_token, wechat_upload_image, wechat_upload_permanent_image, wechat_create_draft
13571416

13581417
wechat_config = config.get("wechat", {})
1359-
app_id = wechat_config.get("app_id", "")
1360-
app_secret = wechat_config.get("app_secret", "")
1418+
app_id = os.getenv("WECHAT_APPID", "") or wechat_config.get("app_id", "")
1419+
app_secret = os.getenv("WECHAT_SECRET", "") or wechat_config.get("app_secret", "")
13611420

13621421
# 未配置微信凭证,仅本地保存
13631422
if not app_id or not app_secret:

scripts/web/run_manager.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,19 @@ def get_run_artifact(run_id: str, filename: str) -> str | None:
415415

416416

417417
def get_run_image_path(run_id: str, image_name: str) -> Path | None:
418-
path = OUTPUT_DIR / "runs" / run_id / "images" / image_name
419-
return path if path.exists() else None
418+
images_dir = OUTPUT_DIR / "runs" / run_id / "images"
419+
path = images_dir / image_name
420+
if path.exists():
421+
return path
422+
423+
# 兼容:前端常按 .jpg 请求,但用户上传可能是 .png/.webp 等
424+
req = Path(image_name)
425+
stem = req.stem
426+
for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]:
427+
candidate = images_dir / f"{stem}{ext}"
428+
if candidate.exists():
429+
return candidate
430+
return None
420431

421432

422433
def get_dashboard_stats() -> dict:

0 commit comments

Comments
 (0)