Skip to content

Commit 991e6d8

Browse files
committed
fix: auto-repair truncated YAML in topic/research validators
LLM output sometimes gets cut off mid-string, causing YAML parse errors and infinite retry loops. New _try_repair_truncated_yaml() trims lines from the end and closes unclosed quotes until valid. Applied to validate_topic_yaml and validate_research_yaml.
1 parent 87d9322 commit 991e6d8

1 file changed

Lines changed: 45 additions & 2 deletions

File tree

scripts/validators.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,32 @@ def _yaml_error_details(exc: Exception) -> list[str]:
4848
return details
4949

5050

51+
def _try_repair_truncated_yaml(text: str) -> str | None:
52+
"""尝试修复 LLM 输出被截断的 YAML。
53+
54+
常见情况:字符串在引号内被截断,或列表项缺少结尾。
55+
策略:从末尾逐行删除直到 YAML 可以 parse。
56+
"""
57+
lines = text.rstrip().split("\n")
58+
if len(lines) < 5:
59+
return None
60+
61+
# 尝试从末尾删 1~20 行
62+
for trim in range(1, min(21, len(lines) - 3)):
63+
candidate = "\n".join(lines[:-trim]).rstrip()
64+
# 补全可能未闭合的引号
65+
open_quotes = candidate.count('"') % 2
66+
if open_quotes:
67+
candidate += '"'
68+
try:
69+
parsed = yaml.safe_load(candidate)
70+
if isinstance(parsed, dict) and len(parsed) >= 3:
71+
return candidate + "\n"
72+
except Exception:
73+
continue
74+
return None
75+
76+
5177
def _json_error_details(exc: json.JSONDecodeError, text: str) -> list[str]:
5278
details = [f"line {exc.lineno}, column {exc.colno}: {exc.msg}"]
5379
lines = text.splitlines()
@@ -76,7 +102,15 @@ def validate_topic_yaml(text: str) -> ValidationResult:
76102
try:
77103
parsed = yaml.safe_load(raw)
78104
except Exception as exc:
79-
return ValidationResult(ok=False, message="topic.yaml is not valid YAML", details=_yaml_error_details(exc))
105+
repaired = _try_repair_truncated_yaml(raw)
106+
if repaired:
107+
try:
108+
parsed = yaml.safe_load(repaired)
109+
raw = repaired
110+
except Exception:
111+
return ValidationResult(ok=False, message="topic.yaml is not valid YAML (repair failed)", details=_yaml_error_details(exc))
112+
else:
113+
return ValidationResult(ok=False, message="topic.yaml is not valid YAML", details=_yaml_error_details(exc))
80114

81115
details: list[str] = []
82116
if not isinstance(parsed, dict):
@@ -115,7 +149,16 @@ def validate_research_yaml(text: str) -> ValidationResult:
115149
try:
116150
parsed = yaml.safe_load(raw)
117151
except Exception as exc:
118-
return ValidationResult(ok=False, message="research.yaml is not valid YAML", details=_yaml_error_details(exc))
152+
# 尝试自动修复截断的 YAML
153+
repaired = _try_repair_truncated_yaml(raw)
154+
if repaired:
155+
try:
156+
parsed = yaml.safe_load(repaired)
157+
raw = repaired # 用修复后的版本继续校验
158+
except Exception:
159+
return ValidationResult(ok=False, message="research.yaml is not valid YAML (repair failed)", details=_yaml_error_details(exc))
160+
else:
161+
return ValidationResult(ok=False, message="research.yaml is not valid YAML", details=_yaml_error_details(exc))
119162

120163
details: list[str] = []
121164
if not isinstance(parsed, dict):

0 commit comments

Comments
 (0)