Skip to content

Commit 4fc59bb

Browse files
committed
fix: file locking for state.yaml concurrent access
Use fcntl.LOCK_EX for writes and LOCK_SH for reads to prevent pipeline background tasks and API requests from corrupting state. Fixes the 'researcher failed' status flicker seen during E2E test.
1 parent 4d549ae commit 4fc59bb

2 files changed

Lines changed: 20 additions & 5 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ settings.local.yaml
3434
dist/
3535
build/
3636
config/.setup_done
37+
.state.lock

scripts/web/run_manager.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -495,20 +495,34 @@ def _deep_merge(base: dict, override: dict) -> None:
495495
# ── 内部函数 ──────────────────────────────────────────────────
496496

497497
def _load_raw_state(run_id: str) -> dict | None:
498+
import fcntl
498499
state_file = OUTPUT_DIR / "runs" / run_id / "state.yaml"
499500
if not state_file.exists():
500501
return None
501-
return yaml.safe_load(state_file.read_text(encoding="utf-8"))
502+
lock_file = OUTPUT_DIR / "runs" / run_id / ".state.lock"
503+
with open(lock_file, "w") as lf:
504+
fcntl.flock(lf, fcntl.LOCK_SH) # 共享锁,允许并发读
505+
try:
506+
return yaml.safe_load(state_file.read_text(encoding="utf-8"))
507+
finally:
508+
fcntl.flock(lf, fcntl.LOCK_UN)
502509

503510

504511
def _save_state(state: dict) -> None:
512+
import fcntl
505513
run_id = state.get("run_id", "unknown")
506514
run_dir = OUTPUT_DIR / "runs" / run_id
507515
run_dir.mkdir(parents=True, exist_ok=True)
508-
(run_dir / "state.yaml").write_text(
509-
yaml.dump(state, allow_unicode=True, default_flow_style=False),
510-
encoding="utf-8",
511-
)
516+
state_file = run_dir / "state.yaml"
517+
content = yaml.dump(state, allow_unicode=True, default_flow_style=False)
518+
# 文件锁防止并发写入(pipeline 后台任务 vs API 请求)
519+
lock_file = run_dir / ".state.lock"
520+
with open(lock_file, "w") as lf:
521+
fcntl.flock(lf, fcntl.LOCK_EX)
522+
try:
523+
state_file.write_text(content, encoding="utf-8")
524+
finally:
525+
fcntl.flock(lf, fcntl.LOCK_UN)
512526

513527

514528
def _enrich_run(state: dict) -> dict:

0 commit comments

Comments
 (0)