From 1d56da2e4bc42e52b07f48118d6ece0790d2d734 Mon Sep 17 00:00:00 2001 From: pbean Date: Thu, 25 Jun 2026 12:13:25 -0700 Subject: [PATCH 1/3] fix(bmad-auto): auto-recover resolved escalations without manual rollback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resolve workflow corrects the frozen spec under the BMAD artifact folder (_bmad-output/..., tracked). On resume the resolved re-drive ran attempt_dirty against the frozen baseline_commit, read the spec edit as a failed attempt, and with scm.rollback_on_failure OFF (default) paused for a manual rollback. Because the check diffs the working tree against the frozen baseline, even committing the spec re-paused on the next resume — an endless loop. A latent sibling bug: with the flag ON, safe_rollback's `git reset --hard` would silently revert the spec. A resolved re-drive is human-initiated, so it now always auto-recovers regardless of the flag. The BMAD artifact folders are treated as orchestrator-owned on that path: excluded from the dirty check (attempt_dirty gains `exclude=`) and preserved through the reset (safe_rollback gains `preserve=`, snapshotting tracked content via `git stash create` and restoring the artifact paths after the hard reset). The failed attempt's source changes still revert to baseline. Unattended/stopped attempts are unchanged: OFF pauses, ON does a clean reset. _safe_reset takes opt-in `preserve=` so sweep's ledger recovery (same folder) still reverts a broken ledger. The manual-recovery notice is now stopped-only. Bump to 0.7.1. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude-plugin/marketplace.json | 2 +- CHANGELOG.md | 18 +++ module.yaml | 2 +- pyproject.toml | 2 +- src/automator/__init__.py | 2 +- src/automator/data/settings/core.toml | 2 +- .../skills/bmad-auto-setup/assets/module.yaml | 2 +- src/automator/engine.py | 126 ++++++++++-------- src/automator/policy.py | 8 +- src/automator/verify.py | 54 +++++++- tests/test_engine.py | 93 +++++++++++-- tests/test_verify.py | 56 ++++++++ uv.lock | 2 +- 13 files changed, 289 insertions(+), 80 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 09c2cfe..dd335da 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "bauto", "source": "./src/automator/data/skills", "description": "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill", - "version": "0.7.0", + "version": "0.7.1", "author": { "name": "pinkyd" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 3708e60..3c21194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ All notable changes to `bmad-auto` are documented here. The format is based on [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0, breaking changes may land in a minor release. +## [0.7.1] — 2026-06-25 + +### Fixed + +- **Resolving a CRITICAL escalation no longer loops on a manual-rollback prompt when the resolve + edited the spec.** 0.7.0 fixed the loop only for an already-clean tree, but the resolve workflow's + whole job is to correct the frozen spec under the BMAD artifact folder (`_bmad-output/...`, which is + tracked). So on resume the orchestrator saw a dirty tree and — with the default + `scm.rollback_on_failure = false` — paused for a manual reset; because the dirty check diffs against + the frozen `baseline_commit`, even committing the spec re-paused on the next resume, an endless loop. + A resolved re-drive is human-initiated, so it now always auto-recovers regardless of the flag: the + BMAD artifact folders are treated as orchestrator-owned — excluded from the dirty check and preserved + through the reset — so the spec correction survives while the failed attempt's source changes revert + to baseline. This also fixes a latent sibling bug: with `rollback_on_failure = true` the reset + previously reverted the just-corrected spec silently. `scm.rollback_on_failure` still defaults OFF and + now governs only unattended/stopped attempts; the manual-recovery notice (reached by stopped attempts + only now) drops its resolved-cause wording. + ## [0.7.0] — 2026-06-24 ### Changed diff --git a/module.yaml b/module.yaml index 2e37a2a..e2fa14d 100644 --- a/module.yaml +++ b/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.0 +module_version: 0.7.1 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/pyproject.toml b/pyproject.toml index 7e62505..ff926fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bmad-auto" -version = "0.7.0" +version = "0.7.1" description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase" readme = "README.md" license = "MIT" diff --git a/src/automator/__init__.py b/src/automator/__init__.py index 3b1c657..0db0738 100644 --- a/src/automator/__init__.py +++ b/src/automator/__init__.py @@ -6,4 +6,4 @@ spec files, and the per-run directory under .automator/runs/. """ -__version__ = "0.7.0" +__version__ = "0.7.1" diff --git a/src/automator/data/settings/core.toml b/src/automator/data/settings/core.toml index 328a7e0..80f6e7b 100644 --- a/src/automator/data/settings/core.toml +++ b/src/automator/data/settings/core.toml @@ -232,7 +232,7 @@ key = "rollback_on_failure" kind = "switch" default_ref = "ScmPolicy.rollback_on_failure" label = "auto-rollback failed attempts" -description = "⚠ in-place mode (isolation=none): when ON, a failed attempt's tracked changes are auto-reverted and the untracked files this run created are deleted (its uncommitted work is lost). When OFF (default), the orchestrator never touches your tree — it pauses with manual recovery steps. Prefer isolation=worktree to keep failures off your main checkout." +description = "⚠ in-place mode (isolation=none): when ON, a failed attempt's tracked changes are auto-reverted and the untracked files this run created are deleted (its uncommitted work is lost). When OFF (default), the orchestrator never touches your tree — it pauses with manual recovery steps. Governs unattended/stopped attempts only: a resolved escalation's re-drive always auto-recovers regardless (reverts the failed source, keeps the corrected spec). Prefer isolation=worktree to keep failures off your main checkout." [[section.field]] key = "seed_adapter_defaults" kind = "switch" diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml index 2e37a2a..e2fa14d 100644 --- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml +++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.0 +module_version: 0.7.1 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/src/automator/engine.py b/src/automator/engine.py index e3b8688..410e4ba 100644 --- a/src/automator/engine.py +++ b/src/automator/engine.py @@ -637,82 +637,94 @@ def _pick_next(self): continue return story + def _protected_relpaths(self) -> tuple[str, ...]: + """Repo-relative posix paths of the BMAD artifact folders. These are + orchestrator-owned: never counted as a dev attempt's dirtiness (the + resolve workflow corrects the frozen spec here) and preserved through + rollback. Folders configured outside the repo are skipped — nothing to + protect there.""" + out: list[str] = [] + for protected in ( + self.workspace.paths.output_folder, + self.workspace.paths.implementation_artifacts, + self.workspace.paths.planning_artifacts, + ): + try: + out.append(protected.relative_to(self.workspace.root).as_posix()) + except ValueError: + pass # configured outside the repo; nothing to protect here + return tuple(out) + def _rollback_or_pause(self, task: StoryTask, *, cause: str = "stopped") -> None: """Recover from an in-place attempt that won't proceed. No-op when the tree is already at the attempt's baseline (nothing this - attempt touched): neither a reset nor a pause is needed. This is also - what lets the manual-recovery instructions terminate — after the operator - resets and resumes, the now-clean tree skips straight through instead of - re-pausing on the still-set ``baseline_commit``. - - Otherwise, with ``scm.rollback_on_failure`` OFF (default) the orchestrator - never touches the working tree: it emits a bold manual-recovery notice and - pauses the run (stop-and-wait), so nothing proceeds on a half-finished - tree. With it ON, it does the safest possible automatic rollback — - revert the attempt's tracked changes to baseline and delete only the - untracked files this run created (the whole BMAD output folder and every - pre-existing untracked file are preserved; there is no blanket - ``git clean``). ``cause`` tunes the manual notice's wording.""" + attempt touched, ignoring orchestrator-owned artifact folders): neither a + reset nor a pause is needed. This is also what lets the manual-recovery + instructions terminate — after the operator resets and resumes, the + now-clean tree skips straight through instead of re-pausing on the + still-set ``baseline_commit``. + + A ``cause="resolved"`` re-drive is human-initiated (the operator ran the + resolve workflow and re-armed the story), so it always auto-recovers and + never pauses, regardless of ``scm.rollback_on_failure``. Only then are the + BMAD artifact folders treated as orchestrator-owned: excluded from the + dirty check (the corrected spec must not read as a failed attempt) and + preserved through the reset (so the correction survives the rebuild). + + Otherwise (a stopped/abandoned attempt) the flag governs: OFF (default) + leaves the working tree untouched and emits a bold manual-recovery notice + that pauses the run (stop-and-wait); ON does a clean reset to baseline. + Either way pre-existing untracked files are preserved; there is no blanket + ``git clean``.""" + resolved = cause == "resolved" + protected = self._protected_relpaths() if resolved else () if task.baseline_commit and not verify.attempt_dirty( - self.workspace.root, task.baseline_commit, task.baseline_untracked + self.workspace.root, task.baseline_commit, task.baseline_untracked, exclude=protected ): self.journal.append("rollback-skipped-clean", story_key=task.story_key) return - if not self.policy.scm.rollback_on_failure: - self._pause_for_manual_recovery(task, task.baseline_commit or "", cause=cause) - return # unreachable: _pause_for_manual_recovery always raises - self.journal.append( - "rollback-auto", - story_key=task.story_key, - baseline=task.baseline_commit or "", - note="reverting tracked changes + run-created untracked files", - ) - self._safe_reset(task) + if resolved or self.policy.scm.rollback_on_failure: + self.journal.append( + "rollback-auto", + story_key=task.story_key, + baseline=task.baseline_commit or "", + note="reverting tracked changes + run-created untracked files", + ) + self._safe_reset(task, preserve=protected) + return + self._pause_for_manual_recovery(task, task.baseline_commit or "") + return # unreachable: _pause_for_manual_recovery always raises - def _safe_reset(self, task: StoryTask) -> None: + def _safe_reset(self, task: StoryTask, *, preserve: tuple[str, ...] = ()) -> None: """Revert tracked changes to the task baseline and remove only the untracked files this run created — never a blanket `git clean`. Used by - the gated rollback (when enabled) and by internal ledger recovery (sweep + the gated/resolved rollback and by internal ledger recovery (sweep migration), which restores the orchestrator's own state and must not - pause.""" - keep = [".automator"] - for protected in ( - self.workspace.paths.output_folder, - self.workspace.paths.implementation_artifacts, - self.workspace.paths.planning_artifacts, - ): - try: - keep.append(str(protected.relative_to(self.workspace.root))) - except ValueError: - pass # configured outside the repo; nothing to protect here + pause. The BMAD artifact folders are always kept from untracked deletion; + ``preserve`` (set only on a resolved re-drive) additionally keeps their + *tracked* content alive through the reset, so a just-corrected spec is not + reverted. Sweep passes no ``preserve`` — it wants the broken ledger gone.""" verify.safe_rollback( self.workspace.root, task.baseline_commit or "", baseline_untracked=task.baseline_untracked, - keep=tuple(keep), + keep=(".automator", *self._protected_relpaths()), + preserve=preserve, ) - def _pause_for_manual_recovery( - self, task: StoryTask, baseline: str, *, cause: str = "stopped" - ) -> None: - """OFF path: leave the tree untouched, surface bold manual-recovery - instructions, and pause the run. Always raises RunPaused. ``cause`` - selects the wording: ``"resolved"`` for an escalation re-armed into a - clean rebuild, anything else for a stopped/abandoned attempt.""" + def _pause_for_manual_recovery(self, task: StoryTask, baseline: str) -> None: + """OFF path for a stopped/abandoned in-place attempt: leave the tree + untouched, surface bold manual-recovery instructions, and pause the run. + Always raises RunPaused. A *resolved* escalation never reaches here — + `_rollback_or_pause` auto-recovers that human-initiated re-drive + regardless of `scm.rollback_on_failure`.""" short = baseline[:12] or "the run's baseline commit" - if cause == "resolved": - why = ( - f"Story **{task.story_key}**'s escalation was resolved; re-driving " - "it needs a clean baseline, but auto-rollback is OFF, so the " - "working tree was left exactly as-is for you to inspect.\n" - ) - else: - why = ( - f"Story **{task.story_key}**'s attempt was stopped and auto-rollback " - "is OFF, so the working tree was left exactly as-is for you to " - "inspect.\n" - ) + why = ( + f"Story **{task.story_key}**'s attempt was stopped and auto-rollback " + "is OFF, so the working tree was left exactly as-is for you to " + "inspect.\n" + ) notice = ( "**ACTION REQUIRED — manual rollback needed**\n" f"{why}" diff --git a/src/automator/policy.py b/src/automator/policy.py index 6ff4de3..f015069 100644 --- a/src/automator/policy.py +++ b/src/automator/policy.py @@ -221,7 +221,11 @@ class ScmPolicy: # untracked files and the whole _bmad-output/ are preserved) — convenient but # it discards the attempt's uncommitted work, so a warning is journalled when # it fires. Worktree isolation sidesteps this entirely (failed work stays in - # its worktree), so this knob only matters for isolation = "none". + # its worktree), so this knob only matters for isolation = "none". This flag + # governs unattended/stopped attempts only: a human-initiated escalation + # resolve re-drive always auto-recovers regardless — it reverts the failed + # attempt's source but preserves the corrected spec under the BMAD artifact + # folders, which it treats as orchestrator-owned. rollback_on_failure: bool = False # failed_diff_max_mb caps the per-file size (MB) of untracked files captured # into a kept-failed unit's forensic changes.patch, so a stray build dir or @@ -715,7 +719,7 @@ def _fold_deprecated_engine( merge_strategy = "merge" # ff | merge | squash (worktree mode merges the unit branch into target locally) delete_branch = true # delete the unit branch after a successful merge keep_failed = true # keep a failed unit's worktree+branch for inspection -rollback_on_failure = false # in-place (isolation="none") recovery after a failed attempt. false = never touch the tree; pause with manual recovery steps. true = auto-revert the attempt's tracked changes + remove only the untracked files this run created (WARNING: discards the attempt's uncommitted work; never a blanket git clean). Prefer isolation="worktree" to avoid touching your main checkout. +rollback_on_failure = false # in-place (isolation="none") recovery after a failed attempt. false = never touch the tree; pause with manual recovery steps. true = auto-revert the attempt's tracked changes + remove only the untracked files this run created (WARNING: discards the attempt's uncommitted work; never a blanket git clean). Governs unattended/stopped attempts only: a resolved escalation's re-drive always auto-recovers regardless (reverts the failed source, keeps the corrected spec). Prefer isolation="worktree" to avoid touching your main checkout. failed_diff_max_mb = 5 # per-file size cap (MB) for untracked files in a kept-failed unit's changes.patch; oversized files are skipped with a marker failed_diff_unlimited = false # true = capture the failed-unit diff with no size cap (may produce very large patches; warns when active) # commit_message_template: when set, the commit message dev sessions use for a diff --git a/src/automator/verify.py b/src/automator/verify.py index aaf3982..9cd6042 100644 --- a/src/automator/verify.py +++ b/src/automator/verify.py @@ -131,19 +131,45 @@ def has_changes_since(repo: Path, baseline: str) -> bool: return rc == 0 and out != "" -def attempt_dirty(repo: Path, baseline: str, baseline_untracked: list[str] | None) -> bool: +def attempt_dirty( + repo: Path, + baseline: str, + baseline_untracked: list[str] | None, + exclude: tuple[str, ...] = (), +) -> bool: """True if a `safe_rollback` to `baseline` would change anything: tracked changes since baseline, or untracked files created since the baseline snapshot. `baseline_untracked=None` (a pre-snapshot run) means untracked files are never this attempt's to remove, so only tracked diff counts. This mirrors `safe_rollback`'s notion of what *this attempt* touched, so callers - can skip a no-op reset/pause when the tree is already at baseline.""" - rc, _ = _git(repo, "diff", "--quiet", baseline, "--") + can skip a no-op reset/pause when the tree is already at baseline. + + `exclude` is repo-relative posix dir prefixes (e.g. the BMAD artifact + folders) whose changes are orchestrator-owned and never count as a dev + attempt's dirtiness — they pair with `safe_rollback`'s `preserve`, so a + change confined to those folders reads as clean.""" + if exclude: + rc, _ = _git(repo, "diff", "--quiet", baseline, "--", ".", *_exclude_specs(exclude)) + else: + rc, _ = _git(repo, "diff", "--quiet", baseline, "--") if rc != 0: return True if baseline_untracked is None: return False - return bool(untracked_files(repo) - set(baseline_untracked)) + created = untracked_files(repo) - set(baseline_untracked) + if exclude: + created = {p for p in created if not _path_under_any(p, exclude)} + return bool(created) + + +def _exclude_specs(dirs: tuple[str, ...]) -> list[str]: + """git pathspec `:(exclude)` args for each repo-relative dir prefix.""" + return [f":(exclude){d}" for d in dirs] + + +def _path_under_any(path: str, prefixes: tuple[str, ...]) -> bool: + """True if repo-relative posix `path` equals or sits under any `prefixes` dir.""" + return any(path == p or path.startswith(p.rstrip("/") + "/") for p in prefixes) def untracked_files(repo: Path) -> set[str]: @@ -162,6 +188,7 @@ def safe_rollback( *, baseline_untracked: list[str] | None, keep: tuple[str, ...] = (".automator",), + preserve: tuple[str, ...] = (), ) -> None: """Undo a failed attempt WITHOUT a blanket `git clean`. @@ -172,10 +199,29 @@ def safe_rollback( therefore never runs `git clean -fd`, so it can't eat a user's pre-existing untracked work. `baseline_untracked` is the snapshot taken when the baseline was captured; None (a pre-upgrade run with no snapshot) removes nothing. + + `preserve` is repo-relative posix dir prefixes (the BMAD artifact folders) + whose *tracked* content must survive the hard reset — e.g. a frozen spec the + resolve workflow just corrected. The `git reset --hard` would otherwise + revert them (keep only guards untracked deletion). We snapshot the current + tree with `git stash create`, reset, then restore those paths from the + snapshot. Untracked artifacts need no special handling: the reset leaves them + alone and the cleanup below skips `keep` dirs. """ + snapshot = "" + if preserve: + rc, out = _git(repo, "stash", "create") + if rc == 0: + snapshot = out.strip() rc, out = _git(repo, "reset", "--hard", baseline) if rc != 0: raise GitError(f"git reset --hard {baseline} failed: {out}") + if preserve and snapshot: + # Restore each artifact path's pre-reset content from the snapshot tree. + # Best-effort per-path: a dir with no tracked files in the snapshot makes + # `git checkout` exit non-zero ("pathspec did not match") — not an error. + for d in preserve: + _git(repo, "checkout", snapshot, "--", d) if baseline_untracked is None: return # no snapshot to diff against: never delete untracked files created = untracked_files(repo) - set(baseline_untracked) diff --git a/tests/test_engine.py b/tests/test_engine.py index a3fd168..d39fa2c 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -621,9 +621,10 @@ def test_rollback_or_pause_skips_clean_tree(project): assert "rollback-manual-required" not in kinds -def test_manual_recovery_wording_resolved_vs_stopped(project): - """The manual-recovery notice never claims the story 'failed', and reflects - the real cause: a resolved escalation re-driving vs. a stopped attempt.""" +def test_manual_recovery_wording_stopped(project): + """Only the stopped/abandoned path reaches the manual-recovery notice now (a + resolved escalation auto-recovers instead). The notice never claims the story + 'failed' and names the real cause.""" policy = Policy( gates=GatesPolicy(mode="none"), notify=QUIET, @@ -633,18 +634,43 @@ def test_manual_recovery_wording_resolved_vs_stopped(project): task = StoryTask(story_key="1-1-a", epic=1) baseline = rev_parse_head(project.project) - with pytest.raises(RunPaused) as resolved: - engine._pause_for_manual_recovery(task, baseline, cause="resolved") with pytest.raises(RunPaused) as stopped: - engine._pause_for_manual_recovery(task, baseline, cause="stopped") + engine._pause_for_manual_recovery(task, baseline) - for exc in (resolved, stopped): - assert "failed" not in exc.value.reason - assert "manual rollback" in exc.value.reason.lower() - assert "escalation was resolved" in resolved.value.reason + assert "failed" not in stopped.value.reason + assert "manual rollback" in stopped.value.reason.lower() assert "attempt was stopped" in stopped.value.reason +def test_rollback_or_pause_resolved_auto_recovers(project): + """A resolved escalation re-drive (human-initiated) auto-recovers even with + rollback_on_failure OFF: it reverts the failed attempt's source change but + preserves the corrected spec under the BMAD artifact folder, and never pauses + for manual rollback.""" + policy = Policy( + gates=GatesPolicy(mode="none"), + notify=QUIET, + scm=ScmPolicy(rollback_on_failure=False), # OFF: stopped attempts would pause + ) + engine, _ = make_engine(project, [], policy=policy) + repo = project.project + task = StoryTask(story_key="1-1-a", epic=1) + task.baseline_commit = rev_parse_head(repo) + task.baseline_untracked = [] # clean fixture at baseline + + (repo / "src.txt").write_text("partial dev work\n") # failed attempt's source + spec = project.implementation_artifacts / "spec-1-1-a.md" + spec.write_text("frozen: corrected by resolve\n") # spec under the artifact folder + + engine._rollback_or_pause(task, cause="resolved") # must NOT raise RunPaused + + assert (repo / "src.txt").read_text() == "original\n" # source reverted + assert spec.read_text() == "frozen: corrected by resolve\n" # spec preserved + kinds = [e["kind"] for e in engine.journal.entries()] + assert "rollback-auto" in kinds + assert "rollback-manual-required" not in kinds + + def test_resolved_escalation_resume_skips_clean_rollback(project): """End-to-end regression for the resume loop: a CRITICAL escalation that left a clean tree, once resolved (re-armed) and resumed, must NOT demand a manual @@ -683,6 +709,53 @@ def test_resolved_escalation_resume_skips_clean_rollback(project): assert "rollback-manual-required" not in kinds +def test_resolved_escalation_resume_dirty_tree_auto_recovers(project): + """End-to-end regression for the reported loop: a CRITICAL escalation that left + the tree dirty (a partial source edit plus a spec under the artifact folder), + once resolved (re-armed) and resumed with rollback_on_failure OFF, must + auto-recover — NOT demand a manual rollback — and re-drive the story to done.""" + write_sprint(project, {"1-1-a": "ready-for-dev"}) + + def escalate_dirty(spec): + baseline = rev_parse_head(project.project) + (project.project / "src.txt").write_text("partial dev work\n") # source debris + sp = spec_path(project, "1-1-a") + write_spec(sp, "blocked", baseline) # spec under the artifact folder + return SessionResult( + status="completed", + result_json={ + "workflow": "auto-dev", + "story_key": "1-1-a", + "spec_file": str(sp), + "escalations": [{"type": "blocked", "severity": "CRITICAL", "detail": "boom"}], + }, + ) + + policy = Policy( + gates=GatesPolicy(mode="none"), + notify=QUIET, + scm=ScmPolicy(rollback_on_failure=False), # production default + ) + engine, _ = make_engine(project, [escalate_dirty], policy=policy) + summary = engine.run() + assert summary.paused and summary.escalated == 1 + + rearm_escalation(engine.run_dir) # the resolve workflow's re-arm step + + resumed, _ = resume_engine( + project, + engine, + [dev_effect(project, "1-1-a"), review_effect(project, "1-1-a", clean=True)], + policy=policy, + ) + summary2 = resumed.run() + + assert summary2.done == 1 and not summary2.paused # no manual-rollback loop + kinds = [e["kind"] for e in resumed.journal.entries()] + assert "rollback-auto" in kinds # auto-recovered despite OFF + assert "rollback-manual-required" not in kinds + + def test_dev_escalation_records_spec_for_rearm(project): """A dev session that HALTs with a `blocked` spec still records task.spec_file, so rearm_escalation can flip the spec to `ready-for-dev` for the re-drive. diff --git a/tests/test_verify.py b/tests/test_verify.py index 8bcb933..b8a7a7d 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -55,6 +55,35 @@ def test_attempt_dirty_none_snapshot_ignores_untracked(project): assert verify.attempt_dirty(project.project, baseline, None) is True +def test_attempt_dirty_excludes_untracked_artifact(project): + """A new untracked spec under an orchestrator-owned artifact folder is not the + dev attempt's dirtiness when that folder is excluded — but counts otherwise.""" + repo = project.project + artifact_rel = project.implementation_artifacts.relative_to(repo).as_posix() + baseline = verify.rev_parse_head(repo) + (project.implementation_artifacts / "spec-1-1-a.md").write_text("corrected\n") + assert verify.attempt_dirty(repo, baseline, [], exclude=(artifact_rel,)) is False + assert verify.attempt_dirty(repo, baseline, []) is True + + +def test_attempt_dirty_excludes_tracked_artifact(project): + """A tracked edit confined to the artifact folder reads as clean when excluded; + a source edit alongside it still counts.""" + repo = project.project + spec = project.implementation_artifacts / "spec-1-1-a.md" + spec.write_text("orig\n") + git(repo, "add", "-A") + git(repo, "commit", "-q", "-m", "spec") + baseline = verify.rev_parse_head(repo) + artifact_rel = project.implementation_artifacts.relative_to(repo).as_posix() + + spec.write_text("corrected by resolve\n") # tracked artifact edit + assert verify.attempt_dirty(repo, baseline, [], exclude=(artifact_rel,)) is False + + (repo / "src.txt").write_text("dev work\n") # real source change + assert verify.attempt_dirty(repo, baseline, [], exclude=(artifact_rel,)) is True + + def test_verify_dev_happy(project): write_sprint(project, {"1-1-a": "review"}) task = make_task(project) @@ -339,6 +368,33 @@ def test_safe_rollback_prunes_emptied_dirs(project): assert not (repo / "tmpdir").exists() # emptied parent dirs pruned +def test_safe_rollback_preserves_tracked_artifact(project): + """`preserve` keeps a *tracked* artifact edit (the resolve workflow's corrected + spec) alive through the hard reset, while a tracked source edit is still + reverted — `keep` alone only guards untracked deletion, not the reset.""" + repo = project.project + spec = project.implementation_artifacts / "spec-1-1-a.md" + spec.write_text("frozen: original\n") + git(repo, "add", "-A") + git(repo, "commit", "-q", "-m", "spec") + baseline = verify.rev_parse_head(repo) + snap = sorted(verify.untracked_files(repo)) + artifact_rel = project.implementation_artifacts.relative_to(repo).as_posix() + + (repo / "src.txt").write_text("dev attempt\n") # tracked source edit + spec.write_text("frozen: corrected\n") # tracked artifact edit (resolve) + + verify.safe_rollback( + repo, + baseline, + baseline_untracked=snap, + keep=(".automator", artifact_rel), + preserve=(artifact_rel,), + ) + assert (repo / "src.txt").read_text() == "original\n" # source reverted + assert spec.read_text() == "frozen: corrected\n" # spec correction preserved + + def test_worktree_clean_ignores_policy_file(project): # A tracked-but-modified .automator/policy.toml (rewritten by the TUI # settings editor) must not count as a dirty tree, or every settings edit diff --git a/uv.lock b/uv.lock index e361ab7..9875155 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11" [[package]] name = "bmad-auto" -version = "0.7.0" +version = "0.7.1" source = { editable = "." } dependencies = [ { name = "pyyaml" }, From 5b58652fef775b353bf05bf15b7716b15ac95523 Mon Sep 17 00:00:00 2001 From: pbean Date: Thu, 25 Jun 2026 13:34:45 -0700 Subject: [PATCH 2/3] fix(tui): strip private-marker SGR so the Log tab isn't all underlined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Log tab replays raw pipe-pane captures through pyte. Modern CLIs (Claude Code et al.) emit XTMODKEYS at startup — `CSI > 4 ; 2 m` (modifyOtherKeys) — a private-marker CSI sequence that is not an SGR. pyte 0.8.2 ignores the `>` marker (only `?` sets private; `>` is `pass`-ed) and dispatches it to SGR anyway, reading the `4` as underline-on. The matching disable (`CSI > 4 ; 0 m`) only fires at app exit, which a live or killed capture never contains, so every rendered line stayed underlined. LogView.read_new now strips private-marker SGR (`CSI [<>=?] ... m`) before feeding pyte. A legitimate SGR never carries such a marker, so the filter can only remove non-display sequences — never printable text or genuine color/bold/ underline styling. A held-back trailing-CSI keeps it correct when a sequence straddles two reads (the file is re-read from the held offset); the file seek/offset bookkeeping is unchanged and the approximate log_pos->line mapping is unaffected. Verified over 110 real run logs through the real LogView: worst underline 38-97% -> 0.0%, reverse 0.0%, color coverage preserved, plain text unchanged. Adds regression, legitimate-underline-preserved, and split-across-reads tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 6 ++++++ src/automator/tui/data.py | 44 +++++++++++++++++++++++++++++++++++---- tests/test_tui_data.py | 42 +++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c21194..f0af982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ breaking changes may land in a minor release. ### Fixed +- **The Log tab no longer renders whole CLI sessions underlined.** Modern CLIs emit an XTMODKEYS + sequence (`CSI > 4 ; 2 m`, "modifyOtherKeys") at startup that the pane emulator (pyte) misread as + SGR 4 / underline-on — with no matching off present in a live capture — so every line came out + underlined and hard to read. The log view now strips private-marker CSI sequences before emulation; + genuine color, bold, and properly-closed underline styling is preserved. + - **Resolving a CRITICAL escalation no longer loops on a manual-rollback prompt when the resolve edited the spec.** 0.7.0 fixed the loop only for an already-clean tree, but the resolve workflow's whole job is to correct the frozen spec under the BMAD artifact folder (`_bmad-output/...`, which is diff --git a/src/automator/tui/data.py b/src/automator/tui/data.py index 5c245de..8a976fc 100644 --- a/src/automator/tui/data.py +++ b/src/automator/tui/data.py @@ -15,6 +15,7 @@ import bisect import json +import re from collections import deque from dataclasses import dataclass from pathlib import Path @@ -316,6 +317,30 @@ def _render_row(row: dict) -> Text: return text +# CSI sequences with a private/secondary marker (< > = ?) are terminal capability +# negotiation, never display SGR. pyte 0.8.2 ignores the marker and misdispatches +# them to SGR anyway: e.g. XTMODKEYS `CSI > 4 ; 2 m` (modifyOtherKeys, emitted at +# session start by Claude Code et al.) is read as SGR 4 = underline-on, leaving the +# whole log underlined until an exit-time disable a live capture never contains. +# Strip them before pyte sees them; a legitimate SGR carries no marker, so this can +# only remove non-display sequences (never printable text or genuine styling). +_PRIVATE_MARKER_SGR = re.compile(rb"\x1b\[[<>=?][0-9;:]*m") +# A trailing, not-yet-terminated CSI at the end of a read: ESC, or ESC[ followed by +# only param/marker bytes with no final letter. Held back so a marker sequence split +# across two reads is filtered whole next time (the filter must see it complete). +_INCOMPLETE_CSI_TAIL = re.compile(rb"\x1b(?:\[[0-9;:<>=?]*)?\Z") + + +def _strip_private_marker_sgr(chunk: bytes) -> tuple[bytes, int]: + """(filtered chunk, bytes held back). Drops private-marker SGR sequences and + returns the length of an unterminated trailing CSI that should not be consumed + yet, so the caller can re-read it next time and see the sequence whole.""" + m = _INCOMPLETE_CSI_TAIL.search(chunk) + held = len(m.group()) if m else 0 + body = chunk[: len(chunk) - held] if held else chunk + return _PRIVATE_MARKER_SGR.sub(b"", body), held + + class _CountingDeque(deque): """history.top replacement that counts rows permanently gone above the window: maxlen evictions plus the clear() from pyte's reset()/ED-3. @@ -441,19 +466,30 @@ def read_new(self) -> bool: with self.path.open("rb") as f: f.seek(self._offset) chunk = f.read(size - self._offset) + # Drop private-marker SGR (pyte misreads it as underline); hold back an + # unterminated trailing CSI so a sequence split across reads is filtered + # whole next time. `consumed` is the original byte span we commit here — + # the file offset advances over it while pyte sees only the filtered bytes. + filtered, held = _strip_private_marker_sgr(chunk) + consumed = len(chunk) - held + base, total = self._offset, len(filtered) top = self._screen.history.top - for start in range(0, len(chunk), self._checkpoint_bytes): - piece = chunk[start : start + self._checkpoint_bytes] + for start in range(0, total, self._checkpoint_bytes): + piece = filtered[start : start + self._checkpoint_bytes] # ByteStream buffers escape sequences split across feeds self._stream.feed(piece) - self._offset += len(piece) + # map back to an original-file offset (filtering only removes a few + # marker bytes; the log_pos->line mapping is already approximate) + end = start + len(piece) + self._offset = base + (consumed if end >= total else round(end / total * consumed)) line = top.dropped + len(top) + self._screen.cursor.y self._checkpoints.append((self._offset, line)) + self._offset = base + consumed # drop checkpoints whose lines evicted past the history horizon; # their offsets would clamp to line 0 anyway while len(self._checkpoints) > 1 and self._checkpoints[1][1] <= top.dropped: self._checkpoints.pop(0) - return True + return consumed > 0 def index(self) -> LogIndex: """Snapshot for log_pos -> rendered-line lookups; reflects the most diff --git a/tests/test_tui_data.py b/tests/test_tui_data.py index 77a726f..d0942ba 100644 --- a/tests/test_tui_data.py +++ b/tests/test_tui_data.py @@ -364,6 +364,48 @@ def test_log_view_styles(tmp_path): assert styled["X"].name == "#ff0000" +def test_log_view_strips_private_marker_sgr(tmp_path): + # XTMODKEYS `CSI > 4 ; 2 m` (modifyOtherKeys, emitted at session start by + # Claude Code et al.) is not an SGR. pyte 0.8.2 ignores the `>` marker and + # misreads the `4` as underline-on with no matching off, underlining the whole + # log; we strip private-marker sequences before pyte sees them. + path = tmp_path / "task.log" + path.write_bytes(b"\x1b[>4;2mhello world\r\n") + view = data.LogView(path) + assert view.read_new() is True + line = view.render() + assert "hello world" in line.plain + assert not any(style.underline for _, _, style in line.spans) + + +def test_log_view_preserves_legitimate_underline(tmp_path): + # A real, properly-closed underline still renders — the fix removes only the + # misparsed private-marker sequences, not genuine SGR styling. + path = tmp_path / "task.log" + path.write_bytes(b"\x1b[4mUP\x1b[24m DOWN\r\n") + view = data.LogView(path) + assert view.read_new() is True + line = view.render() + underlined = "".join(line.plain[s:e] for s, e, st in line.spans if st.underline) + assert "UP" in underlined + assert "DOWN" not in underlined + + +def test_log_view_strips_private_marker_sgr_split_across_reads(tmp_path): + # The marker sequence straddles two reads; the held-back trailing CSI lets the + # filter see it whole on the next read instead of leaking past pyte. + path = tmp_path / "task.log" + path.write_bytes(b"\x1b[>4") + view = data.LogView(path) + view.read_new() + with path.open("ab") as f: + f.write(b";2mhello\r\n") + assert view.read_new() is True + line = view.render() + assert "hello" in line.plain + assert not any(style.underline for _, _, style in line.spans) + + def test_log_view_history_beyond_screen(tmp_path): path = tmp_path / "task.log" path.write_bytes(b"".join(f"row {i:03d}\r\n".encode() for i in range(1, 81))) From 05135d97e15065963e59bb83de096770f8ff38a6 Mon Sep 17 00:00:00 2001 From: pbean Date: Thu, 25 Jun 2026 14:52:43 -0700 Subject: [PATCH 3/3] fix(bmad-auto): address PR #13 review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two automated review findings validated and fixed; one validated as harmless and documented. - engine/model: carry resolved-spec preservation through the WHOLE resolved re-drive, not just the resume-time reset. A new latched StoryTask.resolved_redrive flag keeps the BMAD artifact folders preserved/excluded on every rollback until the correction commits, so a later mid-re-drive retry/defer reset (with rollback_on_failure=true) can no longer silently revert the human correction and loop the re-drive. (coderabbitai) - verify.safe_rollback: a genuine `git checkout` failure while restoring the preserved folders now raises GitError instead of being swallowed alongside the benign empty-dir "pathspec did not match" case — so a corrected spec can't vanish without a trace. (augmentcode) - tui/data.py: documented why an all-held incomplete-CSI chunk is intentionally not consumed (validated harmless, no safe advance possible). (augmentcode) Tests: model round-trip, engine later-rollback preservation regression, verify genuine-failure-raises + empty-dir-tolerated. Full suite green; trunk clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 17 +++++++++---- src/automator/engine.py | 21 ++++++++++++---- src/automator/model.py | 8 +++++++ src/automator/tui/data.py | 6 +++++ src/automator/verify.py | 10 +++++--- tests/test_engine.py | 31 ++++++++++++++++++++++++ tests/test_model.py | 11 +++++++++ tests/test_verify.py | 50 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 141 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0af982..1f5e2d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,11 +23,18 @@ breaking changes may land in a minor release. the frozen `baseline_commit`, even committing the spec re-paused on the next resume, an endless loop. A resolved re-drive is human-initiated, so it now always auto-recovers regardless of the flag: the BMAD artifact folders are treated as orchestrator-owned — excluded from the dirty check and preserved - through the reset — so the spec correction survives while the failed attempt's source changes revert - to baseline. This also fixes a latent sibling bug: with `rollback_on_failure = true` the reset - previously reverted the just-corrected spec silently. `scm.rollback_on_failure` still defaults OFF and - now governs only unattended/stopped attempts; the manual-recovery notice (reached by stopped attempts - only now) drops its resolved-cause wording. + through every reset of the re-drive (not just the resume-time cleanup) — so the spec correction + survives while the failed attempt's source changes revert to baseline. This closes a latent sibling + bug: with `rollback_on_failure = true` a _later_ mid-re-drive retry/defer reset previously ran with no + preserve set and reverted the just-corrected spec silently, looping the re-drive. + `scm.rollback_on_failure` still defaults OFF and now governs only unattended/stopped attempts; the + manual-recovery notice (reached by stopped attempts only now) drops its resolved-cause wording. + +- **A failed artifact restore during rollback now surfaces instead of silently dropping the + correction.** When `safe_rollback` restores the preserved BMAD folders from its pre-reset snapshot, a + genuine `git checkout` failure (corrupt snapshot, lock, IO) was swallowed alongside the benign + empty-dir "pathspec did not match" case — so a corrected spec could vanish with no error and loop the + re-drive. Real failures now raise; the empty-dir case stays tolerated. ## [0.7.0] — 2026-06-24 diff --git a/src/automator/engine.py b/src/automator/engine.py index 410e4ba..401ac34 100644 --- a/src/automator/engine.py +++ b/src/automator/engine.py @@ -667,10 +667,12 @@ def _rollback_or_pause(self, task: StoryTask, *, cause: str = "stopped") -> None A ``cause="resolved"`` re-drive is human-initiated (the operator ran the resolve workflow and re-armed the story), so it always auto-recovers and - never pauses, regardless of ``scm.rollback_on_failure``. Only then are the - BMAD artifact folders treated as orchestrator-owned: excluded from the - dirty check (the corrected spec must not read as a failed attempt) and - preserved through the reset (so the correction survives the rebuild). + never pauses, regardless of ``scm.rollback_on_failure``. For the entire + re-drive (``task.resolved_redrive``, latched at resume and cleared once the + correction is committed) the BMAD artifact folders are treated as + orchestrator-owned: excluded from the dirty check (the corrected spec must + not read as a failed attempt) and preserved through every reset — so a + later mid-re-drive retry/defer reset can't silently revert the correction. Otherwise (a stopped/abandoned attempt) the flag governs: OFF (default) leaves the working tree untouched and emits a bold manual-recovery notice @@ -678,7 +680,10 @@ def _rollback_or_pause(self, task: StoryTask, *, cause: str = "stopped") -> None Either way pre-existing untracked files are preserved; there is no blanket ``git clean``.""" resolved = cause == "resolved" - protected = self._protected_relpaths() if resolved else () + # preserve the corrected spec for the whole re-drive, not just the first + # reset; the auto-recover (pause-vs-reset) decision below is unaffected. + redrive = resolved or task.resolved_redrive + protected = self._protected_relpaths() if redrive else () if task.baseline_commit and not verify.attempt_dirty( self.workspace.root, task.baseline_commit, task.baseline_untracked, exclude=protected ): @@ -779,6 +784,9 @@ def _finish_inflight(self) -> None: task.worktree_path = "" task.branch = "" elif task.baseline_commit: + # latch resolved_redrive so the corrected spec stays protected + # through every reset of this re-drive, not just this first one + task.resolved_redrive = task.resolved_redrive or task.rearmed self._rollback_or_pause(task, cause="resolved" if task.rearmed else "stopped") task.rearmed = False # past rollback (only reached when not paused) task.phase = Phase.PENDING # deliberate reset, not a normal transition @@ -1248,6 +1256,9 @@ def _commit(self, task: StoryTask) -> None: # was nothing to finalize (NO_VCS, or the tree already at baseline). sha = verify.finalize_commit(self.workspace.root, task.baseline_commit, message) task.commit_sha = sha or task.baseline_commit + # the corrected spec is now durable in HEAD; later attempts need no + # special preservation, so drop the re-drive latch. + task.resolved_redrive = False except verify.GitError as e: self._escalate(task, f"commit failed: {e}") advance(task, Phase.DONE) diff --git a/src/automator/model.py b/src/automator/model.py index 76c84ca..f1c3b3a 100644 --- a/src/automator/model.py +++ b/src/automator/model.py @@ -141,6 +141,12 @@ class StoryTask: # resume-time manual-recovery notice describe the real cause; cleared once the # rebuild proceeds. Survives the resume serialization round-trip. rearmed: bool = False + # latched True for the lifetime of a resolved-escalation re-drive (set when + # _finish_inflight re-drives a `rearmed` task, cleared once the corrected spec + # is committed). While set, every rollback preserves the BMAD artifact folders' + # tracked content, so a mid-re-drive retry/defer reset can't silently revert + # the human correction. Survives the resume serialization round-trip. + resolved_redrive: bool = False # sweep bundles only: the deferred-work ids this task closes and the # rendered intent file handed to dev sessions dw_ids: list[str] = field(default_factory=list) @@ -176,6 +182,7 @@ def to_dict(self) -> dict[str, Any]: "commit_sha": self.commit_sha, "defer_reason": self.defer_reason, "rearmed": self.rearmed, + "resolved_redrive": self.resolved_redrive, "dw_ids": self.dw_ids, "bundle_file": self.bundle_file, "worktree_path": self.worktree_path, @@ -215,6 +222,7 @@ def from_dict(cls, d: dict[str, Any]) -> "StoryTask": commit_sha=d.get("commit_sha"), defer_reason=d.get("defer_reason"), rearmed=bool(d.get("rearmed", False)), + resolved_redrive=bool(d.get("resolved_redrive", False)), dw_ids=[str(i) for i in d.get("dw_ids", [])], bundle_file=d.get("bundle_file"), worktree_path=str(d.get("worktree_path", "")), diff --git a/src/automator/tui/data.py b/src/automator/tui/data.py index 8a976fc..ecf1452 100644 --- a/src/automator/tui/data.py +++ b/src/automator/tui/data.py @@ -485,6 +485,12 @@ def read_new(self) -> bool: line = top.dropped + len(top) + self._screen.cursor.y self._checkpoints.append((self._offset, line)) self._offset = base + consumed + # When a chunk is entirely an unterminated trailing CSI, consumed == 0 and + # _offset holds short of EOF until more bytes complete the sequence — a few + # held bytes re-read per poll. We deliberately don't advance past them: a + # log that's still streaming self-heals on the next write, and one that + # permanently ends mid-escape has nothing renderable there anyway (we'd + # only risk eating a legitimately split sequence by forcing it through). # drop checkpoints whose lines evicted past the history horizon; # their offsets would clamp to line 0 anyway while len(self._checkpoints) > 1 and self._checkpoints[1][1] <= top.dropped: diff --git a/src/automator/verify.py b/src/automator/verify.py index 9cd6042..e29fe60 100644 --- a/src/automator/verify.py +++ b/src/automator/verify.py @@ -218,10 +218,14 @@ def safe_rollback( raise GitError(f"git reset --hard {baseline} failed: {out}") if preserve and snapshot: # Restore each artifact path's pre-reset content from the snapshot tree. - # Best-effort per-path: a dir with no tracked files in the snapshot makes - # `git checkout` exit non-zero ("pathspec did not match") — not an error. + # A dir with no tracked files in the snapshot makes `git checkout` exit + # non-zero ("pathspec did not match") — that's benign. Any other failure + # means the corrected artifact wasn't restored: raise instead of silently + # losing it (which would regress the resolved re-drive into a recovery loop). for d in preserve: - _git(repo, "checkout", snapshot, "--", d) + rc, out = _git(repo, "checkout", snapshot, "--", d) + if rc != 0 and "did not match" not in out: + raise GitError(f"git checkout {snapshot[:12]} -- {d} failed: {out}") if baseline_untracked is None: return # no snapshot to diff against: never delete untracked files created = untracked_files(repo) - set(baseline_untracked) diff --git a/tests/test_engine.py b/tests/test_engine.py index d39fa2c..9e49372 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -671,6 +671,37 @@ def test_rollback_or_pause_resolved_auto_recovers(project): assert "rollback-manual-required" not in kinds +def test_resolved_redrive_preserves_spec_on_later_rollback(project): + """Regression: once a resolved re-drive latches `resolved_redrive`, a *later* + mid-re-drive rollback (default cause="stopped", rollback_on_failure ON) must + still preserve the corrected spec under the artifact folder — not just the + first resume-time reset. Without the latch this reset ran with preserve=() + and silently reverted the human correction, looping the re-drive.""" + policy = Policy( + gates=GatesPolicy(mode="none"), + notify=QUIET, + scm=ScmPolicy(rollback_on_failure=True), # ON: a stopped attempt resets + ) + engine, _ = make_engine(project, [], policy=policy) + repo = project.project + task = StoryTask(story_key="1-1-a", epic=1) + task.baseline_commit = rev_parse_head(repo) + task.baseline_untracked = [] + task.resolved_redrive = True # latched by _finish_inflight on the re-drive + + (repo / "src.txt").write_text("re-drive dev work\n") # this attempt's source + spec = project.implementation_artifacts / "spec-1-1-a.md" + spec.write_text("frozen: corrected by resolve\n") # the human correction + + engine._rollback_or_pause(task) # default cause="stopped"; must NOT pause + + assert (repo / "src.txt").read_text() == "original\n" # source reverted + assert spec.read_text() == "frozen: corrected by resolve\n" # correction kept + kinds = [e["kind"] for e in engine.journal.entries()] + assert "rollback-auto" in kinds + assert "rollback-manual-required" not in kinds + + def test_resolved_escalation_resume_skips_clean_rollback(project): """End-to-end regression for the resume loop: a CRITICAL escalation that left a clean tree, once resolved (re-armed) and resumed, must NOT demand a manual diff --git a/tests/test_model.py b/tests/test_model.py index 56d3fd8..10c43e0 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -18,6 +18,17 @@ def test_followup_review_recommended_defaults_false_for_legacy_state(): assert StoryTask.from_dict(doc).followup_review_recommended is False +def test_resolved_redrive_round_trips(): + task = StoryTask(story_key="1-1-a", epic=1, resolved_redrive=True) + assert StoryTask.from_dict(task.to_dict()).resolved_redrive is True + + +def test_resolved_redrive_defaults_false_for_legacy_state(): + doc = StoryTask(story_key="1-1-a", epic=1).to_dict() + del doc["resolved_redrive"] # state.json from before the field existed + assert StoryTask.from_dict(doc).resolved_redrive is False + + def test_stopped_round_trips(): state = _state(stopped=True) assert RunState.from_dict(state.to_dict()).stopped is True diff --git a/tests/test_verify.py b/tests/test_verify.py index b8a7a7d..ea909ef 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -395,6 +395,56 @@ def test_safe_rollback_preserves_tracked_artifact(project): assert spec.read_text() == "frozen: corrected\n" # spec correction preserved +def test_safe_rollback_raises_on_genuine_restore_failure(project, monkeypatch): + """A non-benign `git checkout` failure while restoring a `preserve` path must + raise — not silently drop the correction (which would loop the re-drive). The + benign 'pathspec did not match' case is tolerated; anything else is loud.""" + repo = project.project + spec = project.implementation_artifacts / "spec-1-1-a.md" + spec.write_text("frozen: original\n") + git(repo, "add", "-A") + git(repo, "commit", "-q", "-m", "spec") + baseline = verify.rev_parse_head(repo) + snap = sorted(verify.untracked_files(repo)) + artifact_rel = project.implementation_artifacts.relative_to(repo).as_posix() + spec.write_text("frozen: corrected\n") + + real_git = verify._git + + def fake_git(r, *args): + if args[:1] == ("checkout",): # the restore step only + return 1, "fatal: unable to read tree (something broke)" + return real_git(r, *args) + + monkeypatch.setattr(verify, "_git", fake_git) + with pytest.raises(verify.GitError, match="git checkout"): + verify.safe_rollback( + repo, + baseline, + baseline_untracked=snap, + keep=(".automator", artifact_rel), + preserve=(artifact_rel,), + ) + + +def test_safe_rollback_tolerates_empty_preserve_dir(project): + """A `preserve` dir with no tracked content in the snapshot makes checkout exit + non-zero ('did not match') — benign, must NOT raise.""" + repo = project.project + baseline = verify.rev_parse_head(repo) + snap = sorted(verify.untracked_files(repo)) + (repo / "src.txt").write_text("dev attempt\n") + + verify.safe_rollback( + repo, + baseline, + baseline_untracked=snap, + keep=(".automator", "_bmad-output"), + preserve=("_bmad-output",), # no tracked files here at snapshot time + ) + assert (repo / "src.txt").read_text() == "original\n" # source still reverted + + def test_worktree_clean_ignores_policy_file(project): # A tracked-but-modified .automator/policy.toml (rewritten by the TUI # settings editor) must not count as a dirty tree, or every settings edit