diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 09c2cfe..dd335da 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "bauto", "source": "./src/automator/data/skills", "description": "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill", - "version": "0.7.0", + "version": "0.7.1", "author": { "name": "pinkyd" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 3708e60..1f5e2d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,37 @@ All notable changes to `bmad-auto` are documented here. The format is based on [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0, breaking changes may land in a minor release. +## [0.7.1] — 2026-06-25 + +### Fixed + +- **The Log tab no longer renders whole CLI sessions underlined.** Modern CLIs emit an XTMODKEYS + sequence (`CSI > 4 ; 2 m`, "modifyOtherKeys") at startup that the pane emulator (pyte) misread as + SGR 4 / underline-on — with no matching off present in a live capture — so every line came out + underlined and hard to read. The log view now strips private-marker CSI sequences before emulation; + genuine color, bold, and properly-closed underline styling is preserved. + +- **Resolving a CRITICAL escalation no longer loops on a manual-rollback prompt when the resolve + edited the spec.** 0.7.0 fixed the loop only for an already-clean tree, but the resolve workflow's + whole job is to correct the frozen spec under the BMAD artifact folder (`_bmad-output/...`, which is + tracked). So on resume the orchestrator saw a dirty tree and — with the default + `scm.rollback_on_failure = false` — paused for a manual reset; because the dirty check diffs against + the frozen `baseline_commit`, even committing the spec re-paused on the next resume, an endless loop. + A resolved re-drive is human-initiated, so it now always auto-recovers regardless of the flag: the + BMAD artifact folders are treated as orchestrator-owned — excluded from the dirty check and preserved + through every reset of the re-drive (not just the resume-time cleanup) — so the spec correction + survives while the failed attempt's source changes revert to baseline. This closes a latent sibling + bug: with `rollback_on_failure = true` a _later_ mid-re-drive retry/defer reset previously ran with no + preserve set and reverted the just-corrected spec silently, looping the re-drive. + `scm.rollback_on_failure` still defaults OFF and now governs only unattended/stopped attempts; the + manual-recovery notice (reached by stopped attempts only now) drops its resolved-cause wording. + +- **A failed artifact restore during rollback now surfaces instead of silently dropping the + correction.** When `safe_rollback` restores the preserved BMAD folders from its pre-reset snapshot, a + genuine `git checkout` failure (corrupt snapshot, lock, IO) was swallowed alongside the benign + empty-dir "pathspec did not match" case — so a corrected spec could vanish with no error and loop the + re-drive. Real failures now raise; the empty-dir case stays tolerated. + ## [0.7.0] — 2026-06-24 ### Changed diff --git a/module.yaml b/module.yaml index 2e37a2a..e2fa14d 100644 --- a/module.yaml +++ b/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.0 +module_version: 0.7.1 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/pyproject.toml b/pyproject.toml index 7e62505..ff926fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bmad-auto" -version = "0.7.0" +version = "0.7.1" description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase" readme = "README.md" license = "MIT" diff --git a/src/automator/__init__.py b/src/automator/__init__.py index 3b1c657..0db0738 100644 --- a/src/automator/__init__.py +++ b/src/automator/__init__.py @@ -6,4 +6,4 @@ spec files, and the per-run directory under .automator/runs/. """ -__version__ = "0.7.0" +__version__ = "0.7.1" diff --git a/src/automator/data/settings/core.toml b/src/automator/data/settings/core.toml index 328a7e0..80f6e7b 100644 --- a/src/automator/data/settings/core.toml +++ b/src/automator/data/settings/core.toml @@ -232,7 +232,7 @@ key = "rollback_on_failure" kind = "switch" default_ref = "ScmPolicy.rollback_on_failure" label = "auto-rollback failed attempts" -description = "⚠ in-place mode (isolation=none): when ON, a failed attempt's tracked changes are auto-reverted and the untracked files this run created are deleted (its uncommitted work is lost). When OFF (default), the orchestrator never touches your tree — it pauses with manual recovery steps. Prefer isolation=worktree to keep failures off your main checkout." +description = "⚠ in-place mode (isolation=none): when ON, a failed attempt's tracked changes are auto-reverted and the untracked files this run created are deleted (its uncommitted work is lost). When OFF (default), the orchestrator never touches your tree — it pauses with manual recovery steps. Governs unattended/stopped attempts only: a resolved escalation's re-drive always auto-recovers regardless (reverts the failed source, keeps the corrected spec). Prefer isolation=worktree to keep failures off your main checkout." [[section.field]] key = "seed_adapter_defaults" kind = "switch" diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml index 2e37a2a..e2fa14d 100644 --- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml +++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.0 +module_version: 0.7.1 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/src/automator/engine.py b/src/automator/engine.py index e3b8688..401ac34 100644 --- a/src/automator/engine.py +++ b/src/automator/engine.py @@ -637,82 +637,99 @@ def _pick_next(self): continue return story + def _protected_relpaths(self) -> tuple[str, ...]: + """Repo-relative posix paths of the BMAD artifact folders. These are + orchestrator-owned: never counted as a dev attempt's dirtiness (the + resolve workflow corrects the frozen spec here) and preserved through + rollback. Folders configured outside the repo are skipped — nothing to + protect there.""" + out: list[str] = [] + for protected in ( + self.workspace.paths.output_folder, + self.workspace.paths.implementation_artifacts, + self.workspace.paths.planning_artifacts, + ): + try: + out.append(protected.relative_to(self.workspace.root).as_posix()) + except ValueError: + pass # configured outside the repo; nothing to protect here + return tuple(out) + def _rollback_or_pause(self, task: StoryTask, *, cause: str = "stopped") -> None: """Recover from an in-place attempt that won't proceed. No-op when the tree is already at the attempt's baseline (nothing this - attempt touched): neither a reset nor a pause is needed. This is also - what lets the manual-recovery instructions terminate — after the operator - resets and resumes, the now-clean tree skips straight through instead of - re-pausing on the still-set ``baseline_commit``. - - Otherwise, with ``scm.rollback_on_failure`` OFF (default) the orchestrator - never touches the working tree: it emits a bold manual-recovery notice and - pauses the run (stop-and-wait), so nothing proceeds on a half-finished - tree. With it ON, it does the safest possible automatic rollback — - revert the attempt's tracked changes to baseline and delete only the - untracked files this run created (the whole BMAD output folder and every - pre-existing untracked file are preserved; there is no blanket - ``git clean``). ``cause`` tunes the manual notice's wording.""" + attempt touched, ignoring orchestrator-owned artifact folders): neither a + reset nor a pause is needed. This is also what lets the manual-recovery + instructions terminate — after the operator resets and resumes, the + now-clean tree skips straight through instead of re-pausing on the + still-set ``baseline_commit``. + + A ``cause="resolved"`` re-drive is human-initiated (the operator ran the + resolve workflow and re-armed the story), so it always auto-recovers and + never pauses, regardless of ``scm.rollback_on_failure``. For the entire + re-drive (``task.resolved_redrive``, latched at resume and cleared once the + correction is committed) the BMAD artifact folders are treated as + orchestrator-owned: excluded from the dirty check (the corrected spec must + not read as a failed attempt) and preserved through every reset — so a + later mid-re-drive retry/defer reset can't silently revert the correction. + + Otherwise (a stopped/abandoned attempt) the flag governs: OFF (default) + leaves the working tree untouched and emits a bold manual-recovery notice + that pauses the run (stop-and-wait); ON does a clean reset to baseline. + Either way pre-existing untracked files are preserved; there is no blanket + ``git clean``.""" + resolved = cause == "resolved" + # preserve the corrected spec for the whole re-drive, not just the first + # reset; the auto-recover (pause-vs-reset) decision below is unaffected. + redrive = resolved or task.resolved_redrive + protected = self._protected_relpaths() if redrive else () if task.baseline_commit and not verify.attempt_dirty( - self.workspace.root, task.baseline_commit, task.baseline_untracked + self.workspace.root, task.baseline_commit, task.baseline_untracked, exclude=protected ): self.journal.append("rollback-skipped-clean", story_key=task.story_key) return - if not self.policy.scm.rollback_on_failure: - self._pause_for_manual_recovery(task, task.baseline_commit or "", cause=cause) - return # unreachable: _pause_for_manual_recovery always raises - self.journal.append( - "rollback-auto", - story_key=task.story_key, - baseline=task.baseline_commit or "", - note="reverting tracked changes + run-created untracked files", - ) - self._safe_reset(task) + if resolved or self.policy.scm.rollback_on_failure: + self.journal.append( + "rollback-auto", + story_key=task.story_key, + baseline=task.baseline_commit or "", + note="reverting tracked changes + run-created untracked files", + ) + self._safe_reset(task, preserve=protected) + return + self._pause_for_manual_recovery(task, task.baseline_commit or "") + return # unreachable: _pause_for_manual_recovery always raises - def _safe_reset(self, task: StoryTask) -> None: + def _safe_reset(self, task: StoryTask, *, preserve: tuple[str, ...] = ()) -> None: """Revert tracked changes to the task baseline and remove only the untracked files this run created — never a blanket `git clean`. Used by - the gated rollback (when enabled) and by internal ledger recovery (sweep + the gated/resolved rollback and by internal ledger recovery (sweep migration), which restores the orchestrator's own state and must not - pause.""" - keep = [".automator"] - for protected in ( - self.workspace.paths.output_folder, - self.workspace.paths.implementation_artifacts, - self.workspace.paths.planning_artifacts, - ): - try: - keep.append(str(protected.relative_to(self.workspace.root))) - except ValueError: - pass # configured outside the repo; nothing to protect here + pause. The BMAD artifact folders are always kept from untracked deletion; + ``preserve`` (set only on a resolved re-drive) additionally keeps their + *tracked* content alive through the reset, so a just-corrected spec is not + reverted. Sweep passes no ``preserve`` — it wants the broken ledger gone.""" verify.safe_rollback( self.workspace.root, task.baseline_commit or "", baseline_untracked=task.baseline_untracked, - keep=tuple(keep), + keep=(".automator", *self._protected_relpaths()), + preserve=preserve, ) - def _pause_for_manual_recovery( - self, task: StoryTask, baseline: str, *, cause: str = "stopped" - ) -> None: - """OFF path: leave the tree untouched, surface bold manual-recovery - instructions, and pause the run. Always raises RunPaused. ``cause`` - selects the wording: ``"resolved"`` for an escalation re-armed into a - clean rebuild, anything else for a stopped/abandoned attempt.""" + def _pause_for_manual_recovery(self, task: StoryTask, baseline: str) -> None: + """OFF path for a stopped/abandoned in-place attempt: leave the tree + untouched, surface bold manual-recovery instructions, and pause the run. + Always raises RunPaused. A *resolved* escalation never reaches here — + `_rollback_or_pause` auto-recovers that human-initiated re-drive + regardless of `scm.rollback_on_failure`.""" short = baseline[:12] or "the run's baseline commit" - if cause == "resolved": - why = ( - f"Story **{task.story_key}**'s escalation was resolved; re-driving " - "it needs a clean baseline, but auto-rollback is OFF, so the " - "working tree was left exactly as-is for you to inspect.\n" - ) - else: - why = ( - f"Story **{task.story_key}**'s attempt was stopped and auto-rollback " - "is OFF, so the working tree was left exactly as-is for you to " - "inspect.\n" - ) + why = ( + f"Story **{task.story_key}**'s attempt was stopped and auto-rollback " + "is OFF, so the working tree was left exactly as-is for you to " + "inspect.\n" + ) notice = ( "**ACTION REQUIRED — manual rollback needed**\n" f"{why}" @@ -767,6 +784,9 @@ def _finish_inflight(self) -> None: task.worktree_path = "" task.branch = "" elif task.baseline_commit: + # latch resolved_redrive so the corrected spec stays protected + # through every reset of this re-drive, not just this first one + task.resolved_redrive = task.resolved_redrive or task.rearmed self._rollback_or_pause(task, cause="resolved" if task.rearmed else "stopped") task.rearmed = False # past rollback (only reached when not paused) task.phase = Phase.PENDING # deliberate reset, not a normal transition @@ -1236,6 +1256,9 @@ def _commit(self, task: StoryTask) -> None: # was nothing to finalize (NO_VCS, or the tree already at baseline). sha = verify.finalize_commit(self.workspace.root, task.baseline_commit, message) task.commit_sha = sha or task.baseline_commit + # the corrected spec is now durable in HEAD; later attempts need no + # special preservation, so drop the re-drive latch. + task.resolved_redrive = False except verify.GitError as e: self._escalate(task, f"commit failed: {e}") advance(task, Phase.DONE) diff --git a/src/automator/model.py b/src/automator/model.py index 76c84ca..f1c3b3a 100644 --- a/src/automator/model.py +++ b/src/automator/model.py @@ -141,6 +141,12 @@ class StoryTask: # resume-time manual-recovery notice describe the real cause; cleared once the # rebuild proceeds. Survives the resume serialization round-trip. rearmed: bool = False + # latched True for the lifetime of a resolved-escalation re-drive (set when + # _finish_inflight re-drives a `rearmed` task, cleared once the corrected spec + # is committed). While set, every rollback preserves the BMAD artifact folders' + # tracked content, so a mid-re-drive retry/defer reset can't silently revert + # the human correction. Survives the resume serialization round-trip. + resolved_redrive: bool = False # sweep bundles only: the deferred-work ids this task closes and the # rendered intent file handed to dev sessions dw_ids: list[str] = field(default_factory=list) @@ -176,6 +182,7 @@ def to_dict(self) -> dict[str, Any]: "commit_sha": self.commit_sha, "defer_reason": self.defer_reason, "rearmed": self.rearmed, + "resolved_redrive": self.resolved_redrive, "dw_ids": self.dw_ids, "bundle_file": self.bundle_file, "worktree_path": self.worktree_path, @@ -215,6 +222,7 @@ def from_dict(cls, d: dict[str, Any]) -> "StoryTask": commit_sha=d.get("commit_sha"), defer_reason=d.get("defer_reason"), rearmed=bool(d.get("rearmed", False)), + resolved_redrive=bool(d.get("resolved_redrive", False)), dw_ids=[str(i) for i in d.get("dw_ids", [])], bundle_file=d.get("bundle_file"), worktree_path=str(d.get("worktree_path", "")), diff --git a/src/automator/policy.py b/src/automator/policy.py index 6ff4de3..f015069 100644 --- a/src/automator/policy.py +++ b/src/automator/policy.py @@ -221,7 +221,11 @@ class ScmPolicy: # untracked files and the whole _bmad-output/ are preserved) — convenient but # it discards the attempt's uncommitted work, so a warning is journalled when # it fires. Worktree isolation sidesteps this entirely (failed work stays in - # its worktree), so this knob only matters for isolation = "none". + # its worktree), so this knob only matters for isolation = "none". This flag + # governs unattended/stopped attempts only: a human-initiated escalation + # resolve re-drive always auto-recovers regardless — it reverts the failed + # attempt's source but preserves the corrected spec under the BMAD artifact + # folders, which it treats as orchestrator-owned. rollback_on_failure: bool = False # failed_diff_max_mb caps the per-file size (MB) of untracked files captured # into a kept-failed unit's forensic changes.patch, so a stray build dir or @@ -715,7 +719,7 @@ def _fold_deprecated_engine( merge_strategy = "merge" # ff | merge | squash (worktree mode merges the unit branch into target locally) delete_branch = true # delete the unit branch after a successful merge keep_failed = true # keep a failed unit's worktree+branch for inspection -rollback_on_failure = false # in-place (isolation="none") recovery after a failed attempt. false = never touch the tree; pause with manual recovery steps. true = auto-revert the attempt's tracked changes + remove only the untracked files this run created (WARNING: discards the attempt's uncommitted work; never a blanket git clean). Prefer isolation="worktree" to avoid touching your main checkout. +rollback_on_failure = false # in-place (isolation="none") recovery after a failed attempt. false = never touch the tree; pause with manual recovery steps. true = auto-revert the attempt's tracked changes + remove only the untracked files this run created (WARNING: discards the attempt's uncommitted work; never a blanket git clean). Governs unattended/stopped attempts only: a resolved escalation's re-drive always auto-recovers regardless (reverts the failed source, keeps the corrected spec). Prefer isolation="worktree" to avoid touching your main checkout. failed_diff_max_mb = 5 # per-file size cap (MB) for untracked files in a kept-failed unit's changes.patch; oversized files are skipped with a marker failed_diff_unlimited = false # true = capture the failed-unit diff with no size cap (may produce very large patches; warns when active) # commit_message_template: when set, the commit message dev sessions use for a diff --git a/src/automator/tui/data.py b/src/automator/tui/data.py index 5c245de..ecf1452 100644 --- a/src/automator/tui/data.py +++ b/src/automator/tui/data.py @@ -15,6 +15,7 @@ import bisect import json +import re from collections import deque from dataclasses import dataclass from pathlib import Path @@ -316,6 +317,30 @@ def _render_row(row: dict) -> Text: return text +# CSI sequences with a private/secondary marker (< > = ?) are terminal capability +# negotiation, never display SGR. pyte 0.8.2 ignores the marker and misdispatches +# them to SGR anyway: e.g. XTMODKEYS `CSI > 4 ; 2 m` (modifyOtherKeys, emitted at +# session start by Claude Code et al.) is read as SGR 4 = underline-on, leaving the +# whole log underlined until an exit-time disable a live capture never contains. +# Strip them before pyte sees them; a legitimate SGR carries no marker, so this can +# only remove non-display sequences (never printable text or genuine styling). +_PRIVATE_MARKER_SGR = re.compile(rb"\x1b\[[<>=?][0-9;:]*m") +# A trailing, not-yet-terminated CSI at the end of a read: ESC, or ESC[ followed by +# only param/marker bytes with no final letter. Held back so a marker sequence split +# across two reads is filtered whole next time (the filter must see it complete). +_INCOMPLETE_CSI_TAIL = re.compile(rb"\x1b(?:\[[0-9;:<>=?]*)?\Z") + + +def _strip_private_marker_sgr(chunk: bytes) -> tuple[bytes, int]: + """(filtered chunk, bytes held back). Drops private-marker SGR sequences and + returns the length of an unterminated trailing CSI that should not be consumed + yet, so the caller can re-read it next time and see the sequence whole.""" + m = _INCOMPLETE_CSI_TAIL.search(chunk) + held = len(m.group()) if m else 0 + body = chunk[: len(chunk) - held] if held else chunk + return _PRIVATE_MARKER_SGR.sub(b"", body), held + + class _CountingDeque(deque): """history.top replacement that counts rows permanently gone above the window: maxlen evictions plus the clear() from pyte's reset()/ED-3. @@ -441,19 +466,36 @@ def read_new(self) -> bool: with self.path.open("rb") as f: f.seek(self._offset) chunk = f.read(size - self._offset) + # Drop private-marker SGR (pyte misreads it as underline); hold back an + # unterminated trailing CSI so a sequence split across reads is filtered + # whole next time. `consumed` is the original byte span we commit here — + # the file offset advances over it while pyte sees only the filtered bytes. + filtered, held = _strip_private_marker_sgr(chunk) + consumed = len(chunk) - held + base, total = self._offset, len(filtered) top = self._screen.history.top - for start in range(0, len(chunk), self._checkpoint_bytes): - piece = chunk[start : start + self._checkpoint_bytes] + for start in range(0, total, self._checkpoint_bytes): + piece = filtered[start : start + self._checkpoint_bytes] # ByteStream buffers escape sequences split across feeds self._stream.feed(piece) - self._offset += len(piece) + # map back to an original-file offset (filtering only removes a few + # marker bytes; the log_pos->line mapping is already approximate) + end = start + len(piece) + self._offset = base + (consumed if end >= total else round(end / total * consumed)) line = top.dropped + len(top) + self._screen.cursor.y self._checkpoints.append((self._offset, line)) + self._offset = base + consumed + # When a chunk is entirely an unterminated trailing CSI, consumed == 0 and + # _offset holds short of EOF until more bytes complete the sequence — a few + # held bytes re-read per poll. We deliberately don't advance past them: a + # log that's still streaming self-heals on the next write, and one that + # permanently ends mid-escape has nothing renderable there anyway (we'd + # only risk eating a legitimately split sequence by forcing it through). # drop checkpoints whose lines evicted past the history horizon; # their offsets would clamp to line 0 anyway while len(self._checkpoints) > 1 and self._checkpoints[1][1] <= top.dropped: self._checkpoints.pop(0) - return True + return consumed > 0 def index(self) -> LogIndex: """Snapshot for log_pos -> rendered-line lookups; reflects the most diff --git a/src/automator/verify.py b/src/automator/verify.py index aaf3982..e29fe60 100644 --- a/src/automator/verify.py +++ b/src/automator/verify.py @@ -131,19 +131,45 @@ def has_changes_since(repo: Path, baseline: str) -> bool: return rc == 0 and out != "" -def attempt_dirty(repo: Path, baseline: str, baseline_untracked: list[str] | None) -> bool: +def attempt_dirty( + repo: Path, + baseline: str, + baseline_untracked: list[str] | None, + exclude: tuple[str, ...] = (), +) -> bool: """True if a `safe_rollback` to `baseline` would change anything: tracked changes since baseline, or untracked files created since the baseline snapshot. `baseline_untracked=None` (a pre-snapshot run) means untracked files are never this attempt's to remove, so only tracked diff counts. This mirrors `safe_rollback`'s notion of what *this attempt* touched, so callers - can skip a no-op reset/pause when the tree is already at baseline.""" - rc, _ = _git(repo, "diff", "--quiet", baseline, "--") + can skip a no-op reset/pause when the tree is already at baseline. + + `exclude` is repo-relative posix dir prefixes (e.g. the BMAD artifact + folders) whose changes are orchestrator-owned and never count as a dev + attempt's dirtiness — they pair with `safe_rollback`'s `preserve`, so a + change confined to those folders reads as clean.""" + if exclude: + rc, _ = _git(repo, "diff", "--quiet", baseline, "--", ".", *_exclude_specs(exclude)) + else: + rc, _ = _git(repo, "diff", "--quiet", baseline, "--") if rc != 0: return True if baseline_untracked is None: return False - return bool(untracked_files(repo) - set(baseline_untracked)) + created = untracked_files(repo) - set(baseline_untracked) + if exclude: + created = {p for p in created if not _path_under_any(p, exclude)} + return bool(created) + + +def _exclude_specs(dirs: tuple[str, ...]) -> list[str]: + """git pathspec `:(exclude)