From e2a8636e746628e3388d98dc26ecba507d30ef47 Mon Sep 17 00:00:00 2001 From: pbean Date: Fri, 26 Jun 2026 11:31:20 -0700 Subject: [PATCH 1/5] fix(bmad-auto): stop deferred-work sweep from deferring every bundle it finished MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the migration to the generic upstream bmad-dev-auto primitive, bundle dev sessions completed the work but verify_dev_bundle rejected them with "result.json dw_ids [] do not match the bundle's […]", retried to budget, deferred, and rolled the work back — so a sweep could never close a bundled entry. The retired dev fork echoed the dw ids; the generic skill does not, and the adapter synthesized result.json without them. The orchestrator already owns the bundle→dw-id binding (_post_dev_state_sync), so verify_dev_bundle now enforces the dw_ids cross-check only when the session actually claims ids — an absent claim is the normal generic path and passes. As hardening, the run exports BMAD_AUTO_DW_IDS and the generic adapter stamps them onto the synthesized result so the cross-check stays live. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/automator/adapters/generic.py | 8 +++++++- src/automator/engine.py | 6 ++++++ src/automator/verify.py | 8 ++++++-- tests/test_verify.py | 14 ++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py index 5515b50..16b626e 100644 --- a/src/automator/adapters/generic.py +++ b/src/automator/adapters/generic.py @@ -325,7 +325,13 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool) spec_path = devcontract.find_result_artifact(artifacts, since_ns=handle.launched_ns) if spec_path is not None: story_key = spec.env.get("BMAD_AUTO_STORY_KEY") or None - return devcontract.synthesize_result(spec_path, story_key=story_key).result_json + # Bundle dev sessions: the orchestrator exports the bundle's + # owned dw ids (the generic skill never authors them). Stamp + # them onto the result so verify_dev_bundle's cross-check passes. + dw_ids = [i for i in spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") if i] + return devcontract.synthesize_result( + spec_path, story_key=story_key, dw_ids=dw_ids or None + ).result_json if not wait or time.monotonic() >= deadline: return None time.sleep(RESULT_POLL_S) diff --git a/src/automator/engine.py b/src/automator/engine.py index 401ac34..f473b47 100644 --- a/src/automator/engine.py +++ b/src/automator/engine.py @@ -1381,6 +1381,12 @@ def _run_session( "BMAD_AUTO_TASK_ID": task_id, "BMAD_AUTO_STORY_KEY": task.story_key, } + if task.dw_ids: + # Deferred-work bundle: the orchestrator owns the bundle→dw-id binding + # (the generic bmad-dev-auto primitive knows nothing of dw ids). Export + # them so the generic adapter can stamp them onto the synthesized + # result.json, keeping verify_dev_bundle's dw_ids cross-check live. + env["BMAD_AUTO_DW_IDS"] = ",".join(task.dw_ids) if role == "dev" and not self.policy.review.enabled: # signals that the orchestrator will run no follow-up review session. # bmad-dev-auto always self-reviews inline (step-03 → step-04) and diff --git a/src/automator/verify.py b/src/automator/verify.py index 35e34a4..85fa164 100644 --- a/src/automator/verify.py +++ b/src/automator/verify.py @@ -702,7 +702,11 @@ def verify_dev_bundle( review_enabled: bool = True, ) -> VerifyOutcome: """verify_dev for a deferred-work bundle: bundles have no sprint-status - entry, but the session must claim exactly the dw ids the bundle owns.""" + entry. The orchestrator owns the bundle→dw-id binding (``task.dw_ids``, + marked done by ``SweepEngine._post_dev_state_sync``); the generic + ``bmad-dev-auto`` primitive never authors dw ids. So the dw_ids cross-check + is enforced only when the session actually claims them — an empty/absent + claim is the normal generic path and passes.""" rj = result_json or {} spec_file = rj.get("spec_file") if not spec_file: @@ -740,7 +744,7 @@ def verify_dev_bundle( return VerifyOutcome.escalate(str(e)) claimed_ids = {str(i) for i in rj.get("dw_ids", [])} - if claimed_ids != set(task.dw_ids): + if claimed_ids and claimed_ids != set(task.dw_ids): return VerifyOutcome.retry( f"result.json dw_ids {sorted(claimed_ids)} do not match the bundle's " f"{sorted(task.dw_ids)}" diff --git a/tests/test_verify.py b/tests/test_verify.py index d29991e..ceb9e6c 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -275,6 +275,20 @@ def test_verify_dev_bundle_dw_ids_mismatch(project): assert not out.ok and "dw_ids" in out.reason +def test_verify_dev_bundle_absent_dw_ids_passes(project): + # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json + # omits them. The orchestrator owns the bundle→dw-id binding, so verify must + # pass on an absent claim (regression: this used to always retry → defer). + task = make_bundle_task(project) + sp = project.implementation_artifacts / "spec-dw-test-bundle.md" + write_spec(sp, "in-review", task.baseline_commit) + (project.project / "src.txt").write_text("changed\n") + rj = {"workflow": "auto-dev", "spec_file": str(sp)} + out = verify.verify_dev_bundle(task, project, rj) + assert out.ok + assert task.spec_file == str(sp) + + def test_verify_review_bundle_ledger_gate(project): task = make_bundle_task(project) sp = project.implementation_artifacts / "spec-dw-test-bundle.md" From 4276c67af274e05a0c8aead7fd50a02adeda9e75 Mon Sep 17 00:00:00 2001 From: pbean Date: Fri, 26 Jun 2026 11:38:49 -0700 Subject: [PATCH 2/5] =?UTF-8?q?chore(release):=200.7.4=20=E2=80=94=20Defer?= =?UTF-8?q?red-work=20sweep=20no=20longer=20defers=20every=20bundle=20it?= =?UTF-8?q?=20just=20fi=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude-plugin/marketplace.json | 2 +- CHANGELOG.md | 14 ++++++++++++++ module.yaml | 2 +- pyproject.toml | 2 +- src/automator/__init__.py | 2 +- .../data/skills/bmad-auto-setup/assets/module.yaml | 2 +- uv.lock | 2 +- 7 files changed, 20 insertions(+), 6 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index a766a21..d52867a 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "bauto", "source": "./src/automator/data/skills", "description": "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill", - "version": "0.7.3", + "version": "0.7.4", "author": { "name": "pinkyd" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ff7ffe..c301a57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to `bmad-auto` are documented here. The format is based on [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0, breaking changes may land in a minor release. +## [0.7.4] — 2026-06-26 + +### Fixed + +- **Deferred-work sweep no longer defers every bundle it just finished.** After the migration to + the generic upstream `bmad-dev-auto` primitive, bundle dev sessions completed the work but were + rejected by `verify_dev_bundle` with `result.json dw_ids [] do not match the bundle's […]`, + retried to budget, deferred, and rolled the work back — so a sweep could never close a bundled + entry. The retired dev fork used to echo the dw ids; the generic skill doesn't. The orchestrator + already owns the bundle→dw-id binding, so the cross-check now passes when the session claims no + ids, and the run exports `BMAD_AUTO_DW_IDS` so the synthesized result still carries them and the + check stays live. + ## [0.7.3] — 2026-06-26 ### Fixed @@ -667,6 +680,7 @@ enforced in CI. implementation phase, driven by a Python control loop with hook-based session transport and resumable on-disk run state. +[0.7.4]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.4 [0.7.3]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.3 [0.7.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.2 [0.7.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.0 diff --git a/module.yaml b/module.yaml index 752af0a..38998aa 100644 --- a/module.yaml +++ b/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.3 +module_version: 0.7.4 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/pyproject.toml b/pyproject.toml index d98feac..1f196f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bmad-auto" -version = "0.7.3" +version = "0.7.4" description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase" readme = "README.md" license = "MIT" diff --git a/src/automator/__init__.py b/src/automator/__init__.py index 447fcd2..8136e31 100644 --- a/src/automator/__init__.py +++ b/src/automator/__init__.py @@ -6,4 +6,4 @@ spec files, and the per-run directory under .automator/runs/. """ -__version__ = "0.7.3" +__version__ = "0.7.4" diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml index 752af0a..38998aa 100644 --- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml +++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml @@ -1,7 +1,7 @@ code: bauto name: BMAD Auto Skills description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill" -module_version: 0.7.3 +module_version: 0.7.4 default_selected: false module_greeting: > BMAD Auto installed — both the automation skills and the diff --git a/uv.lock b/uv.lock index bcc6581..818456f 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11" [[package]] name = "bmad-auto" -version = "0.7.3" +version = "0.7.4" source = { editable = "." } dependencies = [ { name = "pyyaml" }, From 4ae716fa4f499186482be8990e144c54c0911748 Mon Sep 17 00:00:00 2001 From: pbean Date: Fri, 26 Jun 2026 11:52:08 -0700 Subject: [PATCH 3/5] fix(bmad-auto): trim whitespace when parsing BMAD_AUTO_DW_IDS The orchestrator joins the env value without spaces, but a hook-rewritten or hand-set "DW-1, DW-2" would parse to " DW-2" and trip verify_dev_bundle's dw_ids cross-check into a spurious defer. Strip each token. Adds a generic- adapter test covering the whitespace-tolerant parse. Addresses PR #17 review feedback. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/automator/adapters/generic.py | 3 ++- tests/test_generic_tmux.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py index 16b626e..5dcecbe 100644 --- a/src/automator/adapters/generic.py +++ b/src/automator/adapters/generic.py @@ -328,7 +328,8 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool) # Bundle dev sessions: the orchestrator exports the bundle's # owned dw ids (the generic skill never authors them). Stamp # them onto the result so verify_dev_bundle's cross-check passes. - dw_ids = [i for i in spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") if i] + raw_dw_ids = spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") + dw_ids = [tok for tok in (i.strip() for i in raw_dw_ids) if tok] return devcontract.synthesize_result( spec_path, story_key=story_key, dw_ids=dw_ids or None ).result_json diff --git a/tests/test_generic_tmux.py b/tests/test_generic_tmux.py index da6fafd..319e4be 100644 --- a/tests/test_generic_tmux.py +++ b/tests/test_generic_tmux.py @@ -234,6 +234,27 @@ def test_generic_dev_synthesizes_done_spec(tmp_path): assert rj["baseline_commit"] == "abc123" # mapped from baseline_revision assert rj["story_key"] == "3-1" assert rj["escalations"] == [] + assert "dw_ids" not in rj # a normal story exports no BMAD_AUTO_DW_IDS + + +def test_generic_dev_bundle_stamps_dw_ids_from_env(tmp_path): + # The orchestrator exports the bundle's owned dw ids; the generic skill never + # authors them. The adapter stamps them onto the synthesized result, tolerant + # of whitespace in the env value (e.g. a hand-set or hook-rewritten "DW-1, DW-2"). + adapter, impl = make_dev_adapter(tmp_path) + (impl / "spec-dw-bundle.md").write_text( + "---\nstatus: done\nbaseline_revision: abc123\n---\n\n" + "## Auto Run Result\n\nStatus: done\nResolved the bundle.\n" + ) + spec = SessionSpec( + task_id="3-1-dev-1", + role="dev", + prompt="/bmad-dev-auto bundle", + cwd=tmp_path, + env={"BMAD_AUTO_STORY_KEY": "dw-bundle", "BMAD_AUTO_DW_IDS": "DW-1, DW-2"}, + ) + rj = adapter._result_json(_dev_handle(), spec, wait=True) + assert rj["dw_ids"] == ["DW-1", "DW-2"] def test_generic_dev_finds_spec_in_worktree(tmp_path): From e277c34b462aef7c86021b85af318349e0b9b764 Mon Sep 17 00:00:00 2001 From: pbean Date: Fri, 26 Jun 2026 12:17:34 -0700 Subject: [PATCH 4/5] fix(bmad-auto): guard BMAD_AUTO_DW_IDS parse against a non-string env value A plugin/session hook that sets BMAD_AUTO_DW_IDS to None (rather than deleting it) would make `.split(",")` raise and crash result synthesis, false-stalling a completed session. Coerce to "" the same way the adjacent story_key line guards. Adds tests: a None-valued env no longer crashes synthesis, and an explicit empty dw_ids list (the literal payload that defered in production) passes verify. Addresses PR #17 review feedback. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/automator/adapters/generic.py | 2 +- tests/test_generic_tmux.py | 21 +++++++++++++++++++++ tests/test_verify.py | 10 ++++++---- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py index 5dcecbe..4dcd663 100644 --- a/src/automator/adapters/generic.py +++ b/src/automator/adapters/generic.py @@ -328,7 +328,7 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool) # Bundle dev sessions: the orchestrator exports the bundle's # owned dw ids (the generic skill never authors them). Stamp # them onto the result so verify_dev_bundle's cross-check passes. - raw_dw_ids = spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") + raw_dw_ids = (spec.env.get("BMAD_AUTO_DW_IDS") or "").split(",") dw_ids = [tok for tok in (i.strip() for i in raw_dw_ids) if tok] return devcontract.synthesize_result( spec_path, story_key=story_key, dw_ids=dw_ids or None diff --git a/tests/test_generic_tmux.py b/tests/test_generic_tmux.py index 319e4be..6bd42cf 100644 --- a/tests/test_generic_tmux.py +++ b/tests/test_generic_tmux.py @@ -257,6 +257,27 @@ def test_generic_dev_bundle_stamps_dw_ids_from_env(tmp_path): assert rj["dw_ids"] == ["DW-1", "DW-2"] +def test_generic_dev_dw_ids_none_env_does_not_crash(tmp_path): + # A misbehaving plugin/hook could set BMAD_AUTO_DW_IDS to None instead of + # deleting it; synthesis must not crash (it would false-stall a completed + # session), and emits no dw ids. + adapter, impl = make_dev_adapter(tmp_path) + (impl / "spec-3-1-foo.md").write_text( + "---\nstatus: done\nbaseline_revision: abc123\n---\n\n" + "## Auto Run Result\n\nStatus: done\nImplemented the thing.\n" + ) + spec = SessionSpec( + task_id="3-1-dev-1", + role="dev", + prompt="/bmad-dev-auto 3-1", + cwd=tmp_path, + env={"BMAD_AUTO_STORY_KEY": "3-1", "BMAD_AUTO_DW_IDS": None}, + ) + rj = adapter._result_json(_dev_handle(), spec, wait=True) + assert rj["status"] == "done" + assert "dw_ids" not in rj + + def test_generic_dev_finds_spec_in_worktree(tmp_path): # Under worktree isolation the skill runs with cwd set to the worktree and # leaves its terminal spec in the worktree's rebased implementation-artifacts diff --git a/tests/test_verify.py b/tests/test_verify.py index ceb9e6c..89c497f 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -275,15 +275,17 @@ def test_verify_dev_bundle_dw_ids_mismatch(project): assert not out.ok and "dw_ids" in out.reason -def test_verify_dev_bundle_absent_dw_ids_passes(project): +@pytest.mark.parametrize("claim", [{}, {"dw_ids": []}], ids=["missing-key", "empty-list"]) +def test_verify_dev_bundle_absent_dw_ids_passes(project, claim): # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json - # omits them. The orchestrator owns the bundle→dw-id binding, so verify must - # pass on an absent claim (regression: this used to always retry → defer). + # omits them (missing key) or carries an empty list. The orchestrator owns the + # bundle→dw-id binding, so verify must pass on an unclaimed bundle. The empty + # list is the literal payload that defered in production ("dw_ids []"). task = make_bundle_task(project) sp = project.implementation_artifacts / "spec-dw-test-bundle.md" write_spec(sp, "in-review", task.baseline_commit) (project.project / "src.txt").write_text("changed\n") - rj = {"workflow": "auto-dev", "spec_file": str(sp)} + rj = {"workflow": "auto-dev", "spec_file": str(sp), **claim} out = verify.verify_dev_bundle(task, project, rj) assert out.ok assert task.spec_file == str(sp) From 3b55fa82603818f54e314d58177ac5a20895044b Mon Sep 17 00:00:00 2001 From: pbean Date: Fri, 26 Jun 2026 12:49:58 -0700 Subject: [PATCH 5/5] fix(bmad-auto): guard verify_dev_bundle against an explicit dw_ids null rj.get("dw_ids", []) returns None on a "dw_ids": null payload (the default only covers an absent key), so the set comprehension raised TypeError and crashed bundle verification. Coerce with `or []`. Extends the parametrized test with the null case. Addresses PR #17 review feedback. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/automator/verify.py | 2 +- tests/test_verify.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/automator/verify.py b/src/automator/verify.py index 85fa164..143785d 100644 --- a/src/automator/verify.py +++ b/src/automator/verify.py @@ -743,7 +743,7 @@ def verify_dev_bundle( except GitError as e: return VerifyOutcome.escalate(str(e)) - claimed_ids = {str(i) for i in rj.get("dw_ids", [])} + claimed_ids = {str(i) for i in (rj.get("dw_ids") or [])} if claimed_ids and claimed_ids != set(task.dw_ids): return VerifyOutcome.retry( f"result.json dw_ids {sorted(claimed_ids)} do not match the bundle's " diff --git a/tests/test_verify.py b/tests/test_verify.py index 89c497f..c5b0496 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -275,12 +275,17 @@ def test_verify_dev_bundle_dw_ids_mismatch(project): assert not out.ok and "dw_ids" in out.reason -@pytest.mark.parametrize("claim", [{}, {"dw_ids": []}], ids=["missing-key", "empty-list"]) +@pytest.mark.parametrize( + "claim", + [{}, {"dw_ids": []}, {"dw_ids": None}], + ids=["missing-key", "empty-list", "null"], +) def test_verify_dev_bundle_absent_dw_ids_passes(project, claim): # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json - # omits them (missing key) or carries an empty list. The orchestrator owns the - # bundle→dw-id binding, so verify must pass on an unclaimed bundle. The empty - # list is the literal payload that defered in production ("dw_ids []"). + # omits them (missing key), carries an empty list, or an explicit null. The + # orchestrator owns the bundle→dw-id binding, so verify must pass on an + # unclaimed bundle without crashing. The empty list is the literal payload + # that defered in production ("dw_ids []"). task = make_bundle_task(project) sp = project.implementation_artifacts / "spec-dw-test-bundle.md" write_spec(sp, "in-review", task.baseline_commit)