From e2a8636e746628e3388d98dc26ecba507d30ef47 Mon Sep 17 00:00:00 2001
From: pbean <paulbeanjr@gmail.com>
Date: Fri, 26 Jun 2026 11:31:20 -0700
Subject: [PATCH 1/5] fix(bmad-auto): stop deferred-work sweep from deferring
 every bundle it finished
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the migration to the generic upstream bmad-dev-auto primitive, bundle dev
sessions completed the work but verify_dev_bundle rejected them with
"result.json dw_ids [] do not match the bundle's […]", retried to budget,
deferred, and rolled the work back — so a sweep could never close a bundled
entry. The retired dev fork echoed the dw ids; the generic skill does not, and
the adapter synthesized result.json without them.

The orchestrator already owns the bundle→dw-id binding (_post_dev_state_sync),
so verify_dev_bundle now enforces the dw_ids cross-check only when the session
actually claims ids — an absent claim is the normal generic path and passes. As
hardening, the run exports BMAD_AUTO_DW_IDS and the generic adapter stamps them
onto the synthesized result so the cross-check stays live.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/automator/adapters/generic.py |  8 +++++++-
 src/automator/engine.py           |  6 ++++++
 src/automator/verify.py           |  8 ++++++--
 tests/test_verify.py              | 14 ++++++++++++++
 4 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py
index 5515b50..16b626e 100644
--- a/src/automator/adapters/generic.py
+++ b/src/automator/adapters/generic.py
@@ -325,7 +325,13 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool)
                 spec_path = devcontract.find_result_artifact(artifacts, since_ns=handle.launched_ns)
                 if spec_path is not None:
                     story_key = spec.env.get("BMAD_AUTO_STORY_KEY") or None
-                    return devcontract.synthesize_result(spec_path, story_key=story_key).result_json
+                    # Bundle dev sessions: the orchestrator exports the bundle's
+                    # owned dw ids (the generic skill never authors them). Stamp
+                    # them onto the result so verify_dev_bundle's cross-check passes.
+                    dw_ids = [i for i in spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") if i]
+                    return devcontract.synthesize_result(
+                        spec_path, story_key=story_key, dw_ids=dw_ids or None
+                    ).result_json
             if not wait or time.monotonic() >= deadline:
                 return None
             time.sleep(RESULT_POLL_S)
diff --git a/src/automator/engine.py b/src/automator/engine.py
index 401ac34..f473b47 100644
--- a/src/automator/engine.py
+++ b/src/automator/engine.py
@@ -1381,6 +1381,12 @@ def _run_session(
             "BMAD_AUTO_TASK_ID": task_id,
             "BMAD_AUTO_STORY_KEY": task.story_key,
         }
+        if task.dw_ids:
+            # Deferred-work bundle: the orchestrator owns the bundle→dw-id binding
+            # (the generic bmad-dev-auto primitive knows nothing of dw ids). Export
+            # them so the generic adapter can stamp them onto the synthesized
+            # result.json, keeping verify_dev_bundle's dw_ids cross-check live.
+            env["BMAD_AUTO_DW_IDS"] = ",".join(task.dw_ids)
         if role == "dev" and not self.policy.review.enabled:
             # signals that the orchestrator will run no follow-up review session.
             # bmad-dev-auto always self-reviews inline (step-03 → step-04) and
diff --git a/src/automator/verify.py b/src/automator/verify.py
index 35e34a4..85fa164 100644
--- a/src/automator/verify.py
+++ b/src/automator/verify.py
@@ -702,7 +702,11 @@ def verify_dev_bundle(
     review_enabled: bool = True,
 ) -> VerifyOutcome:
     """verify_dev for a deferred-work bundle: bundles have no sprint-status
-    entry, but the session must claim exactly the dw ids the bundle owns."""
+    entry. The orchestrator owns the bundle→dw-id binding (``task.dw_ids``,
+    marked done by ``SweepEngine._post_dev_state_sync``); the generic
+    ``bmad-dev-auto`` primitive never authors dw ids. So the dw_ids cross-check
+    is enforced only when the session actually claims them — an empty/absent
+    claim is the normal generic path and passes."""
     rj = result_json or {}
     spec_file = rj.get("spec_file")
     if not spec_file:
@@ -740,7 +744,7 @@ def verify_dev_bundle(
             return VerifyOutcome.escalate(str(e))
 
     claimed_ids = {str(i) for i in rj.get("dw_ids", [])}
-    if claimed_ids != set(task.dw_ids):
+    if claimed_ids and claimed_ids != set(task.dw_ids):
         return VerifyOutcome.retry(
             f"result.json dw_ids {sorted(claimed_ids)} do not match the bundle's "
             f"{sorted(task.dw_ids)}"
diff --git a/tests/test_verify.py b/tests/test_verify.py
index d29991e..ceb9e6c 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -275,6 +275,20 @@ def test_verify_dev_bundle_dw_ids_mismatch(project):
     assert not out.ok and "dw_ids" in out.reason
 
 
+def test_verify_dev_bundle_absent_dw_ids_passes(project):
+    # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json
+    # omits them. The orchestrator owns the bundle→dw-id binding, so verify must
+    # pass on an absent claim (regression: this used to always retry → defer).
+    task = make_bundle_task(project)
+    sp = project.implementation_artifacts / "spec-dw-test-bundle.md"
+    write_spec(sp, "in-review", task.baseline_commit)
+    (project.project / "src.txt").write_text("changed\n")
+    rj = {"workflow": "auto-dev", "spec_file": str(sp)}
+    out = verify.verify_dev_bundle(task, project, rj)
+    assert out.ok
+    assert task.spec_file == str(sp)
+
+
 def test_verify_review_bundle_ledger_gate(project):
     task = make_bundle_task(project)
     sp = project.implementation_artifacts / "spec-dw-test-bundle.md"

From 4276c67af274e05a0c8aead7fd50a02adeda9e75 Mon Sep 17 00:00:00 2001
From: pbean <paulbeanjr@gmail.com>
Date: Fri, 26 Jun 2026 11:38:49 -0700
Subject: [PATCH 2/5] =?UTF-8?q?chore(release):=200.7.4=20=E2=80=94=20Defer?=
 =?UTF-8?q?red-work=20sweep=20no=20longer=20defers=20every=20bundle=20it?=
 =?UTF-8?q?=20just=20fi=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .claude-plugin/marketplace.json                    |  2 +-
 CHANGELOG.md                                       | 14 ++++++++++++++
 module.yaml                                        |  2 +-
 pyproject.toml                                     |  2 +-
 src/automator/__init__.py                          |  2 +-
 .../data/skills/bmad-auto-setup/assets/module.yaml |  2 +-
 uv.lock                                            |  2 +-
 7 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index a766a21..d52867a 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -12,7 +12,7 @@
       "name": "bauto",
       "source": "./src/automator/data/skills",
       "description": "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill",
-      "version": "0.7.3",
+      "version": "0.7.4",
       "author": {
         "name": "pinkyd"
       },
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ff7ffe..c301a57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,19 @@ All notable changes to `bmad-auto` are documented here. The format is based on
 [Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0,
 breaking changes may land in a minor release.
 
+## [0.7.4] — 2026-06-26
+
+### Fixed
+
+- **Deferred-work sweep no longer defers every bundle it just finished.** After the migration to
+  the generic upstream `bmad-dev-auto` primitive, bundle dev sessions completed the work but were
+  rejected by `verify_dev_bundle` with `result.json dw_ids [] do not match the bundle's […]`,
+  retried to budget, deferred, and rolled the work back — so a sweep could never close a bundled
+  entry. The retired dev fork used to echo the dw ids; the generic skill doesn't. The orchestrator
+  already owns the bundle→dw-id binding, so the cross-check now passes when the session claims no
+  ids, and the run exports `BMAD_AUTO_DW_IDS` so the synthesized result still carries them and the
+  check stays live.
+
 ## [0.7.3] — 2026-06-26
 
 ### Fixed
@@ -667,6 +680,7 @@ enforced in CI.
   implementation phase, driven by a Python control loop with hook-based session transport and
   resumable on-disk run state.
 
+[0.7.4]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.4
 [0.7.3]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.3
 [0.7.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.2
 [0.7.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.0
diff --git a/module.yaml b/module.yaml
index 752af0a..38998aa 100644
--- a/module.yaml
+++ b/module.yaml
@@ -1,7 +1,7 @@
 code: bauto
 name: BMAD Auto Skills
 description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill"
-module_version: 0.7.3
+module_version: 0.7.4
 default_selected: false
 module_greeting: >
   BMAD Auto installed — both the automation skills and the
diff --git a/pyproject.toml b/pyproject.toml
index d98feac..1f196f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "bmad-auto"
-version = "0.7.3"
+version = "0.7.4"
 description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase"
 readme = "README.md"
 license = "MIT"
diff --git a/src/automator/__init__.py b/src/automator/__init__.py
index 447fcd2..8136e31 100644
--- a/src/automator/__init__.py
+++ b/src/automator/__init__.py
@@ -6,4 +6,4 @@
 spec files, and the per-run directory under .automator/runs/.
 """
 
-__version__ = "0.7.3"
+__version__ = "0.7.4"
diff --git a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
index 752af0a..38998aa 100644
--- a/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
+++ b/src/automator/data/skills/bmad-auto-setup/assets/module.yaml
@@ -1,7 +1,7 @@
 code: bauto
 name: BMAD Auto Skills
 description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill"
-module_version: 0.7.3
+module_version: 0.7.4
 default_selected: false
 module_greeting: >
   BMAD Auto installed — both the automation skills and the
diff --git a/uv.lock b/uv.lock
index bcc6581..818456f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,7 +4,7 @@ requires-python = ">=3.11"
 
 [[package]]
 name = "bmad-auto"
-version = "0.7.3"
+version = "0.7.4"
 source = { editable = "." }
 dependencies = [
     { name = "pyyaml" },

From 4ae716fa4f499186482be8990e144c54c0911748 Mon Sep 17 00:00:00 2001
From: pbean <paulbeanjr@gmail.com>
Date: Fri, 26 Jun 2026 11:52:08 -0700
Subject: [PATCH 3/5] fix(bmad-auto): trim whitespace when parsing
 BMAD_AUTO_DW_IDS

The orchestrator joins the env value without spaces, but a hook-rewritten or
hand-set "DW-1, DW-2" would parse to " DW-2" and trip verify_dev_bundle's
dw_ids cross-check into a spurious defer. Strip each token. Adds a generic-
adapter test covering the whitespace-tolerant parse.

Addresses PR #17 review feedback.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/automator/adapters/generic.py |  3 ++-
 tests/test_generic_tmux.py        | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py
index 16b626e..5dcecbe 100644
--- a/src/automator/adapters/generic.py
+++ b/src/automator/adapters/generic.py
@@ -328,7 +328,8 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool)
                     # Bundle dev sessions: the orchestrator exports the bundle's
                     # owned dw ids (the generic skill never authors them). Stamp
                     # them onto the result so verify_dev_bundle's cross-check passes.
-                    dw_ids = [i for i in spec.env.get("BMAD_AUTO_DW_IDS", "").split(",") if i]
+                    raw_dw_ids = spec.env.get("BMAD_AUTO_DW_IDS", "").split(",")
+                    dw_ids = [tok for tok in (i.strip() for i in raw_dw_ids) if tok]
                     return devcontract.synthesize_result(
                         spec_path, story_key=story_key, dw_ids=dw_ids or None
                     ).result_json
diff --git a/tests/test_generic_tmux.py b/tests/test_generic_tmux.py
index da6fafd..319e4be 100644
--- a/tests/test_generic_tmux.py
+++ b/tests/test_generic_tmux.py
@@ -234,6 +234,27 @@ def test_generic_dev_synthesizes_done_spec(tmp_path):
     assert rj["baseline_commit"] == "abc123"  # mapped from baseline_revision
     assert rj["story_key"] == "3-1"
     assert rj["escalations"] == []
+    assert "dw_ids" not in rj  # a normal story exports no BMAD_AUTO_DW_IDS
+
+
+def test_generic_dev_bundle_stamps_dw_ids_from_env(tmp_path):
+    # The orchestrator exports the bundle's owned dw ids; the generic skill never
+    # authors them. The adapter stamps them onto the synthesized result, tolerant
+    # of whitespace in the env value (e.g. a hand-set or hook-rewritten "DW-1, DW-2").
+    adapter, impl = make_dev_adapter(tmp_path)
+    (impl / "spec-dw-bundle.md").write_text(
+        "---\nstatus: done\nbaseline_revision: abc123\n---\n\n"
+        "## Auto Run Result\n\nStatus: done\nResolved the bundle.\n"
+    )
+    spec = SessionSpec(
+        task_id="3-1-dev-1",
+        role="dev",
+        prompt="/bmad-dev-auto bundle",
+        cwd=tmp_path,
+        env={"BMAD_AUTO_STORY_KEY": "dw-bundle", "BMAD_AUTO_DW_IDS": "DW-1, DW-2"},
+    )
+    rj = adapter._result_json(_dev_handle(), spec, wait=True)
+    assert rj["dw_ids"] == ["DW-1", "DW-2"]
 
 
 def test_generic_dev_finds_spec_in_worktree(tmp_path):

From e277c34b462aef7c86021b85af318349e0b9b764 Mon Sep 17 00:00:00 2001
From: pbean <paulbeanjr@gmail.com>
Date: Fri, 26 Jun 2026 12:17:34 -0700
Subject: [PATCH 4/5] fix(bmad-auto): guard BMAD_AUTO_DW_IDS parse against a
 non-string env value

A plugin/session hook that sets BMAD_AUTO_DW_IDS to None (rather than deleting
it) would make `.split(",")` raise and crash result synthesis, false-stalling a
completed session. Coerce to "" the same way the adjacent story_key line guards.
Adds tests: a None-valued env no longer crashes synthesis, and an explicit empty
dw_ids list (the literal payload that defered in production) passes verify.

Addresses PR #17 review feedback.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/automator/adapters/generic.py |  2 +-
 tests/test_generic_tmux.py        | 21 +++++++++++++++++++++
 tests/test_verify.py              | 10 ++++++----
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/automator/adapters/generic.py b/src/automator/adapters/generic.py
index 5dcecbe..4dcd663 100644
--- a/src/automator/adapters/generic.py
+++ b/src/automator/adapters/generic.py
@@ -328,7 +328,7 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool)
                     # Bundle dev sessions: the orchestrator exports the bundle's
                     # owned dw ids (the generic skill never authors them). Stamp
                     # them onto the result so verify_dev_bundle's cross-check passes.
-                    raw_dw_ids = spec.env.get("BMAD_AUTO_DW_IDS", "").split(",")
+                    raw_dw_ids = (spec.env.get("BMAD_AUTO_DW_IDS") or "").split(",")
                     dw_ids = [tok for tok in (i.strip() for i in raw_dw_ids) if tok]
                     return devcontract.synthesize_result(
                         spec_path, story_key=story_key, dw_ids=dw_ids or None
diff --git a/tests/test_generic_tmux.py b/tests/test_generic_tmux.py
index 319e4be..6bd42cf 100644
--- a/tests/test_generic_tmux.py
+++ b/tests/test_generic_tmux.py
@@ -257,6 +257,27 @@ def test_generic_dev_bundle_stamps_dw_ids_from_env(tmp_path):
     assert rj["dw_ids"] == ["DW-1", "DW-2"]
 
 
+def test_generic_dev_dw_ids_none_env_does_not_crash(tmp_path):
+    # A misbehaving plugin/hook could set BMAD_AUTO_DW_IDS to None instead of
+    # deleting it; synthesis must not crash (it would false-stall a completed
+    # session), and emits no dw ids.
+    adapter, impl = make_dev_adapter(tmp_path)
+    (impl / "spec-3-1-foo.md").write_text(
+        "---\nstatus: done\nbaseline_revision: abc123\n---\n\n"
+        "## Auto Run Result\n\nStatus: done\nImplemented the thing.\n"
+    )
+    spec = SessionSpec(
+        task_id="3-1-dev-1",
+        role="dev",
+        prompt="/bmad-dev-auto 3-1",
+        cwd=tmp_path,
+        env={"BMAD_AUTO_STORY_KEY": "3-1", "BMAD_AUTO_DW_IDS": None},
+    )
+    rj = adapter._result_json(_dev_handle(), spec, wait=True)
+    assert rj["status"] == "done"
+    assert "dw_ids" not in rj
+
+
 def test_generic_dev_finds_spec_in_worktree(tmp_path):
     # Under worktree isolation the skill runs with cwd set to the worktree and
     # leaves its terminal spec in the worktree's rebased implementation-artifacts
diff --git a/tests/test_verify.py b/tests/test_verify.py
index ceb9e6c..89c497f 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -275,15 +275,17 @@ def test_verify_dev_bundle_dw_ids_mismatch(project):
     assert not out.ok and "dw_ids" in out.reason
 
 
-def test_verify_dev_bundle_absent_dw_ids_passes(project):
+@pytest.mark.parametrize("claim", [{}, {"dw_ids": []}], ids=["missing-key", "empty-list"])
+def test_verify_dev_bundle_absent_dw_ids_passes(project, claim):
     # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json
-    # omits them. The orchestrator owns the bundle→dw-id binding, so verify must
-    # pass on an absent claim (regression: this used to always retry → defer).
+    # omits them (missing key) or carries an empty list. The orchestrator owns the
+    # bundle→dw-id binding, so verify must pass on an unclaimed bundle. The empty
+    # list is the literal payload that defered in production ("dw_ids []").
     task = make_bundle_task(project)
     sp = project.implementation_artifacts / "spec-dw-test-bundle.md"
     write_spec(sp, "in-review", task.baseline_commit)
     (project.project / "src.txt").write_text("changed\n")
-    rj = {"workflow": "auto-dev", "spec_file": str(sp)}
+    rj = {"workflow": "auto-dev", "spec_file": str(sp), **claim}
     out = verify.verify_dev_bundle(task, project, rj)
     assert out.ok
     assert task.spec_file == str(sp)

From 3b55fa82603818f54e314d58177ac5a20895044b Mon Sep 17 00:00:00 2001
From: pbean <paulbeanjr@gmail.com>
Date: Fri, 26 Jun 2026 12:49:58 -0700
Subject: [PATCH 5/5] fix(bmad-auto): guard verify_dev_bundle against an
 explicit dw_ids null

rj.get("dw_ids", []) returns None on a "dw_ids": null payload (the default only
covers an absent key), so the set comprehension raised TypeError and crashed
bundle verification. Coerce with `or []`. Extends the parametrized test with the
null case.

Addresses PR #17 review feedback.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/automator/verify.py |  2 +-
 tests/test_verify.py    | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/automator/verify.py b/src/automator/verify.py
index 85fa164..143785d 100644
--- a/src/automator/verify.py
+++ b/src/automator/verify.py
@@ -743,7 +743,7 @@ def verify_dev_bundle(
         except GitError as e:
             return VerifyOutcome.escalate(str(e))
 
-    claimed_ids = {str(i) for i in rj.get("dw_ids", [])}
+    claimed_ids = {str(i) for i in (rj.get("dw_ids") or [])}
     if claimed_ids and claimed_ids != set(task.dw_ids):
         return VerifyOutcome.retry(
             f"result.json dw_ids {sorted(claimed_ids)} do not match the bundle's "
diff --git a/tests/test_verify.py b/tests/test_verify.py
index 89c497f..c5b0496 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -275,12 +275,17 @@ def test_verify_dev_bundle_dw_ids_mismatch(project):
     assert not out.ok and "dw_ids" in out.reason
 
 
-@pytest.mark.parametrize("claim", [{}, {"dw_ids": []}], ids=["missing-key", "empty-list"])
+@pytest.mark.parametrize(
+    "claim",
+    [{}, {"dw_ids": []}, {"dw_ids": None}],
+    ids=["missing-key", "empty-list", "null"],
+)
 def test_verify_dev_bundle_absent_dw_ids_passes(project, claim):
     # Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json
-    # omits them (missing key) or carries an empty list. The orchestrator owns the
-    # bundle→dw-id binding, so verify must pass on an unclaimed bundle. The empty
-    # list is the literal payload that defered in production ("dw_ids []").
+    # omits them (missing key), carries an empty list, or an explicit null. The
+    # orchestrator owns the bundle→dw-id binding, so verify must pass on an
+    # unclaimed bundle without crashing. The empty list is the literal payload
+    # that defered in production ("dw_ids []").
     task = make_bundle_task(project)
     sp = project.implementation_artifacts / "spec-dw-test-bundle.md"
     write_spec(sp, "in-review", task.baseline_commit)