Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude-plugin/marketplace.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"name": "bauto",
"source": "./src/automator/data/skills",
"description": "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill",
"version": "0.7.3",
"version": "0.7.4",
"author": {
"name": "pinkyd"
},
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ All notable changes to `bmad-auto` are documented here. The format is based on
[Semantic Versioning](https://semver.org/spec/v2.0.0.html). While the project is pre-1.0,
breaking changes may land in a minor release.

## [0.7.4] — 2026-06-26

### Fixed

- **Deferred-work sweep no longer defers every bundle it just finished.** After the migration to
the generic upstream `bmad-dev-auto` primitive, bundle dev sessions completed the work but were
rejected by `verify_dev_bundle` with `result.json dw_ids [] do not match the bundle's […]`,
retried to budget, deferred, and rolled the work back — so a sweep could never close a bundled
entry. The retired dev fork used to echo the dw ids; the generic skill doesn't. The orchestrator
already owns the bundle→dw-id binding, so the cross-check now passes when the session claims no
ids, and the run exports `BMAD_AUTO_DW_IDS` so the synthesized result still carries them and the
check stays live.

## [0.7.3] — 2026-06-26

### Fixed
Expand Down Expand Up @@ -667,6 +680,7 @@ enforced in CI.
implementation phase, driven by a Python control loop with hook-based session transport and
resumable on-disk run state.

[0.7.4]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.4
[0.7.3]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.3
[0.7.2]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.2
[0.7.0]: https://github.com/bmad-code-org/bmad-auto/releases/tag/v0.7.0
Expand Down
2 changes: 1 addition & 1 deletion module.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code: bauto
name: BMAD Auto Skills
description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill"
module_version: 0.7.3
module_version: 0.7.4
default_selected: false
module_greeting: >
BMAD Auto installed — both the automation skills and the
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "bmad-auto"
version = "0.7.3"
version = "0.7.4"
description = "Deterministic ralph-loop orchestrator for the BMAD implementation phase"
readme = "README.md"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion src/automator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
spec files, and the per-run directory under .automator/runs/.
"""

__version__ = "0.7.3"
__version__ = "0.7.4"
9 changes: 8 additions & 1 deletion src/automator/adapters/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,14 @@ def _result_json(self, handle: SessionHandle, spec: SessionSpec, *, wait: bool)
spec_path = devcontract.find_result_artifact(artifacts, since_ns=handle.launched_ns)
if spec_path is not None:
story_key = spec.env.get("BMAD_AUTO_STORY_KEY") or None
return devcontract.synthesize_result(spec_path, story_key=story_key).result_json
# Bundle dev sessions: the orchestrator exports the bundle's
# owned dw ids (the generic skill never authors them). Stamp
# them onto the result so verify_dev_bundle's cross-check passes.
raw_dw_ids = (spec.env.get("BMAD_AUTO_DW_IDS") or "").split(",")

@augmentcode augmentcode Bot Jun 26, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

src/automator/adapters/generic.py:331: (spec.env.get("BMAD_AUTO_DW_IDS") or "").split(",") still assumes a string; if a hook sets a truthy non-string (e.g., list/int), this will raise and can incorrectly mark a completed session as stalled.

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Declining this one, by design. spec.env is typed dict[str, str] and is consumed as the process environment ({**self.profile.env, **spec.env} at generic.py:118/142), which only accepts string values — a list/int there would fail at session launch, so the session could never reach the Stop event that calls _result_json. The guarded state is unreachable. And str()-coercing would be actively wrong: a list ["DW-1","DW-2"] stringifies to "['DW-1', 'DW-2']", which split(",") turns into garbage tokens → a spurious dw_ids mismatch, the exact false-defer this PR fixes. A crash on a hard type-contract violation is preferable to silently-wrong behavior; the shipped or "" guard already covers None, the only idiomatic unset sentinel.

dw_ids = [tok for tok in (i.strip() for i in raw_dw_ids) if tok]
return devcontract.synthesize_result(
spec_path, story_key=story_key, dw_ids=dw_ids or None
).result_json
if not wait or time.monotonic() >= deadline:
return None
time.sleep(RESULT_POLL_S)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code: bauto
name: BMAD Auto Skills
description: "Automation-mode skills driven by the bmad-auto orchestrator: interactive escalation resolution (bmad-auto-resolve) and deferred-work sweep triage (bmad-auto-sweep) — the inner dev primitive (which self-reviews and commits) is the upstream bmad-dev-auto skill"
module_version: 0.7.3
module_version: 0.7.4
default_selected: false
module_greeting: >
BMAD Auto installed — both the automation skills and the
Expand Down
6 changes: 6 additions & 0 deletions src/automator/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,12 @@ def _run_session(
"BMAD_AUTO_TASK_ID": task_id,
"BMAD_AUTO_STORY_KEY": task.story_key,
}
if task.dw_ids:
# Deferred-work bundle: the orchestrator owns the bundle→dw-id binding
# (the generic bmad-dev-auto primitive knows nothing of dw ids). Export
# them so the generic adapter can stamp them onto the synthesized
# result.json, keeping verify_dev_bundle's dw_ids cross-check live.
env["BMAD_AUTO_DW_IDS"] = ",".join(task.dw_ids)
if role == "dev" and not self.policy.review.enabled:
# signals that the orchestrator will run no follow-up review session.
# bmad-dev-auto always self-reviews inline (step-03 → step-04) and
Expand Down
10 changes: 7 additions & 3 deletions src/automator/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,11 @@ def verify_dev_bundle(
review_enabled: bool = True,
) -> VerifyOutcome:
"""verify_dev for a deferred-work bundle: bundles have no sprint-status
entry, but the session must claim exactly the dw ids the bundle owns."""
entry. The orchestrator owns the bundle→dw-id binding (``task.dw_ids``,
marked done by ``SweepEngine._post_dev_state_sync``); the generic
``bmad-dev-auto`` primitive never authors dw ids. So the dw_ids cross-check
is enforced only when the session actually claims them — an empty/absent
claim is the normal generic path and passes."""
rj = result_json or {}
spec_file = rj.get("spec_file")
if not spec_file:
Expand Down Expand Up @@ -739,8 +743,8 @@ def verify_dev_bundle(
except GitError as e:
return VerifyOutcome.escalate(str(e))

claimed_ids = {str(i) for i in rj.get("dw_ids", [])}
if claimed_ids != set(task.dw_ids):
claimed_ids = {str(i) for i in (rj.get("dw_ids") or [])}
if claimed_ids and claimed_ids != set(task.dw_ids):

@augmentcode augmentcode Bot Jun 26, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

src/automator/verify.py:747: claimed_ids = {str(i) for i in rj.get("dw_ids", [])} will raise if a result payload includes "dw_ids": null (or any non-iterable), which can crash bundle verification even though empty/absent claims are now treated as valid.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Valid — fixed in 3b55fa8. Our synthesize_result never emits a null dw_ids (it's gated if dw_ids:), but verify_dev_bundle is a standalone dict consumer with no launch-time protection, so rj.get("dw_ids", []) returning None on an explicit "dw_ids": null would crash the set comprehension. Now coerced with or []; the parametrized test_verify_dev_bundle_absent_dw_ids_passes gains a null case.

return VerifyOutcome.retry(
f"result.json dw_ids {sorted(claimed_ids)} do not match the bundle's "
f"{sorted(task.dw_ids)}"
Expand Down
42 changes: 42 additions & 0 deletions tests/test_generic_tmux.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,48 @@ def test_generic_dev_synthesizes_done_spec(tmp_path):
assert rj["baseline_commit"] == "abc123" # mapped from baseline_revision
assert rj["story_key"] == "3-1"
assert rj["escalations"] == []
assert "dw_ids" not in rj # a normal story exports no BMAD_AUTO_DW_IDS


def test_generic_dev_bundle_stamps_dw_ids_from_env(tmp_path):
# The orchestrator exports the bundle's owned dw ids; the generic skill never
# authors them. The adapter stamps them onto the synthesized result, tolerant
# of whitespace in the env value (e.g. a hand-set or hook-rewritten "DW-1, DW-2").
adapter, impl = make_dev_adapter(tmp_path)
(impl / "spec-dw-bundle.md").write_text(
"---\nstatus: done\nbaseline_revision: abc123\n---\n\n"
"## Auto Run Result\n\nStatus: done\nResolved the bundle.\n"
)
spec = SessionSpec(
task_id="3-1-dev-1",
role="dev",
prompt="/bmad-dev-auto bundle",
cwd=tmp_path,
env={"BMAD_AUTO_STORY_KEY": "dw-bundle", "BMAD_AUTO_DW_IDS": "DW-1, DW-2"},
)
rj = adapter._result_json(_dev_handle(), spec, wait=True)
assert rj["dw_ids"] == ["DW-1", "DW-2"]


def test_generic_dev_dw_ids_none_env_does_not_crash(tmp_path):
# A misbehaving plugin/hook could set BMAD_AUTO_DW_IDS to None instead of
# deleting it; synthesis must not crash (it would false-stall a completed
# session), and emits no dw ids.
adapter, impl = make_dev_adapter(tmp_path)
(impl / "spec-3-1-foo.md").write_text(
"---\nstatus: done\nbaseline_revision: abc123\n---\n\n"
"## Auto Run Result\n\nStatus: done\nImplemented the thing.\n"
)
spec = SessionSpec(
task_id="3-1-dev-1",
role="dev",
prompt="/bmad-dev-auto 3-1",
cwd=tmp_path,
env={"BMAD_AUTO_STORY_KEY": "3-1", "BMAD_AUTO_DW_IDS": None},
)
rj = adapter._result_json(_dev_handle(), spec, wait=True)
assert rj["status"] == "done"
assert "dw_ids" not in rj


def test_generic_dev_finds_spec_in_worktree(tmp_path):
Expand Down
21 changes: 21 additions & 0 deletions tests/test_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,27 @@ def test_verify_dev_bundle_dw_ids_mismatch(project):
assert not out.ok and "dw_ids" in out.reason


@pytest.mark.parametrize(
"claim",
[{}, {"dw_ids": []}, {"dw_ids": None}],
ids=["missing-key", "empty-list", "null"],
)
def test_verify_dev_bundle_absent_dw_ids_passes(project, claim):
# Generic bmad-dev-auto path: the primitive authors no dw ids, so result.json
# omits them (missing key), carries an empty list, or an explicit null. The
# orchestrator owns the bundle→dw-id binding, so verify must pass on an
# unclaimed bundle without crashing. The empty list is the literal payload
# that defered in production ("dw_ids []").
task = make_bundle_task(project)
sp = project.implementation_artifacts / "spec-dw-test-bundle.md"
write_spec(sp, "in-review", task.baseline_commit)
(project.project / "src.txt").write_text("changed\n")
rj = {"workflow": "auto-dev", "spec_file": str(sp), **claim}
out = verify.verify_dev_bundle(task, project, rj)
assert out.ok
assert task.spec_file == str(sp)


def test_verify_review_bundle_ledger_gate(project):
task = make_bundle_task(project)
sp = project.implementation_artifacts / "spec-dw-test-bundle.md"
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading