Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
#
# ANTHROPIC_API_KEY=sk-ant-api03-REPLACE_ME
# Optional model pin
# ANTHROPIC_MODEL=claude-sonnet-4-7-20260101
# ANTHROPIC_MODEL=claude-sonnet-4-6
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ max_budget_usd = 5.0 # per-pipeline budget cap (>= 0)
persona = "falcor" # CLI output persona

[models] # bring your own provider key — strings live here
reasoning = "claude-opus-4-7" # researcher, reviewer, synthesizer, analyst
reasoning = "claude-opus-4-8" # researcher, reviewer, synthesizer, analyst
fast = "claude-haiku-4-5" # tester, implementer, verifier, publisher, closer
balanced = "claude-sonnet-4-6" # fallback for unknown role strings

Expand Down
2 changes: 1 addition & 1 deletion src/bonfire/dispatch/pydantic_ai_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class PydanticAIBackend:
Parameters
----------
model
The model identifier string (e.g. ``"claude-sonnet-4-20250514"``).
The model identifier string (e.g. ``"claude-sonnet-4-6"``).
"""

def __init__(self, model: str = "") -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/bonfire/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class ModelsConfig(BaseModel):
configured backend.
"""

reasoning: str = "claude-opus-4-7"
reasoning: str = "claude-opus-4-8"
fast: str = "claude-haiku-4-5"
balanced: str = "claude-sonnet-4-6"

Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/prompts/runner-prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The project at /workspace/target has one deliberately broken test. Your job is t
Append two JSONL lines (one JSON object per line, newline-terminated). Use today's unix timestamp for `timestamp` (call `date +%s.%N`). Use the SESSION_ID env var for `session_id`. Approximate the cost and duration based on what you observed.

Line 1 (DispatchRecord — your work counts as one dispatch):
{"type":"dispatch","timestamp":1714564800.123,"session_id":"<SESSION_ID>","agent_name":"claude-cli","cost_usd":0.42,"duration_seconds":187.5,"model":"claude-sonnet-4-7-20260101"}
{"type":"dispatch","timestamp":1714564800.123,"session_id":"<SESSION_ID>","agent_name":"claude-cli","cost_usd":0.42,"duration_seconds":187.5,"model":"claude-sonnet-4-6"}

Line 2 (PipelineRecord — your work counts as a one-stage pipeline):
{"type":"pipeline","timestamp":1714564800.456,"session_id":"<SESSION_ID>","total_cost_usd":0.42,"duration_seconds":187.5,"stages_completed":1}
Expand Down
12 changes: 6 additions & 6 deletions tests/integration/test_budget_enforcement.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,9 @@ async def test_max_budget_usd_reaches_claude_agent_options(self) -> None:
):
backend = ClaudeSDKBackend()
await backend.execute(
Envelope(task="t", agent_name="scout-agent", model="claude-opus-4-7"),
Envelope(task="t", agent_name="scout-agent", model="claude-opus-4-8"),
options=DispatchOptions(
model="claude-opus-4-7",
model="claude-opus-4-8",
max_budget_usd=3.14,
max_turns=7,
),
Expand All @@ -334,7 +334,7 @@ async def test_max_budget_usd_zero_default_preserved(self) -> None:
# Default DispatchOptions.max_budget_usd is 0.0 per protocols.py:62.
await backend.execute(
Envelope(task="t", agent_name="scout-agent"),
options=DispatchOptions(model="claude-opus-4-7"),
options=DispatchOptions(model="claude-opus-4-8"),
)

assert captured["max_budget_usd"] == pytest.approx(0.0)
Expand All @@ -359,9 +359,9 @@ async def test_max_turns_reaches_claude_agent_options(self) -> None:
):
backend = ClaudeSDKBackend()
await backend.execute(
Envelope(task="t", agent_name="knight-agent", model="claude-opus-4-7"),
Envelope(task="t", agent_name="knight-agent", model="claude-opus-4-8"),
options=DispatchOptions(
model="claude-opus-4-7",
model="claude-opus-4-8",
max_turns=42,
max_budget_usd=1.0,
),
Expand All @@ -381,7 +381,7 @@ async def test_max_turns_default_reaches_sdk(self) -> None:
backend = ClaudeSDKBackend()
await backend.execute(
Envelope(task="t", agent_name="scout-agent"),
options=DispatchOptions(model="claude-opus-4-7"),
options=DispatchOptions(model="claude-opus-4-8"),
)

assert captured["max_turns"] == 10
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_bard_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,7 @@ async def test_custom_config_does_not_override_slug_constants(
github_client,
) -> None:
"""slug_max_len + suffix_chars are module-scope constants, not config knobs."""
cfg = PipelineConfig(model="claude-opus-4-7")
cfg = PipelineConfig(model="claude-opus-4-8")
handler = BardHandler(
git_workflow=git_workflow,
github_client=github_client,
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ def test_default_construction(self):
def test_custom_values(self):
p = PipelineConfig(
tier="pro",
model="claude-opus-4",
model="claude-opus-4-8",
max_turns=20,
max_budget_usd=50.0,
persona="anta",
)
assert p.tier == "pro"
assert p.model == "claude-opus-4"
assert p.model == "claude-opus-4-8"
assert p.max_turns == 20
assert p.max_budget_usd == 50.0
assert p.persona == "anta"
Expand Down Expand Up @@ -380,7 +380,7 @@ def test_agents_from_toml(self, tmp_path, monkeypatch):
# Locks the ``[models]`` TOML schema delivered by BON-350:
#
# * §D5 — three string fields (``reasoning``/``fast``/``balanced``)
# defaulting to ``claude-opus-4-7``/``claude-haiku-4-5``/``claude-sonnet-4-6``.
# defaulting to ``claude-opus-4-8``/``claude-haiku-4-5``/``claude-sonnet-4-6``.
# * §D5 — TOML ``[models]`` section loads onto ``BonfireSettings.models``;
# missing section falls back to defaults (backward-compatible).
# * §D5 — Arbitrary strings accepted (BYOK passthrough).
Expand All @@ -399,7 +399,7 @@ def test_default_construction_recommends_anthropic(self):
from bonfire.models.config import ModelsConfig

m = ModelsConfig()
assert m.reasoning == "claude-opus-4-7"
assert m.reasoning == "claude-opus-4-8"
assert m.fast == "claude-haiku-4-5"
assert m.balanced == "claude-sonnet-4-6"

Expand Down Expand Up @@ -453,13 +453,13 @@ class TestModelsConfigTomlPartialOverride:
),
(
'[models]\nfast = "Y"\n',
"claude-opus-4-7",
"claude-opus-4-8",
"Y",
"claude-sonnet-4-6",
),
(
'[models]\nbalanced = "Z"\n',
"claude-opus-4-7",
"claude-opus-4-8",
"claude-haiku-4-5",
"Z",
),
Expand Down
34 changes: 17 additions & 17 deletions tests/unit/test_cost_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,15 +576,15 @@ def test_groups_records_by_model(self, ledger_path: Path) -> None:
agent_name="a",
cost_usd=0.10,
duration_seconds=1.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
),
DispatchRecord(
timestamp=2.0,
session_id="s",
agent_name="b",
cost_usd=0.05,
duration_seconds=2.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
),
DispatchRecord(
timestamp=3.0,
Expand All @@ -601,7 +601,7 @@ def test_groups_records_by_model(self, ledger_path: Path) -> None:
assert len(results) == 2
assert all(isinstance(r, ModelCost) for r in results)
models_to_records = {r.model: r for r in results}
assert set(models_to_records) == {"claude-opus-4-7", "claude-haiku-4-5"}
assert set(models_to_records) == {"claude-opus-4-8", "claude-haiku-4-5"}

def test_sort_descending_by_cost(self, ledger_path: Path) -> None:
"""Sage memo D8 — sort key is ``total_cost_usd``, descending. Same
Expand Down Expand Up @@ -659,7 +659,7 @@ def test_legacy_empty_model_grouped_visible(self, ledger_path: Path) -> None:
agent_name="modern-b",
cost_usd=0.10,
duration_seconds=1.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
),
]
_write_records(ledger_path, recs)
Expand All @@ -668,7 +668,7 @@ def test_legacy_empty_model_grouped_visible(self, ledger_path: Path) -> None:
assert len(results) == 2
models = {r.model for r in results}
assert "" in models
assert "claude-opus-4-7" in models
assert "claude-opus-4-8" in models

def test_dispatch_count_correct(self, ledger_path: Path) -> None:
"""Sage memo D8 — ``dispatch_count`` reflects the number of records
Expand Down Expand Up @@ -704,15 +704,15 @@ def test_total_duration_summed(self, ledger_path: Path) -> None:
agent_name="a",
cost_usd=0.01,
duration_seconds=12.5,
model="claude-opus-4-7",
model="claude-opus-4-8",
),
DispatchRecord(
timestamp=2.0,
session_id="s",
agent_name="b",
cost_usd=0.02,
duration_seconds=7.5,
model="claude-opus-4-7",
model="claude-opus-4-8",
),
]
_write_records(ledger_path, recs)
Expand Down Expand Up @@ -765,7 +765,7 @@ def test_pathological_corpus_groups_correctly_and_sorts(self, ledger_path: Path)
spec'd dict-based group, it stays fast.

Cost layout (deterministic):
claude-opus-4-7 -> 4000 records * 0.04 = 160.00
claude-opus-4-8 -> 4000 records * 0.04 = 160.00
claude-sonnet-4-6 -> 3000 records * 0.03 = 90.00
claude-haiku-4-5 -> 2000 records * 0.02 = 40.00
claude-extra-1 -> 900 records * 0.05 = 45.00
Expand All @@ -778,7 +778,7 @@ def test_pathological_corpus_groups_correctly_and_sorts(self, ledger_path: Path)

n_total = 10_000
layout: list[tuple[str, int, float]] = [
("claude-opus-4-7", 4000, 0.04),
("claude-opus-4-8", 4000, 0.04),
("claude-sonnet-4-6", 3000, 0.03),
("claude-haiku-4-5", 2000, 0.02),
("claude-extra-1", 900, 0.05),
Expand Down Expand Up @@ -811,16 +811,16 @@ def test_pathological_corpus_groups_correctly_and_sorts(self, ledger_path: Path)
# Descending by total_cost_usd.
names = [m.model for m in results]
assert names == [
"claude-opus-4-7",
"claude-opus-4-8",
"claude-sonnet-4-6",
"claude-extra-1",
"claude-haiku-4-5",
"claude-extra-2",
]
# Spot-check totals.
by_name = {m.model: m for m in results}
assert by_name["claude-opus-4-7"].total_cost_usd == pytest.approx(160.0)
assert by_name["claude-opus-4-7"].dispatch_count == 4000
assert by_name["claude-opus-4-8"].total_cost_usd == pytest.approx(160.0)
assert by_name["claude-opus-4-8"].dispatch_count == 4000
assert by_name["claude-haiku-4-5"].total_cost_usd == pytest.approx(40.0)
assert by_name["claude-extra-2"].dispatch_count == 100

Expand Down Expand Up @@ -855,7 +855,7 @@ def test_mixed_legacy_and_new_rows_round_trip(self, ledger_path: Path) -> None:
agent_name="a",
cost_usd=0.30,
duration_seconds=2.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
)
new_b = DispatchRecord(
timestamp=5.0,
Expand All @@ -873,23 +873,23 @@ def test_mixed_legacy_and_new_rows_round_trip(self, ledger_path: Path) -> None:

# Three buckets: "", opus, haiku.
by_name = {m.model: m for m in results}
assert set(by_name) == {"", "claude-opus-4-7", "claude-haiku-4-5"}
assert set(by_name) == {"", "claude-opus-4-8", "claude-haiku-4-5"}

# Legacy bucket is preserved as a visible "" key with summed cost.
legacy = by_name[""]
assert legacy.dispatch_count == 3
assert legacy.total_cost_usd == pytest.approx(0.85)

# New rows attributed to their model strings.
assert by_name["claude-opus-4-7"].dispatch_count == 1
assert by_name["claude-opus-4-7"].total_cost_usd == pytest.approx(0.30)
assert by_name["claude-opus-4-8"].dispatch_count == 1
assert by_name["claude-opus-4-8"].total_cost_usd == pytest.approx(0.30)
assert by_name["claude-haiku-4-5"].dispatch_count == 1
assert by_name["claude-haiku-4-5"].total_cost_usd == pytest.approx(0.20)

# Sort descending by cost: legacy (0.85) > opus (0.30) > haiku (0.20).
assert [m.model for m in results] == [
"",
"claude-opus-4-7",
"claude-opus-4-8",
"claude-haiku-4-5",
]

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_cost_analyzer_memoization.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def test_cache_invalidates_when_ledger_modified(
agent_name="warrior",
cost_usd=0.50,
duration_seconds=60.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
)
_write_records(ledger_path, [*sample_records, new_record])

Expand Down Expand Up @@ -313,7 +313,7 @@ def test_same_mtime_inode_change_busts_cache(
agent_name="sage",
cost_usd=0.77,
duration_seconds=42.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
)
_write_records(replacement, [*sample_records, extra_record])
os.replace(replacement, ledger_path)
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_cost_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,9 @@ def test_dispatch_record_accepts_model(self) -> None:
agent_name="a",
cost_usd=0.0,
duration_seconds=0.0,
model="claude-opus-4-7",
model="claude-opus-4-8",
)
assert record.model == "claude-opus-4-7"
assert record.model == "claude-opus-4-8"

def test_legacy_jsonl_row_without_model_loads(self) -> None:
"""Sage memo D5 — pre-BON-351 ledger rows omit ``model`` entirely.
Expand Down Expand Up @@ -397,12 +397,12 @@ class TestModelCost:
def test_model_cost_construction(self) -> None:
"""Sage memo D5 — four required fields, all positional-by-name."""
m = ModelCost(
model="claude-opus-4-7",
model="claude-opus-4-8",
total_cost_usd=1.0,
dispatch_count=3,
total_duration_seconds=12.5,
)
assert m.model == "claude-opus-4-7"
assert m.model == "claude-opus-4-8"
assert m.total_cost_usd == 1.0
assert m.dispatch_count == 3
assert m.total_duration_seconds == 12.5
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_dispatch_options_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,13 @@ def test_role_with_leading_slash_accepted(self) -> None:
def test_role_paired_with_tools_and_model(self) -> None:
"""Sage D4 + D7 — role travels alongside tools and model."""
opts = DispatchOptions(
model="claude-opus-4-7",
model="claude-opus-4-8",
tools=["Read", "Write", "Edit", "Bash", "Grep", "Glob"],
role="warrior",
)
assert opts.role == "warrior"
assert opts.tools == ["Read", "Write", "Edit", "Bash", "Grep", "Glob"]
assert opts.model == "claude-opus-4-7"
assert opts.model == "claude-opus-4-8"


# ===========================================================================
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_dispatch_pydantic_ai_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_import_from_module(self):
assert _P is not None

def test_construct_with_model_kwarg(self):
backend = PydanticAIBackend(model="claude-sonnet-4")
backend = PydanticAIBackend(model="claude-sonnet-4-6")
assert backend is not None

def test_construct_with_no_args(self):
Expand Down
Loading
Loading