Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion file-budget.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@
"src/bonfire/handlers": 3126,
"src/bonfire/onboard": 2973,
"src/bonfire/prompt": 855,
"tests/unit": 58858
"tests/unit": 59014
}
}
4 changes: 2 additions & 2 deletions src/bonfire/knowledge/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ async def get_by_source(self, source_path: str) -> list[VaultEntry]:
)
return [self._record_to_entry(r) for r in results]
except Exception as exc:
logger.exception("Vault get_by_source failed: %s", exc)
return []
msg = f"Vault get_by_source failed: {exc}"
raise RetrievalError(msg, context={"source_path": source_path}) from exc

def _ensure_connected(self) -> None:
"""Lazy connect. Runs migration if vault_v2 doesn't exist."""
Expand Down
156 changes: 156 additions & 0 deletions tests/unit/test_knowledge_backend_get_by_source_typed_failure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2026 BonfireAI

"""Elegance-Law contract for ``LanceDBBackend.get_by_source()``.

``get_by_source()`` is the source-path retrieval path on the ``VaultBackend``
contract: callers ask "give me every entry that originated from this file" and
treat the returned list as the complete, authoritative answer. A real backend
failure — ``search().where().to_list()`` raising because the table is corrupt,
the store is unreachable, or a vector op blows up — was previously swallowed
into ``return []``. That ``[]`` is **indistinguishable from a legitimate
"no entries for this source path"**, so a failed lookup masqueraded as "nothing
is stored from here": the caller proceeds on partial truth with no typed signal,
no retryable flag, no context. That is exactly the swallowed-failure the Elegance
Law forbids — and the exact masquerade its structural sibling ``query()`` already
had removed (``query`` now raises a typed ``RetrievalError`` instead of ``[]``).

``get_by_source()`` is the overlooked sibling: same ``table.search(...).where(
...).to_list()`` shape, same "list of entries" return type, same indistinguishable
``[]``. This test pins the matching contract: a backend failure inside
``get_by_source()`` must surface as a typed :class:`bonfire.errors.RetrievalError`
(the one vocabulary; ``code == "retrieval"``, ``retryable is True``) carrying the
originating exception as its ``__cause__`` and naming the source path in its
structured context — NOT a bare ``[]``. A genuine empty result (zero matching
entries) stays a legitimate success and returns ``[]``.

Note the deliberate divergence from the *other* sibling, ``exists()``: that method
fail-opens to ``False`` by documented design (its caller treats a false-negative
as a harmless dedup miss). ``get_by_source()`` has no such fail-open caller — its
list IS the answer — so, like ``query()``, it must raise rather than degrade.

``lancedb`` is not a test dependency, so we never connect to a real store: we
pre-set the backend's private ``_table``/``_db`` so ``_ensure_connected()`` is a
no-op and inject a fake table whose ``.search(...).where(...).to_list()`` raises.
That exercises the exact ``except`` arm under test without any LanceDB runtime.
"""

from __future__ import annotations

import pytest

from bonfire.errors import BonfireError, RetrievalError
from bonfire.knowledge.backend import LanceDBBackend


class _FakeEmbedder:
"""Minimal embedder stub — ``get_by_source()`` only reads ``dim`` for the
zero-vector probe; it never embeds free text."""

dim = 4

def embed(self, texts: list[str]) -> list[list[float]]:
return [[0.0] * self.dim for _ in texts]


class _ExplodingSearch:
"""A search builder whose terminal ``.to_list()`` blows up."""

def limit(self, _n: int) -> _ExplodingSearch:
return self

def where(self, _clause: str) -> _ExplodingSearch:
return self

def to_list(self) -> list[dict[str, object]]:
raise RuntimeError("simulated LanceDB to_list failure")


class _ExplodingTable:
"""Non-empty table whose source-path query fails at ``to_list()``."""

def count_rows(self) -> int:
# Non-empty so ``get_by_source()`` proceeds past the early empty-table
# guard and reaches the protected ``search(...).where(...).to_list()``.
return 1

def search(self, _vector: object) -> _ExplodingSearch:
return _ExplodingSearch()


class _EmptyHitSearch:
def limit(self, _n: int) -> _EmptyHitSearch:
return self

def where(self, _clause: str) -> _EmptyHitSearch:
return self

def to_list(self) -> list[dict[str, object]]:
return []


class _EmptyHitTable:
"""Non-empty table whose source-path query legitimately matches nothing."""

def count_rows(self) -> int:
return 1

def search(self, _vector: object) -> _EmptyHitSearch:
return _EmptyHitSearch()


def _make_backend(table: object) -> LanceDBBackend:
backend = LanceDBBackend(vault_path=":memory:", embedder=_FakeEmbedder())
# Pre-wire the lazy-connect fields so ``_ensure_connected()`` short-circuits
# (it returns early when ``_table is not None``) and never imports lancedb.
backend._table = table
backend._db = object()
return backend


@pytest.mark.asyncio
async def test_get_by_source_backend_failure_raises_typed_retrieval_error() -> None:
"""A failed ``get_by_source()`` lookup must speak the one vocabulary, not ``[]``."""
backend = _make_backend(_ExplodingTable())

with pytest.raises(RetrievalError) as exc_info:
await backend.get_by_source("/src/whatever.py")

err = exc_info.value
assert isinstance(err, BonfireError), "must be in the one Bonfire vocabulary"
assert err.code == "retrieval"
assert err.retryable is True, "a transient retrieval failure is retryable"


@pytest.mark.asyncio
async def test_get_by_source_failure_chains_the_original_exception() -> None:
"""The typed error must carry the originating exception (no lost traceback)."""
backend = _make_backend(_ExplodingTable())

with pytest.raises(RetrievalError) as exc_info:
await backend.get_by_source("/src/whatever.py")

cause = exc_info.value.__cause__
assert isinstance(cause, RuntimeError)
assert "simulated LanceDB to_list failure" in str(cause)


@pytest.mark.asyncio
async def test_get_by_source_failure_context_carries_the_source_path() -> None:
"""Structured context must name the source path so the failure is self-describing."""
backend = _make_backend(_ExplodingTable())

with pytest.raises(RetrievalError) as exc_info:
await backend.get_by_source("/src/needle.py")

assert exc_info.value.context.get("source_path") == "/src/needle.py"


@pytest.mark.asyncio
async def test_get_by_source_zero_hits_is_a_success_not_an_error() -> None:
"""A genuine empty result (no matching entries) stays a success — returns []."""
backend = _make_backend(_EmptyHitTable())

result = await backend.get_by_source("/src/unstored.py")

assert result == []
Loading