feat(bob): implement manifest-first entity recall (#224)

visahak · claude · visahak · commit 4fc47a203cbd · 2026-04-28T09:56:54.000-04:00
Replace full-body entity injection with human-readable manifest output
in Bob's recall script. Uses shared load_manifest and dedupe helpers
from entity_io.py. Output format is markdown lines with path, type,
and trigger — Bob reads full files on demand via read_file.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite:recall/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite:recall/SKILL.md
@@ -16,10 +16,10 @@ Entities can come from multiple sources:
 
 ## How It Works
 
-1. List all `.md` files under `.evolve/entities/`, `.evolve/public/`, and their subdirectories
-2. Read each file — the YAML frontmatter contains `type` and `trigger`, the body contains the entity content and rationale
-3. Review each entity for relevance to the current task
-4. Apply relevant entities as additional context for your work
+1. The script scans `.evolve/entities/` and `.evolve/public/` and emits a compact manifest containing only `path`, `type`, and `trigger` for each entity
+2. Review the manifest and identify entities whose trigger looks relevant to the current task
+3. Use `read_file` to read the full content of relevant entity files on demand
+4. Apply the retrieved guidance as additional context for your work
 
 **Directory structure**:
 - `.evolve/entities/guideline/` - Your private entities
@@ -54,7 +54,14 @@ Entities are stored as individual markdown files in `.evolve/entities/`, organiz
         code-review.md
 ```
 
-Each file uses markdown with YAML frontmatter:
+The manifest output is human-readable:
+
+```
+- `.evolve/entities/guideline/use-context-managers.md` [guideline] — When processing files or managing resources
+- `.evolve/entities/subscribed/alice/guideline/error-handling.md` [guideline] — When writing error handlers
+```
+
+Each file still uses markdown with YAML frontmatter:
 
 ```markdown
 ---
@@ -71,11 +78,6 @@ Use context managers for file operations
 Ensures proper resource cleanup
 ```
 
-## Entity Annotations
+## On-Demand Expansion
 
-Subscribed entities are annotated with their source:
-```
-- **[guideline]** [from: alice] Use context managers for file operations
-  - _Rationale: Ensures proper resource cleanup_
-  - _When: When processing files or managing resources_
-```
+When a manifest entry's trigger matches the current task, use `read_file` to load the full entity. The file body contains the guideline content and an optional `## Rationale` section.
diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite:recall/scripts/retrieve_entities.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite:recall/scripts/retrieve_entities.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Retrieve and output entities for Bob to filter."""
+"""Retrieve and output an entity manifest for Bob to expand on demand."""
 
 import sys
 from pathlib import Path
@@ -12,99 +12,47 @@
         sys.path.insert(0, str(lib_path))
         break
 
-from entity_io import find_entities_dir, get_evolve_dir, markdown_to_entity, log as _log  # noqa: E402
+from entity_io import dedupe_manifest_entries, find_recall_entity_dirs, load_manifest, log as _log  # noqa: E402
 
 
 def log(message):
     _log("retrieve", message)
 
 
 def format_entities(entities):
-    """Format all entities for Bob to review.
+    """Format a manifest of entities as human-readable markdown for Bob."""
+    header = """## Evolve entity manifest for this task
 
-    Entities that came from a subscribed source have their path recorded in
-    the private ``_source`` key (set by load_entities_with_source). These are
-    annotated with ``[from: {name}]`` so Bob knows their provenance.
-    """
-    header = """## Entities for this task
-
-Review these entities and apply any relevant ones:
+These stored entities are available for this repo. Read only the files whose trigger looks relevant to the user's request:
 
 """
-    items = []
+    lines = []
     for e in entities:
-        content = e.get("content")
-        if not content:
-            continue
-        source = e.get("_source")
-        if source:
-            content = f"[from: {source}] {content}"
-        item = f"- **[{e.get('type', 'general')}]** {content}"
-        if e.get("rationale"):
-            item += f"\n  - _Rationale: {e['rationale']}_"
-        if e.get("trigger"):
-            item += f"\n  - _When: {e['trigger']}_"
-        items.append(item)
-
-    return header + "\n".join(items)
-
-
-def load_entities_with_source(entities_dir):
-    """Glob all .md files under entities_dir and parse each.
-
-    Entities stored under entities/subscribed/{name}/ have ``_source`` set to
-    the subscription name so format_entities can annotate them. The owner field
-    written by publish.py is preserved; _source is just a routing key used
-    internally and is never written to disk.
-    """
-    entities_dir = Path(entities_dir)
-    entities = []
-    for md in sorted(entities_dir.glob("**/*.md")):
-        if md.is_symlink():
-            continue
-        try:
-            entity = markdown_to_entity(md)
-            entity.pop("_source", None)
-            if not entity.get("content"):
-                continue
-            try:
-                rel_parts = md.relative_to(entities_dir).parts
-            except ValueError:
-                rel_parts = md.parts
-            if rel_parts[0] == "subscribed" and len(rel_parts) > 1:
-                entity["_source"] = rel_parts[1]
-            entities.append(entity)
-        except (OSError, UnicodeDecodeError):
-            pass
-    return entities
+        lines.append(f"- `{e['path']}` [{e['type']}] \u2014 {e['trigger']}")
+    return header + "\n".join(lines)
 
 
 def main():
     log("Script started")
 
-    entities_dir = find_entities_dir()
-    log(f"Entities dir: {entities_dir}")
-
     entities = []
-    if entities_dir:
-        entities = load_entities_with_source(entities_dir)
+    recall_dirs = find_recall_entity_dirs()
+    log(f"Recall dirs: {recall_dirs}")
+    for root_dir in recall_dirs:
+        entities.extend(load_manifest(root_dir))
 
-    public_dir = get_evolve_dir() / "public"
-    if public_dir.is_dir():
-        log(f"Loading public entities from: {public_dir}")
-        entities += load_entities_with_source(public_dir)
+    entities = dedupe_manifest_entries(entities)
 
     if not entities:
         log("No entities found")
         return
 
     log(f"Loaded {len(entities)} entities")
+
     output = format_entities(entities)
     print(output)
     log(f"Output {len(output)} chars to stdout")
 
 
 if __name__ == "__main__":
     main()
-
-# Made with Bob
diff --git a/tests/platform_integrations/test_bob_sharing.py b/tests/platform_integrations/test_bob_sharing.py
@@ -610,51 +610,52 @@ def test_output_reports_added_count(self, temp_project_dir):
 class TestBobRetrieveEntities:
     """Tests for Bob's retrieve_entities.py script.
 
-    Note: Bob's retrieve script outputs markdown for Bob's UI, not JSON.
+    Bob outputs human-readable manifest markdown (not JSON like Claude/Codex).
     """
 
     def test_returns_entities_from_private_dir(self, temp_project_dir):
         evolve_dir = temp_project_dir / ".evolve"
         entities_dir = evolve_dir / "entities" / "guideline"
         entities_dir.mkdir(parents=True)
-        (entities_dir / "tip.md").write_text("---\ntype: guideline\n---\n\nPrivate tip.\n")
+        (entities_dir / "tip.md").write_text("---\ntype: guideline\ntrigger: when writing private code\n---\n\nPrivate tip.\n")
 
         result = run_script(RETRIEVE_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
-        # Bob outputs markdown, not JSON
-        assert "Private tip" in result.stdout
-        assert "## Entities for this task" in result.stdout
+        assert "Evolve entity manifest for this task" in result.stdout
+        assert "[guideline]" in result.stdout
+        assert "when writing private code" in result.stdout
+        assert "Private tip." not in result.stdout
 
     def test_returns_entities_from_public_dir(self, temp_project_dir):
         evolve_dir = temp_project_dir / ".evolve"
         public_dir = evolve_dir / "public" / "guideline"
         public_dir.mkdir(parents=True)
-        (public_dir / "tip.md").write_text("---\ntype: guideline\nvisibility: public\n---\n\nPublic tip.\n")
+        (public_dir / "tip.md").write_text("---\ntype: guideline\ntrigger: when sharing guidelines\nvisibility: public\n---\n\nPublic tip.\n")
 
         result = run_script(RETRIEVE_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
-        assert "Public tip" in result.stdout
+        assert "when sharing guidelines" in result.stdout
+        assert "Public tip." not in result.stdout
 
     def test_returns_entities_from_subscribed_dir(self, temp_project_dir):
         evolve_dir = temp_project_dir / ".evolve"
         subscribed_dir = evolve_dir / "entities" / "subscribed" / "alice" / "guideline"
         subscribed_dir.mkdir(parents=True)
-        (subscribed_dir / "tip.md").write_text("---\ntype: guideline\n---\n\nSubscribed tip.\n")
+        (subscribed_dir / "tip.md").write_text("---\ntype: guideline\ntrigger: when adding coverage\n---\n\nSubscribed tip.\n")
 
         result = run_script(RETRIEVE_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
-        assert "Subscribed tip" in result.stdout
-        assert "[from: alice]" in result.stdout
+        assert "when adding coverage" in result.stdout
+        assert ".evolve/entities/subscribed/alice/guideline/tip.md" in result.stdout
+        assert "Subscribed tip." not in result.stdout
 
     def test_retrieve_filters_symlinked_entities(self, temp_project_dir):
         evolve_dir = temp_project_dir / ".evolve"
         subscribed_dir = evolve_dir / "entities" / "subscribed" / "alice" / "guideline"
         subscribed_dir.mkdir(parents=True)
         real_file = subscribed_dir / "real.md"
-        real_file.write_text("---\ntype: guideline\n---\n\nReal content.\n")
+        real_file.write_text("---\ntype: guideline\ntrigger: when testing\n---\n\nReal content.\n")
         link_file = subscribed_dir / "link.md"
         link_file.symlink_to(real_file)
 
         result = run_script(RETRIEVE_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
-        assert "Real content" in result.stdout
-        assert result.stdout.count("Real content") == 1, "Symlinked duplicate should be filtered out"
-
-
-# Made with Bob
+        assert "when testing" in result.stdout
+        assert result.stdout.count("when testing") == 1, "Symlinked duplicate should be filtered out"
+        assert "Real content." not in result.stdout