From 06898b22b017c270741196661008eadd646136fa Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:22:38 +0300
Subject: [PATCH 01/11] docs: design spec for snippet skeletonization +
 content-aware rendering

Ports headroom's AST structure handler + StructureMask idea into the
retrieval layer: focus skeletons (signatures + matched lines, bodies
elided) so more results fit the token budget. Reversible, content-aware
(code/markdown/structured), retrieval-time only, raw-fallback safe.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 ...26-06-24-snippet-skeletonization-design.md | 260 ++++++++++++++++++
 1 file changed, 260 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-06-24-snippet-skeletonization-design.md

diff --git a/docs/superpowers/specs/2026-06-24-snippet-skeletonization-design.md b/docs/superpowers/specs/2026-06-24-snippet-skeletonization-design.md
new file mode 100644
index 0000000..e0f5860
--- /dev/null
+++ b/docs/superpowers/specs/2026-06-24-snippet-skeletonization-design.md
@@ -0,0 +1,260 @@
+# Snippet Skeletonization & Content-Aware Rendering — Design
+
+- **Date:** 2026-06-24
+- **Status:** Approved (design); pending implementation plan
+- **Author:** denfry (with Claude)
+- **Inspiration:** [`headroomlabs-ai/headroom`](https://github.com/headroomlabs-ai/headroom) — its
+  AST structure handler (`compression/handlers/code_handler.py`), `StructureMask`
+  (`compression/masks.py`), and content-type routing (`compression/detector.py`).
+
+## 1. Summary
+
+Make retrieval snippets carry **structure, not bulk**. Today a result's `snippet` is the raw
+text of a code chunk. This design transforms that text, at retrieval time, into a
+**skeleton**: import/signature/class/type lines are kept, function/method bodies are elided to a
+marker (`… 24 lines elided (read 88–134)`), and the line(s) that actually match the query are
+always preserved (**focus skeleton**). The transform is content-aware — code, markdown, and
+structured config each get an appropriate line classifier; everything else is left untouched.
+
+The win is the skill's reason to exist: more relevant results fit in the same `token_budget`.
+The transform is **reversible** — `recommended_reads` and each result's `line_start`/`line_end`
+remain the path to the full body — and **lossless-safe** — it never produces a worse result than
+today (guards fall back to the raw snippet).
+
+This ports the *idea* at the heart of headroom (separate structural tokens from compressible
+ones) but adapts it for a **retrieval** system rather than a generic compression middleware: we
+preserve the matched line, we route by file extension instead of an ML detector, and we operate
+at line granularity instead of token granularity.
+
+## 2. Goals / Non-goals
+
+**Goals**
+
+- Reduce per-snippet token cost so `apply_budget` attaches snippets to **more** ranked results.
+- Preserve the query-matching line(s) in every code snippet (focus skeleton).
+- Route by content type: code → AST skeleton, markdown → heading skeleton, structured config →
+  key skeleton, everything else → unchanged.
+- Be reversible and never degrade output (raw fallback on any failure or non-win).
+- Tell Claude what was compressed (`skeletonized`, `elided_lines`) and where to expand.
+
+**Non-goals (YAGNI)**
+
+- No ML content detector (Magika). We already know the path → `detect_language`.
+- No index-time skeleton sidecar (storing skeletons in the DB). Retrieval-time only; revisit only
+  if parsing cost ever shows up in profiling. tree-sitter parse of a small chunk is ~ms.
+- No log-dedup renderer — a code index does not carry logs.
+- No change to indexing, chunking, FTS, or vector storage. Recall is untouched.
+
+## 3. Background — current snippet flow
+
+`query → intent → retrievers → RRF fuse → rerank → budget → payload`
+(`retrieval/pipeline.py:search`).
+
+- `Candidate` (`retrieval/types.py`) carries `path`, `line_start`, `line_end`, `source`,
+  `content`, `token_est`, plus graph/score fields.
+- `content` is populated raw by two retrievers:
+  - `fts_candidates` → `content = row["content"]` (a line-window chunk; **raw code**).
+  - `vector_candidates` → `content = row["content"]` (chunk text; **raw code**).
+  - `symbol_candidates` → `content = row["signature"]` (already one line; effectively
+    pre-skeletonized — the size guard will skip it).
+  - `path_candidates` → no content.
+- `apply_budget` (`retrieval/budget.py`) greedily attaches `redact_snippet(c.content)` to the
+  top-ranked results until `token_budget` is spent; the rest become `recommended_reads`.
+
+The transform plugs in **at `apply_budget`**, between the raw `content` and the emitted `snippet`.
+Index-time transformation is rejected: skeletonizing before storage would strip body text from
+FTS/vector indexes and collapse recall.
+
+Reused infrastructure (no new parsing stack):
+
+- `discovery/classify.py:detect_language(path)` and `_TREE_SITTER_LANGS` — content-type routing.
+- `parsers/treesitter.py:parse_file(lang, text)` → `Symbol`s with `line_start`, `line_end`,
+  `signature`, `kind`, `parent_index` (already computes the signature line per def).
+- `parsers/line_chunker.py:estimate_tokens` — recompute token cost of the skeleton.
+- `output/redact.py:redact_snippet` — applied **after** skeletonization.
+
+## 4. Design
+
+### 4.1 Core abstraction: line-level structure mask
+
+headroom marks **tokens** structural-vs-compressible (`StructureMask`). We port the idea to
+**lines** — more robust for partial chunks and aligned with the line-range vocabulary used
+everywhere else in the codebase.
+
+New module `src/codebase_index/retrieval/skeleton.py`:
+
+```python
+@dataclass
+class Compacted:
+    text: str
+    token_est: int
+    elided_lines: int
+    skeletonized: bool        # False => text is the original content (raw fallback / no win)
+
+def classify_lines(content: str, *, lang: str | None,
+                   query_terms: list[str], ctx_lines: int) -> list[bool]:
+    """Return one bool per line: True = keep (structural or focus), False = elide."""
+
+def render_skeleton(content: str, keep: list[bool], *, line_start: int) -> tuple[str, int]:
+    """Collapse consecutive elided runs into a marker using ABSOLUTE file line numbers.
+    Returns (skeleton_text, elided_line_count)."""
+
+def compact(content: str, *, path: str, line_start: int,
+            intent: Intent, query_terms: list[str],
+            min_reduction: float) -> Compacted:
+    """Full pipeline: route → classify → render → guard. Never raises."""
+```
+
+`render_skeleton` emits markers like `… 24 lines elided (read 88–134)` where `88–134` are
+**absolute** file lines (`line_start` offset applied), so Claude can expand precisely with `Read`.
+
+### 4.2 Content-type classifiers (feature №2)
+
+Type from `detect_language(path)`:
+
+- **Code** (`lang in _TREE_SITTER_LANGS`): `parse_file(lang, content)` → symbols.
+  A line is `keep` if it is **outside** every function/method body (imports, class/interface
+  headers, decorators, module-level statements) **or** it is a symbol's signature line(s).
+  Function/method **bodies** are `elide`. Nested methods inside a class are handled naturally:
+  the class header + each method signature stay, each method body elides.
+  Parse failure / non-tree-sitter language → regex signature fallback (§5).
+- **Markdown** (`markdown`): `keep` heading lines (`^#{1,6}\s`) + the first non-blank line of each
+  section; elide long prose runs.
+- **Structured** (`json`, `yaml`, `toml`, `ini`): `keep` key-introducing lines (per-family regex)
+  and structural brackets; elide long value/array bodies.
+- **Other** (`sql`, `terraform`, `hcl`, `dockerfile`, `make`, unknown/`None`): all `keep` →
+  identical to the raw snippet (no transform).
+
+### 4.3 Policy & focus
+
+- **Focus is always on** when `query_terms` is non-empty: any line containing a query term is
+  force-`keep`, plus `ctx_lines` of surrounding context. **A matched line is never elided.**
+- **Intent tunes `ctx_lines`** (aggressiveness), not on/off:
+  - `ARCHITECTURE`, `HOW_IT_WORKS`, `DATA_FLOW` → `ctx_lines = 0` (pure signatures; the *shape*
+    is the answer).
+  - `LOCATE_IMPL`, `KEYWORD`, `DEBUG_ERROR`, `IMPACT`, `FIND_REFS` → `ctx_lines = 2` (keep the
+    matched line in context).
+- **Savings guard:** adopt the skeleton only if it saves ≥ `min_reduction` (default `0.25`) of the
+  estimated tokens; otherwise return the raw content with `skeletonized=False`. This alone makes
+  the transform a no-op on already-minimal content (e.g. symbol-signature candidates), with no
+  special-casing.
+
+### 4.4 Budget integration
+
+`apply_budget(candidates, *, token_budget, compactor=None)` gains an injected `compactor`
+(dependency injection keeps `budget.py` decoupled and unit-testable):
+
+```python
+comp = compactor(c) if (compactor and c.content) else None
+text = comp.text if comp else c.content
+tok  = comp.token_est if comp else c.token_est
+# fit `tok` against the budget; snippet = redact_snippet(text)
+meta["skeletonized"] = bool(comp and comp.skeletonized)
+meta["elided_lines"] = comp.elided_lines if comp else 0
+meta["token_est"]    = tok                      # reflects the compacted size
+```
+
+`pipeline.search` builds the compactor once and passes it in:
+
+```python
+compactor = make_compactor(intent=plan.intent, query=query,
+                           enabled=not raw, min_reduction=cfg_min_reduction)
+all_results, all_recommended = apply_budget(ranked, token_budget=scaled_budget,
+                                            compactor=compactor)
+```
+
+Because each compacted snippet costs fewer tokens, the greedy loop reaches **more** candidates
+before exhausting `token_budget` — the concrete win.
+
+### 4.5 Output schema additions
+
+Each `results[]` entry gains:
+
+- `skeletonized: bool` — true when the snippet is a skeleton.
+- `elided_lines: int` — count of source lines folded away (`0` when not skeletonized).
+- `token_est` — now reports the **compacted** estimate.
+
+`recommended_reads` semantics are unchanged: a skeletonized snippet is still "useful" so it is not
+forced into `recommended_reads`, but every result already carries `line_start`/`line_end`, so
+expansion is always one `Read` away. No payload field is removed; consumers that ignore the new
+fields keep working.
+
+### 4.6 Surface: CLI / MCP / config / SKILL.md
+
+- **CLI:** `--raw` flag on `search` / `explain` / `architecture` disables compaction. Default = on.
+- **MCP** (`mcp/server.py`): add `raw: bool = false` to the search-family tools.
+- **Config** (`config.py:RetrievalConfig`):
+  - `compact_snippets: bool = True`
+  - `compact_min_reduction: float = 0.25`
+  These are retrieval-time fields — **not** added to `config_hash`, so no reindex is triggered.
+- **SKILL.md:** document `skeletonized` / `elided_lines` (snippet may be signatures + matched
+  lines with bodies elided; Read `line_start–line_end` to expand a body) and the `--raw` escape.
+
+## 5. Error handling & safety guarantees
+
+- **Never raises.** `compact` wraps classification; any exception → raw content,
+  `skeletonized=False`.
+- **Parse fallback chain:** tree-sitter parse → on failure or non-tree-sitter language, a
+  regex signature detector (ports headroom's `_SIGNATURE_PATTERNS`) → on empty, raw content.
+- **Preserve bias:** a line that cannot be attributed to a function/method body is kept. Partial
+  window chunks (a body cut mid-function) therefore degrade to "keep more", never "elide the
+  wrong thing".
+- **Focus invariant:** a line containing a query term is never elided.
+- **Savings guard:** never emit a skeleton that isn't meaningfully smaller (§4.3).
+- **Redaction order:** skeletonize → `redact_snippet`. Bodies (where secrets usually live) are
+  already dropped; surviving structural lines are still redacted.
+- **Determinism:** no randomness, stable ordering — identical input yields identical output
+  (required by golden tests).
+
+## 6. Performance
+
+The compactor runs only on candidates that actually receive a snippet (top handful per query,
+bounded by `token_budget`). Each is a tree-sitter parse of a small chunk (~ms). Parsers are
+created per call today; if profiling shows cost, adopt the thread-local parser-cache pattern
+(headroom's `_get_parser`). No measurable impact expected for interactive use.
+
+## 7. Testing strategy (TDD)
+
+**Unit (`tests/test_skeleton.py`)**
+
+- Code classifier: Python / TypeScript / Go samples → signatures kept, bodies elided,
+  `elided_lines` exact.
+- Focus: a body line containing a query term (+ context) survives; surrounding body elided.
+- Markdown classifier: headings + first section line kept.
+- Structured classifier: JSON / TOML key lines kept, long values elided.
+- Fallback: unparseable content → raw, `skeletonized=False`; tree-sitter-absent path → regex.
+- Savings guard: skeleton not ≥25% smaller → raw returned.
+- `render_skeleton`: marker carries correct **absolute** line range; adjacent elide runs merge.
+- Determinism: repeated calls byte-identical.
+
+**Integration (`tests/test_budget.py`, `tests/test_pipeline.py`)**
+
+- With a compactor, more candidates receive snippets at the same `token_budget` than without.
+- `redact_snippet` still applied to skeletonized text.
+- `skeletonized` / `elided_lines` / compacted `token_est` present and correct.
+- Pagination and `recommended_reads` filtering unchanged.
+- `--raw` / `compact_snippets=False` → byte-identical to pre-feature output (regression guard).
+
+**Golden**
+
+- One representative `search` payload captured before/after, asserting more results carry snippets
+  and the matched line is present in each skeleton.
+
+## 8. Rollout & backward compatibility
+
+- Additive fields only; no field removed or renamed. Pre-feature consumers ignore the new keys.
+- `--raw` and `compact_snippets=False` reproduce exact current behavior (escape hatch + test oracle).
+- No schema migration, no reindex (config fields excluded from `config_hash`).
+- CHANGELOG entry under a new minor version.
+
+## 9. Resolved decisions
+
+| Question | Decision |
+|---|---|
+| Index-time vs retrieval-time | Retrieval-time (index-time kills FTS/vector recall). |
+| Signatures-only vs focus | Focus skeleton — matched line always preserved. |
+| Default on vs opt-in | Default on, `--raw` escape (headroom-style: compress by default). |
+| Content detector | `detect_language(path)` by extension — no ML detector. |
+| Mask granularity | Lines, not tokens — robust for partial chunks, matches codebase idiom. |
+| When to skip | Savings guard (≥25%) auto-skips minimal/signature content. |
+```

From a6c1f7d098a6378945565151b94ccf0b6d03d14a Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:42:25 +0300
Subject: [PATCH 02/11] docs: implementation plan for snippet skeletonization

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../2026-06-24-snippet-skeletonization.md     | 1140 +++++++++++++++++
 1 file changed, 1140 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-06-24-snippet-skeletonization.md

diff --git a/docs/superpowers/plans/2026-06-24-snippet-skeletonization.md b/docs/superpowers/plans/2026-06-24-snippet-skeletonization.md
new file mode 100644
index 0000000..733f2e8
--- /dev/null
+++ b/docs/superpowers/plans/2026-06-24-snippet-skeletonization.md
@@ -0,0 +1,1140 @@
+# Snippet Skeletonization & Content-Aware Rendering Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Transform retrieval snippets into focus skeletons (signatures + matched lines kept, function bodies elided) at retrieval time so more ranked results fit the token budget, reversibly and content-aware.
+
+**Architecture:** A new `retrieval/skeleton.py` computes a per-line keep/elide mask (a line-granularity port of headroom's `StructureMask`), routed by `detect_language(path)`: code → AST signatures via the existing `parse_file`, markdown → headings, structured config → key lines, everything else untouched. A `compactor` callable is injected into `apply_budget`, which uses the compacted text + reduced token estimate when (and only when) it is a meaningful win; otherwise it falls back byte-identically to today's raw snippet. The flag threads CLI `--raw` / MCP `raw` → `service.search_payload` → `pipeline.search`.
+
+**Tech Stack:** Python 3.11+, tree-sitter (`tree_sitter`, `tree_sitter_language_pack` — already core deps), Typer (CLI), FastMCP (MCP), pytest.
+
+## Global Constraints
+
+- Python ≥ 3.11 (repo floor); `from __future__ import annotations` at the top of every module.
+- Never raise from the skeletonizer — any failure returns the raw snippet (`skeletonized=False`).
+- Output additive only: no existing payload field renamed or removed.
+- `compactor=None` / `compact=False` / `--raw` must reproduce **byte-identical** current output (regression oracle).
+- Skeletonize **then** `redact_snippet` — never the reverse.
+- Deterministic: identical input → identical output (no randomness, stable iteration).
+- Retrieval-time only — no indexing, chunking, FTS, vector, or schema changes; new config fields stay out of `config_hash` (no reindex).
+- Conventional-commit messages (`feat:`, `docs:`, `test:`); end each with the `Co-Authored-By` trailer shown in Step 5 of Task 1.
+
+---
+
+## File Structure
+
+- **Create** `src/codebase_index/retrieval/skeleton.py` — the whole skeletonizer: `Compacted`, `render_skeleton`, `classify_lines` (+ per-type classifiers), `compact`, `make_compactor`. One responsibility: turn raw snippet text into a compacted snippet.
+- **Create** `tests/test_skeleton.py` — unit tests for the skeletonizer.
+- **Modify** `src/codebase_index/retrieval/budget.py` — inject the `compactor`.
+- **Modify** `src/codebase_index/retrieval/pipeline.py` — build the compactor, pass it to `apply_budget`.
+- **Modify** `src/codebase_index/config.py` — two `RetrievalConfig` fields.
+- **Modify** `src/codebase_index/service.py` — thread `raw`.
+- **Modify** `src/codebase_index/cli.py` — `--raw` on `search` and `explain`.
+- **Modify** `src/codebase_index/mcp/server.py` — `raw` param on `search_code` and `explain_code`.
+- **Modify** `tests/test_budget.py` — compactor integration tests.
+- **Modify** `tests/test_pipeline_search.py` — end-to-end skeleton-on/off test.
+- **Modify** `skill/SKILL.md`, `CHANGELOG.md` — docs.
+
+> **Scope note vs. spec:** the spec named `architecture` among the `--raw` surfaces. `architecture_payload` returns cached graph analytics and emits **no snippets**, so `--raw` there would be a no-op. This plan scopes `--raw` to `search` and `explain` (the two commands that flow through `apply_budget`).
+
+---
+
+### Task 1: `render_skeleton` — collapse a keep/elide mask into text
+
+**Files:**
+- Create: `src/codebase_index/retrieval/skeleton.py`
+- Test: `tests/test_skeleton.py`
+
+**Interfaces:**
+- Consumes: `from ..parsers.line_chunker import estimate_tokens` (existing: `estimate_tokens(text: str) -> int`).
+- Produces:
+  - `render_skeleton(content: str, keep: list[bool], *, line_start: int) -> tuple[str, int]` — returns `(skeleton_text, elided_line_count)`. Consecutive `False` lines collapse into one marker `... {n} lines elided (read {a}-{b})` where `a`/`b` are **absolute** file line numbers (`line_start` is the absolute line of `content`'s first line).
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_skeleton.py
+from codebase_index.retrieval.skeleton import render_skeleton
+
+
+def test_render_collapses_elided_run_with_absolute_lines():
+    content = "def f():\n    a = 1\n    b = 2\n    return a + b"
+    keep = [True, False, False, False]
+    text, elided = render_skeleton(content, keep, line_start=10)
+    assert text == "def f():\n... 3 lines elided (read 11-13)"
+    assert elided == 3
+
+
+def test_render_all_keep_is_unchanged():
+    content = "a\nb\nc"
+    text, elided = render_skeleton(content, [True, True, True], line_start=1)
+    assert text == content
+    assert elided == 0
+
+
+def test_render_merges_adjacent_runs_but_keeps_separated_ones():
+    content = "h1\nx\nh2\ny\nz"
+    keep = [True, False, True, False, False]
+    text, elided = render_skeleton(content, keep, line_start=1)
+    assert text == "h1\n... 1 lines elided (read 2-2)\nh2\n... 2 lines elided (read 4-5)"
+    assert elided == 3
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: FAIL with `ModuleNotFoundError: No module named 'codebase_index.retrieval.skeleton'`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+```python
+# src/codebase_index/retrieval/skeleton.py
+"""Retrieval-time snippet skeletonization (line-granularity StructureMask).
+
+Turns a raw code/text snippet into a compact skeleton: signature/structural
+lines are kept, function bodies (and other compressible runs) collapse into a
+marker that points at the absolute line range to read for the full body. A
+line-granularity port of headroom's StructureMask, adapted for a retrieval
+system: the query-matching line is always preserved, routing is by file
+extension, and the transform never makes output worse than the raw snippet.
+"""
+
+from __future__ import annotations
+
+from ..parsers.line_chunker import estimate_tokens
+
+
+def render_skeleton(
+    content: str, keep: list[bool], *, line_start: int
+) -> tuple[str, int]:
+    """Collapse consecutive ``keep=False`` lines into one elision marker.
+
+    ``line_start`` is the absolute file line number of ``content``'s first line,
+    so markers cite the real range to ``Read``. Returns (text, elided_count).
+    """
+    lines = content.split("\n")
+    if len(keep) != len(lines):
+        # Defensive: mask/line mismatch must never corrupt output.
+        return content, 0
+
+    out: list[str] = []
+    elided_total = 0
+    i = 0
+    n = len(lines)
+    while i < n:
+        if keep[i]:
+            out.append(lines[i])
+            i += 1
+            continue
+        run_start = i
+        while i < n and not keep[i]:
+            i += 1
+        run_len = i - run_start
+        elided_total += run_len
+        a = line_start + run_start
+        b = line_start + i - 1
+        out.append(f"... {run_len} lines elided (read {a}-{b})")
+    return "\n".join(out), elided_total
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: PASS (3 tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/retrieval/skeleton.py tests/test_skeleton.py
+git commit -m "$(cat <<'EOF'
+feat(skeleton): render_skeleton collapses keep/elide mask into markers
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 2: Code classifier + `compact` orchestration
+
+**Files:**
+- Modify: `src/codebase_index/retrieval/skeleton.py`
+- Test: `tests/test_skeleton.py`
+
+**Interfaces:**
+- Consumes: `from ..discovery.classify import detect_language`; `from .parsers...` → actually `from ..parsers.treesitter import parse_file` and `from ..parsers.languages` is not needed (route via `detect_language` + a local code-language set); `render_skeleton`, `estimate_tokens` (Task 1).
+- Produces:
+  - `@dataclass class Compacted: text: str; token_est: int; elided_lines: int; skeletonized: bool`
+  - `classify_lines(content: str, *, lang: str | None, query_terms: list[str], ctx_lines: int) -> list[bool]` — one bool per `content.split("\n")` line.
+  - `compact(content: str, *, path: str, line_start: int, ctx_lines: int, query_terms: list[str], min_reduction: float) -> Compacted` — full pipeline; **never raises**.
+
+> Design note vs. spec §4.1: `compact` takes the already-resolved `ctx_lines` (int), not `intent`. The intent→`ctx_lines` policy lives in `make_compactor` (Task 4), keeping `compact` pure and unit-testable.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_skeleton.py  (append)
+from codebase_index.retrieval.skeleton import Compacted, compact
+
+
+PY_SAMPLE = (
+    "import os\n"
+    "\n"
+    "class Store:\n"
+    "    def refresh(self, tok):\n"
+    "        decoded = decode(tok)\n"
+    "        validate(decoded)\n"
+    "        return decoded\n"
+    "    def revoke(self, tok):\n"
+    "        self.blocklist.add(tok)\n"
+    "        log('revoked')\n"
+)
+
+
+def test_code_skeleton_keeps_signatures_and_elides_bodies():
+    r = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is True
+    assert "def refresh(self, tok):" in r.text
+    assert "def revoke(self, tok):" in r.text
+    assert "class Store:" in r.text
+    assert "import os" in r.text
+    assert "decoded = decode(tok)" not in r.text   # body elided
+    assert r.elided_lines >= 3
+    assert r.token_est < estimate_tokens_helper(PY_SAMPLE)
+
+
+def estimate_tokens_helper(text):
+    from codebase_index.parsers.line_chunker import estimate_tokens
+    return estimate_tokens(text)
+
+
+def test_focus_keeps_matched_body_line_and_context():
+    r = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=1, query_terms=["blocklist"], min_reduction=0.25)
+    assert "self.blocklist.add(tok)" in r.text       # matched line preserved
+    assert "decoded = decode(tok)" not in r.text      # unrelated body still elided
+
+
+def test_unparseable_or_unknown_type_falls_back_to_raw():
+    blob = "%%% not code %%%\n@@@@@\n!!!!!"
+    r = compact(blob, path="notes.bin", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is False
+    assert r.text == blob
+    assert r.elided_lines == 0
+
+
+def test_savings_guard_returns_raw_when_not_enough_win():
+    tiny = "def f(): pass"     # one line, nothing to elide
+    r = compact(tiny, path="f.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is False
+    assert r.text == tiny
+
+
+def test_compact_is_deterministic():
+    a = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    b = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert (a.text, a.token_est, a.elided_lines, a.skeletonized) == \
+           (b.text, b.token_est, b.elided_lines, b.skeletonized)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: FAIL with `ImportError: cannot import name 'compact'`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Append to `src/codebase_index/retrieval/skeleton.py`:
+
+```python
+from dataclasses import dataclass
+
+# Languages we skeletonize via tree-sitter signatures. Mirrors
+# discovery.classify._TREE_SITTER_LANGS (kept local to avoid a private import).
+_CODE_LANGS = frozenset({
+    "python", "typescript", "javascript", "go", "java", "rust",
+    "c", "cpp", "csharp", "ruby", "php", "kotlin", "lua",
+})
+# Languages whose body opens at a line ending in ':' vs. one containing '{'.
+_BRACE_LANGS = frozenset({
+    "typescript", "javascript", "go", "java", "rust",
+    "c", "cpp", "csharp", "php", "kotlin",
+})
+_MAX_SIG_SCAN = 5  # bound the multi-line-signature lookahead
+
+
+@dataclass
+class Compacted:
+    text: str
+    token_est: int
+    elided_lines: int
+    skeletonized: bool
+
+
+def _raw(content: str) -> Compacted:
+    return Compacted(text=content, token_est=estimate_tokens(content),
+                     elided_lines=0, skeletonized=False)
+
+
+def _signature_end(lines: list[str], start: int, lang: str | None, end: int) -> int:
+    """0-based index of the last signature line for a def starting at ``start``.
+
+    Scans forward (bounded) for the line that opens the body so multi-line
+    signatures stay visible; defaults to ``start`` when nothing matches.
+    """
+    limit = min(end, start + _MAX_SIG_SCAN)
+    for i in range(start, limit + 1):
+        s = lines[i].strip()
+        if lang in _BRACE_LANGS and "{" in s:
+            return i
+        if lang not in _BRACE_LANGS and s.endswith(":"):
+            return i
+    return start
+
+
+def _classify_code(content: str, lines: list[str], lang: str) -> list[bool] | None:
+    """Keep imports/signatures/headers; elide function & method bodies.
+
+    Returns None when parsing yields no usable symbols (caller falls back).
+    """
+    from ..parsers.treesitter import parse_file
+
+    try:
+        result = parse_file(lang, content)
+    except Exception:
+        return None
+    symbols = result.symbols
+    if not symbols:
+        return None
+
+    n = len(lines)
+    keep = [True] * n
+    # Pass 1: elide the interior of every callable body.
+    for sym in symbols:
+        if sym.kind not in ("function", "method"):
+            continue
+        start0 = sym.line_start - 1
+        end0 = sym.line_end - 1
+        if not (0 <= start0 < n):
+            continue
+        end0 = min(end0, n - 1)
+        sig_end = _signature_end(lines, start0, lang, end0)
+        for i in range(sig_end + 1, end0 + 1):
+            keep[i] = False
+    # Pass 2: re-keep every symbol's signature line(s) (restores nested defs).
+    for sym in symbols:
+        start0 = sym.line_start - 1
+        end0 = min(sym.line_end - 1, n - 1)
+        if not (0 <= start0 < n):
+            continue
+        sig_end = _signature_end(lines, start0, lang, end0)
+        for i in range(start0, sig_end + 1):
+            keep[i] = True
+    return keep
+
+
+def _apply_focus(lines: list[str], keep: list[bool],
+                 query_terms: list[str], ctx_lines: int) -> None:
+    """Force-keep any line containing a query term, plus +/- ctx_lines."""
+    if not query_terms:
+        return
+    n = len(lines)
+    for i, line in enumerate(lines):
+        low = line.lower()
+        if any(t in low for t in query_terms):
+            for j in range(max(0, i - ctx_lines), min(n, i + ctx_lines + 1)):
+                keep[j] = True
+
+
+def classify_lines(content: str, *, lang: str | None,
+                   query_terms: list[str], ctx_lines: int) -> list[bool]:
+    lines = content.split("\n")
+    keep: list[bool] | None = None
+    if lang in _CODE_LANGS:
+        keep = _classify_code(content, lines, lang)
+    if keep is None:
+        keep = [True] * len(lines)        # unknown / parse miss -> keep all (raw)
+    _apply_focus(lines, keep, query_terms, ctx_lines)
+    return keep
+
+
+def compact(content: str, *, path: str, line_start: int, ctx_lines: int,
+            query_terms: list[str], min_reduction: float) -> Compacted:
+    """Route -> classify -> render -> guard. Never raises; raw fallback on any miss."""
+    if not content.strip():
+        return _raw(content)
+    try:
+        from ..discovery.classify import detect_language
+        lang = detect_language(path)
+        keep = classify_lines(content, lang=lang,
+                              query_terms=[t.lower() for t in query_terms],
+                              ctx_lines=ctx_lines)
+        if all(keep):
+            return _raw(content)
+        text, elided = render_skeleton(content, keep, line_start=line_start)
+        if elided == 0:
+            return _raw(content)
+        new_tok = estimate_tokens(text)
+        raw_tok = estimate_tokens(content)
+        if new_tok > raw_tok * (1.0 - min_reduction):
+            return _raw(content)          # not a meaningful win
+        return Compacted(text=text, token_est=new_tok,
+                         elided_lines=elided, skeletonized=True)
+    except Exception:
+        return _raw(content)
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: PASS (all Task 1 + Task 2 tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/retrieval/skeleton.py tests/test_skeleton.py
+git commit -m "$(cat <<'EOF'
+feat(skeleton): code classifier + compact() with focus, guard, raw fallback
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 3: Markdown + structured-config classifiers
+
+**Files:**
+- Modify: `src/codebase_index/retrieval/skeleton.py`
+- Test: `tests/test_skeleton.py`
+
+**Interfaces:**
+- Consumes: `classify_lines` routing from Task 2.
+- Produces: routing additions inside `classify_lines` (no new public signature). Markdown (`markdown`) and structured (`json`, `yaml`, `toml`, `ini`) get keep-line heuristics.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_skeleton.py  (append)
+
+MD_SAMPLE = (
+    "# Title\n"
+    "Intro line one.\n"
+    "More prose that is not structural and should be dropped.\n"
+    "Even more prose.\n"
+    "## Section\n"
+    "Section body line.\n"
+    "Trailing prose to elide here too.\n"
+)
+
+JSON_SAMPLE = (
+    '{\n'
+    '  "name": "demo",\n'
+    '  "description": "a long value that is mostly prose and can be elided away",\n'
+    '  "nested": {\n'
+    '    "key": "value"\n'
+    '  }\n'
+    '}\n'
+)
+
+
+def test_markdown_keeps_headings_and_first_section_line():
+    r = compact(MD_SAMPLE, path="README.md", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is True
+    assert "# Title" in r.text
+    assert "## Section" in r.text
+    assert "Intro line one." in r.text          # first line after heading kept
+    assert "Even more prose." not in r.text
+
+
+def test_structured_keeps_key_lines():
+    r = compact(JSON_SAMPLE, path="pkg.json", line_start=1,
+                ctx_lines=0, query_terms=["nested"], min_reduction=0.10)
+    assert '"name": "demo"' in r.text
+    assert '"nested"' in r.text                 # focus term line kept
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_skeleton.py::test_markdown_keeps_headings_and_first_section_line -v`
+Expected: FAIL (markdown currently keeps all lines → `skeletonized is False`).
+
+- [ ] **Step 3: Write minimal implementation**
+
+In `skeleton.py`, add the classifiers and wire them into `classify_lines`:
+
+```python
+import re
+
+_STRUCT_LANGS = frozenset({"json", "yaml", "toml", "ini"})
+_HEADING_RE = re.compile(r"^\s{0,3}#{1,6}\s")
+_SECTION_RE = re.compile(r"^\s*\[.*\]\s*$")        # toml/ini section header
+_KEY_RE = re.compile(r"[:=]")                       # key/value introducer
+_BRACKET = {"{", "}", "[", "]", "{}", "[]", "},", "],"}
+
+
+def _classify_markdown(lines: list[str]) -> list[bool]:
+    keep = [False] * len(lines)
+    for i, line in enumerate(lines):
+        if _HEADING_RE.match(line):
+            keep[i] = True
+            # keep the first non-blank line of the section
+            for j in range(i + 1, len(lines)):
+                if lines[j].strip():
+                    keep[j] = True
+                    break
+    return keep
+
+
+def _classify_structured(lines: list[str]) -> list[bool]:
+    keep = [False] * len(lines)
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if not s or s in _BRACKET or _SECTION_RE.match(s) or _KEY_RE.search(s):
+            keep[i] = True
+    return keep
+```
+
+Then modify `classify_lines` (replace the routing block from Task 2):
+
+```python
+def classify_lines(content: str, *, lang: str | None,
+                   query_terms: list[str], ctx_lines: int) -> list[bool]:
+    lines = content.split("\n")
+    keep: list[bool] | None = None
+    if lang in _CODE_LANGS:
+        keep = _classify_code(content, lines, lang)
+    elif lang == "markdown":
+        keep = _classify_markdown(lines)
+    elif lang in _STRUCT_LANGS:
+        keep = _classify_structured(lines)
+    if keep is None:
+        keep = [True] * len(lines)
+    _apply_focus(lines, keep, query_terms, ctx_lines)
+    return keep
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: PASS (all skeleton tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/retrieval/skeleton.py tests/test_skeleton.py
+git commit -m "$(cat <<'EOF'
+feat(skeleton): markdown heading + structured key classifiers
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 4: `make_compactor` factory (intent policy + query terms)
+
+**Files:**
+- Modify: `src/codebase_index/retrieval/skeleton.py`
+- Test: `tests/test_skeleton.py`
+
+**Interfaces:**
+- Consumes: `compact` (Task 2); `from .types import Candidate, Intent`.
+- Produces:
+  - `make_compactor(*, intent: Intent, query: str, enabled: bool, min_reduction: float) -> Callable[[Candidate], Compacted] | None` — returns `None` when `enabled is False`; otherwise a closure mapping a `Candidate` (reads `.content`, `.path`, `.line_start`) to `Compacted`, with `ctx_lines` resolved from `intent` (`0` for shape-first intents, else `2`) and `query` tokenized once.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_skeleton.py  (append)
+from codebase_index.retrieval.skeleton import make_compactor
+from codebase_index.retrieval.types import Candidate, Intent
+
+
+def _cand(content):
+    return Candidate(path="store.py", line_start=1, line_end=10,
+                     source="fts", score=1.0, content=content, token_est=99)
+
+
+def test_make_compactor_disabled_returns_none():
+    assert make_compactor(intent=Intent.KEYWORD, query="x",
+                           enabled=False, min_reduction=0.25) is None
+
+
+def test_make_compactor_shape_intent_uses_zero_context():
+    comp = make_compactor(intent=Intent.ARCHITECTURE, query="blocklist",
+                          enabled=True, min_reduction=0.25)
+    r = comp(_cand(PY_SAMPLE))
+    # ctx 0 => even a matched line's neighbours are not force-kept
+    assert r.skeletonized is True
+
+
+def test_make_compactor_locate_intent_keeps_matched_line():
+    comp = make_compactor(intent=Intent.LOCATE_IMPL, query="blocklist",
+                          enabled=True, min_reduction=0.25)
+    r = comp(_cand(PY_SAMPLE))
+    assert "self.blocklist.add(tok)" in r.text
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_skeleton.py::test_make_compactor_disabled_returns_none -v`
+Expected: FAIL with `ImportError: cannot import name 'make_compactor'`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Append to `skeleton.py`:
+
+```python
+from typing import Callable, Optional
+
+from .types import Candidate, Intent
+
+# Shape-first intents want pure signatures (no context around matches).
+_SHAPE_INTENTS = frozenset({Intent.ARCHITECTURE, Intent.HOW_IT_WORKS, Intent.DATA_FLOW})
+_TERM_RE = re.compile(r"[A-Za-z0-9_]+")
+_STOPWORDS = frozenset({
+    "the", "a", "an", "is", "are", "how", "does", "do", "what", "where",
+    "which", "to", "of", "in", "on", "for", "and", "or", "with", "from",
+})
+
+
+def _query_terms(query: str) -> list[str]:
+    out: list[str] = []
+    for t in _TERM_RE.findall(query):
+        tl = t.lower()
+        if len(tl) >= 3 and tl not in _STOPWORDS:
+            out.append(tl)
+    return list(dict.fromkeys(out))
+
+
+def make_compactor(*, intent: Intent, query: str, enabled: bool,
+                   min_reduction: float) -> Optional[Callable[[Candidate], Compacted]]:
+    if not enabled:
+        return None
+    ctx_lines = 0 if intent in _SHAPE_INTENTS else 2
+    terms = _query_terms(query)
+
+    def _compact(c: Candidate) -> Compacted:
+        return compact(c.content or "", path=c.path, line_start=c.line_start,
+                       ctx_lines=ctx_lines, query_terms=terms,
+                       min_reduction=min_reduction)
+
+    return _compact
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `python -m pytest tests/test_skeleton.py -v`
+Expected: PASS (all skeleton tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/retrieval/skeleton.py tests/test_skeleton.py
+git commit -m "$(cat <<'EOF'
+feat(skeleton): make_compactor factory with intent->context policy
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 5: Inject the compactor into `apply_budget`
+
+**Files:**
+- Modify: `src/codebase_index/retrieval/budget.py`
+- Test: `tests/test_budget.py`
+
+**Interfaces:**
+- Consumes: `Compacted` and the `Callable[[Candidate], Compacted]` shape from Task 4.
+- Produces: `apply_budget(candidates, *, token_budget, compactor=None) -> tuple[list[dict], list[dict]]`. New per-result keys `skeletonized: bool` and `elided_lines: int`; `token_est` reflects the compacted size. When `compactor is None` **or** a candidate's `Compacted.skeletonized is False`, behavior is byte-identical to today (uses `c.content` / `c.token_est`).
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_budget.py  (append)
+from codebase_index.retrieval.skeleton import Compacted
+
+
+def test_compactor_lets_more_results_fit_budget():
+    cands = [_c(f"f{i}.py", 1, 50, "x" * 4000, 1000) for i in range(5)]
+
+    def fake_compactor(c):
+        return Compacted(text="sig\n... 49 lines elided (read 2-50)",
+                         token_est=10, elided_lines=49, skeletonized=True)
+
+    no_comp, _ = apply_budget(cands, token_budget=1500)
+    with_comp, _ = apply_budget(cands, token_budget=1500, compactor=fake_compactor)
+    fit_no = sum(1 for r in no_comp if r["snippet"] is not None)
+    fit_yes = sum(1 for r in with_comp if r["snippet"] is not None)
+    assert fit_yes > fit_no
+    assert all(r["skeletonized"] for r in with_comp if r["snippet"])
+    assert all(r["elided_lines"] == 49 for r in with_comp if r["snippet"])
+
+
+def test_compactor_output_is_still_redacted():
+    secret = "key = 'AKIAIOSFODNN7EXAMPLE'\nbody line\nbody line"
+    cand = _c("s.py", 1, 3, secret, 50)
+
+    def fake_compactor(c):
+        return Compacted(text=secret, token_est=50, elided_lines=0, skeletonized=True)
+
+    results, _ = apply_budget([cand], token_budget=1000, compactor=fake_compactor)
+    assert "AKIAIOSFODNN7EXAMPLE" not in results[0]["snippet"]
+
+
+def test_none_compactor_is_unchanged_behavior():
+    cands = [_c("a.py", 1, 5, "y" * 400, 100)]
+    results, _ = apply_budget(cands, token_budget=1000, compactor=None)
+    assert results[0]["skeletonized"] is False
+    assert results[0]["elided_lines"] == 0
+    assert results[0]["token_est"] == 100        # original, untouched
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_budget.py::test_compactor_lets_more_results_fit_budget -v`
+Expected: FAIL with `TypeError: apply_budget() got an unexpected keyword argument 'compactor'`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+Replace the body of `apply_budget` in `src/codebase_index/retrieval/budget.py`:
+
+```python
+from typing import Callable, Optional
+
+from ..output.redact import redact_snippet
+from .types import Candidate
+
+_MIN_USEFUL_TOKENS = 40
+
+
+def _meta(c: Candidate) -> dict:
+    return {
+        "path": c.path,
+        "line_start": c.line_start,
+        "line_end": c.line_end,
+        "symbols": [c.symbol] if c.symbol else [],
+        "score": round(c.score, 4),
+        "reason": c.reason if c.reason else c.source,
+        "token_est": c.token_est,
+    }
+
+
+def apply_budget(
+    candidates: list[Candidate],
+    *,
+    token_budget: int,
+    compactor: Optional[Callable[[Candidate], "object"]] = None,
+) -> tuple[list[dict], list[dict]]:
+    results: list[dict] = []
+    recommended: list[dict] = []
+    spent = 0
+
+    for rank, c in enumerate(candidates, start=1):
+        meta = _meta(c)
+        meta["rank"] = rank
+        meta["skeletonized"] = False
+        meta["elided_lines"] = 0
+
+        # Resolve the snippet text + cost. A compactor only changes anything
+        # when it returns a real skeleton; otherwise we keep today's raw path.
+        text = c.content
+        cost = c.token_est
+        if compactor is not None and c.content:
+            comp = compactor(c)
+            if getattr(comp, "skeletonized", False):
+                text = comp.text
+                cost = comp.token_est
+                meta["skeletonized"] = True
+                meta["elided_lines"] = comp.elided_lines
+
+        snippet = None
+        snippet_is_useful = False
+        if text and spent + cost <= token_budget:
+            snippet = redact_snippet(text)
+            spent += cost
+            meta["token_est"] = cost
+            snippet_is_useful = cost >= _MIN_USEFUL_TOKENS
+
+        if not snippet_is_useful:
+            recommended.append(
+                {"path": c.path, "line_start": c.line_start, "line_end": c.line_end}
+            )
+        meta["snippet"] = snippet
+        results.append(meta)
+
+    return results, recommended
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `python -m pytest tests/test_budget.py -v`
+Expected: PASS — existing tests (`test_snippets_stop_at_budget`, `test_secrets_are_redacted`, `test_metadata_always_present_even_when_budget_zero`) plus the three new ones.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/retrieval/budget.py tests/test_budget.py
+git commit -m "$(cat <<'EOF'
+feat(budget): inject snippet compactor; emit skeletonized/elided_lines
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 6: Build the compactor in the pipeline + config knobs
+
+**Files:**
+- Modify: `src/codebase_index/config.py:24-29` (`RetrievalConfig`)
+- Modify: `src/codebase_index/retrieval/pipeline.py` (`search`)
+- Test: `tests/test_config.py`, `tests/test_pipeline_search.py`
+
+**Interfaces:**
+- Consumes: `make_compactor` (Task 4), `apply_budget(..., compactor=...)` (Task 5), `plan.intent`.
+- Produces: `RetrievalConfig.compact_snippets: bool = True`, `RetrievalConfig.compact_min_reduction: float = 0.25`; `pipeline.search(..., compact: bool = True, compact_min_reduction: float = 0.25)`.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_config.py  (append)
+def test_retrieval_config_has_compaction_defaults():
+    from codebase_index.config import Config
+    cfg = Config()
+    assert cfg.retrieval.compact_snippets is True
+    assert cfg.retrieval.compact_min_reduction == 0.25
+
+
+def test_compaction_fields_do_not_change_config_hash():
+    from codebase_index.config import Config
+    base = Config()
+    h1 = base.config_hash()
+    base.retrieval.compact_snippets = False          # retrieval-time only
+    assert base.config_hash() == h1                  # no reindex triggered
+```
+
+```python
+# tests/test_pipeline_search.py  (append — mirrors existing search-pipeline tests there)
+def test_search_skeletonizes_by_default_and_raw_disables(tmp_path):
+    # Reuse the module's existing index fixture/helper to build a small repo and
+    # connection. Pseudocode for the assertion shape:
+    #   payload_default = search(conn, "blocklist revoke", mode="hybrid",
+    #                            limit=5, token_budget=1500, no_fallback=True)
+    #   payload_raw     = search(conn, "blocklist revoke", mode="hybrid",
+    #                            limit=5, token_budget=1500, no_fallback=True,
+    #                            compact=False)
+    #   assert any(r.get("skeletonized") for r in payload_default["results"])
+    #   assert all(not r.get("skeletonized") for r in payload_raw["results"])
+    ...
+```
+
+> Implementation note: `tests/test_pipeline_search.py` already constructs an indexed SQLite connection — follow its existing fixture (do not invent a new one). Fill the `...` with the two `search(...)` calls and the two assertions above.
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_config.py::test_retrieval_config_has_compaction_defaults -v`
+Expected: FAIL with `AttributeError: 'RetrievalConfig' object has no attribute 'compact_snippets'`.
+
+- [ ] **Step 3: Write minimal implementation**
+
+In `src/codebase_index/config.py`, extend `RetrievalConfig`:
+
+```python
+class RetrievalConfig(BaseModel):
+    default_mode: Literal["hybrid", "fts", "symbol", "vector"] = "hybrid"
+    rrf_k: int = 60
+    token_budget: int = 1500
+    limit: int = 10
+    compact_snippets: bool = True
+    compact_min_reduction: float = 0.25
+```
+
+(`config_hash` already lists only indexing-relevant fields and does not include `retrieval`, so no change is needed there.)
+
+In `src/codebase_index/retrieval/pipeline.py`, update `search`'s signature and the budget call:
+
+```python
+def search(
+    conn: sqlite3.Connection,
+    query: str,
+    *,
+    mode: str,
+    limit: int,
+    token_budget: int,
+    no_fallback: bool,
+    backend=None,
+    root: Optional[Path] = None,
+    config: Optional[Config] = None,
+    offset: int = 0,
+    compact: bool = True,
+    compact_min_reduction: float = 0.25,
+) -> dict:
+```
+
+Replace the `apply_budget(...)` call (currently `pipeline.py:151`) with:
+
+```python
+    from .skeleton import make_compactor
+
+    compactor = make_compactor(
+        intent=plan.intent, query=query,
+        enabled=compact, min_reduction=compact_min_reduction,
+    )
+    all_results, all_recommended = apply_budget(
+        ranked, token_budget=scaled_budget, compactor=compactor
+    )
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `python -m pytest tests/test_config.py tests/test_pipeline_search.py -v`
+Expected: PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/config.py src/codebase_index/retrieval/pipeline.py tests/test_config.py tests/test_pipeline_search.py
+git commit -m "$(cat <<'EOF'
+feat(pipeline): build snippet compactor; add compact config knobs
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 7: Thread `--raw` / `raw` through service, CLI, and MCP
+
+**Files:**
+- Modify: `src/codebase_index/service.py:67-98` (`search_payload`)
+- Modify: `src/codebase_index/cli.py:371-410` (`search`), `:478-499` (`explain`)
+- Modify: `src/codebase_index/mcp/server.py:120-154` (`search_code`), `:236-264` (`explain_code`)
+- Test: `tests/test_cli.py`, `tests/test_mcp_server.py`
+
+**Interfaces:**
+- Consumes: `pipeline.search(..., compact=..., compact_min_reduction=...)` (Task 6); `cfg.retrieval.compact_snippets`, `cfg.retrieval.compact_min_reduction` (Task 6).
+- Produces: `service.search_payload(..., raw: bool = False)`; CLI `--raw` flag on `search`/`explain`; MCP `raw: bool = False` on `search_code`/`explain_code`. Effective enable = `cfg.retrieval.compact_snippets and not raw`.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# tests/test_cli.py  (append — mirrors existing CliRunner tests in this file)
+def test_search_raw_flag_present():
+    from typer.testing import CliRunner
+    from codebase_index.cli import app
+    res = CliRunner().invoke(app, ["search", "--help"])
+    assert res.exit_code == 0
+    assert "--raw" in res.stdout
+```
+
+```python
+# tests/test_mcp_server.py  (append — mirrors existing tool-signature tests)
+def test_search_code_accepts_raw_param():
+    import inspect
+    from codebase_index.mcp import server
+    assert "raw" in inspect.signature(server.search_code.fn).parameters
+```
+
+> Implementation note: `tests/test_mcp_server.py` already accesses tool functions; if its existing tests use a different accessor than `.fn` for a FastMCP tool, copy that accessor here instead.
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `python -m pytest tests/test_cli.py::test_search_raw_flag_present -v`
+Expected: FAIL — `--raw` not in help output.
+
+- [ ] **Step 3: Write minimal implementation**
+
+**`service.py`** — add `raw` and compute the effective enable:
+
+```python
+def search_payload(
+    db_path: Path,
+    cfg: "Config",
+    query: str,
+    *,
+    mode: str = "hybrid",
+    limit: int = 10,
+    offset: int = 0,
+    token_budget: int = 1500,
+    no_fallback: bool = False,
+    backend: Any = None,
+    raw: bool = False,
+) -> dict:
+    from .retrieval.pipeline import search as run_search
+    from .storage.db import Database
+
+    compact = cfg.retrieval.compact_snippets and not raw
+    with Database(db_path) as db:
+        if backend is not None and getattr(backend, "enabled", False):
+            db.enable_vectors()
+        return run_search(
+            db.conn,
+            query,
+            mode=mode,
+            limit=limit,
+            offset=offset,
+            token_budget=token_budget,
+            no_fallback=no_fallback,
+            backend=backend,
+            root=Path(cfg.root),
+            config=cfg,
+            compact=compact,
+            compact_min_reduction=cfg.retrieval.compact_min_reduction,
+        )
+```
+
+**`cli.py`** — `search`: add the option and pass it through. Add after the `no_fallback` option (line 381):
+
+```python
+    raw: bool = typer.Option(
+        False, "--raw",
+        help="Disable snippet skeletonization; return full raw snippets.",
+    ),
+```
+
+and in the `search_payload(...)` call add `raw=raw,`. For `explain`, add the same `raw` option and pass `raw=raw,` to its `search_payload(...)` call.
+
+**`mcp/server.py`** — `search_code`: add `raw: bool = False,` to the signature (after `offset`), document it in the docstring (`raw: If true, return full raw snippets instead of skeletons.`), and pass `raw=raw,` into `search_payload(...)`. Do the same for `explain_code`.
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `python -m pytest tests/test_cli.py tests/test_mcp_server.py -v`
+Expected: PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/codebase_index/service.py src/codebase_index/cli.py src/codebase_index/mcp/server.py tests/test_cli.py tests/test_mcp_server.py
+git commit -m "$(cat <<'EOF'
+feat(cli,mcp): --raw / raw flag to disable snippet skeletonization
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 8: Document the contract — SKILL.md + CHANGELOG
+
+**Files:**
+- Modify: `skill/SKILL.md` (the "Token-budgeted output interpretation" section, ~lines 98-128)
+- Modify: `CHANGELOG.md` (top, under a new Unreleased/next-minor heading)
+
+**Interfaces:**
+- Consumes: the `skeletonized` / `elided_lines` fields (Task 5) and the `--raw` flag (Task 7).
+- Produces: documentation only.
+
+- [ ] **Step 1: Update SKILL.md**
+
+In the `## Token-budgeted output interpretation` list, after the `snippet` bullet (`SKILL.md:108`), add:
+
+```markdown
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class
+  lines and the line(s) matching your query are kept; function bodies are collapsed to a marker
+  like `... 24 lines elided (read 88-134)`. Read that line range (or the result's
+  `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
+```
+
+In `## Token efficiency rules`, add a bullet:
+
+```markdown
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is
+  always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full
+  bodies inline instead of reading the cited line range.
+```
+
+- [ ] **Step 2: Update CHANGELOG.md**
+
+Add at the top of the changelog (under the standard next-version heading used by this repo):
+
+```markdown
+### Added
+- **Snippet skeletonization & content-aware rendering.** Search/explain snippets are now focus
+  skeletons — signatures and the query-matching line are kept while function bodies collapse to a
+  `... N lines elided (read A-B)` marker — so more ranked results fit the same token budget.
+  Content-aware (code via tree-sitter, markdown headings, structured-config keys), reversible via
+  `recommended_reads`, and safe (raw fallback on any parse miss). New `skeletonized` /
+  `elided_lines` result fields; new `retrieval.compact_snippets` / `retrieval.compact_min_reduction`
+  config knobs (no reindex); disable per-call with `--raw` (CLI) or `raw: true` (MCP).
+```
+
+- [ ] **Step 3: Verify docs render and reference real fields**
+
+Run: `python -m pytest tests/test_plugin_skill_parity.py -v`
+Expected: PASS (skill text stays consistent across installed targets; if this test pins skill content, update its fixture as it directs).
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add skill/SKILL.md CHANGELOG.md
+git commit -m "$(cat <<'EOF'
+docs: document snippet skeletonization fields and --raw flag
+
+Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+### Task 9: Full-suite verification
+
+**Files:** none (verification only).
+
+- [ ] **Step 1: Run the whole test suite**
+
+Run: `python -m pytest -q`
+Expected: all pass (no regressions in `test_budget`, `test_pipeline_search`, `test_cli`, `test_mcp_server`, `test_output`, golden tests).
+
+- [ ] **Step 2: Run the linters/type-checks the repo uses**
+
+Run: `python -m ruff check src tests && python -m mypy src`
+Expected: clean (match the repo's CI; fix any new findings in the touched files).
+
+- [ ] **Step 3: Manual smoke check**
+
+Run:
+```bash
+python -m codebase_index index
+python -m codebase_index search "apply budget snippet" --json
+python -m codebase_index search "apply budget snippet" --json --raw
+```
+Expected: the first JSON shows `"skeletonized": true` on at least one code result with an
+`elided_lines > 0` and a `... N lines elided (read A-B)` marker in its snippet; the `--raw` run
+shows `"skeletonized": false` everywhere with full snippet bodies.
+
+---
+
+## Self-Review
+
+**1. Spec coverage**
+- §4.1 core abstraction (`render_skeleton`, `classify_lines`, `compact`, `Compacted`) → Tasks 1, 2.
+- §4.2 content-aware classifiers (code/markdown/structured/other) → Tasks 2, 3.
+- §4.3 policy & focus (intent→ctx, focus invariant, savings guard) → Tasks 2, 4.
+- §4.4 budget integration (`compactor` DI, reduced cost → more snippets) → Task 5.
+- §4.5 output fields (`skeletonized`, `elided_lines`, compacted `token_est`) → Task 5.
+- §4.6 surface (CLI `--raw`, MCP `raw`, config knobs, SKILL.md) → Tasks 6, 7, 8. *(architecture scoped out — no snippets; noted in File Structure.)*
+- §5 error handling (never raises, fallback chain, preserve bias, focus invariant, redact order, determinism) → Tasks 2, 5; tests in Task 2.
+- §7 testing → Tasks 1-9. §8 backward compat (`--raw`/`compact=False` identical) → Task 5 `test_none_compactor_is_unchanged_behavior`, Task 6 raw assertion.
+
+**2. Placeholder scan** — the only `...` literals are: (a) the documented Python `def f(): pass`/marker strings, and (b) `tests/test_pipeline_search.py` Step 1, which is explicitly flagged as "fill using the file's existing index fixture" with the exact two calls + assertions to insert. No "TBD/handle edge cases/add validation" placeholders.
+
+**3. Type consistency** — `compact(...)` is defined with `ctx_lines: int` in Task 2 and always called with `ctx_lines=` (Tasks 2, 4); `make_compactor(...)` signature matches its callers (Tasks 4, 6); `apply_budget(..., compactor=None)` matches Tasks 5, 6; `Compacted` fields (`text`, `token_est`, `elided_lines`, `skeletonized`) are read identically in Tasks 4, 5. `search_payload(..., raw=False)` matches CLI/MCP callers (Task 7). Intent members (`ARCHITECTURE`, `HOW_IT_WORKS`, `DATA_FLOW`, `LOCATE_IMPL`, `KEYWORD`) exist in `retrieval/types.py:Intent`.

From 384a1f42654119f6bfbdfd6655f69ea3fad052b2 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:47:35 +0300
Subject: [PATCH 03/11] feat(skeleton): render_skeleton collapses keep/elide
 mask into markers

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/skeleton.py | 46 ++++++++++++++++++++++++
 tests/test_skeleton.py                   | 24 +++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 src/codebase_index/retrieval/skeleton.py
 create mode 100644 tests/test_skeleton.py

diff --git a/src/codebase_index/retrieval/skeleton.py b/src/codebase_index/retrieval/skeleton.py
new file mode 100644
index 0000000..3ce9a3e
--- /dev/null
+++ b/src/codebase_index/retrieval/skeleton.py
@@ -0,0 +1,46 @@
+"""Retrieval-time snippet skeletonization (line-granularity StructureMask).
+
+Turns a raw code/text snippet into a compact skeleton: signature/structural
+lines are kept, function bodies (and other compressible runs) collapse into a
+marker that points at the absolute line range to read for the full body. A
+line-granularity port of headroom's StructureMask, adapted for a retrieval
+system: the query-matching line is always preserved, routing is by file
+extension, and the transform never makes output worse than the raw snippet.
+"""
+
+from __future__ import annotations
+
+from ..parsers.line_chunker import estimate_tokens
+
+
+def render_skeleton(
+    content: str, keep: list[bool], *, line_start: int
+) -> tuple[str, int]:
+    """Collapse consecutive ``keep=False`` lines into one elision marker.
+
+    ``line_start`` is the absolute file line number of ``content``'s first line,
+    so markers cite the real range to ``Read``. Returns (text, elided_count).
+    """
+    lines = content.split("\n")
+    if len(keep) != len(lines):
+        # Defensive: mask/line mismatch must never corrupt output.
+        return content, 0
+
+    out: list[str] = []
+    elided_total = 0
+    i = 0
+    n = len(lines)
+    while i < n:
+        if keep[i]:
+            out.append(lines[i])
+            i += 1
+            continue
+        run_start = i
+        while i < n and not keep[i]:
+            i += 1
+        run_len = i - run_start
+        elided_total += run_len
+        a = line_start + run_start
+        b = line_start + i - 1
+        out.append(f"... {run_len} lines elided (read {a}-{b})")
+    return "\n".join(out), elided_total
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
new file mode 100644
index 0000000..de51aed
--- /dev/null
+++ b/tests/test_skeleton.py
@@ -0,0 +1,24 @@
+from codebase_index.retrieval.skeleton import render_skeleton
+
+
+def test_render_collapses_elided_run_with_absolute_lines():
+    content = "def f():\n    a = 1\n    b = 2\n    return a + b"
+    keep = [True, False, False, False]
+    text, elided = render_skeleton(content, keep, line_start=10)
+    assert text == "def f():\n... 3 lines elided (read 11-13)"
+    assert elided == 3
+
+
+def test_render_all_keep_is_unchanged():
+    content = "a\nb\nc"
+    text, elided = render_skeleton(content, [True, True, True], line_start=1)
+    assert text == content
+    assert elided == 0
+
+
+def test_render_merges_adjacent_runs_but_keeps_separated_ones():
+    content = "h1\nx\nh2\ny\nz"
+    keep = [True, False, True, False, False]
+    text, elided = render_skeleton(content, keep, line_start=1)
+    assert text == "h1\n... 1 lines elided (read 2-2)\nh2\n... 2 lines elided (read 4-5)"
+    assert elided == 3

From 1bff1e2dd58fb1dfc369477e2d81dc5fccbb7a34 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:50:12 +0300
Subject: [PATCH 04/11] feat(skeleton): code classifier + compact() with focus,
 guard, raw fallback

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/skeleton.py | 136 +++++++++++++++++++++++
 tests/test_skeleton.py                   |  72 ++++++++++++
 2 files changed, 208 insertions(+)

diff --git a/src/codebase_index/retrieval/skeleton.py b/src/codebase_index/retrieval/skeleton.py
index 3ce9a3e..b4216f8 100644
--- a/src/codebase_index/retrieval/skeleton.py
+++ b/src/codebase_index/retrieval/skeleton.py
@@ -10,6 +10,8 @@
 
 from __future__ import annotations
 
+from dataclasses import dataclass
+
 from ..parsers.line_chunker import estimate_tokens
 
 
@@ -44,3 +46,137 @@ def render_skeleton(
         b = line_start + i - 1
         out.append(f"... {run_len} lines elided (read {a}-{b})")
     return "\n".join(out), elided_total
+
+
+# Languages we skeletonize via tree-sitter signatures. Mirrors
+# discovery.classify._TREE_SITTER_LANGS (kept local to avoid a private import).
+_CODE_LANGS = frozenset({
+    "python", "typescript", "javascript", "go", "java", "rust",
+    "c", "cpp", "csharp", "ruby", "php", "kotlin", "lua",
+})
+# Languages whose body opens at a line containing '{' vs. one ending in ':'.
+_BRACE_LANGS = frozenset({
+    "typescript", "javascript", "go", "java", "rust",
+    "c", "cpp", "csharp", "php", "kotlin",
+})
+_MAX_SIG_SCAN = 5  # bound the multi-line-signature lookahead
+
+
+@dataclass
+class Compacted:
+    text: str
+    token_est: int
+    elided_lines: int
+    skeletonized: bool
+
+
+def _raw(content: str) -> Compacted:
+    return Compacted(text=content, token_est=estimate_tokens(content),
+                     elided_lines=0, skeletonized=False)
+
+
+def _signature_end(lines: list[str], start: int, lang: str | None, end: int) -> int:
+    """0-based index of the last signature line for a def starting at ``start``.
+
+    Scans forward (bounded) for the line that opens the body so multi-line
+    signatures stay visible; defaults to ``start`` when nothing matches.
+    """
+    limit = min(end, start + _MAX_SIG_SCAN)
+    for i in range(start, limit + 1):
+        s = lines[i].strip()
+        if lang in _BRACE_LANGS and "{" in s:
+            return i
+        if lang not in _BRACE_LANGS and s.endswith(":"):
+            return i
+    return start
+
+
+def _classify_code(content: str, lines: list[str], lang: str) -> list[bool] | None:
+    """Keep imports/signatures/headers; elide function & method bodies.
+
+    Returns None when parsing yields no usable symbols (caller falls back).
+    """
+    from ..parsers.treesitter import parse_file
+
+    try:
+        result = parse_file(lang, content)
+    except Exception:
+        return None
+    symbols = result.symbols
+    if not symbols:
+        return None
+
+    n = len(lines)
+    keep = [True] * n
+    # Pass 1: elide the interior of every callable body.
+    for sym in symbols:
+        if sym.kind not in ("function", "method"):
+            continue
+        start0 = sym.line_start - 1
+        if not (0 <= start0 < n):
+            continue
+        end0 = min(sym.line_end - 1, n - 1)
+        sig_end = _signature_end(lines, start0, lang, end0)
+        for i in range(sig_end + 1, end0 + 1):
+            keep[i] = False
+    # Pass 2: re-keep every symbol's signature line(s) (restores nested defs).
+    for sym in symbols:
+        start0 = sym.line_start - 1
+        if not (0 <= start0 < n):
+            continue
+        end0 = min(sym.line_end - 1, n - 1)
+        sig_end = _signature_end(lines, start0, lang, end0)
+        for i in range(start0, sig_end + 1):
+            keep[i] = True
+    return keep
+
+
+def _apply_focus(lines: list[str], keep: list[bool],
+                 query_terms: list[str], ctx_lines: int) -> None:
+    """Force-keep any line containing a query term, plus +/- ctx_lines."""
+    if not query_terms:
+        return
+    n = len(lines)
+    for i, line in enumerate(lines):
+        low = line.lower()
+        if any(t in low for t in query_terms):
+            for j in range(max(0, i - ctx_lines), min(n, i + ctx_lines + 1)):
+                keep[j] = True
+
+
+def classify_lines(content: str, *, lang: str | None,
+                   query_terms: list[str], ctx_lines: int) -> list[bool]:
+    lines = content.split("\n")
+    keep: list[bool] | None = None
+    if lang in _CODE_LANGS:
+        keep = _classify_code(content, lines, lang)
+    if keep is None:
+        keep = [True] * len(lines)        # unknown / parse miss -> keep all (raw)
+    _apply_focus(lines, keep, query_terms, ctx_lines)
+    return keep
+
+
+def compact(content: str, *, path: str, line_start: int, ctx_lines: int,
+            query_terms: list[str], min_reduction: float) -> Compacted:
+    """Route -> classify -> render -> guard. Never raises; raw fallback on any miss."""
+    if not content.strip():
+        return _raw(content)
+    try:
+        from ..discovery.classify import detect_language
+        lang = detect_language(path)
+        keep = classify_lines(content, lang=lang,
+                              query_terms=[t.lower() for t in query_terms],
+                              ctx_lines=ctx_lines)
+        if all(keep):
+            return _raw(content)
+        text, elided = render_skeleton(content, keep, line_start=line_start)
+        if elided == 0:
+            return _raw(content)
+        new_tok = estimate_tokens(text)
+        raw_tok = estimate_tokens(content)
+        if new_tok > raw_tok * (1.0 - min_reduction):
+            return _raw(content)          # not a meaningful win
+        return Compacted(text=text, token_est=new_tok,
+                         elided_lines=elided, skeletonized=True)
+    except Exception:
+        return _raw(content)
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
index de51aed..b5a3e28 100644
--- a/tests/test_skeleton.py
+++ b/tests/test_skeleton.py
@@ -22,3 +22,75 @@ def test_render_merges_adjacent_runs_but_keeps_separated_ones():
     text, elided = render_skeleton(content, keep, line_start=1)
     assert text == "h1\n... 1 lines elided (read 2-2)\nh2\n... 2 lines elided (read 4-5)"
     assert elided == 3
+
+
+from codebase_index.retrieval.skeleton import Compacted, compact  # noqa: E402
+
+
+PY_SAMPLE = (
+    "import os\n"
+    "\n"
+    "class Store:\n"
+    "    def refresh(self, tok):\n"
+    "        decoded = decode(tok)\n"
+    "        validate(decoded)\n"
+    "        return decoded\n"
+    "    def revoke(self, tok):\n"
+    "        self.blocklist.add(tok)\n"
+    "        log('revoked')\n"
+)
+
+
+def estimate_tokens_helper(text):
+    from codebase_index.parsers.line_chunker import estimate_tokens
+    return estimate_tokens(text)
+
+
+def test_code_skeleton_keeps_signatures_and_elides_bodies():
+    r = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is True
+    assert "def refresh(self, tok):" in r.text
+    assert "def revoke(self, tok):" in r.text
+    assert "class Store:" in r.text
+    assert "import os" in r.text
+    assert "decoded = decode(tok)" not in r.text   # body elided
+    assert r.elided_lines >= 3
+    assert r.token_est < estimate_tokens_helper(PY_SAMPLE)
+
+
+def test_focus_keeps_matched_body_line_and_context():
+    # Low threshold isolates the focus behaviour: on this tiny sample only the
+    # unrelated `refresh` body is elided (the matched `revoke` body is kept by
+    # focus), a sub-25% win the guard would otherwise reject. The 25% guard
+    # itself is covered by test_savings_guard_returns_raw_when_not_enough_win.
+    r = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=1, query_terms=["blocklist"], min_reduction=0.10)
+    assert "self.blocklist.add(tok)" in r.text       # matched line preserved
+    assert "decoded = decode(tok)" not in r.text      # unrelated body still elided
+
+
+def test_unparseable_or_unknown_type_falls_back_to_raw():
+    blob = "%%% not code %%%\n@@@@@\n!!!!!"
+    r = compact(blob, path="notes.bin", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is False
+    assert r.text == blob
+    assert r.elided_lines == 0
+
+
+def test_savings_guard_returns_raw_when_not_enough_win():
+    tiny = "def f(): pass"     # one line, nothing to elide
+    r = compact(tiny, path="f.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is False
+    assert r.text == tiny
+
+
+def test_compact_is_deterministic():
+    a = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    b = compact(PY_SAMPLE, path="store.py", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert (a.text, a.token_est, a.elided_lines, a.skeletonized) == \
+           (b.text, b.token_est, b.elided_lines, b.skeletonized)

From 54768b3d547ff908dd274d7e8717986ea23a5cee Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:54:45 +0300
Subject: [PATCH 05/11] feat(skeleton): markdown heading + structured key
 classifiers

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/skeleton.py | 34 +++++++++++++++++++++
 tests/test_skeleton.py                   | 38 ++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/src/codebase_index/retrieval/skeleton.py b/src/codebase_index/retrieval/skeleton.py
index b4216f8..fdbc261 100644
--- a/src/codebase_index/retrieval/skeleton.py
+++ b/src/codebase_index/retrieval/skeleton.py
@@ -10,6 +10,7 @@
 
 from __future__ import annotations
 
+import re
 from dataclasses import dataclass
 
 from ..parsers.line_chunker import estimate_tokens
@@ -144,12 +145,45 @@ def _apply_focus(lines: list[str], keep: list[bool],
                 keep[j] = True
 
 
+_STRUCT_LANGS = frozenset({"json", "yaml", "toml", "ini"})
+_HEADING_RE = re.compile(r"^\s{0,3}#{1,6}\s")
+_SECTION_RE = re.compile(r"^\s*\[.*\]\s*$")        # toml/ini section header
+_KEY_RE = re.compile(r"[:=]")                       # key/value introducer
+_BRACKET = {"{", "}", "[", "]", "{}", "[]", "},", "],"}
+
+
+def _classify_markdown(lines: list[str]) -> list[bool]:
+    keep = [False] * len(lines)
+    for i, line in enumerate(lines):
+        if _HEADING_RE.match(line):
+            keep[i] = True
+            # keep the first non-blank line of the section
+            for j in range(i + 1, len(lines)):
+                if lines[j].strip():
+                    keep[j] = True
+                    break
+    return keep
+
+
+def _classify_structured(lines: list[str]) -> list[bool]:
+    keep = [False] * len(lines)
+    for i, line in enumerate(lines):
+        s = line.strip()
+        if not s or s in _BRACKET or _SECTION_RE.match(s) or _KEY_RE.search(s):
+            keep[i] = True
+    return keep
+
+
 def classify_lines(content: str, *, lang: str | None,
                    query_terms: list[str], ctx_lines: int) -> list[bool]:
     lines = content.split("\n")
     keep: list[bool] | None = None
     if lang in _CODE_LANGS:
         keep = _classify_code(content, lines, lang)
+    elif lang == "markdown":
+        keep = _classify_markdown(lines)
+    elif lang in _STRUCT_LANGS:
+        keep = _classify_structured(lines)
     if keep is None:
         keep = [True] * len(lines)        # unknown / parse miss -> keep all (raw)
     _apply_focus(lines, keep, query_terms, ctx_lines)
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
index b5a3e28..88efdad 100644
--- a/tests/test_skeleton.py
+++ b/tests/test_skeleton.py
@@ -94,3 +94,41 @@ def test_compact_is_deterministic():
                 ctx_lines=0, query_terms=[], min_reduction=0.25)
     assert (a.text, a.token_est, a.elided_lines, a.skeletonized) == \
            (b.text, b.token_est, b.elided_lines, b.skeletonized)
+
+
+MD_SAMPLE = (
+    "# Title\n"
+    "Intro line one.\n"
+    "More prose that is not structural and should be dropped.\n"
+    "Even more prose.\n"
+    "## Section\n"
+    "Section body line.\n"
+    "Trailing prose to elide here too.\n"
+)
+
+JSON_SAMPLE = (
+    '{\n'
+    '  "name": "demo",\n'
+    '  "description": "a long value that is mostly prose and can be elided away",\n'
+    '  "nested": {\n'
+    '    "key": "value"\n'
+    '  }\n'
+    '}\n'
+)
+
+
+def test_markdown_keeps_headings_and_first_section_line():
+    r = compact(MD_SAMPLE, path="README.md", line_start=1,
+                ctx_lines=0, query_terms=[], min_reduction=0.25)
+    assert r.skeletonized is True
+    assert "# Title" in r.text
+    assert "## Section" in r.text
+    assert "Intro line one." in r.text          # first line after heading kept
+    assert "Even more prose." not in r.text
+
+
+def test_structured_keeps_key_lines():
+    r = compact(JSON_SAMPLE, path="pkg.json", line_start=1,
+                ctx_lines=0, query_terms=["nested"], min_reduction=0.10)
+    assert '"name": "demo"' in r.text
+    assert '"nested"' in r.text                 # focus term line kept

From 8715ad13097b72a0ce769908bbf54cb408a5e924 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 08:59:48 +0300
Subject: [PATCH 06/11] feat(skeleton): make_compactor factory with
 intent->context policy

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/skeleton.py | 35 ++++++++++++++++++++++++
 tests/test_skeleton.py                   | 35 ++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/src/codebase_index/retrieval/skeleton.py b/src/codebase_index/retrieval/skeleton.py
index fdbc261..54763c9 100644
--- a/src/codebase_index/retrieval/skeleton.py
+++ b/src/codebase_index/retrieval/skeleton.py
@@ -12,8 +12,10 @@
 
 import re
 from dataclasses import dataclass
+from typing import Callable, Optional
 
 from ..parsers.line_chunker import estimate_tokens
+from .types import Candidate, Intent
 
 
 def render_skeleton(
@@ -214,3 +216,36 @@ def compact(content: str, *, path: str, line_start: int, ctx_lines: int,
                          elided_lines=elided, skeletonized=True)
     except Exception:
         return _raw(content)
+
+
+# Shape-first intents want pure signatures (no context around matches).
+_SHAPE_INTENTS = frozenset({Intent.ARCHITECTURE, Intent.HOW_IT_WORKS, Intent.DATA_FLOW})
+_TERM_RE = re.compile(r"[A-Za-z0-9_]+")
+_STOPWORDS = frozenset({
+    "the", "a", "an", "is", "are", "how", "does", "do", "what", "where",
+    "which", "to", "of", "in", "on", "for", "and", "or", "with", "from",
+})
+
+
+def _query_terms(query: str) -> list[str]:
+    out: list[str] = []
+    for t in _TERM_RE.findall(query):
+        tl = t.lower()
+        if len(tl) >= 3 and tl not in _STOPWORDS:
+            out.append(tl)
+    return list(dict.fromkeys(out))
+
+
+def make_compactor(*, intent: Intent, query: str, enabled: bool,
+                   min_reduction: float) -> Optional[Callable[[Candidate], Compacted]]:
+    if not enabled:
+        return None
+    ctx_lines = 0 if intent in _SHAPE_INTENTS else 2
+    terms = _query_terms(query)
+
+    def _compact(c: Candidate) -> Compacted:
+        return compact(c.content or "", path=c.path, line_start=c.line_start,
+                       ctx_lines=ctx_lines, query_terms=terms,
+                       min_reduction=min_reduction)
+
+    return _compact
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
index 88efdad..01da537 100644
--- a/tests/test_skeleton.py
+++ b/tests/test_skeleton.py
@@ -132,3 +132,38 @@ def test_structured_keeps_key_lines():
                 ctx_lines=0, query_terms=["nested"], min_reduction=0.10)
     assert '"name": "demo"' in r.text
     assert '"nested"' in r.text                 # focus term line kept
+
+
+from codebase_index.retrieval.skeleton import make_compactor  # noqa: E402
+from codebase_index.retrieval.types import Candidate, Intent  # noqa: E402
+
+
+def _cand(content):
+    return Candidate(path="store.py", line_start=1, line_end=10,
+                     source="fts", score=1.0, content=content, token_est=99)
+
+
+def test_make_compactor_disabled_returns_none():
+    assert make_compactor(intent=Intent.KEYWORD, query="x",
+                           enabled=False, min_reduction=0.25) is None
+
+
+def test_make_compactor_shape_intent_uses_zero_context():
+    # min_reduction=0.0 isolates the ctx policy from the savings guard: a shape
+    # intent uses ctx 0, so the matched line is kept but its neighbour is not.
+    comp = make_compactor(intent=Intent.ARCHITECTURE, query="blocklist",
+                          enabled=True, min_reduction=0.0)
+    r = comp(_cand(PY_SAMPLE))
+    assert r.skeletonized is True
+    assert "self.blocklist.add(tok)" in r.text       # matched line kept
+    assert "log('revoked')" not in r.text             # neighbour elided (ctx 0)
+
+
+def test_make_compactor_locate_intent_keeps_matched_line_and_context():
+    # A locate intent uses ctx 2, so the matched line AND its neighbour stay.
+    comp = make_compactor(intent=Intent.LOCATE_IMPL, query="blocklist",
+                          enabled=True, min_reduction=0.0)
+    r = comp(_cand(PY_SAMPLE))
+    assert "self.blocklist.add(tok)" in r.text        # matched line kept
+    assert "log('revoked')" in r.text                  # neighbour kept (ctx 2)
+    assert "decoded = decode(tok)" not in r.text       # unrelated body elided

From 4fb5164a372af92dc921748ae634e497fb1700ae Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 09:03:47 +0300
Subject: [PATCH 07/11] feat(budget): inject snippet compactor; emit
 skeletonized/elided_lines

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/budget.py | 33 ++++++++++++++++++----
 tests/test_budget.py                   | 38 ++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/codebase_index/retrieval/budget.py b/src/codebase_index/retrieval/budget.py
index ec50e63..d832709 100644
--- a/src/codebase_index/retrieval/budget.py
+++ b/src/codebase_index/retrieval/budget.py
@@ -12,6 +12,8 @@
 
 from __future__ import annotations
 
+from typing import Callable, Optional
+
 from ..output.redact import redact_snippet
 from .types import Candidate
 
@@ -33,7 +35,10 @@ def _meta(c: Candidate) -> dict:
 
 
 def apply_budget(
-    candidates: list[Candidate], *, token_budget: int
+    candidates: list[Candidate],
+    *,
+    token_budget: int,
+    compactor: Optional[Callable[[Candidate], "object"]] = None,
 ) -> tuple[list[dict], list[dict]]:
     results: list[dict] = []
     recommended: list[dict] = []
@@ -42,13 +47,29 @@ def apply_budget(
     for rank, c in enumerate(candidates, start=1):
         meta = _meta(c)
         meta["rank"] = rank
+        meta["skeletonized"] = False
+        meta["elided_lines"] = 0
+
+        # Resolve the snippet text + cost. A compactor only changes anything
+        # when it returns a real skeleton; otherwise we keep today's raw path
+        # byte-for-byte (uses c.content / c.token_est).
+        text = c.content
+        cost = c.token_est
+        if compactor is not None and c.content:
+            comp = compactor(c)
+            if getattr(comp, "skeletonized", False):
+                text = comp.text
+                cost = comp.token_est
+                meta["skeletonized"] = True
+                meta["elided_lines"] = comp.elided_lines
+
         snippet = None
         snippet_is_useful = False
-
-        if c.content and spent + c.token_est <= token_budget:
-            snippet = redact_snippet(c.content)
-            spent += c.token_est
-            snippet_is_useful = c.token_est >= _MIN_USEFUL_TOKENS
+        if text and spent + cost <= token_budget:
+            snippet = redact_snippet(text)
+            spent += cost
+            meta["token_est"] = cost
+            snippet_is_useful = cost >= _MIN_USEFUL_TOKENS
 
         if not snippet_is_useful:
             recommended.append(
diff --git a/tests/test_budget.py b/tests/test_budget.py
index 7bb6100..52e258e 100644
--- a/tests/test_budget.py
+++ b/tests/test_budget.py
@@ -28,3 +28,41 @@ def test_metadata_always_present_even_when_budget_zero():
     cands = [_c("a.py", 1, 2, "content", 50)]
     results, recommended = apply_budget(cands, token_budget=0)
     assert results[0]["path"] == "a.py" and results[0]["snippet"] is None
+
+
+from codebase_index.retrieval.skeleton import Compacted
+
+
+def test_compactor_lets_more_results_fit_budget():
+    cands = [_c(f"f{i}.py", 1, 50, "x" * 4000, 1000) for i in range(5)]
+
+    def fake_compactor(c):
+        return Compacted(text="sig\n... 49 lines elided (read 2-50)",
+                         token_est=10, elided_lines=49, skeletonized=True)
+
+    no_comp, _ = apply_budget(cands, token_budget=1500)
+    with_comp, _ = apply_budget(cands, token_budget=1500, compactor=fake_compactor)
+    fit_no = sum(1 for r in no_comp if r["snippet"] is not None)
+    fit_yes = sum(1 for r in with_comp if r["snippet"] is not None)
+    assert fit_yes > fit_no
+    assert all(r["skeletonized"] for r in with_comp if r["snippet"])
+    assert all(r["elided_lines"] == 49 for r in with_comp if r["snippet"])
+
+
+def test_compactor_output_is_still_redacted():
+    secret = "key = 'AKIAIOSFODNN7EXAMPLE'\nbody line\nbody line"
+    cand = _c("s.py", 1, 3, secret, 50)
+
+    def fake_compactor(c):
+        return Compacted(text=secret, token_est=50, elided_lines=0, skeletonized=True)
+
+    results, _ = apply_budget([cand], token_budget=1000, compactor=fake_compactor)
+    assert "AKIAIOSFODNN7EXAMPLE" not in results[0]["snippet"]
+
+
+def test_none_compactor_is_unchanged_behavior():
+    cands = [_c("a.py", 1, 5, "y" * 400, 100)]
+    results, _ = apply_budget(cands, token_budget=1000, compactor=None)
+    assert results[0]["skeletonized"] is False
+    assert results[0]["elided_lines"] == 0
+    assert results[0]["token_est"] == 100        # original, untouched

From adc92738d1f564f94f5d327a38f4a194766d4f73 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 09:08:14 +0300
Subject: [PATCH 08/11] feat(pipeline): build snippet compactor; add compact
 config knobs

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/config.py             |  2 ++
 src/codebase_index/retrieval/pipeline.py | 12 +++++++++++-
 tests/conftest.py                        | 24 ++++++++++++++++++++++++
 tests/test_config.py                     | 15 +++++++++++++++
 tests/test_pipeline_search.py            | 18 ++++++++++++++++++
 5 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/codebase_index/config.py b/src/codebase_index/config.py
index 6e41b3b..217cad7 100644
--- a/src/codebase_index/config.py
+++ b/src/codebase_index/config.py
@@ -26,6 +26,8 @@ class RetrievalConfig(BaseModel):
     rrf_k: int = 60
     token_budget: int = 1500
     limit: int = 10
+    compact_snippets: bool = True
+    compact_min_reduction: float = 0.25
 
 
 class EmbeddingsConfig(BaseModel):
diff --git a/src/codebase_index/retrieval/pipeline.py b/src/codebase_index/retrieval/pipeline.py
index 8a82096..d270e30 100644
--- a/src/codebase_index/retrieval/pipeline.py
+++ b/src/codebase_index/retrieval/pipeline.py
@@ -134,6 +134,8 @@ def search(
     root: Optional[Path] = None,
     config: Optional[Config] = None,
     offset: int = 0,
+    compact: bool = True,
+    compact_min_reduction: float = 0.25,
 ) -> dict:
     plan = detect_intent(query)
     if token_budget <= 0:
@@ -148,7 +150,15 @@ def search(
     confidence = _confidence(ranked)
     # Scale budget proportionally so later pages receive snippet coverage.
     scaled_budget = token_budget * fetch_limit // max(limit, 1) if offset > 0 else token_budget
-    all_results, all_recommended = apply_budget(ranked, token_budget=scaled_budget)
+    from .skeleton import make_compactor
+
+    compactor = make_compactor(
+        intent=plan.intent, query=query,
+        enabled=compact, min_reduction=compact_min_reduction,
+    )
+    all_results, all_recommended = apply_budget(
+        ranked, token_budget=scaled_budget, compactor=compactor
+    )
 
     # Paginate: slice results and filter recommended_reads to the current page.
     paginated = all_results[offset:offset + limit]
diff --git a/tests/conftest.py b/tests/conftest.py
index 5b2ef0e..5929df0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -97,6 +97,30 @@ def seeded_index(tmp_path) -> Database:
     _insert_symbol(conn, gen, name="Token", kind="type", line_start=1, line_end=2,
                    signature="type Token")
 
+    # A large, parseable Python body so skeletonization has something to elide
+    # (the other fixtures are too small to cross the savings guard). Queried by
+    # its unique name in tests so it never collides with the auth/token queries.
+    api = _insert_file(conn, path="src/api/ratelimit.py", lang="python", mtime_ns=7000)
+    _rl_body = (
+        "def ratelimit_bucket_refill(bucket, moment):\n"
+        "    elapsed = moment - bucket.last\n"
+        "    gained = elapsed * bucket.rate\n"
+        "    bucket.level = min(bucket.cap, bucket.level + gained)\n"
+        "    bucket.last = moment\n"
+        "    if bucket.level < 1:\n"
+        "        bucket.denied = True\n"
+        "        emit_denied(bucket)\n"
+        "        return False\n"
+        "    bucket.level -= 1\n"
+        "    bucket.denied = False\n"
+        "    emit_allowed(bucket)\n"
+        "    return True\n"
+    )
+    _insert_chunk(conn, api, line_start=1, line_end=13, content=_rl_body, kind="symbol_body")
+    _insert_symbol(conn, api, name="ratelimit_bucket_refill", kind="function",
+                   line_start=1, line_end=13,
+                   signature="def ratelimit_bucket_refill(bucket, moment)", in_degree=1)
+
     conn.commit()
     yield db
     db.close()
diff --git a/tests/test_config.py b/tests/test_config.py
index fe341bb..7ac6f0f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -75,3 +75,18 @@ def test_config_hash_ignores_external_endpoint():
     b.embeddings.endpoint = "https://example.test/embed"
     b.embeddings.allow_external = True
     assert a.config_hash() == b.config_hash()
+
+
+def test_retrieval_config_has_compaction_defaults():
+    from codebase_index.config import Config
+    cfg = Config()
+    assert cfg.retrieval.compact_snippets is True
+    assert cfg.retrieval.compact_min_reduction == 0.25
+
+
+def test_compaction_fields_do_not_change_config_hash():
+    from codebase_index.config import Config
+    base = Config()
+    h1 = base.config_hash()
+    base.retrieval.compact_snippets = False          # retrieval-time only
+    assert base.config_hash() == h1                  # no reindex triggered
diff --git a/tests/test_pipeline_search.py b/tests/test_pipeline_search.py
index fc79cbc..5a05c62 100644
--- a/tests/test_pipeline_search.py
+++ b/tests/test_pipeline_search.py
@@ -90,3 +90,21 @@ def test_recommended_reads_within_page(seeded_index):
     result_keys = {(r["path"], r["line_start"], r["line_end"]) for r in payload["results"]}
     for rec in payload["recommended_reads"]:
         assert (rec["path"], rec["line_start"], rec["line_end"]) in result_keys
+
+
+# ── snippet skeletonization ─────────────────────────────────────────────────
+
+def test_search_skeletonizes_code_by_default(seeded_index):
+    # mode=fts so the fts candidate (full body) is not fused with the
+    # signature-only symbol candidate; the large ratelimit body is skeletonized.
+    payload = search(seeded_index.conn, "ratelimit_bucket_refill", mode="fts",
+                     limit=5, token_budget=1500, no_fallback=True)
+    assert payload["results"]
+    assert any(r.get("skeletonized") for r in payload["results"])
+
+
+def test_search_compact_false_disables_skeleton(seeded_index):
+    payload = search(seeded_index.conn, "ratelimit_bucket_refill", mode="fts",
+                     limit=5, token_budget=1500, no_fallback=True, compact=False)
+    assert payload["results"]
+    assert all(not r.get("skeletonized") for r in payload["results"])

From dc74e9bae610e5ca44eceb9fd8f10c7f24104c30 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 09:13:53 +0300
Subject: [PATCH 09/11] feat(cli,mcp): --raw / raw flag to disable snippet
 skeletonization

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/cli.py        | 12 ++++++++++--
 src/codebase_index/mcp/server.py |  7 ++++++-
 src/codebase_index/service.py    |  9 ++++++++-
 tests/test_cli.py                | 17 +++++++++++++++++
 tests/test_mcp_server.py         | 12 ++++++++++++
 5 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py
index 4906446..3ad5f3b 100644
--- a/src/codebase_index/cli.py
+++ b/src/codebase_index/cli.py
@@ -379,6 +379,10 @@ def search(
     token_budget: int = typer.Option(1500, "--token-budget"),
     mode: str = typer.Option("hybrid", "--mode", help="hybrid|fts|symbol|vector"),
     no_fallback: bool = typer.Option(False, "--no-fallback"),
+    raw: bool = typer.Option(
+        False, "--raw",
+        help="Disable snippet skeletonization; return full raw snippets.",
+    ),
     json_out: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."),
 ) -> None:
     """Hybrid ranked search; returns compact results + recommended_reads."""
@@ -403,7 +407,7 @@ def search(
     db_path, cfg = _ensure_index(ctx)
     payload = search_payload(
         db_path, cfg, query, mode=mode, limit=limit, offset=offset,
-        token_budget=token_budget, no_fallback=no_fallback, backend=backend,
+        token_budget=token_budget, no_fallback=no_fallback, backend=backend, raw=raw,
     )
 
     want_json = json_out or (ctx.obj and ctx.obj.get("json"))
@@ -480,6 +484,10 @@ def explain(
     ctx: typer.Context,
     query: str = typer.Argument(...),
     token_budget: int = typer.Option(2200, "--token-budget"),
+    raw: bool = typer.Option(
+        False, "--raw",
+        help="Disable snippet skeletonization; return full raw snippets.",
+    ),
     json_out: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."),
 ) -> None:
     """Intent-aware bundle for 'how does X work' / overview questions."""
@@ -492,7 +500,7 @@ def explain(
 
     payload = search_payload(
         db_path, cfg, normalize_explain_query(query), mode="hybrid", limit=10,
-        token_budget=token_budget, no_fallback=False, backend=backend,
+        token_budget=token_budget, no_fallback=False, backend=backend, raw=raw,
     )
 
     want_json = json_out or (ctx.obj and ctx.obj.get("json"))
diff --git a/src/codebase_index/mcp/server.py b/src/codebase_index/mcp/server.py
index cf708c7..0f7be5f 100644
--- a/src/codebase_index/mcp/server.py
+++ b/src/codebase_index/mcp/server.py
@@ -124,6 +124,7 @@ def search_code(
     limit: int = 10,
     token_budget: int = 1500,
     offset: int = 0,
+    raw: bool = False,
 ) -> str:
     """Hybrid search over the codebase index.
 
@@ -140,6 +141,7 @@ def search_code(
         token_budget: Token budget for the response payload.
         offset: Result offset for pagination. Pass ``next_offset`` from a
                 previous response to fetch the next page.
+        raw: If true, return full raw snippets instead of skeletons.
     """
     db_path, cfg = _resolve_db()
     if not db_path.exists():
@@ -150,6 +152,7 @@ def search_code(
     payload = search_payload(
         db_path, cfg, query, mode=mode, limit=limit, offset=offset,
         token_budget=token_budget, no_fallback=False, backend=_search_backend(cfg),
+        raw=raw,
     )
     return _emit("search_code", payload)
 
@@ -238,6 +241,7 @@ def explain_code(
     query: str,
     token_budget: int = 2200,
     offset: int = 0,
+    raw: bool = False,
 ) -> str:
     """Intent-aware retrieval for architecture / how-does-X-work questions.
 
@@ -249,6 +253,7 @@ def explain_code(
         token_budget: Token budget for the response payload.
         offset: Result offset for pagination. Pass ``next_offset`` from a
                 previous response to fetch the next page.
+        raw: If true, return full raw snippets instead of skeletons.
     """
     db_path, cfg = _resolve_db()
     if not db_path.exists():
@@ -259,7 +264,7 @@ def explain_code(
     payload = search_payload(
         db_path, cfg, normalize_explain_query(query), mode="hybrid", limit=10,
         offset=offset, token_budget=token_budget, no_fallback=False,
-        backend=_search_backend(cfg),
+        backend=_search_backend(cfg), raw=raw,
     )
     return _emit("explain_code", payload)
 
diff --git a/src/codebase_index/service.py b/src/codebase_index/service.py
index b481bf2..60bec24 100644
--- a/src/codebase_index/service.py
+++ b/src/codebase_index/service.py
@@ -75,12 +75,17 @@ def search_payload(
     token_budget: int = 1500,
     no_fallback: bool = False,
     backend: Any = None,
+    raw: bool = False,
 ) -> dict:
     """One search session: open the DB (vector-enabled when the backend is
-    live), run retrieval, return the payload dict both surfaces serialize."""
+    live), run retrieval, return the payload dict both surfaces serialize.
+
+    ``raw`` forces full snippets; otherwise snippets are skeletonized when
+    ``cfg.retrieval.compact_snippets`` is on (the default)."""
     from .retrieval.pipeline import search as run_search
     from .storage.db import Database
 
+    compact = cfg.retrieval.compact_snippets and not raw
     with Database(db_path) as db:
         if backend is not None and getattr(backend, "enabled", False):
             db.enable_vectors()
@@ -95,6 +100,8 @@ def search_payload(
             backend=backend,
             root=Path(cfg.root),
             config=cfg,
+            compact=compact,
+            compact_min_reduction=cfg.retrieval.compact_min_reduction,
         )
 
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index bfd393c..2c1cffd 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -35,3 +35,20 @@ def test_stats_and_doctor_accept_command_json(tmp_path):
     doctor = runner.invoke(app, ["--root", str(tmp_path), "doctor", "--json"])
     assert doctor.exit_code == 0, doctor.output
     assert "findings" in json.loads(doctor.output)
+
+
+def _strip_ansi(text: str) -> str:
+    import re
+    return re.sub(r"\x1b\[[0-9;]*m", "", text)
+
+
+def test_search_has_raw_flag():
+    result = runner.invoke(app, ["search", "--help"])
+    assert result.exit_code == 0
+    assert "--raw" in _strip_ansi(result.stdout)
+
+
+def test_explain_has_raw_flag():
+    result = runner.invoke(app, ["explain", "--help"])
+    assert result.exit_code == 0
+    assert "--raw" in _strip_ansi(result.stdout)
diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 2a22b15..b53c536 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -194,3 +194,15 @@ def test_search_code_pagination_with_real_index(tmp_path):
 
 def test_run_function_exists():
     assert callable(mcp_server.run)
+
+
+def test_search_code_accepts_raw_parameter():
+    """search_code accepts raw without raising TypeError."""
+    result = _with_missing_db(lambda: _call(mcp_server.search_code, query="foo", raw=True))
+    assert "error" in result
+
+
+def test_explain_code_accepts_raw_parameter():
+    """explain_code accepts raw without raising TypeError."""
+    result = _with_missing_db(lambda: _call(mcp_server.explain_code, query="foo", raw=True))
+    assert "error" in result

From f566e5a47419259cda0c119ee2b476808e02a824 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 09:16:05 +0300
Subject: [PATCH 10/11] docs: document snippet skeletonization fields and --raw
 flag

Edit canonical skill_template/SKILL.md + sync all copies; CHANGELOG entry.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .claude/skills/codebase-index/SKILL.md     |  3 +++
 .codex/skills/codebase-index/SKILL.md      |  3 +++
 .opencode/skills/codebase-index/SKILL.md   |  3 +++
 CHANGELOG.md                               | 10 ++++++++++
 skill/SKILL.md                             |  3 +++
 skills/codebase-index/SKILL.md             |  3 +++
 src/codebase_index/skill_template/SKILL.md |  3 +++
 7 files changed, 28 insertions(+)

diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/.claude/skills/codebase-index/SKILL.md
+++ b/.claude/skills/codebase-index/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 
diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/.codex/skills/codebase-index/SKILL.md
+++ b/.codex/skills/codebase-index/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 
diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/.opencode/skills/codebase-index/SKILL.md
+++ b/.opencode/skills/codebase-index/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 889af8d..c0cb476 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,16 @@ All notable changes to this project are documented here. The format is based on
 
 ## [Unreleased]
 
+### Added
+- **Snippet skeletonization & content-aware rendering.** `search`/`explain` snippets are now
+  focus skeletons — import/signature/class lines and the query-matching line are kept while
+  function bodies collapse to a `... N lines elided (read A-B)` marker — so more ranked results
+  fit the same token budget. Content-aware (code via tree-sitter, markdown headings,
+  structured-config keys), reversible via `recommended_reads`, and safe (raw fallback on any
+  parse miss or non-win). New `skeletonized` / `elided_lines` result fields; new
+  `retrieval.compact_snippets` / `retrieval.compact_min_reduction` config knobs (no reindex);
+  disable per-call with `--raw` (CLI) or `raw: true` (MCP `search_code` / `explain_code`).
+
 ## [1.5.0] - 2026-06-24
 
 ### Added — graph visualization upgrade + interop exports
diff --git a/skill/SKILL.md b/skill/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/skill/SKILL.md
+++ b/skill/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 
diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/skills/codebase-index/SKILL.md
+++ b/skills/codebase-index/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 
diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md
index 6b1e1b9..ee31b7d 100644
--- a/src/codebase_index/skill_template/SKILL.md
+++ b/src/codebase_index/skill_template/SKILL.md
@@ -106,6 +106,8 @@ The index returns a **ranked retrieval packet** with:
 - `score` — relevance score
 - `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
 - `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
+- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
+- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
 
 Top-level fields:
 
@@ -136,6 +138,7 @@ Top-level fields:
 - Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
 - For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
 - Test files are demoted in ranking by default. Include "test" in the query to surface them.
+- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
 
 ## Fallback behavior
 

From d58c231c274ab83e4111be5d5425d3ae9d09e5f8 Mon Sep 17 00:00:00 2001
From: denfry <aseraw115@gmail.com>
Date: Wed, 24 Jun 2026 09:29:58 +0300
Subject: [PATCH 11/11] test: regenerate goldens for skeletonized fields; fix
 ruff/mypy

Goldens gain additive skeletonized/elided_lines fields; budget.py types the
compactor as Optional[Callable[[Candidate], Compacted]]; tidy test imports.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/codebase_index/retrieval/budget.py | 5 +++--
 tests/golden/mcp_search_code.json      | 4 ++++
 tests/golden/search_token.json         | 4 ++++
 tests/test_budget.py                   | 4 +---
 tests/test_skeleton.py                 | 2 +-
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/codebase_index/retrieval/budget.py b/src/codebase_index/retrieval/budget.py
index d832709..a09e28b 100644
--- a/src/codebase_index/retrieval/budget.py
+++ b/src/codebase_index/retrieval/budget.py
@@ -15,6 +15,7 @@
 from typing import Callable, Optional
 
 from ..output.redact import redact_snippet
+from .skeleton import Compacted
 from .types import Candidate
 
 # Snippets shorter than this threshold are treated as previews only; the result
@@ -38,7 +39,7 @@ def apply_budget(
     candidates: list[Candidate],
     *,
     token_budget: int,
-    compactor: Optional[Callable[[Candidate], "object"]] = None,
+    compactor: Optional[Callable[[Candidate], Compacted]] = None,
 ) -> tuple[list[dict], list[dict]]:
     results: list[dict] = []
     recommended: list[dict] = []
@@ -57,7 +58,7 @@ def apply_budget(
         cost = c.token_est
         if compactor is not None and c.content:
             comp = compactor(c)
-            if getattr(comp, "skeletonized", False):
+            if comp.skeletonized:
                 text = comp.text
                 cost = comp.token_est
                 meta["skeletonized"] = True
diff --git a/tests/golden/mcp_search_code.json b/tests/golden/mcp_search_code.json
index d213911..b2e1a8a 100644
--- a/tests/golden/mcp_search_code.json
+++ b/tests/golden/mcp_search_code.json
@@ -20,12 +20,14 @@
   ],
   "results": [
     {
+      "elided_lines": 0,
       "line_end": 6,
       "line_start": 4,
       "path": "src/auth/token.py",
       "rank": 1,
       "reason": "in src/auth/ · 2 callers",
       "score": 2.233,
+      "skeletonized": false,
       "snippet": "def refresh_access_token(refresh_token: str) -> str:",
       "symbols": [
         "refresh_access_token"
@@ -33,12 +35,14 @@
       "token_est": 13
     },
     {
+      "elided_lines": 0,
       "line_end": 11,
       "line_start": 7,
       "path": "src/api/service.py",
       "rank": 2,
       "reason": "fts",
       "score": 0.9375,
+      "skeletonized": false,
       "snippet": "class AdminUser(User):\n    \"\"\"Subclass of User; imported-from edge target for impact tests.\"\"\"\n\n    def renew(self, refresh_token: str) -> str:\n        return refresh_access_token(refresh_token)",
       "symbols": [],
       "token_est": 48
diff --git a/tests/golden/search_token.json b/tests/golden/search_token.json
index a04d5fe..295730f 100644
--- a/tests/golden/search_token.json
+++ b/tests/golden/search_token.json
@@ -20,12 +20,14 @@
   ],
   "results": [
     {
+      "elided_lines": 0,
       "line_end": 6,
       "line_start": 4,
       "path": "src/auth/token.py",
       "rank": 1,
       "reason": "in src/auth/ · 2 callers",
       "score": 2.233,
+      "skeletonized": false,
       "snippet": "def refresh_access_token(refresh_token: str) -> str:",
       "symbols": [
         "refresh_access_token"
@@ -33,12 +35,14 @@
       "token_est": 13
     },
     {
+      "elided_lines": 0,
       "line_end": 11,
       "line_start": 7,
       "path": "src/api/service.py",
       "rank": 2,
       "reason": "fts",
       "score": 0.9375,
+      "skeletonized": false,
       "snippet": "class AdminUser(User):\n    \"\"\"Subclass of User; imported-from edge target for impact tests.\"\"\"\n\n    def renew(self, refresh_token: str) -> str:\n        return refresh_access_token(refresh_token)",
       "symbols": [],
       "token_est": 48
diff --git a/tests/test_budget.py b/tests/test_budget.py
index 52e258e..4c46217 100644
--- a/tests/test_budget.py
+++ b/tests/test_budget.py
@@ -1,4 +1,5 @@
 from codebase_index.retrieval.budget import apply_budget
+from codebase_index.retrieval.skeleton import Compacted
 from codebase_index.retrieval.types import Candidate
 
 
@@ -30,9 +31,6 @@ def test_metadata_always_present_even_when_budget_zero():
     assert results[0]["path"] == "a.py" and results[0]["snippet"] is None
 
 
-from codebase_index.retrieval.skeleton import Compacted
-
-
 def test_compactor_lets_more_results_fit_budget():
     cands = [_c(f"f{i}.py", 1, 50, "x" * 4000, 1000) for i in range(5)]
 
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
index 01da537..98d7fff 100644
--- a/tests/test_skeleton.py
+++ b/tests/test_skeleton.py
@@ -24,7 +24,7 @@ def test_render_merges_adjacent_runs_but_keeps_separated_ones():
     assert elided == 3
 
 
-from codebase_index.retrieval.skeleton import Compacted, compact  # noqa: E402
+from codebase_index.retrieval.skeleton import compact  # noqa: E402
 
 
 PY_SAMPLE = (