22 * Per-message size-based context compaction for pi-agent-core's
33 * `transformContext` hook. Runs before every LLM call.
44 *
5- * Philosophy: **history is intent tracking, not payload storage.**
5+ * Philosophy: **history is intent tracking, not payload storage.** The model
6+ * needs the decision trail — which tools, in what order, with what shape —
7+ * not verbatim 9 MB artifact dumps or whole-file view returns from ten turns
8+ * ago. Current file state is always recoverable via ranged `view()`.
69 *
7- * What the model needs from prior turns is the DECISION trail — which tools
8- * it called, in what order, with what shape. What it does NOT need is its own
9- * verbatim 9 MB artifact dump, or the full view() return of a 100 KB file it
10- * already touched. Current state is always recoverable via view(), so there
11- * is no information loss from stubbing prior payloads.
10+ * Evolution:
11+ * - v1 (window): kept last N turns verbatim, stubbed older. Missed the
12+ * dominant failure mode — a 9 MB `<artifact>` text dump sat inside the
13+ * keep-verbatim window and shipped 3.97 M tokens.
14+ * - v2 (windowless): stubbed every block over its cap regardless of
15+ * position. Safe, but over-aggressive after the prompt OVERRIDE block
16+ * eliminated the text-dump vector — the model's own latest str_replace
17+ * new_str got summarized, so picking the next old_str required guessing.
18+ * - v3 (this file): split behavior by block type.
19+ * · `assistant.content[*].text` is always capped (8 KB, all turns).
20+ * This is the regression guard: the one class of block that must
21+ * never be allowed to balloon, because a bad prompt interaction
22+ * can resurrect the `<artifact>` dump.
23+ * · `assistant.content[*].toolCall.input` and
24+ * `toolResult.content[*].text` are capped only outside a small
25+ * recent-turn window. Inside the window they stay verbatim so the
26+ * model reads its own just-written section and the latest view()
27+ * output in full fidelity. Outside the window, large payloads
28+ * collapse to a one-line stub.
1229 *
13- * v1 (window-based) failed in production because the 9 MB payload was the
14- * LATEST assistant text block (the model streaming out the final artifact as
15- * prose right before `done`). It sat inside the "keep verbatim" window, and
16- * the request still shipped 3.97 M tokens.
17- *
18- * v3 (this file) drops the window entirely. Every message is inspected per
19- * block; any block whose serialized size exceeds its per-type cap is stubbed.
20- * Caps are small on purpose — 8 KB fits any reasonable paragraph of reasoning
21- * or normal tool argument, but cannot carry an HTML/JSX artifact, a base64
22- * image, or a whole-file view return. The model's most recent decision is
23- * still visible at full fidelity for small outputs.
24- *
25- * Three block-level caps:
26- * - `assistant.content[*].text` — model prose / streamed artifact text.
27- * - `assistant.content[*].toolCall.input` — args the model sent to a tool.
28- * - `toolResult.content[*].text` — host-side tool return payload.
30+ * Block-level caps:
31+ * - TEXT_BLOCK_LIMIT — assistant prose, ALL turns.
32+ * - TOOL_INPUT_LIMIT — assistant.toolCall.input, older turns only.
33+ * - TOOL_RESULT_LIMIT — toolResult.text, older turns only.
2934 *
3035 * Stub format carries bytes + a short preview so the model can tell what
3136 * got dropped, and (for tool calls) keeps tool NAME + id so pi-ai's shape
3237 * validation remains happy.
3338 *
3439 * Safety net: after per-block stubbing, if the grand total still exceeds
35- * `HARD_CAP_BYTES`, we shrink caps further and re-run. Catches pathological
36- * runs with many just-under-threshold blocks.
40+ * `HARD_CAP_BYTES`, we shrink caps further (including within the window)
41+ * and re-run. Catches pathological runs with many just-under-threshold
42+ * blocks.
3743 */
3844
3945import type { AgentMessage } from '@mariozechner/pi-agent-core' ;
4046import { type CoreLogger , NOOP_LOGGER } from './logger.js' ;
4147
4248const TEXT_BLOCK_LIMIT = 8 * 1024 ;
43- const TOOL_INPUT_LIMIT = 8 * 1024 ;
49+ const TOOL_INPUT_LIMIT = 24 * 1024 ;
4450const TOOL_RESULT_LIMIT = 8 * 1024 ;
4551const HARD_CAP_BYTES = 200_000 ;
4652const AGGRESSIVE_BLOCK_LIMIT = 2 * 1024 ;
4753
54+ /**
55+ * Number of most-recent non-user messages whose tool payloads (toolCall.input
56+ * and toolResult.text) stay verbatim. Assistant TEXT is still capped inside
57+ * this window — see TEXT_BLOCK_LIMIT rationale above.
58+ *
59+ * 3 covers "current turn is reading the previous turn's str_replace + its
60+ * toolResult" in the typical one-section-per-turn polish cadence.
61+ */
62+ const RECENT_WINDOW = 3 ;
63+
4864function estimateBytes ( messages : AgentMessage [ ] ) : number {
4965 let total = 0 ;
5066 for ( const m of messages ) {
@@ -66,7 +82,11 @@ function stubText(text: string, label: string): string {
6682 return `[${ label } — ${ text . length } B, head: "${ preview ( text ) } "]` ;
6783}
6884
69- function compactAssistant ( m : AgentMessage , textLimit : number , toolLimit : number ) : AgentMessage {
85+ function compactAssistant (
86+ m : AgentMessage ,
87+ textLimit : number ,
88+ toolLimit : number | null ,
89+ ) : AgentMessage {
7090 const original = m as unknown as {
7191 role : 'assistant' ;
7292 content ?: Array < Record < string , unknown > > ;
@@ -81,7 +101,7 @@ function compactAssistant(m: AgentMessage, textLimit: number, toolLimit: number)
81101 changed = true ;
82102 return { ...block , text : stubText ( text , 'prior assistant output dropped' ) } ;
83103 }
84- if ( type === 'toolCall' ) {
104+ if ( type === 'toolCall' && toolLimit !== null ) {
85105 const input = block [ 'input' ] ;
86106 let origBytes = 0 ;
87107 let preview = '' ;
@@ -105,7 +125,8 @@ function compactAssistant(m: AgentMessage, textLimit: number, toolLimit: number)
105125 return { ...( original as object ) , content : nextContent } as unknown as AgentMessage ;
106126}
107127
108- function compactToolResult ( m : AgentMessage , limit : number ) : AgentMessage {
128+ function compactToolResult ( m : AgentMessage , limit : number | null ) : AgentMessage {
129+ if ( limit === null ) return m ;
109130 const original = m as unknown as {
110131 role : 'toolResult' ;
111132 content ?: Array < { type : string ; text ?: string } > ;
@@ -123,15 +144,48 @@ function compactToolResult(m: AgentMessage, limit: number): AgentMessage {
123144 return { ...( original as object ) , content : nextContent } as unknown as AgentMessage ;
124145}
125146
126- function applyCaps (
127- messages : AgentMessage [ ] ,
128- textLimit : number ,
129- toolInputLimit : number ,
130- toolResultLimit : number ,
131- ) : AgentMessage [ ] {
132- return messages . map ( ( m ) => {
133- if ( m . role === 'assistant' ) return compactAssistant ( m , textLimit , toolInputLimit ) ;
134- if ( m . role === 'toolResult' ) return compactToolResult ( m , toolResultLimit ) ;
147+ /**
148+ * Index threshold (inclusive) — messages at or after this index are "recent"
149+ * and their tool payloads stay verbatim. Counts assistant + toolResult roles
150+ * from the tail; user messages are never a prune target but also don't
151+ * consume window slots.
152+ */
153+ function computeWindowStart ( messages : AgentMessage [ ] , windowTurns : number ) : number {
154+ if ( windowTurns <= 0 ) return messages . length ;
155+ let seen = 0 ;
156+ for ( let i = messages . length - 1 ; i >= 0 ; i -= 1 ) {
157+ const role = messages [ i ] ?. role ;
158+ if ( role === 'assistant' || role === 'toolResult' ) {
159+ seen += 1 ;
160+ if ( seen >= windowTurns ) return i ;
161+ }
162+ }
163+ return 0 ;
164+ }
165+
166+ interface CapConfig {
167+ textLimit : number ;
168+ toolInputLimitOld : number ;
169+ toolResultLimitOld : number ;
170+ toolInputLimitRecent : number | null ;
171+ toolResultLimitRecent : number | null ;
172+ windowTurns : number ;
173+ }
174+
175+ function applyCaps ( messages : AgentMessage [ ] , cfg : CapConfig ) : AgentMessage [ ] {
176+ const windowStart = computeWindowStart ( messages , cfg . windowTurns ) ;
177+ return messages . map ( ( m , idx ) => {
178+ const isRecent = idx >= windowStart ;
179+ if ( m . role === 'assistant' ) {
180+ return compactAssistant (
181+ m ,
182+ cfg . textLimit ,
183+ isRecent ? cfg . toolInputLimitRecent : cfg . toolInputLimitOld ,
184+ ) ;
185+ }
186+ if ( m . role === 'toolResult' ) {
187+ return compactToolResult ( m , isRecent ? cfg . toolResultLimitRecent : cfg . toolResultLimitOld ) ;
188+ }
135189 return m ;
136190 } ) ;
137191}
@@ -143,25 +197,36 @@ export function buildTransformContext(
143197 if ( messages . length === 0 ) return messages ;
144198
145199 const before = estimateBytes ( messages ) ;
146- const first = applyCaps ( messages , TEXT_BLOCK_LIMIT , TOOL_INPUT_LIMIT , TOOL_RESULT_LIMIT ) ;
200+ const first = applyCaps ( messages , {
201+ textLimit : TEXT_BLOCK_LIMIT ,
202+ toolInputLimitOld : TOOL_INPUT_LIMIT ,
203+ toolResultLimitOld : TOOL_RESULT_LIMIT ,
204+ toolInputLimitRecent : null ,
205+ toolResultLimitRecent : null ,
206+ windowTurns : RECENT_WINDOW ,
207+ } ) ;
147208 const firstSize = estimateBytes ( first ) ;
148209
149210 log . info ( '[context-prune] step=caps' , {
150211 messages : messages . length ,
151212 before,
152213 after : firstSize ,
153214 textLimit : TEXT_BLOCK_LIMIT ,
154- toolLimit : TOOL_INPUT_LIMIT ,
215+ toolInputLimit : TOOL_INPUT_LIMIT ,
216+ toolResultLimit : TOOL_RESULT_LIMIT ,
217+ window : RECENT_WINDOW ,
155218 } ) ;
156219
157220 if ( firstSize <= HARD_CAP_BYTES ) return first ;
158221
159- const aggressive = applyCaps (
160- messages ,
161- AGGRESSIVE_BLOCK_LIMIT ,
162- AGGRESSIVE_BLOCK_LIMIT ,
163- AGGRESSIVE_BLOCK_LIMIT ,
164- ) ;
222+ const aggressive = applyCaps ( messages , {
223+ textLimit : AGGRESSIVE_BLOCK_LIMIT ,
224+ toolInputLimitOld : AGGRESSIVE_BLOCK_LIMIT ,
225+ toolResultLimitOld : AGGRESSIVE_BLOCK_LIMIT ,
226+ toolInputLimitRecent : AGGRESSIVE_BLOCK_LIMIT ,
227+ toolResultLimitRecent : AGGRESSIVE_BLOCK_LIMIT ,
228+ windowTurns : 0 ,
229+ } ) ;
165230 const aggressiveSize = estimateBytes ( aggressive ) ;
166231 log . info ( '[context-prune] step=aggressive' , {
167232 messages : messages . length ,
0 commit comments