|
3 | 3 | * hook. Invoked before every LLM call to keep the message array from growing |
4 | 4 | * unboundedly. |
5 | 5 | * |
6 | | - * The hot path (and the one that has crashed production with a 4M-token |
7 | | - * request) is: |
| 6 | + * Two bulk sources dominate context growth in a long tool-using run: |
| 7 | + * - `toolResult.content` — view returns of the whole file (15-100 KB each), |
| 8 | + * tool result payloads from read_url / done / etc. |
8 | 9 | * - `assistant.toolCall.input` — str_replace old_str / new_str for every |
9 | | - * section edit (2-5 KB each, 30+ edits per run = 100+ KB resent every |
10 | | - * turn). |
11 | | - * - `toolResult.content` — view returns of the whole file (15-100 KB each, |
12 | | - * multiple calls per run). |
| 10 | + * section edit (2-5 KB each, 30+ edits per run = 100+ KB). The model's |
| 11 | + * own output, carried forward across every subsequent LLM call. |
13 | 12 | * |
14 | | - * Strategy: keep the most recent `WINDOW_KEEP` tool-use rounds verbatim so the |
15 | | - * model has full fidelity for its current reasoning. For older rounds, |
16 | | - * replace `toolResult.content` with a one-line stub. We intentionally DO NOT |
17 | | - * rewrite assistant.toolCall.input (tampering with the model's own output |
18 | | - * history confuses reasoning); the savings from stubbing toolResults alone |
19 | | - * are ~60-70% of historical bytes in practice. |
| 13 | + * Conservative v1 only stubbed toolResult content (safer — keeps the model's |
| 14 | + * self-history intact) but the production 4M-token failure showed that is |
| 15 | + * not enough. v2 now compacts BOTH: |
| 16 | + * - toolResult rows older than WINDOW_KEEP rounds → content replaced with |
| 17 | + * a one-line stub that preserves toolCallId pairing. |
| 18 | + * - assistant.toolCall.input older than WINDOW_KEEP rounds → args replaced |
| 19 | + * with `{_summarized: true, _origBytes: N}`. The tool name + id stay |
| 20 | + * intact so pi-ai's tool-use shape validation is happy; the large |
| 21 | + * old_str/new_str payload is discarded. |
20 | 22 | * |
21 | | - * User messages and assistant-text-only messages always pass through unchanged. |
| 23 | + * We keep the tool NAME on compacted toolCalls so the model can still see |
| 24 | + * "earlier in this run I did str_replace on index.html 10 times" when |
| 25 | + * reasoning about what's been done. |
| 26 | + * |
| 27 | + * User messages and assistant-text-only messages always pass through |
| 28 | + * unchanged (no loss of user intent or agent commentary). |
22 | 29 | * |
23 | 30 | * Safety net: if the total estimated size still exceeds `HARD_CAP_BYTES` |
24 | | - * (~300 KB of assistant + toolResult text), tighten the window to the last 4 |
25 | | - * rounds only. |
| 31 | + * after the conservative pass, tighten to WINDOW_KEEP_AGGRESSIVE rounds. |
26 | 32 | */ |
27 | 33 |
|
28 | 34 | import type { AgentMessage } from '@mariozechner/pi-agent-core'; |
29 | 35 |
|
30 | | -const WINDOW_KEEP = 8; |
31 | | -const WINDOW_KEEP_AGGRESSIVE = 4; |
32 | | -const HARD_CAP_BYTES = 300_000; |
| 36 | +const WINDOW_KEEP = 6; |
| 37 | +const WINDOW_KEEP_AGGRESSIVE = 3; |
| 38 | +const HARD_CAP_BYTES = 200_000; |
33 | 39 |
|
34 | 40 | function estimateBytes(messages: AgentMessage[]): number { |
35 | 41 | let total = 0; |
@@ -90,35 +96,86 @@ function stubToolResult(m: AgentMessage): AgentMessage { |
90 | 96 | } as unknown as AgentMessage; |
91 | 97 | } |
92 | 98 |
|
| 99 | +/** |
| 100 | + * Shrink every `toolCall` block inside an assistant message. Name + id |
| 101 | + * stay so pi-ai's shape check (toolResult must match a prior toolCall id) |
| 102 | + * still passes; the `input` args get replaced with a tiny summary. |
| 103 | + */ |
| 104 | +function stubAssistantToolCalls(m: AgentMessage): AgentMessage { |
| 105 | + const original = m as unknown as { |
| 106 | + role: 'assistant'; |
| 107 | + content?: Array<Record<string, unknown>>; |
| 108 | + }; |
| 109 | + if (!Array.isArray(original.content)) return m; |
| 110 | + let changed = false; |
| 111 | + const nextContent = original.content.map((block) => { |
| 112 | + if (block?.['type'] !== 'toolCall') return block; |
| 113 | + const input = block['input']; |
| 114 | + let origBytes = 0; |
| 115 | + try { |
| 116 | + origBytes = JSON.stringify(input ?? null).length; |
| 117 | + } catch { |
| 118 | + /* ignore */ |
| 119 | + } |
| 120 | + if (origBytes === 0) return block; |
| 121 | + changed = true; |
| 122 | + return { |
| 123 | + ...block, |
| 124 | + input: { _summarized: true, _origBytes: origBytes }, |
| 125 | + }; |
| 126 | + }); |
| 127 | + if (!changed) return m; |
| 128 | + return { ...(original as object), content: nextContent } as unknown as AgentMessage; |
| 129 | +} |
| 130 | + |
| 131 | +function applyWindow(messages: AgentMessage[], keep: number): AgentMessage[] { |
| 132 | + const roundIdxs = findToolUseRoundIndices(messages); |
| 133 | + const firstKeptRoundIdx = |
| 134 | + roundIdxs.length > keep ? (roundIdxs[roundIdxs.length - keep] ?? 0) : 0; |
| 135 | + return messages.map((m, i) => { |
| 136 | + if (i >= firstKeptRoundIdx) return m; // inside the window — keep verbatim |
| 137 | + if (isToolResult(m)) return stubToolResult(m); |
| 138 | + if (isAssistantWithToolCall(m)) return stubAssistantToolCalls(m); |
| 139 | + return m; // user messages + assistant-text-only stay intact |
| 140 | + }); |
| 141 | +} |
| 142 | + |
93 | 143 | export function buildTransformContext(): ( |
94 | 144 | messages: AgentMessage[], |
95 | 145 | signal?: AbortSignal, |
96 | 146 | ) => Promise<AgentMessage[]> { |
97 | 147 | return async (messages) => { |
98 | 148 | if (messages.length === 0) return messages; |
99 | 149 |
|
100 | | - const roundIdxs = findToolUseRoundIndices(messages); |
101 | | - // Decide how many rounds to keep. If we're already tight on bytes, fall |
102 | | - // back to the aggressive window. |
103 | | - let keep = WINDOW_KEEP; |
104 | | - let firstKeptRoundIdx = roundIdxs.length > keep ? (roundIdxs[roundIdxs.length - keep] ?? 0) : 0; |
| 150 | + const conservative = applyWindow(messages, WINDOW_KEEP); |
| 151 | + const conservativeSize = estimateBytes(conservative); |
105 | 152 |
|
106 | | - // First pass with the conservative window. |
107 | | - const firstPass = messages.map((m, i) => { |
108 | | - if (!isToolResult(m)) return m; |
109 | | - if (firstKeptRoundIdx !== undefined && i >= firstKeptRoundIdx) return m; |
110 | | - return stubToolResult(m); |
111 | | - }); |
| 153 | + // Telemetry — surfaces in the Electron main log so we can tell whether |
| 154 | + // the hook is actually firing and what size we are landing at per turn. |
| 155 | + if (typeof console !== 'undefined' && typeof console.log === 'function') { |
| 156 | + try { |
| 157 | + console.log( |
| 158 | + `[context-prune] messages=${messages.length} before=${estimateBytes(messages)}B ` + |
| 159 | + `after=${conservativeSize}B keep=${WINDOW_KEEP}`, |
| 160 | + ); |
| 161 | + } catch { |
| 162 | + /* noop */ |
| 163 | + } |
| 164 | + } |
112 | 165 |
|
113 | | - if (estimateBytes(firstPass) <= HARD_CAP_BYTES) return firstPass; |
| 166 | + if (conservativeSize <= HARD_CAP_BYTES) return conservative; |
114 | 167 |
|
115 | | - // Safety net: still too big — tighten to the last 4 rounds. |
116 | | - keep = WINDOW_KEEP_AGGRESSIVE; |
117 | | - firstKeptRoundIdx = roundIdxs.length > keep ? (roundIdxs[roundIdxs.length - keep] ?? 0) : 0; |
118 | | - return messages.map((m, i) => { |
119 | | - if (!isToolResult(m)) return m; |
120 | | - if (firstKeptRoundIdx !== undefined && i >= firstKeptRoundIdx) return m; |
121 | | - return stubToolResult(m); |
122 | | - }); |
| 168 | + const aggressive = applyWindow(messages, WINDOW_KEEP_AGGRESSIVE); |
| 169 | + if (typeof console !== 'undefined' && typeof console.log === 'function') { |
| 170 | + try { |
| 171 | + console.log( |
| 172 | + `[context-prune] aggressive fallback: size=${estimateBytes(aggressive)}B ` + |
| 173 | + `keep=${WINDOW_KEEP_AGGRESSIVE}`, |
| 174 | + ); |
| 175 | + } catch { |
| 176 | + /* noop */ |
| 177 | + } |
| 178 | + } |
| 179 | + return aggressive; |
123 | 180 | }; |
124 | 181 | } |
0 commit comments