Skip to content

Commit b614196

Browse files
committed
fix(core): aggressive context-prune — compact toolCall.input + toolResult
The conservative v1 only stubbed `toolResult.content` on older rounds and left `assistant.toolCall.input` verbatim, but production runs still hit the 1M-token wall at ~4M tokens because str_replace `old_str`/ `new_str` payloads (2-5 KB each, 30+ edits per run) dominate the historical bytes and are carried in assistant messages. v2 compacts both sources for rounds older than the window: - `toolResult.content` → one-line stub (same as v1) - `toolCall.input` → `{_summarized: true, _origBytes: N}`. Name + id stay so pi-ai's tool-use pairing still validates and the model can still see "I called str_replace N times earlier" when reasoning about progress. Tightened parameters: window 8→6, aggressive 4→3, hard cap 300 KB→200 KB. Added per-turn [context-prune] log so we can see the hook firing + before/after bytes per LLM call. User messages and text-only assistant messages are never touched.
1 parent 16be6cc commit b614196

2 files changed

Lines changed: 57 additions & 38 deletions

File tree

packages/core/src/context-prune.test.ts

Lines changed: 56 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,20 @@ describe('buildTransformContext — sliding-window compaction', () => {
4747
expect(out).toEqual(messages);
4848
});
4949

50-
it('leaves the last 8 tool-use rounds verbatim, stubs older toolResult content', async () => {
50+
it('leaves the last N tool-use rounds verbatim, stubs older toolResult content', async () => {
5151
const transform = buildTransformContext();
5252
const messages: AgentMessage[] = [userMsg('build this')];
53-
// 12 tool-use rounds, each with a bulky toolResult body so older ones
54-
// should be stubbed.
5553
const bulk = 'x'.repeat(2_000);
56-
for (let i = 0; i < 12; i += 1) {
54+
// 10 rounds — default window is 6, so first ~4 should be compacted.
55+
for (let i = 0; i < 10; i += 1) {
5756
messages.push(assistantWithToolCall(`t${i}`, `args-${i}`));
5857
messages.push(toolResult(`t${i}`, `result body ${i} ${bulk}`));
5958
}
6059
const out = await transform(messages);
61-
// First few toolResult rows should be stubbed, last 8 should still be
62-
// original-shaped.
6360
const resultRows = out.filter((m) => m.role === 'toolResult');
64-
expect(resultRows).toHaveLength(12);
65-
const early = resultRows.slice(0, 4);
66-
const recent = resultRows.slice(-8);
61+
expect(resultRows).toHaveLength(10);
62+
const early = resultRows.slice(0, 3);
63+
const recent = resultRows.slice(-6);
6764
for (const row of early) {
6865
const first = (row as { content: Array<{ text?: string }> }).content[0]?.text ?? '';
6966
expect(first.startsWith('[dropped')).toBe(true);
@@ -74,6 +71,33 @@ describe('buildTransformContext — sliding-window compaction', () => {
7471
}
7572
});
7673

74+
it('compacts assistant.toolCall.input on old rounds but preserves name + id', async () => {
75+
const transform = buildTransformContext();
76+
const messages: AgentMessage[] = [userMsg('build')];
77+
const bulk = 'a'.repeat(4_000);
78+
// 10 rounds with big toolCall args — older ones should have args summarized.
79+
for (let i = 0; i < 10; i += 1) {
80+
messages.push(assistantWithToolCall(`call-${i}`, bulk));
81+
messages.push(toolResult(`call-${i}`, 'ok'));
82+
}
83+
const out = await transform(messages);
84+
// Oldest assistant message's toolCall block should have summarized input.
85+
const oldest = out.find(
86+
(m) =>
87+
m.role === 'assistant' &&
88+
Array.isArray((m as { content?: unknown }).content) &&
89+
(m as { content: Array<{ type?: string; id?: string }> }).content.some(
90+
(c) => c?.id === 'call-0',
91+
),
92+
) as { content: Array<{ id?: string; name?: string; input?: unknown }> } | undefined;
93+
expect(oldest).toBeDefined();
94+
const tc = oldest?.content.find((c) => c.id === 'call-0');
95+
expect(tc?.name).toBe('str_replace_based_edit_tool');
96+
const input = tc?.input as { _summarized?: boolean; _origBytes?: number } | undefined;
97+
expect(input?._summarized).toBe(true);
98+
expect(input?._origBytes).toBeGreaterThan(1_000);
99+
});
100+
77101
it('keeps the toolCallId on stubbed toolResult rows (pi-ai shape requirement)', async () => {
78102
const transform = buildTransformContext();
79103
const messages: AgentMessage[] = [userMsg('x')];
@@ -83,8 +107,6 @@ describe('buildTransformContext — sliding-window compaction', () => {
83107
messages.push(toolResult(`call-${i}`, `body ${bulk}`));
84108
}
85109
const out = await transform(messages);
86-
// Oldest round's toolResult must still carry the matching toolCallId so
87-
// the LLM can pair it with the assistant toolCall block.
88110
const first = out.find(
89111
(m) => m.role === 'toolResult' && (m as { toolCallId?: string }).toolCallId === 'call-0',
90112
) as { toolCallId?: string; content: Array<{ text?: string }> } | undefined;
@@ -96,48 +118,46 @@ describe('buildTransformContext — sliding-window compaction', () => {
96118
it('preserves user messages and assistant-text messages unchanged', async () => {
97119
const transform = buildTransformContext();
98120
const bulk = 'z'.repeat(3_000);
99-
const messages: AgentMessage[] = [
100-
userMsg('initial brief, do not mangle'),
101-
assistantText('I will start now.'),
102-
];
121+
const openingUser = userMsg('initial brief, do not mangle');
122+
const openingNote = assistantText('I will start now.');
123+
const messages: AgentMessage[] = [openingUser, openingNote];
103124
for (let i = 0; i < 10; i += 1) {
104125
messages.push(assistantWithToolCall(`c${i}`, 'op'));
105126
messages.push(toolResult(`c${i}`, `r ${bulk}`));
106127
}
107128
messages.push(assistantText('final summary line'));
108129
const out = await transform(messages);
109-
// User message identity preserved.
110-
const firstUser = out.find((m) => m.role === 'user');
111-
expect(firstUser).toBe(messages[0]);
112-
// Non-tool-call assistant text preserved.
113-
const openingNote = out.find(
130+
expect(out.find((m) => m.role === 'user')).toBe(openingUser);
131+
const textOnlyAssistants = out.filter(
114132
(m) =>
115133
m.role === 'assistant' &&
116-
(m as { content: Array<{ type: string; text?: string }> }).content.every(
117-
(c) => c.type === 'text',
118-
),
134+
(m as { content: Array<{ type: string }> }).content.every((c) => c.type === 'text'),
119135
);
120-
expect(openingNote).toBeDefined();
136+
expect(textOnlyAssistants.length).toBeGreaterThanOrEqual(2);
121137
});
122138

123-
it('tightens to the aggressive 4-round window when HARD_CAP_BYTES is exceeded', async () => {
139+
it('tightens to the aggressive window when HARD_CAP_BYTES is exceeded', async () => {
124140
const transform = buildTransformContext();
125141
const messages: AgentMessage[] = [userMsg('go')];
126-
// Stuff assistant content with very large payloads so even after stubbing
127-
// older toolResults the total still exceeds the 300 KB cap.
128142
const hugeArgs = 'p'.repeat(40_000);
129-
for (let i = 0; i < 12; i += 1) {
143+
for (let i = 0; i < 10; i += 1) {
130144
messages.push(assistantWithToolCall(`big-${i}`, hugeArgs));
131145
messages.push(toolResult(`big-${i}`, 'small-response'));
132146
}
133147
const out = await transform(messages);
134-
// In aggressive mode only the last 4 toolResult rows stay verbatim. Even
135-
// though the results are small here, we just verify the stub count rose:
136-
// older-than-last-4 rows should all be stubbed.
137-
const results = out.filter((m) => m.role === 'toolResult');
138-
const stubbed = results.filter((m) =>
139-
((m as { content: Array<{ text?: string }> }).content[0]?.text ?? '').startsWith('[dropped'),
140-
);
141-
expect(stubbed.length).toBeGreaterThanOrEqual(8);
148+
// In aggressive mode only the last 3 rounds stay verbatim. Count
149+
// assistant toolCall blocks with summarized input.
150+
let summarizedCount = 0;
151+
for (const m of out) {
152+
if (m.role !== 'assistant') continue;
153+
const content = (m as { content: Array<{ type?: string; input?: unknown }> }).content;
154+
for (const c of content) {
155+
if (c.type === 'toolCall') {
156+
const input = c.input as { _summarized?: boolean } | undefined;
157+
if (input?._summarized === true) summarizedCount += 1;
158+
}
159+
}
160+
}
161+
expect(summarizedCount).toBeGreaterThanOrEqual(7);
142162
});
143163
});

packages/core/src/context-prune.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ function stubAssistantToolCalls(m: AgentMessage): AgentMessage {
130130

131131
function applyWindow(messages: AgentMessage[], keep: number): AgentMessage[] {
132132
const roundIdxs = findToolUseRoundIndices(messages);
133-
const firstKeptRoundIdx =
134-
roundIdxs.length > keep ? (roundIdxs[roundIdxs.length - keep] ?? 0) : 0;
133+
const firstKeptRoundIdx = roundIdxs.length > keep ? (roundIdxs[roundIdxs.length - keep] ?? 0) : 0;
135134
return messages.map((m, i) => {
136135
if (i >= firstKeptRoundIdx) return m; // inside the window — keep verbatim
137136
if (isToolResult(m)) return stubToolResult(m);

0 commit comments

Comments
 (0)