Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
efa8ba9
fix(#106): Phase 1 - eager session stripping + reduced cold-parse pre…
san360 Jun 9, 2026
cd78e31
fix(#106): Phase 2 - stream parse result to parent in per-session chu…
san360 Jun 9, 2026
af771e4
chore: stop tracking docs/proposals and add to .gitignore
san360 Jun 9, 2026
ed402dc
fix(#106): update reload-stability benchmark for chunked IPC protocol
san360 Jun 9, 2026
8b194de
test: skip non-Windows findVsCodeDirs server-path test on win32
san360 Jun 9, 2026
34869a1
test(#106): extract chunking module + synthetic-log generator and OOM…
san360 Jun 9, 2026
28249f2
feat(#106): surface skipped parse history + live telemetry
san360 Jun 9, 2026
5e477d7
refactor: extract worker-host, vscode-request, session-totals, and lo…
san360 Jun 10, 2026
1fb5cec
test: close issue #106 coverage gaps for streaming parse, worker host…
san360 Jun 10, 2026
76b3b9d
chore: reconcile package-lock after rebase onto main
san360 Jun 10, 2026
40096b1
docs: add CHANGELOG entry for issue #106 OOM fix
san360 Jun 10, 2026
96bb4bb
fix: remove duplicate eslint complexity/max-depth rules
san360 Jun 10, 2026
be80457
fix: restore missing @emnapi lockfile entries to sync npm ci
san360 Jun 10, 2026
f474759
fix: restore package-lock.json to match main (no dep changes on this …
san360 Jun 10, 2026
3edd3a0
fix: add 'backpressure' to cspell dictionary
san360 Jun 10, 2026
27a890d
fix(#106): drop misleading 'System free' telemetry tile
san360 Jun 21, 2026
1691c6f
merge: resolve conflicts with main
san360 Jun 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ docs/security-audit.md
test-results/.last-run.json
README.github.md
PROPOSED_FIXES.md
docs/proposals/
local/
test_output.txt
scripts/verify-counts.ts
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Changelog

## Unreleased

- Fix parse-worker out-of-memory on large log sets (#106): parsing now runs in a
forked child process with a capped heap and streams results back in per-session
chunks using ack-window backpressure to avoid native IPC buffer growth
- Add live parse telemetry strip and a dismissible "skipped history" banner to the
loading screen
- Extract streaming JSONL readers, worker-host, and skipped-banner modules with
full unit-test coverage

## 0.1.0 — First Release

- Dashboard with timeline, output, and consumption views
Expand Down
5 changes: 5 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
"minWordLength": 5,
"words": [
"aicoach",
"acked",
"affordances",
"akiaiosfodnn",
"allpending",
"antipatterns",
"aymen",
"backpressure",
"baprs",
"behaviour",
"bglpat",
Expand All @@ -17,6 +19,7 @@
"btcbb",
"burndown",
"bxapp",
"catchable",
"chartjs",
"chatmode",
"chatmodes",
Expand Down Expand Up @@ -89,6 +92,8 @@
"treemap",
"tseslint",
"undercount",
"unacked",
"unflushed",
"unparseable",
"upskilling",
"visualbasic",
Expand Down
3 changes: 3 additions & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ export default tseslint.config(
unicorn,
},
rules: {
'max-lines': ['warn', { max: 400, skipBlankLines: true, skipComments: true }],
'max-lines-per-function': ['warn', { max: 50, skipBlankLines: true, skipComments: true, IIFEs: true }],
'max-params': ['warn', 4],
"@typescript-eslint/no-unused-vars": [
"error",
{
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@
"analyze:e2e-real-data": "npx tsx scripts/e2e-real-data.ts",
"analyze:catalog-fetch": "node scripts/test-catalog-fetch.mjs",
"smoke-test": "node scripts/smoke-test.mjs",
"gen-synth-logs": "npx tsx scripts/generate-synthetic-logs.ts",
"test:oom-synth": "npx tsx scripts/oom-repro.ts",
"package": "node scripts/package-readme-swap.mjs",
"spellcheck": "cspell \"src/**/*.ts\" \"docs/**/*.md\" \"*.md\"",
"knip": "knip",
Expand Down
29 changes: 26 additions & 3 deletions scripts/benchmark-reload-stability.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,22 @@ function runChildParse(label: string, logsDirs: string[]): Promise<WorkerPayload
const started = Date.now();
let lastLogged = 0;

child.on('message', (msg: { type: string; progress?: { detail?: string }; payload?: WorkerPayload; message?: string }) => {
// Accumulators for the chunked IPC protocol (issue #106, S1).
const sessions: import('../src/core/types').Session[] = [];
const editLocEntries: Array<[string, Array<[string, number]>]> = [];
let workspaces: Array<[string, { id: string; name: string; path: string }]> = [];

child.on('message', (msg: {
type: string;
progress?: { detail?: string };
payload?: {
sessions?: import('../src/core/types').Session[];
editLocEntries?: Array<[string, Array<[string, number]>]>;
workspaces?: Array<[string, { id: string; name: string; path: string }]>;
orphanEditLoc?: Array<[string, Array<[string, number]>]>;
};
message?: string;
}) => {
if (msg.type === 'progress') {
const match = msg.progress?.detail?.match(/^(\d+)\/(\d+):/);
if (!match) return;
Expand All @@ -67,10 +82,18 @@ function runChildParse(label: string, logsDirs: string[]): Promise<WorkerPayload
return;
}

if (msg.type === 'result' && msg.payload) {
if (msg.type === 'chunk' && msg.payload) {
if (msg.payload.sessions) for (const s of msg.payload.sessions) sessions.push(s);
if (msg.payload.editLocEntries) for (const e of msg.payload.editLocEntries) editLocEntries.push(e);
return;
}

if (msg.type === 'done' && msg.payload) {
if (msg.payload.workspaces) workspaces = msg.payload.workspaces;
if (msg.payload.orphanEditLoc) for (const e of msg.payload.orphanEditLoc) editLocEntries.push(e);
console.log(`${label} result after ${((Date.now() - started) / 1000).toFixed(1)}s`);
child.kill();
resolve(msg.payload);
resolve({ result: { workspaces, sessions, editLocIndex: editLocEntries } });
return;
}

Expand Down
158 changes: 158 additions & 0 deletions scripts/generate-synthetic-logs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE in the project root for license information.
*--------------------------------------------------------------------------------------------*/

/*
* Synthetic VS Code session-log generator (issue #106 testing).
*
* Writes fake chatSessions JSON into a throwaway directory that mirrors the on-disk layout the
* parser expects: <root>/<workspaceId>/chatSessions/<sessionId>.json
*
* SAFETY: never writes into the user's workspace or real log dirs. Defaults to an OS temp dir
* (os.tmpdir()), and the directory is the caller's to delete (use cleanup()).
*
* Usable two ways:
* 1. Imported: generateSyntheticLogs({ workspaces, sessionsPerWorkspace, ... })
* 2. CLI: npx tsx scripts/generate-synthetic-logs.ts --target-gb 2 --keep
*/

import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';

export interface GenOptions {
/** Destination root. Defaults to a fresh OS temp dir. Never point this at real data. */
root?: string;
workspaces: number;
sessionsPerWorkspace: number;
requestsPerSession: number;
/** Approximate combined bytes of message + response text per request. */
bytesPerRequest: number;
}

export interface GenResult {
root: string;
totalSessions: number;
totalRequests: number;
approxBytesOnDisk: number;
}

const DEFAULTS: Omit<GenOptions, 'root'> = {
workspaces: 50,
sessionsPerWorkspace: 20,
requestsPerSession: 8,
bytesPerRequest: 4096,
};

function makeText(bytes: number, seed: string): string {
if (bytes <= 0) return '';
// Cheap, deterministic-ish filler. Repeating a seeded token keeps generation fast.
const token = `${seed} the quick brown fox edits ${seed}.ts and runs npm test. `;
return token.repeat(Math.ceil(bytes / token.length)).slice(0, bytes);
}

/** Create a single synthetic chatSession file's JSON content. */
function makeSessionJson(wsId: string, sessionIdx: number, opts: GenOptions): string {
const baseTs = 1_700_000_000_000 + sessionIdx * 60_000;
const half = Math.max(1, Math.floor(opts.bytesPerRequest / 2));
const requests = Array.from({ length: opts.requestsPerSession }, (_, j) => ({
requestId: `${wsId}-s${sessionIdx}-r${j}`,
timestamp: baseTs + j * 1000,
message: { text: makeText(half, `m${j}`) },
response: [{ value: makeText(half, `a${j}`) }],
modelId: j % 2 === 0 ? 'gpt-4o' : 'claude-3.7',
result: { timings: { firstProgress: 200, totalElapsed: 1500 }, metadata: {} },
}));
return JSON.stringify({
creationDate: baseTs,
lastMessageDate: baseTs + opts.requestsPerSession * 1000,
sessionId: `${wsId}-s${sessionIdx}`,
requests,
});
}

export function generateSyntheticLogs(partial: Partial<GenOptions>): GenResult {
const opts: GenOptions = { ...DEFAULTS, ...partial };
const root = opts.root ?? fs.mkdtempSync(path.join(os.tmpdir(), 'aic-synth-'));

let totalSessions = 0;
let totalRequests = 0;
let approxBytesOnDisk = 0;

for (let w = 0; w < opts.workspaces; w++) {
const wsId = `synthws-${String(w).padStart(5, '0')}`;
const chatDir = path.join(root, wsId, 'chatSessions');
fs.mkdirSync(chatDir, { recursive: true });
fs.writeFileSync(
path.join(root, wsId, 'workspace.json'),
JSON.stringify({ folder: `file:///synthetic/${wsId}` }),
);
for (let s = 0; s < opts.sessionsPerWorkspace; s++) {
const json = makeSessionJson(wsId, s, opts);
fs.writeFileSync(path.join(chatDir, `${wsId}-s${s}.json`), json);
approxBytesOnDisk += json.length;
totalRequests += opts.requestsPerSession;
totalSessions++;
}
}

return { root, totalSessions, totalRequests, approxBytesOnDisk };
}

/** Remove a generated tree. Safe no-op if it doesn't exist. */
export function cleanup(root: string): void {
fs.rmSync(root, { recursive: true, force: true });
}

/** Derive workspace/session counts that roughly hit a target on-disk size in GB. */
export function planForTargetGb(targetGb: number, bytesPerRequest = DEFAULTS.bytesPerRequest): Partial<GenOptions> {
const targetBytes = targetGb * 1024 * 1024 * 1024;
const requestsPerSession = DEFAULTS.requestsPerSession;
const sessionsPerWorkspace = DEFAULTS.sessionsPerWorkspace;
const bytesPerSession = requestsPerSession * bytesPerRequest;
const bytesPerWorkspace = sessionsPerWorkspace * bytesPerSession;
const workspaces = Math.max(1, Math.ceil(targetBytes / bytesPerWorkspace));
return { workspaces, sessionsPerWorkspace, requestsPerSession, bytesPerRequest };
}

function mb(bytes: number): string {
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}

// CLI entry — only runs when executed directly, not when imported.
const invokedDirectly = process.argv[1] && path.resolve(process.argv[1]) === path.resolve(import.meta.url.replace('file://', '').replace(/^\/([A-Za-z]:)/, '$1'));
if (invokedDirectly) {
const args = process.argv.slice(2);
const getNum = (flag: string, def: number): number => {
const i = args.indexOf(flag);
return i >= 0 && args[i + 1] ? Number(args[i + 1]) : def;
};
const keep = args.includes('--keep');
const targetGb = args.includes('--target-gb') ? getNum('--target-gb', 1) : undefined;

const plan: Partial<GenOptions> = targetGb != null
? planForTargetGb(targetGb)
: {
workspaces: getNum('--workspaces', DEFAULTS.workspaces),
sessionsPerWorkspace: getNum('--sessions', DEFAULTS.sessionsPerWorkspace),
requestsPerSession: getNum('--requests', DEFAULTS.requestsPerSession),
bytesPerRequest: getNum('--bytes', DEFAULTS.bytesPerRequest),
};

console.log('Generating synthetic logs with plan:', plan);
const t0 = Date.now();
const res = generateSyntheticLogs(plan);
console.log(`\nGenerated in ${((Date.now() - t0) / 1000).toFixed(1)}s`);
console.log(` root: ${res.root}`);
console.log(` sessions: ${res.totalSessions}`);
console.log(` requests: ${res.totalRequests}`);
console.log(` on disk: ${mb(res.approxBytesOnDisk)}`);
if (keep) {
console.log('\n--keep set: tree retained. Delete it yourself when done:');
console.log(` rm -rf "${res.root}"`);
} else {
cleanup(res.root);
console.log('\nCleaned up (pass --keep to retain the tree).');
}
}
105 changes: 105 additions & 0 deletions scripts/oom-repro.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE in the project root for license information.
*--------------------------------------------------------------------------------------------*/

/*
* OOM regression repro (issue #106).
*
* Generates a large synthetic session tree in an OS temp dir, then forks the REAL built parse
* worker (dist/parse-worker.js) exactly like the extension does — isolated process, capped heap
* — and asserts it streams a `done` message instead of crashing (SIGABRT / non-zero exit).
*
* SAFETY:
* - Synthetic data is written only to os.tmpdir(); deleted on exit.
* - The forked worker's HOME/USERPROFILE are redirected to a temp dir so its disk-cache write
* never touches the user's real ~/.copilot-analytics-cache.
*
* This is intentionally NOT part of `npm test` (it allocates GBs). Run it manually:
* npm run build
* npm run test:oom-synth # default ~1.5 GB tree
* OOM_TARGET_GB=3 OOM_HEAP_MB=4096 npm run test:oom-synth
*/

import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { fork } from 'child_process';
import { cleanup, generateSyntheticLogs, planForTargetGb } from './generate-synthetic-logs';

function mb(bytes: number): string { return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; }

async function main(): Promise<void> {
const targetGb = Number(process.env.OOM_TARGET_GB ?? '1.5');
const heapMb = Number(process.env.OOM_HEAP_MB ?? '4096');
// Optional: shape large individual session files (mimics histories with a few huge sessions,
// which stress the prefetch buffer / per-workspace transient — issue #106).
const bytesPerRequest = process.env.OOM_BYTES_PER_REQUEST ? Number(process.env.OOM_BYTES_PER_REQUEST) : undefined;

const workerPath = path.join(process.cwd(), 'dist', 'parse-worker.js');
if (!fs.existsSync(workerPath)) {
throw new Error(`Worker not built: ${workerPath}. Run "npm run build" first.`);
}

// Redirect the forked worker's home so its cache write lands in temp, not the real home dir.
const fakeHome = fs.mkdtempSync(path.join(os.tmpdir(), 'aic-oom-home-'));

console.log(`Generating ~${targetGb} GB synthetic tree...`);
const t0 = Date.now();
const gen = generateSyntheticLogs(planForTargetGb(targetGb, bytesPerRequest));
console.log(` ${gen.totalSessions} sessions, ${mb(gen.approxBytesOnDisk)} on disk in ${((Date.now() - t0) / 1000).toFixed(1)}s`);
console.log(` root: ${gen.root}`);
console.log(`Forking worker with --max-old-space-size=${heapMb}...`);

const cleanupAll = (): void => { cleanup(gen.root); cleanup(fakeHome); };

try {
const exitCode = await new Promise<number>((resolve, reject) => {
const child = fork(workerPath, [], {
execArgv: [`--max-old-space-size=${heapMb}`],
stdio: ['ignore', 'inherit', 'inherit', 'ipc'],
env: { ...process.env, HOME: fakeHome, USERPROFILE: fakeHome },
});

let chunks = 0;
let sessions = 0;
let gotDone = false;
const started = Date.now();

child.on('message', (msg: { type?: string; payload?: { sessions?: unknown[] } }) => {
if (msg.type === 'chunk') {
chunks++;
sessions += msg.payload?.sessions?.length ?? 0;
} else if (msg.type === 'done') {
gotDone = true;
console.log(`\n✅ worker emitted "done" after ${((Date.now() - started) / 1000).toFixed(1)}s: ${chunks} chunks, ${sessions} sessions`);
child.kill();
resolve(0);
} else if (msg.type === 'error') {
reject(new Error(`worker error message: ${JSON.stringify(msg)}`));
}
});

child.on('exit', (code, signal) => {
if (gotDone) return;
reject(new Error(`worker exited before "done": code=${code} signal=${signal || ''}`));
});
child.on('error', reject);

child.send({ logsDirs: [gen.root] });
});

console.log(`\nPASS — repro completed without OOM (exit ${exitCode}).`);
} catch (err) {
console.error(`\nFAIL — ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
} finally {
cleanupAll();
console.log('Cleaned up temp data.');
}
}

main().catch((e) => {
console.error(e instanceof Error ? e.stack ?? e.message : String(e));
process.exitCode = 1;
});
Loading
Loading