Skip to content

Commit 7cc02ee

Browse files
committed
fix: kill orphaned worker processes when CLI is killed ungracefully
When pnpm sends SIGKILL to the CLI process tree, SIGINT/SIGTERM handlers never run, leaving trigger-dev-run-worker child processes alive as zombies consuming significant CPU. The existing watchdog (#3191) handles server-side run cancellation but does not kill the OS-level worker processes. This fix: - Adds getAllPids() to TaskRunProcessPool to collect PIDs from both available and busy process maps - Periodically refreshes active-runs.json (every 2s) so workerPids stays current as processes are spawned and returned to the pool - Extends the watchdog's onParentDied() to SIGTERM all tracked worker PIDs, wait 3s, then SIGKILL any survivors before calling disconnect Fixes #2909
1 parent 5693b62 commit 7cc02ee

4 files changed

Lines changed: 91 additions & 6 deletions

File tree

packages/cli-v3/src/dev/devSupervisor.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ class DevSupervisor implements WorkerRuntime {
8080
private activeRunsPath?: string;
8181
private watchdogPidPath?: string;
8282

83+
private activeRunsUpdateInterval?: NodeJS.Timeout;
84+
85+
8386
constructor(public readonly options: WorkerRuntimeOptions) { }
8487

8588
async init(): Promise<void> {
@@ -153,6 +156,10 @@ class DevSupervisor implements WorkerRuntime {
153156

154157
// Spawn detached watchdog to cancel runs if CLI is killed (e.g. pnpm SIGKILL)
155158
this.#spawnWatchdog();
159+
// Keep active-runs.json current with latest worker PIDs
160+
this.activeRunsUpdateInterval = setInterval(() => {
161+
this.#updateActiveRunsFile();
162+
}, 2_000);
156163

157164
//start dequeuing
158165
await this.#dequeueRuns();
@@ -198,6 +205,10 @@ class DevSupervisor implements WorkerRuntime {
198205
});
199206
}
200207
}
208+
209+
if (this.activeRunsUpdateInterval) {
210+
clearInterval(this.activeRunsUpdateInterval);
211+
}
201212
}
202213

203214
#spawnWatchdog() {
@@ -292,6 +303,7 @@ class DevSupervisor implements WorkerRuntime {
292303
const data = {
293304
parentPid: process.pid,
294305
runFriendlyIds: Array.from(this.runControllers.keys()),
306+
workerPids: this.taskRunProcessPool?.getAllPids() ?? [],
295307
};
296308
// Atomic write: write to temp file then rename to avoid corrupt reads
297309
const tmpPath = this.activeRunsPath + ".tmp";

packages/cli-v3/src/dev/devWatchdog.ts

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ writeFileSync(pidFilePath, `${PID_FILE_PREFIX}${process.pid}`);
7171
function cleanup() {
7272
try {
7373
unlinkSync(pidFilePath);
74-
} catch {}
74+
} catch { }
7575
try {
7676
unlinkSync(activeRunsPath);
77-
} catch {}
77+
} catch { }
7878
}
7979

8080
function cleanupTmpDir() {
@@ -95,12 +95,39 @@ function isParentAlive(): boolean {
9595
}
9696
}
9797

98-
function readActiveRuns(): string[] {
98+
function readActiveRuns(): { runFriendlyIds: string[]; workerPids: number[] } {
9999
try {
100100
const data = JSON.parse(readFileSync(activeRunsPath, "utf8"));
101-
return data.runFriendlyIds ?? [];
101+
return {
102+
runFriendlyIds: data.runFriendlyIds ?? [],
103+
workerPids: data.workerPids ?? [],
104+
};
102105
} catch {
103-
return [];
106+
return { runFriendlyIds: [], workerPids: [] };
107+
}
108+
}
109+
110+
async function killWorkerProcesses(pids: number[]): Promise<void> {
111+
for (const pid of pids) {
112+
try {
113+
process.kill(pid, "SIGTERM");
114+
} catch {
115+
// Already dead
116+
}
117+
}
118+
119+
if (pids.length === 0) return;
120+
121+
// Give processes a moment to exit cleanly before SIGKILL
122+
await new Promise((resolve) => setTimeout(resolve, 3_000));
123+
124+
for (const pid of pids) {
125+
try {
126+
process.kill(pid, 0); // Check if still alive
127+
process.kill(pid, "SIGKILL");
128+
} catch {
129+
// Already dead — good
130+
}
104131
}
105132
}
106133

@@ -124,7 +151,10 @@ const MAX_DISCONNECT_ATTEMPTS = 5;
124151
const INITIAL_BACKOFF_MS = 500;
125152

126153
async function onParentDied(): Promise<void> {
127-
const runFriendlyIds = readActiveRuns();
154+
const { runFriendlyIds, workerPids } = readActiveRuns();
155+
156+
//kill orphaned worker processes first
157+
await killWorkerProcesses(workerPids);
128158

129159
if (runFriendlyIds.length > 0) {
130160
for (let attempt = 0; attempt < MAX_DISCONNECT_ATTEMPTS; attempt++) {
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { describe, it, expect } from "vitest";
2+
import { TaskRunProcessPool } from "./taskRunProcessPool.js";
3+
4+
describe("TaskRunProcessPool", () => {
5+
it("getAllPids returns empty array when pool is empty", () => {
6+
const pool = new TaskRunProcessPool({
7+
env: {},
8+
cwd: "/tmp",
9+
enableProcessReuse: false,
10+
});
11+
12+
expect(pool.getAllPids()).toEqual([]);
13+
});
14+
15+
it("getAllPids returns no undefined values", () => {
16+
const pool = new TaskRunProcessPool({
17+
env: {},
18+
cwd: "/tmp",
19+
enableProcessReuse: false,
20+
});
21+
22+
const pids = pool.getAllPids();
23+
expect(pids.every((pid) => typeof pid === "number")).toBe(true);
24+
});
25+
});

packages/cli-v3/src/dev/taskRunProcessPool.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,24 @@ export class TaskRunProcessPool {
271271
}
272272
}
273273

274+
getAllPids(): number[] {
275+
const pids: number[] = [];
276+
277+
for (const processes of this.availableProcessesByVersion.values()) {
278+
for (const process of processes) {
279+
if (process.pid !== undefined) pids.push(process.pid);
280+
}
281+
}
282+
283+
for (const processSet of this.busyProcessesByVersion.values()) {
284+
for (const process of processSet) {
285+
if (process.pid !== undefined) pids.push(process.pid);
286+
}
287+
}
288+
289+
return pids;
290+
}
291+
274292
async shutdown(): Promise<void> {
275293
const totalAvailable = Array.from(this.availableProcessesByVersion.values()).reduce(
276294
(sum, processes) => sum + processes.length,

0 commit comments

Comments
 (0)