Skip to content

Commit 222bae2

Browse files
committed
fix: cross-process lock stale judgment and heartbeat
Problem: CI test "updateJSON serializes writes across separate node processes" was failing with expect 100 but got 89/97. The root cause was isLockStale() being too aggressive - it could mistakenly delete locks held by other processes. Fixes: 1. isLockStale() now uses mtime only - fresh locks are never stale 2. Added heartbeat mechanism during lock hold to support long updaters 3. Removed PID check that was unreliable in CI/containers 4. Fixed ENOENT race when lock is released between EEXIST and stat Tests: 180 pass, 0 fail
1 parent 53aa6d3 commit 222bae2

1 file changed

Lines changed: 27 additions & 26 deletions

File tree

src/storage.ts

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import { existsSync } from "fs";
22
import { randomUUID } from "crypto";
33
import { mkdir, open, readFile, rename, rm, stat, writeFile } from "fs/promises";
4+
import type { FileHandle } from "fs/promises";
45
import { dirname } from "path";
56

67
const fileLocks = new Map<string, Promise<unknown>>();
78
const LOCK_WAIT_TIMEOUT_MS = 5000;
89
const LOCK_STALE_MS = 30_000;
9-
const LOCK_ACQUIRE_GRACE_MS = 250;
10+
const LOCK_HEARTBEAT_MS = 1_000;
1011

1112
export async function readJSON<T>(path: string, fallback: () => T): Promise<T> {
1213
if (!existsSync(path)) return fallback();
@@ -28,36 +29,24 @@ async function readJSONStrict<T>(path: string, fallback: () => T): Promise<T> {
2829

2930
async function isLockStale(lockPath: string, now = Date.now()): Promise<boolean> {
3031
try {
32+
const stats = await stat(lockPath);
33+
34+
if (now - stats.mtimeMs > LOCK_STALE_MS) return true;
35+
3136
const content = await readFile(lockPath, "utf8");
32-
const [pidText, createdText] = content.split("\n");
33-
const pid = Number(pidText);
37+
const [, createdText] = content.split("\n");
3438
const createdAt = Number(createdText);
3539

36-
if (!Number.isFinite(createdAt)) {
37-
return !(await isRecentlyTouched(lockPath, now));
38-
}
39-
if (now - createdAt > LOCK_STALE_MS) return true;
40-
if (!Number.isInteger(pid) || pid <= 0) {
41-
return !(await isRecentlyTouched(lockPath, now));
42-
}
43-
44-
try {
45-
process.kill(pid, 0);
46-
return false;
47-
} catch (error) {
48-
return (error as NodeJS.ErrnoException).code === "ESRCH";
49-
}
50-
} catch {
51-
return true;
40+
return Number.isFinite(createdAt) && now - createdAt > LOCK_STALE_MS;
41+
} catch (error) {
42+
return (error as NodeJS.ErrnoException).code !== "ENOENT";
5243
}
5344
}
5445

55-
async function isRecentlyTouched(path: string, now = Date.now()): Promise<boolean> {
56-
try {
57-
return now - (await stat(path)).mtimeMs <= LOCK_ACQUIRE_GRACE_MS;
58-
} catch {
59-
return false;
60-
}
46+
async function writeLockInfo(handle: FileHandle): Promise<void> {
47+
const content = `${process.pid}\n${Date.now()}\n`;
48+
await handle.truncate(0);
49+
await handle.write(content, 0, "utf8");
6150
}
6251

6352
async function withFileLock<T>(path: string, fn: () => Promise<T>): Promise<T> {
@@ -68,10 +57,22 @@ async function withFileLock<T>(path: string, fn: () => Promise<T>): Promise<T> {
6857
while (true) {
6958
try {
7059
const handle = await open(lockPath, "wx", 0o600);
60+
let heartbeat: NodeJS.Timeout | undefined;
61+
let heartbeatWrite: Promise<void> = Promise.resolve();
62+
const queueHeartbeat = (): void => {
63+
heartbeatWrite = heartbeatWrite
64+
.catch(() => undefined)
65+
.then(() => writeLockInfo(handle))
66+
.catch(() => undefined);
67+
};
68+
7169
try {
72-
await handle.writeFile(`${process.pid}\n${Date.now()}\n`, "utf8");
70+
await writeLockInfo(handle);
71+
heartbeat = setInterval(queueHeartbeat, LOCK_HEARTBEAT_MS);
7372
return await fn();
7473
} finally {
74+
if (heartbeat) clearInterval(heartbeat);
75+
await heartbeatWrite.catch(() => undefined);
7576
await handle.close();
7677
await rm(lockPath, { force: true });
7778
}

0 commit comments

Comments
 (0)