Skip to content

Commit 8dd490b

Browse files
fix(store): keep WAL journal mode during bulk write to prevent DB corruption on crash
cbm_store_begin_bulk() was switching the SQLite journal mode from WAL to MEMORY for write throughput. If the process crashed mid-bulk-write the in-memory rollback journal was lost, leaving the database file in a partially-written, unrecoverable state. WAL mode is inherently crash-safe: uncommitted WAL entries are discarded on the next open. The performance benefit of bulk mode is preserved via synchronous=OFF and an enlarged cache_size, both of which are safe under WAL. Remove the PRAGMA journal_mode = MEMORY from cbm_store_begin_bulk and the matching PRAGMA journal_mode = WAL from cbm_store_end_bulk. Update the header comments to reflect the new invariant. Add tests/test_store_bulk.c with three tests: - bulk_pragma_wal_invariant: asserts journal_mode remains "wal" after cbm_store_begin_bulk via an independent read-only connection - bulk_pragma_end_wal_invariant: asserts journal_mode remains "wal" after cbm_store_end_bulk - bulk_crash_recovery: forks a child that enters bulk mode, opens an explicit transaction, writes data, then calls _exit() without committing; the parent verifies the database opens cleanly and baseline data survives Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0ef1b7b commit 8dd490b

5 files changed

Lines changed: 177 additions & 13 deletions

File tree

Makefile.cbm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ TEST_STORE_SRCS = \
247247
tests/test_store_nodes.c \
248248
tests/test_store_edges.c \
249249
tests/test_store_search.c \
250-
tests/test_store_arch.c
250+
tests/test_store_arch.c \
251+
tests/test_store_bulk.c
251252

252253
TEST_CYPHER_SRCS = \
253254
tests/test_cypher.c

src/store/store.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -427,23 +427,21 @@ int cbm_store_rollback(cbm_store_t *s) {
427427
/* ── Bulk write ─────────────────────────────────────────────────── */
428428

429429
int cbm_store_begin_bulk(cbm_store_t *s) {
430-
int rc = exec_sql(s, "PRAGMA journal_mode = MEMORY;");
431-
if (rc != CBM_STORE_OK) {
432-
return rc;
433-
}
434-
rc = exec_sql(s, "PRAGMA synchronous = OFF;");
430+
/* Stay in WAL mode throughout. Switching to MEMORY journal mode would
431+
* make the database unrecoverable if the process crashes mid-write,
432+
* because the in-memory rollback journal is lost on crash.
433+
* WAL mode is crash-safe: uncommitted WAL entries are simply discarded
434+
* on the next open. Performance is preserved via synchronous=OFF and a
435+
* larger cache, which are safe with WAL. */
436+
int rc = exec_sql(s, "PRAGMA synchronous = OFF;");
435437
if (rc != CBM_STORE_OK) {
436438
return rc;
437439
}
438440
return exec_sql(s, "PRAGMA cache_size = -65536;"); /* 64 MB */
439441
}
440442

441443
int cbm_store_end_bulk(cbm_store_t *s) {
442-
int rc = exec_sql(s, "PRAGMA journal_mode = WAL;");
443-
if (rc != CBM_STORE_OK) {
444-
return rc;
445-
}
446-
rc = exec_sql(s, "PRAGMA synchronous = NORMAL;");
444+
int rc = exec_sql(s, "PRAGMA synchronous = NORMAL;");
447445
if (rc != CBM_STORE_OK) {
448446
return rc;
449447
}

src/store/store.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,11 @@ int cbm_store_rollback(cbm_store_t *s);
212212

213213
/* ── Bulk write optimization ────────────────────────────────────── */
214214

215-
/* Switch to MEMORY journal for maximum write throughput. */
215+
/* Tune pragmas for bulk write throughput (synchronous=OFF, large cache).
216+
* WAL journal mode is preserved throughout for crash safety. */
216217
int cbm_store_begin_bulk(cbm_store_t *s);
217218

218-
/* Restore WAL journal mode after bulk writes. */
219+
/* Restore normal pragmas (synchronous=NORMAL, default cache) after bulk writes. */
219220
int cbm_store_end_bulk(cbm_store_t *s);
220221

221222
/* Drop user indexes for faster bulk inserts. */

tests/test_main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ extern void suite_sqlite_writer(void);
3737
extern void suite_go_lsp(void);
3838
extern void suite_c_lsp(void);
3939
extern void suite_store_arch(void);
40+
extern void suite_store_bulk(void);
4041
extern void suite_httplink(void);
4142
extern void suite_traces(void);
4243
extern void suite_configlink(void);
@@ -69,6 +70,7 @@ int main(void) {
6970
RUN_SUITE(store_nodes);
7071
RUN_SUITE(store_edges);
7172
RUN_SUITE(store_search);
73+
RUN_SUITE(store_bulk);
7274

7375
/* Cypher (M6) */
7476
RUN_SUITE(cypher);

tests/test_store_bulk.c

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/*
2+
* test_store_bulk.c — Crash-safety tests for bulk write mode.
3+
*
4+
* Verifies that cbm_store_begin_bulk / cbm_store_end_bulk never switch away
5+
* from WAL journal mode. Switching to MEMORY journal mode during bulk writes
6+
* makes the database unrecoverable on a crash because the in-memory rollback
7+
* journal is lost. WAL mode is inherently crash-safe: uncommitted WAL entries
8+
* are discarded on the next open.
9+
*
10+
* Tests:
11+
* bulk_pragma_wal_invariant — journal_mode stays "wal" after begin_bulk
12+
* bulk_pragma_end_wal_invariant — journal_mode stays "wal" after end_bulk
13+
* bulk_crash_recovery — DB is readable after simulated crash mid-bulk
14+
*/
15+
#include "test_framework.h"
16+
#include <store/store.h>
17+
#include <sqlite3.h>
18+
#include <stdio.h>
19+
#include <stdlib.h>
20+
#include <string.h>
21+
#include <unistd.h>
22+
#include <sys/wait.h>
23+
24+
/* ── Helpers ──────────────────────────────────────────────────── */
25+
26+
/* Query journal_mode via a separate read-only connection so the result is
27+
* independent of any state held inside the cbm_store_t under test. */
28+
static char *get_journal_mode(const char *db_path) {
29+
sqlite3 *db;
30+
if (sqlite3_open_v2(db_path, &db, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
31+
return NULL;
32+
sqlite3_stmt *stmt;
33+
char *mode = NULL;
34+
if (sqlite3_prepare_v2(db, "PRAGMA journal_mode;", -1, &stmt, NULL) == SQLITE_OK) {
35+
if (sqlite3_step(stmt) == SQLITE_ROW)
36+
mode = strdup((const char *)sqlite3_column_text(stmt, 0));
37+
sqlite3_finalize(stmt);
38+
}
39+
sqlite3_close(db);
40+
return mode;
41+
}
42+
43+
static void make_temp_path(char *buf, size_t n) {
44+
snprintf(buf, n, "/tmp/cmm_bulk_test_%d.db", (int)getpid());
45+
}
46+
47+
static void cleanup_db(const char *path) {
48+
remove(path);
49+
char aux[512];
50+
snprintf(aux, sizeof(aux), "%s-wal", path);
51+
remove(aux);
52+
snprintf(aux, sizeof(aux), "%s-shm", path);
53+
remove(aux);
54+
}
55+
56+
/* ── Tests ──────────────────────────────────────────────────────── */
57+
58+
/* begin_bulk must NOT switch journal_mode away from WAL. */
59+
TEST(bulk_pragma_wal_invariant) {
60+
char db_path[256];
61+
make_temp_path(db_path, sizeof(db_path));
62+
cleanup_db(db_path);
63+
64+
cbm_store_t *s = cbm_store_open_path(db_path);
65+
ASSERT_NOT_NULL(s);
66+
67+
char *before = get_journal_mode(db_path);
68+
ASSERT_NOT_NULL(before);
69+
ASSERT_STR_EQ(before, "wal");
70+
free(before);
71+
72+
int rc = cbm_store_begin_bulk(s);
73+
ASSERT_EQ(rc, CBM_STORE_OK);
74+
75+
char *after = get_journal_mode(db_path);
76+
ASSERT_NOT_NULL(after);
77+
ASSERT_STR_EQ(after, "wal"); /* FAILS with bug, PASSES with fix */
78+
free(after);
79+
80+
cbm_store_end_bulk(s);
81+
cbm_store_close(s);
82+
cleanup_db(db_path);
83+
PASS();
84+
}
85+
86+
/* end_bulk must also leave journal_mode as WAL. */
87+
TEST(bulk_pragma_end_wal_invariant) {
88+
char db_path[256];
89+
make_temp_path(db_path, sizeof(db_path));
90+
cleanup_db(db_path);
91+
92+
cbm_store_t *s = cbm_store_open_path(db_path);
93+
ASSERT_NOT_NULL(s);
94+
95+
cbm_store_begin_bulk(s);
96+
cbm_store_end_bulk(s);
97+
98+
char *mode = get_journal_mode(db_path);
99+
ASSERT_NOT_NULL(mode);
100+
ASSERT_STR_EQ(mode, "wal");
101+
free(mode);
102+
103+
cbm_store_close(s);
104+
cleanup_db(db_path);
105+
PASS();
106+
}
107+
108+
/* Simulate a crash mid-bulk-write: fork a child that calls begin_bulk, opens
109+
* an explicit transaction, and then calls _exit() without committing or calling
110+
* end_bulk. The parent verifies the database is still openable and that
111+
* committed baseline data is intact. */
112+
TEST(bulk_crash_recovery) {
113+
char db_path[256];
114+
make_temp_path(db_path, sizeof(db_path));
115+
cleanup_db(db_path);
116+
117+
/* Write committed baseline data. */
118+
cbm_store_t *s = cbm_store_open_path(db_path);
119+
ASSERT_NOT_NULL(s);
120+
int rc = cbm_store_upsert_project(s, "baseline", "/tmp/baseline");
121+
ASSERT_EQ(rc, CBM_STORE_OK);
122+
cbm_store_close(s);
123+
124+
/* Child: enter bulk mode, start a transaction, write, then crash. */
125+
pid_t pid = fork();
126+
if (pid == 0) {
127+
cbm_store_t *cs = cbm_store_open_path(db_path);
128+
if (!cs)
129+
_exit(1);
130+
cbm_store_begin_bulk(cs);
131+
cbm_store_begin(cs); /* explicit open transaction */
132+
cbm_store_upsert_project(cs, "crashed", "/tmp/crashed");
133+
/* Crash: no COMMIT, no end_bulk, no close. */
134+
_exit(0);
135+
}
136+
ASSERT_GT(pid, 0);
137+
int status;
138+
waitpid(pid, &status, 0);
139+
140+
/* Recovery: database must open cleanly. */
141+
cbm_store_t *recovered = cbm_store_open_path(db_path);
142+
ASSERT_NOT_NULL(recovered); /* NULL would indicate corruption */
143+
144+
/* Baseline commit must survive. */
145+
cbm_project_t p = {0};
146+
rc = cbm_store_get_project(recovered, "baseline", &p);
147+
ASSERT_EQ(rc, CBM_STORE_OK);
148+
ASSERT_STR_EQ(p.name, "baseline");
149+
cbm_project_free_fields(&p);
150+
151+
cbm_store_close(recovered);
152+
cleanup_db(db_path);
153+
PASS();
154+
}
155+
156+
/* ── Suite ──────────────────────────────────────────────────────── */
157+
158+
SUITE(store_bulk) {
159+
RUN_TEST(bulk_pragma_wal_invariant);
160+
RUN_TEST(bulk_pragma_end_wal_invariant);
161+
RUN_TEST(bulk_crash_recovery);
162+
}

0 commit comments

Comments
 (0)