Skip to content

Commit 68cc19e

Browse files
committed
Auto-detect and clean corrupt index databases
Add cbm_store_check_integrity() that validates: - projects table has <= 5 rows (1 project per .db file) - root_path starts with / or drive letter (not numeric garbage) Wire into resolve_store (MCP queries) and pipeline routing: - MCP: corrupt DB auto-deleted with ERROR log, returns "not indexed" - Pipeline: corrupt DB deleted, falls through to full reindex Root cause: user-reported corrupt DBs had 48K rows in projects table with node IDs in text columns. Exact write path TBD, but detection + auto-clean prevents the symptom from persisting. 5 unit tests: clean DB, empty DB, bad path, too many rows, NULL.
1 parent abe319b commit 68cc19e

5 files changed

Lines changed: 171 additions & 13 deletions

File tree

src/mcp/mcp.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,23 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) {
657657
project_db_path(project, path, sizeof(path));
658658
srv->store = cbm_store_open_path_query(path);
659659
if (srv->store) {
660+
/* Check DB integrity — auto-clean corrupt databases */
661+
if (!cbm_store_check_integrity(srv->store)) {
662+
cbm_log_error("store.auto_clean", "project", project, "path", path, "action",
663+
"deleting corrupt db — re-index required");
664+
cbm_store_close(srv->store);
665+
srv->store = NULL;
666+
/* Delete the corrupt DB + WAL/SHM files */
667+
cbm_unlink(path);
668+
char wal_path[1040];
669+
char shm_path[1040];
670+
snprintf(wal_path, sizeof(wal_path), "%s-wal", path);
671+
snprintf(shm_path, sizeof(shm_path), "%s-shm", path);
672+
cbm_unlink(wal_path);
673+
cbm_unlink(shm_path);
674+
return NULL;
675+
}
676+
660677
/* Verify the project actually exists in this database.
661678
* A .db file may exist but be empty (e.g., after delete_project on
662679
* Linux where unlink defers actual removal). Opening an empty/deleted

src/pipeline/pipeline.c

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -353,19 +353,34 @@ int cbm_pipeline_run(cbm_pipeline_t *p) {
353353
/* DB exists — check if it has file hashes */
354354
cbm_store_t *check_store = cbm_store_open_path(db_path);
355355
if (check_store) {
356-
cbm_file_hash_t *hashes = NULL;
357-
int hash_count = 0;
358-
cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count);
359-
cbm_store_free_file_hashes(hashes, hash_count);
360-
cbm_store_close(check_store);
361-
362-
if (hash_count > 0) {
363-
cbm_log_info("pipeline.route", "path", "incremental", "stored_hashes",
364-
itoa_buf(hash_count));
365-
rc = cbm_pipeline_run_incremental(p, db_path, files, file_count);
366-
cbm_discover_free(files, file_count);
367-
free(db_path);
368-
return rc;
356+
/* Integrity check — corrupt DB → delete and fall through to full reindex */
357+
if (!cbm_store_check_integrity(check_store)) {
358+
cbm_log_error("pipeline.corrupt_db", "path", db_path, "action",
359+
"deleting — will do full reindex");
360+
cbm_store_close(check_store);
361+
cbm_unlink(db_path);
362+
char wal[1040];
363+
char shm[1040];
364+
snprintf(wal, sizeof(wal), "%s-wal", db_path);
365+
snprintf(shm, sizeof(shm), "%s-shm", db_path);
366+
cbm_unlink(wal);
367+
cbm_unlink(shm);
368+
} else {
369+
cbm_file_hash_t *hashes = NULL;
370+
int hash_count = 0;
371+
cbm_store_get_file_hashes(check_store, p->project_name, &hashes,
372+
&hash_count);
373+
cbm_store_free_file_hashes(hashes, hash_count);
374+
cbm_store_close(check_store);
375+
376+
if (hash_count > 0) {
377+
cbm_log_info("pipeline.route", "path", "incremental", "stored_hashes",
378+
itoa_buf(hash_count));
379+
rc = cbm_pipeline_run_incremental(p, db_path, files, file_count);
380+
cbm_discover_free(files, file_count);
381+
free(db_path);
382+
return rc;
383+
}
369384
}
370385
}
371386
}

src/store/store.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,54 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) {
404404
return s;
405405
}
406406

407+
/* ── Integrity check ───────────────────────────────────────────── */
408+
409+
bool cbm_store_check_integrity(cbm_store_t *s) {
410+
if (!s || !s->db) {
411+
return false;
412+
}
413+
414+
/* Each project gets its own .db file, so the projects table should have
415+
* exactly 1 row. More than 5 rows is definitely corrupt (allows some slack
416+
* for edge cases). Also check that root_path looks like a real path. */
417+
sqlite3_stmt *stmt = NULL;
418+
int rc = sqlite3_prepare_v2(s->db, "SELECT count(*) FROM projects;", -1, &stmt, NULL);
419+
if (rc != SQLITE_OK) {
420+
return false;
421+
}
422+
423+
bool ok = true;
424+
if (sqlite3_step(stmt) == SQLITE_ROW) {
425+
int row_count = sqlite3_column_int(stmt, 0);
426+
if (row_count > 5) {
427+
fprintf(stderr, "ERROR store.corrupt table=projects rows=%d (expected 1)\n", row_count);
428+
ok = false;
429+
}
430+
}
431+
sqlite3_finalize(stmt);
432+
433+
if (ok) {
434+
/* Check that root_path in projects table starts with '/' or a drive letter.
435+
* Corrupt DBs often have numeric strings like "826" in root_path. */
436+
rc = sqlite3_prepare_v2(
437+
s->db,
438+
"SELECT root_path FROM projects WHERE root_path != '' "
439+
"AND substr(root_path, 1, 1) NOT IN ('/', 'A','B','C','D','E','F','G','H') LIMIT 1;",
440+
-1, &stmt, NULL);
441+
if (rc == SQLITE_OK) {
442+
if (sqlite3_step(stmt) == SQLITE_ROW) {
443+
const char *bad_path = (const char *)sqlite3_column_text(stmt, 0);
444+
fprintf(stderr, "ERROR store.corrupt table=projects bad_root_path=%s\n",
445+
bad_path ? bad_path : "(null)");
446+
ok = false;
447+
}
448+
sqlite3_finalize(stmt);
449+
}
450+
}
451+
452+
return ok;
453+
}
454+
407455
cbm_store_t *cbm_store_open(const char *project) {
408456
if (!project) {
409457
return NULL;
@@ -470,6 +518,10 @@ void cbm_store_close(cbm_store_t *s) {
470518
free(s);
471519
}
472520

521+
sqlite3 *cbm_store_get_db(cbm_store_t *s) {
522+
return s ? s->db : NULL;
523+
}
524+
473525
const char *cbm_store_error(cbm_store_t *s) {
474526
return s ? s->errbuf : "null store";
475527
}

src/store/store.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,20 @@ cbm_store_t *cbm_store_open_path(const char *db_path);
198198
* Returns NULL if the file does not exist — never creates a new .db file. */
199199
cbm_store_t *cbm_store_open_path_query(const char *db_path);
200200

201+
/* Check database integrity. Returns true if the DB passes basic sanity checks
202+
* (projects table has correct types, no corruption indicators).
203+
* Returns false if corruption is detected — caller should delete and re-index. */
204+
bool cbm_store_check_integrity(cbm_store_t *s);
205+
201206
/* Open database for a named project in the default cache dir. */
202207
cbm_store_t *cbm_store_open(const char *project);
203208

204209
/* Close the store and free all resources. NULL-safe. */
205210
void cbm_store_close(cbm_store_t *s);
206211

212+
/* Get the underlying sqlite3 handle (for testing only). */
213+
struct sqlite3 *cbm_store_get_db(cbm_store_t *s);
214+
207215
/* Get the last error message (static string, valid until next call). */
208216
const char *cbm_store_error(cbm_store_t *s);
209217

tests/test_store_nodes.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77
#include "test_framework.h"
88
#include <store/store.h>
9+
#include <sqlite3.h>
910
#include <string.h>
1011
#include <stdlib.h>
1112
#include <stdio.h>
@@ -895,10 +896,75 @@ TEST(store_find_node_ids_by_qns) {
895896
PASS();
896897
}
897898

899+
/* ── Integrity check tests ──────────────────────────────────────── */
900+
901+
TEST(store_integrity_clean) {
902+
/* A fresh store with correct data should pass integrity check */
903+
cbm_store_t *s = cbm_store_open_memory();
904+
ASSERT_NOT_NULL(s);
905+
cbm_store_upsert_project(s, "test-proj", "/tmp/test");
906+
ASSERT_TRUE(cbm_store_check_integrity(s));
907+
cbm_store_close(s);
908+
PASS();
909+
}
910+
911+
TEST(store_integrity_empty) {
912+
/* An empty store (no project rows) should pass — 0 rows is fine */
913+
cbm_store_t *s = cbm_store_open_memory();
914+
ASSERT_NOT_NULL(s);
915+
ASSERT_TRUE(cbm_store_check_integrity(s));
916+
cbm_store_close(s);
917+
PASS();
918+
}
919+
920+
TEST(store_integrity_corrupt_bad_path) {
921+
/* Simulate corruption: root_path is a numeric string (not a real path).
922+
* This matches the real corruption where node IDs ended up in root_path. */
923+
cbm_store_t *s = cbm_store_open_memory();
924+
ASSERT_NOT_NULL(s);
925+
sqlite3 *db = cbm_store_get_db(s);
926+
sqlite3_exec(db,
927+
"INSERT INTO projects (name, indexed_at, root_path) "
928+
"VALUES ('some-project', '2024-01-01', '826');",
929+
NULL, NULL, NULL);
930+
ASSERT_FALSE(cbm_store_check_integrity(s));
931+
cbm_store_close(s);
932+
PASS();
933+
}
934+
935+
TEST(store_integrity_corrupt_too_many_rows) {
936+
/* Simulate corruption: >5 rows in projects table */
937+
cbm_store_t *s = cbm_store_open_memory();
938+
ASSERT_NOT_NULL(s);
939+
sqlite3 *db = cbm_store_get_db(s);
940+
for (int i = 0; i < 10; i++) {
941+
char sql[256];
942+
snprintf(sql, sizeof(sql),
943+
"INSERT INTO projects (name, indexed_at, root_path) "
944+
"VALUES ('proj-%d', '2024-01-01', '/tmp/%d');",
945+
i, i);
946+
sqlite3_exec(db, sql, NULL, NULL, NULL);
947+
}
948+
ASSERT_FALSE(cbm_store_check_integrity(s));
949+
cbm_store_close(s);
950+
PASS();
951+
}
952+
953+
TEST(store_integrity_null_check) {
954+
/* NULL store should return false (not crash) */
955+
ASSERT_FALSE(cbm_store_check_integrity(NULL));
956+
PASS();
957+
}
958+
898959
SUITE(store_nodes) {
899960
RUN_TEST(store_open_memory);
900961
RUN_TEST(store_close_null);
901962
RUN_TEST(store_open_memory_twice);
963+
RUN_TEST(store_integrity_clean);
964+
RUN_TEST(store_integrity_empty);
965+
RUN_TEST(store_integrity_corrupt_bad_path);
966+
RUN_TEST(store_integrity_corrupt_too_many_rows);
967+
RUN_TEST(store_integrity_null_check);
902968
RUN_TEST(store_project_crud);
903969
RUN_TEST(store_project_update);
904970
RUN_TEST(store_project_delete);

0 commit comments

Comments
 (0)