From bc1ca61a4eaaa4ad06d1e1a3b4efed2cf1c0b27d Mon Sep 17 00:00:00 2001 From: Claude Lin & Lay Date: Wed, 29 Apr 2026 19:45:24 +0900 Subject: [PATCH] chore(fts): remove failed FTS5 corruption recovery migrations 0002-0004 (#135) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit production は本日 D1 time-travel restore (2026-04-23T12:00:00Z) で 0002 適用前の状態に復元済み。本 commit は source 側の掃除で、既に corruption を引き起こしたまま放置されていた失敗 migration を物理削除する。 0002 の `INSERT INTO search_docs_code_fts(search_docs_code_fts) VALUES('rebuild');` が 0001 で導入した AFTER INSERT trigger の `WHERE tokenizer_kind` 分岐を無視し、全 search_docs を code_fts (trigram tokenizer) にも強制 index した。8000 字の自然言語 nat content × 739 row ≈ 600 万 trigram が code_fts に流入し、FTS5 内部 segment が corruption。0003 (DROP+CREATE search_docs_code_fts + 再 'rebuild') と 0004 (trigger 再生成) は同じ 'rebuild' pattern を踏襲した復旧試行で、いずれも production で失敗した。storage 層の corruption が論理的な DROP+CREATE では除去できず、time-travel restore でしか抜けられなかった。 production の `d1_migrations` テーブルには 0002/0003/0004 の dummy 行を "applied" 状態で挿入済み。wrangler 側で再 apply は抑止される。 未来の fresh DB deploy は 0001 のみで初期化されるため、同じ罠を踏まない。 Closes #135 --- migrations/0002_fts5_rebuild.sql | 24 ----------- migrations/0003_fts5_code_recreate.sql | 37 ---------------- migrations/0004_fts5_triggers_regen.sql | 57 ------------------------- 3 files changed, 118 deletions(-) delete mode 100644 migrations/0002_fts5_rebuild.sql delete mode 100644 migrations/0003_fts5_code_recreate.sql delete mode 100644 migrations/0004_fts5_triggers_regen.sql diff --git a/migrations/0002_fts5_rebuild.sql b/migrations/0002_fts5_rebuild.sql deleted file mode 100644 index 177d188..0000000 --- a/migrations/0002_fts5_rebuild.sql +++ /dev/null @@ -1,24 +0,0 @@ --- FTS5 virtual table rebuild — recover from SQLITE_CORRUPT_VTAB --- --- Layer = L4 Operations (sparse retrieval surface recovery) --- --- Context: --- Observed on 2026-04-24: `search` sparse path and FTS upserts both --- failed with `D1_ERROR: database disk image is malformed: SQLITE_CORRUPT --- (extended: SQLITE_CORRUPT_VTAB)`. The content-owner table `search_docs` --- was intact (1109 rows, direct bm25 queries via `wrangler d1 execute` --- succeeded), so only the FTS5 virtual tables were corrupt. --- --- Recovery: --- FTS5's built-in `'rebuild'` command re-populates the virtual table from --- the content-owner table (`search_docs`). The operation is idempotent and --- non-destructive — it reads every row of `search_docs` and re-indexes it --- through each tokenizer. On a healthy or empty FTS table it is effectively --- a no-op, so this migration is safe to re-run. --- --- Scope: --- Covers both FTS5 tables (nat + code). Triggers from 0001 keep subsequent --- upserts in sync automatically. - -INSERT INTO search_docs_nat_fts(search_docs_nat_fts) VALUES('rebuild'); -INSERT INTO search_docs_code_fts(search_docs_code_fts) VALUES('rebuild'); diff --git a/migrations/0003_fts5_code_recreate.sql b/migrations/0003_fts5_code_recreate.sql deleted file mode 100644 index 19921e0..0000000 --- a/migrations/0003_fts5_code_recreate.sql +++ /dev/null @@ -1,37 +0,0 @@ --- D1 FTS5 code_fts virtual table fresh recreate — recurring SQLITE_CORRUPT_VTAB --- --- Layer = L4 Operations (sparse retrieval surface recovery) --- --- Context: --- 2026-04-24: migration 0002 applied FTS5 'rebuild' to both nat_fts and code_fts --- to recover from SQLITE_CORRUPT_VTAB. --- 2026-04-28: corruption recurred on code_fts only (trigram tokenizer side). --- Enriched logs from PR #137 confirmed errorName=Error / --- D1_ERROR: database disk image is malformed: SQLITE_CORRUPT_VTAB on every diff --- upsert across all 5 polled repos. --- --- Recovery (more aggressive than 0002): --- DROP the corrupted virtual table and recreate it from scratch with the --- same definition as 0001, then repopulate via FTS5 'rebuild' which reads --- from the content-owner table (search_docs). --- --- Triggers from 0001 (trg_search_docs_ai/ad/au) reference search_docs_code_fts --- by name; they resume working as soon as the new table exists, so they do --- not need to be redefined. --- --- Scope: --- Affects code_fts only. nat_fts is untouched (no recurring corruption observed there). --- --- Idempotency: --- IF EXISTS / IF NOT EXISTS clauses keep the migration safe to re-run. - -DROP TABLE IF EXISTS search_docs_code_fts; - -CREATE VIRTUAL TABLE IF NOT EXISTS search_docs_code_fts USING fts5 ( - content, - tokenize = 'trigram case_sensitive 0', - content = 'search_docs', - content_rowid = 'rowid' -); - -INSERT INTO search_docs_code_fts(search_docs_code_fts) VALUES('rebuild'); diff --git a/migrations/0004_fts5_triggers_regen.sql b/migrations/0004_fts5_triggers_regen.sql deleted file mode 100644 index d7cb785..0000000 --- a/migrations/0004_fts5_triggers_regen.sql +++ /dev/null @@ -1,57 +0,0 @@ --- Force re-declaration of FTS5 sync triggers (axis 2 attempt for issue #135) --- --- Layer = L4 Operations (sparse retrieval surface recovery) --- --- Context: --- 2026-04-28: migration 0003 dropped+recreated search_docs_code_fts to --- recover from recurring SQLITE_CORRUPT_VTAB. After 0003 was merged AND --- applied via D1 console, production Worker continues to hit --- D1_ERROR: SQLITE_CORRUPT_VTAB on every diff upsert (tokenizer_kind=code). --- nat_fts surface is clean; code_fts surface persists corrupt across --- :30 pollDiffs cron iterations. --- --- Hypothesis: --- The AFTER INSERT/UPDATE/DELETE triggers from 0001 were compiled with --- references that may need re-resolution after the underlying virtual --- table was DROP+CREATEd. Re-declaring the triggers (DROP + CREATE) --- forces re-binding to the new search_docs_code_fts. --- --- Scope: --- Triggers carry no data (declarative), so this is non-destructive. --- Body is byte-for-byte identical to 0001; only the declaration --- timing changes. --- --- Idempotency: --- DROP IF EXISTS keeps the migration safe to re-run. - -DROP TRIGGER IF EXISTS trg_search_docs_ai; -DROP TRIGGER IF EXISTS trg_search_docs_ad; -DROP TRIGGER IF EXISTS trg_search_docs_au; - -CREATE TRIGGER trg_search_docs_ai AFTER INSERT ON search_docs -BEGIN - INSERT INTO search_docs_nat_fts(rowid, content) - SELECT new.rowid, new.content WHERE new.tokenizer_kind = 'nat'; - INSERT INTO search_docs_code_fts(rowid, content) - SELECT new.rowid, new.content WHERE new.tokenizer_kind = 'code'; -END; - -CREATE TRIGGER trg_search_docs_ad AFTER DELETE ON search_docs -BEGIN - INSERT INTO search_docs_nat_fts(search_docs_nat_fts, rowid, content) - VALUES('delete', old.rowid, old.content); - INSERT INTO search_docs_code_fts(search_docs_code_fts, rowid, content) - VALUES('delete', old.rowid, old.content); -END; - -CREATE TRIGGER trg_search_docs_au AFTER UPDATE ON search_docs -BEGIN - INSERT INTO search_docs_nat_fts(search_docs_nat_fts, rowid, content) - VALUES('delete', old.rowid, old.content); - INSERT INTO search_docs_code_fts(search_docs_code_fts, rowid, content) - VALUES('delete', old.rowid, old.content); - INSERT INTO search_docs_nat_fts(rowid, content) - SELECT new.rowid, new.content WHERE new.tokenizer_kind = 'nat'; - INSERT INTO search_docs_code_fts(rowid, content) - SELECT new.rowid, new.content WHERE new.tokenizer_kind = 'code'; -END;