From 2b4e886c4dc4e1ccab7ef4f50843fb4557015207 Mon Sep 17 00:00:00 2001 From: Andre Rabold Date: Mon, 18 May 2026 06:26:03 -0700 Subject: [PATCH 1/5] fix(store): migrate vectors to partition keys --- .../014-rebuild-vector-partition-keys.sql | 40 +++++ src/store/DocumentStore.test.ts | 167 ++++++++++++++++++ src/store/DocumentStore.ts | 124 ++++++++++--- src/store/applyMigrations.test.ts | 100 ++++++++++- 4 files changed, 399 insertions(+), 32 deletions(-) create mode 100644 db/migrations/014-rebuild-vector-partition-keys.sql diff --git a/db/migrations/014-rebuild-vector-partition-keys.sql b/db/migrations/014-rebuild-vector-partition-keys.sql new file mode 100644 index 00000000..eeb6dc10 --- /dev/null +++ b/db/migrations/014-rebuild-vector-partition-keys.sql @@ -0,0 +1,40 @@ +-- Migration: rebuild documents_vec with sqlite-vec partition keys +-- This enables selective KNN queries by library_id and version_id. + +-- Preserve compatible vectors from the existing vec table. +DROP TABLE IF EXISTS temp_documents_vec_partition_migration; + +CREATE TEMPORARY TABLE temp_documents_vec_partition_migration AS +SELECT rowid, library_id, version_id, embedding +FROM documents_vec +WHERE vec_length(embedding) = 1536; + +DROP TABLE documents_vec; + +CREATE VIRTUAL TABLE documents_vec USING vec0( + library_id INTEGER partition key, + version_id INTEGER partition key, + embedding FLOAT[1536] +); + +INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) +SELECT rowid, library_id, version_id, embedding +FROM temp_documents_vec_partition_migration; + +-- Backfill any vectors stored on documents but missing from the vec table. +INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) +SELECT + d.id, + v.library_id, + v.id AS version_id, + json_extract(d.embedding, '$') AS embedding +FROM documents d +JOIN pages p ON d.page_id = p.id +JOIN versions v ON p.version_id = v.id +WHERE d.embedding IS NOT NULL + AND vec_length(json_extract(d.embedding, '$')) = 1536 + AND NOT EXISTS ( + SELECT 1 FROM documents_vec existing WHERE existing.rowid = d.id + ); + +DROP TABLE temp_documents_vec_partition_migration; diff --git a/src/store/DocumentStore.test.ts b/src/store/DocumentStore.test.ts index 4492be58..c7500be5 100644 --- a/src/store/DocumentStore.test.ts +++ b/src/store/DocumentStore.test.ts @@ -458,6 +458,92 @@ describe("DocumentStore - With Embeddings", () => { expect(result.score).toBeGreaterThan(0); } }); + + it("should use partition-filtered vector search for hybrid results", async () => { + const originalApiKey = process.env.OPENAI_API_KEY; + try { + process.env.OPENAI_API_KEY = "test-key-for-partition-search"; + await store.shutdown(); + + const cfg = loadConfig(); + const embeddingConfig = EmbeddingConfig.parseEmbeddingConfig( + "openai:text-embedding-3-small", + ); + cfg.app.embeddingModel = embeddingConfig.modelSpec; + store = new DocumentStore(":memory:", cfg); + await store.initialize(); + + await store.addDocuments( + "searchtest", + "1.0.0", + 1, + createScrapeResult( + "JavaScript Programming Guide", + "https://example.com/js-guide", + "JavaScript programming tutorial with code examples and functions", + ["programming", "javascript"], + ), + ); + await store.addDocuments( + "searchtest", + "1.0.0", + 1, + createScrapeResult( + "JavaScript Frameworks", + "https://example.com/js-frameworks", + "Advanced JavaScript frameworks like React and Vue for building applications", + ["programming", "javascript", "frameworks"], + ), + ); + + // @ts-expect-error Accessing private property for testing + const db = store.db; + const ddl = db + .prepare( + "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'documents_vec'", + ) + .get() as { sql: string }; + expect(ddl.sql).toContain("library_id INTEGER partition key"); + expect(ddl.sql).toContain("version_id INTEGER partition key"); + + const results = await store.findByContent( + "searchtest", + "1.0.0", + "application building", + 10, + ); + // @ts-expect-error Accessing private property for testing + const vector = await store.embeddings.embedQuery("application building"); + + expect(results.length).toBeGreaterThan(0); + + const vectorResults = db + .prepare(` + SELECT dv.rowid, dv.distance + FROM documents_vec dv + WHERE dv.library_id = ( + SELECT id FROM libraries WHERE name = ? + ) + AND dv.version_id = ( + SELECT v.id + FROM versions v + JOIN libraries l ON v.library_id = l.id + WHERE l.name = ? AND v.name = ? + ) + AND dv.embedding MATCH ? + AND dv.k = ? + ORDER BY dv.distance + `) + .all("searchtest", "searchtest", "1.0.0", JSON.stringify(vector), 10); + expect(vectorResults.length).toBeGreaterThan(0); + } finally { + if (originalApiKey === undefined) { + delete process.env.OPENAI_API_KEY; + } else { + process.env.OPENAI_API_KEY = originalApiKey; + } + } + }); }); describe("Embedding Batch Processing", () => { @@ -1777,6 +1863,8 @@ describe("DocumentStore - Embedding Model Change Safety", () => { ) .get() as { sql: string }; expect(ddl.sql).toContain("768"); + expect(ddl.sql).toContain("library_id INTEGER partition key"); + expect(ddl.sql).toContain("version_id INTEGER partition key"); }); it("should update metadata with new model and dimension", async () => { @@ -1804,6 +1892,8 @@ describe("DocumentStore - Embedding Model Change Safety", () => { .get() as { sql: string }; expect(ddl).toBeDefined(); expect(ddl.sql).toContain("1536"); + expect(ddl.sql).toContain("library_id INTEGER partition key"); + expect(ddl.sql).toContain("version_id INTEGER partition key"); // Table should be empty (no backfill) // @ts-expect-error Accessing private property for testing @@ -1844,5 +1934,82 @@ describe("DocumentStore - Embedding Model Change Safety", () => { .get() as { cnt: number }; expect(vecAfter.cnt).toBe(vecBefore.cnt); }); + + it("should rebuild old metadata-column vec table and backfill stored embeddings", async () => { + store = await createStore(""); + + // @ts-expect-error Accessing private property for testing + const db = store.db; + db.prepare("INSERT INTO libraries (name) VALUES (?)").run("legacyvec"); + const { id: libraryId } = db + .prepare("SELECT id FROM libraries WHERE name = ?") + .get("legacyvec") as { id: number }; + db.prepare("INSERT INTO versions (library_id, name) VALUES (?, ?)").run( + libraryId, + "1.0.0", + ); + const { id: versionId } = db + .prepare("SELECT id FROM versions WHERE library_id = ? AND name = ?") + .get(libraryId, "1.0.0") as { id: number }; + const pageId = db + .prepare("INSERT INTO pages (version_id, url, title) VALUES (?, ?, ?)") + .run(versionId, "https://example.com/legacy", "Legacy Vec").lastInsertRowid; + const vector = new Array(1536).fill(0).map((_, index) => (index === 0 ? 1 : 0)); + const docId = db + .prepare( + "INSERT INTO documents (page_id, content, metadata, sort_order, embedding) VALUES (?, ?, ?, ?, ?)", + ) + .run( + pageId, + "legacy vector content", + JSON.stringify({ path: ["legacy"] }), + 0, + JSON.stringify(vector), + ).lastInsertRowid; + + db.exec(` + DROP TABLE documents_vec; + CREATE VIRTUAL TABLE documents_vec USING vec0( + library_id INTEGER NOT NULL, + version_id INTEGER NOT NULL, + embedding FLOAT[1536] + ); + `); + db.prepare( + "INSERT INTO documents_vec (rowid, library_id, version_id, embedding) VALUES (?, ?, ?, ?)", + ).run(BigInt(docId), BigInt(libraryId), BigInt(versionId), JSON.stringify(vector)); + + // @ts-expect-error Accessing private method for testing + store.ensureVectorTable(); + + const ddl = db + .prepare( + "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'documents_vec'", + ) + .get() as { sql: string }; + expect(ddl.sql).toContain("library_id INTEGER partition key"); + expect(ddl.sql).toContain("version_id INTEGER partition key"); + + const vectorRows = db + .prepare("SELECT COUNT(*) as cnt FROM documents_vec WHERE rowid = ?") + .get(docId) as { cnt: number }; + expect(vectorRows.cnt).toBe(1); + + const result = db + .prepare(` + SELECT rowid, distance + FROM documents_vec + WHERE library_id = ? + AND version_id = ? + AND embedding MATCH ? + AND k = 1 + `) + .get(libraryId, versionId, JSON.stringify(vector)) as + | { rowid: number; distance: number } + | undefined; + + expect(result?.rowid).toBe(Number(docId)); + expect(result?.distance).toBeCloseTo(0, 6); + }); }); }); diff --git a/src/store/DocumentStore.ts b/src/store/DocumentStore.ts index f53fd78f..5f334e5b 100644 --- a/src/store/DocumentStore.ts +++ b/src/store/DocumentStore.ts @@ -574,13 +574,7 @@ export class DocumentStore { // Drop and recreate vec table as empty with the new dimension this.db.exec("DROP TABLE IF EXISTS documents_vec"); - this.db.exec(` - CREATE VIRTUAL TABLE documents_vec USING vec0( - library_id INTEGER NOT NULL, - version_id INTEGER NOT NULL, - embedding FLOAT[${newDimension}] - ); - `); + this.createVectorTable(newDimension); // Update metadata to reflect the new configuration this.setEmbeddingMetadata(newModel, newDimension); @@ -901,14 +895,15 @@ export class DocumentStore { /** * Creates or reconciles the documents_vec virtual table with configurable dimension. * Called after migrations and model change detection. The table is initially created - * by migration 003 with a fixed 1536 dimension; this method reconciles it at runtime - * if the configured dimension differs. - * Idempotent: if the table already exists with the same dimension, no-op; if dimension - * changed in config, drops and recreates so any embedding provider (e.g. 1536 or 3584) works. + * by migrations with a fixed 1536 dimension; this method reconciles it at runtime + * if the configured dimension or partition-key schema differs. + * Idempotent: if the table already has the expected dimension and partition keys, + * no-op; otherwise, drops and recreates so any embedding provider works and KNN + * queries can use selective partition filters. * - * Note: No backfill of existing embeddings is performed. Vectors are populated during - * scraping, not at startup. Old vectors from a different dimension or model are incompatible - * and are handled by the model change detection system (checkEmbeddingModelChange). + * Compatible existing vectors are preserved, and missing rows are backfilled from + * documents.embedding when available. Old vectors from a different dimension or model + * are handled by the model change detection system (checkEmbeddingModelChange). */ private ensureVectorTable(): void { const dim = this.config.embeddings.vectorDimension; @@ -927,21 +922,96 @@ export class DocumentStore { if (existingSql) { const match = existingSql.sql.match(/embedding\s+FLOAT\s*\[\s*(\d+)\s*]/i); const existingDim = match ? Number(match[1]) : null; - if (existingDim === dim) { + if (existingDim === dim && this.hasVectorPartitionKeys(existingSql.sql)) { return; } - this.db.exec("DROP TABLE documents_vec;"); } + logger.info( + existingSql + ? "🔄 Rebuilding vector index with partition-key schema" + : "🔄 Creating vector index with partition-key schema", + ); + this.rebuildVectorTable(dim, Boolean(existingSql)); + } + + private hasVectorPartitionKeys(sql: string): boolean { + return ( + /library_id\s+INTEGER\s+partition\s+key/i.test(sql) && + /version_id\s+INTEGER\s+partition\s+key/i.test(sql) + ); + } + + private createVectorTable(dimension: number): void { this.db.exec(` CREATE VIRTUAL TABLE documents_vec USING vec0( - library_id INTEGER NOT NULL, - version_id INTEGER NOT NULL, - embedding FLOAT[${dim}] + library_id INTEGER partition key, + version_id INTEGER partition key, + embedding FLOAT[${dimension}] ); `); } + private backfillVectorTable(dimension: number): void { + this.db + .prepare<[number]>(` + INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) + SELECT + d.id, + v.library_id, + v.id, + json_extract(d.embedding, '$') + FROM documents d + JOIN pages p ON d.page_id = p.id + JOIN versions v ON p.version_id = v.id + WHERE d.embedding IS NOT NULL + AND vec_length(json_extract(d.embedding, '$')) = ? + AND NOT EXISTS ( + SELECT 1 FROM documents_vec existing WHERE existing.rowid = d.id + ) + `) + .run(dimension); + } + + private rebuildVectorTable(dimension: number, preserveExisting: boolean): void { + const transaction = this.db.transaction(() => { + this.db.exec("DROP TABLE IF EXISTS temp_documents_vec_migration"); + + if (preserveExisting) { + this.db + .prepare<[number]>(` + CREATE TEMPORARY TABLE temp_documents_vec_migration AS + SELECT rowid, library_id, version_id, embedding + FROM documents_vec + WHERE vec_length(embedding) = ? + `) + .run(dimension); + this.db.exec("DROP TABLE documents_vec"); + } else { + this.db.exec(` + CREATE TEMPORARY TABLE temp_documents_vec_migration( + rowid INTEGER PRIMARY KEY, + library_id INTEGER NOT NULL, + version_id INTEGER NOT NULL, + embedding BLOB NOT NULL + ) + `); + } + + this.createVectorTable(dimension); + + this.db.exec(` + INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) + SELECT rowid, library_id, version_id, embedding + FROM temp_documents_vec_migration + `); + this.backfillVectorTable(dimension); + this.db.exec("DROP TABLE temp_documents_vec_migration"); + }); + + transaction(); + } + /** * Resolves a library name and version string to version_id. * Creates library and version records if they don't exist. @@ -1798,7 +1868,7 @@ export class DocumentStore { return []; } - const { id: versionId } = versionRow; + const { id: versionId, library_id: libraryId } = versionRow; if (this.isVectorSearchEnabled) { // Hybrid search: vector + full-text search with RRF ranking @@ -1812,17 +1882,13 @@ export class DocumentStore { const vectorSearchK = overfetchLimit * this.vectorSearchMultiplier; const stmt = this.db.prepare(` - WITH vec_distances AS ( + WITH vec_distances AS NOT MATERIALIZED ( SELECT dv.rowid as id, dv.distance as vec_distance FROM documents_vec dv - JOIN documents d ON dv.rowid = d.id - JOIN pages p ON d.page_id = p.id - JOIN versions v ON p.version_id = v.id - JOIN libraries l ON v.library_id = l.id - WHERE l.name = ? - AND COALESCE(v.name, '') = COALESCE(?, '') + WHERE dv.library_id = ? + AND dv.version_id = ? AND dv.embedding MATCH ? AND dv.k = ? ORDER BY dv.distance @@ -1861,8 +1927,8 @@ export class DocumentStore { `); const rawResults = stmt.all( - library.toLowerCase(), - normalizedVersion, + libraryId, + versionId, JSON.stringify(embedding), vectorSearchK, versionId, diff --git a/src/store/applyMigrations.test.ts b/src/store/applyMigrations.test.ts index e333f858..8e465830 100644 --- a/src/store/applyMigrations.test.ts +++ b/src/store/applyMigrations.test.ts @@ -30,7 +30,7 @@ describe("Database Migrations", () => { const tableNames = (tables as TableRow[]).map((t) => t.name); expect(tableNames).toContain("documents"); expect(tableNames).toContain("documents_fts"); - // documents_vec is created by migration 003 (with fixed 1536 dimension); + // documents_vec is created by migrations with a fixed 1536 dimension; // DocumentStore.ensureVectorTable() reconciles it at runtime if the configured dimension differs expect(tableNames).toContain("documents_vec"); expect(tableNames).toContain("libraries"); @@ -103,14 +103,16 @@ describe("Database Migrations", () => { .get() as { sql: string } | undefined; expect(ftsTableInfo?.sql).toContain("VIRTUAL TABLE documents_fts USING fts5"); - // documents_vec is created by migration 003 (with fixed 1536 dimension) and survives through all - // subsequent migrations. DocumentStore.ensureVectorTable() reconciles it at runtime if needed. + // documents_vec is created by migrations with fixed 1536 dimension and partition keys. + // DocumentStore.ensureVectorTable() reconciles it at runtime if needed. const vecTableInfo = db .prepare( "SELECT sql FROM sqlite_master WHERE type='table' AND name='documents_vec';", ) .get() as { sql: string } | undefined; expect(vecTableInfo).toBeDefined(); + expect(vecTableInfo?.sql).toContain("library_id INTEGER partition key"); + expect(vecTableInfo?.sql).toContain("version_id INTEGER partition key"); }); it("should handle vector search with empty results gracefully", () => { @@ -162,6 +164,98 @@ describe("Database Migrations", () => { expect(searchResults).toEqual([]); }); + it("should preserve and backfill vectors when migrating to partition keys", () => { + expect(() => applyMigrations(db)).not.toThrow(); + + const ddl = db + .prepare( + "SELECT sql FROM sqlite_master WHERE type='table' AND name='documents_vec';", + ) + .get() as { sql: string }; + expect(ddl.sql).toContain("library_id INTEGER partition key"); + expect(ddl.sql).toContain("version_id INTEGER partition key"); + + db.prepare("INSERT INTO libraries (name) VALUES (?)").run("partition-lib"); + const { id: libraryId } = db + .prepare("SELECT id FROM libraries WHERE name = ?") + .get("partition-lib") as { id: number }; + db.prepare("INSERT INTO versions (library_id, name) VALUES (?, ?)").run( + libraryId, + "1.0.0", + ); + const { id: versionId } = db + .prepare("SELECT id FROM versions WHERE library_id = ? AND name = ?") + .get(libraryId, "1.0.0") as { id: number }; + + const pageId = db + .prepare("INSERT INTO pages (version_id, url, title) VALUES (?, ?, ?)") + .run(versionId, "https://example.com/partition", "Partitioned").lastInsertRowid as + | number + | bigint; + const vector = new Array(1536).fill(0).map((_, index) => (index === 0 ? 1 : 0)); + const docId = db + .prepare( + "INSERT INTO documents (page_id, content, metadata, sort_order, embedding) VALUES (?, ?, ?, ?, ?)", + ) + .run( + pageId, + "Partitioned vector content", + JSON.stringify({ path: "/partition" }), + 0, + JSON.stringify(vector), + ).lastInsertRowid as number | bigint; + + db.exec(` + CREATE TEMPORARY TABLE temp_existing_vectors AS + SELECT rowid, library_id, version_id, embedding FROM documents_vec; + DROP TABLE documents_vec; + CREATE VIRTUAL TABLE documents_vec USING vec0( + library_id INTEGER NOT NULL, + version_id INTEGER NOT NULL, + embedding FLOAT[1536] + ); + INSERT INTO documents_vec (rowid, library_id, version_id, embedding) + SELECT rowid, library_id, version_id, embedding FROM temp_existing_vectors; + DROP TABLE temp_existing_vectors; + DELETE FROM documents_vec; + `); + + db.prepare("DELETE FROM _schema_migrations WHERE id = ?").run( + "014-rebuild-vector-partition-keys.sql", + ); + + expect(() => applyMigrations(db)).not.toThrow(); + + const migratedDdl = db + .prepare( + "SELECT sql FROM sqlite_master WHERE type='table' AND name='documents_vec';", + ) + .get() as { sql: string }; + expect(migratedDdl.sql).toContain("library_id INTEGER partition key"); + expect(migratedDdl.sql).toContain("version_id INTEGER partition key"); + + const vectorRows = db + .prepare("SELECT COUNT(*) as cnt FROM documents_vec WHERE rowid = ?") + .get(docId) as { cnt: number }; + expect(vectorRows.cnt).toBe(1); + + const result = db + .prepare(` + SELECT rowid, distance + FROM documents_vec + WHERE library_id = ? + AND version_id = ? + AND embedding MATCH ? + AND k = 1 + `) + .get(libraryId, versionId, JSON.stringify(vector)) as + | { rowid: number; distance: number } + | undefined; + + expect(result?.rowid).toBe(Number(docId)); + expect(result?.distance).toBeCloseTo(0, 6); + }); + it("should perform vector search and return similar vectors correctly", () => { // Apply all migrations (documents_vec exists from migration 003 with 1536d) expect(() => applyMigrations(db)).not.toThrow(); From 4b010dee0d6f567d41382019071a7772a993f4fb Mon Sep 17 00:00:00 2001 From: Andre Rabold Date: Mon, 18 May 2026 06:56:06 -0700 Subject: [PATCH 2/5] fix(store): derive vector partition keys during rebuild --- .../014-rebuild-vector-partition-keys.sql | 24 ++++++--- src/store/DocumentStore.test.ts | 24 +++++++-- src/store/DocumentStore.ts | 23 +++++--- src/store/applyMigrations.test.ts | 52 ++++++++++++++----- 4 files changed, 89 insertions(+), 34 deletions(-) diff --git a/db/migrations/014-rebuild-vector-partition-keys.sql b/db/migrations/014-rebuild-vector-partition-keys.sql index eeb6dc10..538468cc 100644 --- a/db/migrations/014-rebuild-vector-partition-keys.sql +++ b/db/migrations/014-rebuild-vector-partition-keys.sql @@ -1,13 +1,21 @@ -- Migration: rebuild documents_vec with sqlite-vec partition keys -- This enables selective KNN queries by library_id and version_id. --- Preserve compatible vectors from the existing vec table. -DROP TABLE IF EXISTS temp_documents_vec_partition_migration; +-- Preserve compatible vectors from the existing vec table. This uses a +-- disk-backed staging table because large vector indexes can exceed memory. +DROP TABLE IF EXISTS _documents_vec_partition_migration; -CREATE TEMPORARY TABLE temp_documents_vec_partition_migration AS -SELECT rowid, library_id, version_id, embedding -FROM documents_vec -WHERE vec_length(embedding) = 1536; +CREATE TABLE _documents_vec_partition_migration AS +SELECT + d.id AS rowid, + v.library_id, + v.id AS version_id, + dv.embedding +FROM documents_vec dv +JOIN documents d ON dv.rowid = d.id +JOIN pages p ON d.page_id = p.id +JOIN versions v ON p.version_id = v.id +WHERE vec_length(dv.embedding) = 1536; DROP TABLE documents_vec; @@ -19,7 +27,7 @@ CREATE VIRTUAL TABLE documents_vec USING vec0( INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) SELECT rowid, library_id, version_id, embedding -FROM temp_documents_vec_partition_migration; +FROM _documents_vec_partition_migration; -- Backfill any vectors stored on documents but missing from the vec table. INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) @@ -37,4 +45,4 @@ WHERE d.embedding IS NOT NULL SELECT 1 FROM documents_vec existing WHERE existing.rowid = d.id ); -DROP TABLE temp_documents_vec_partition_migration; +DROP TABLE _documents_vec_partition_migration; diff --git a/src/store/DocumentStore.test.ts b/src/store/DocumentStore.test.ts index c7500be5..6aad2289 100644 --- a/src/store/DocumentStore.test.ts +++ b/src/store/DocumentStore.test.ts @@ -1935,7 +1935,7 @@ describe("DocumentStore - Embedding Model Change Safety", () => { expect(vecAfter.cnt).toBe(vecBefore.cnt); }); - it("should rebuild old metadata-column vec table and backfill stored embeddings", async () => { + it("should rebuild old metadata-column vec table with current partition keys", async () => { store = await createStore(""); // @ts-expect-error Accessing private property for testing @@ -1951,20 +1951,26 @@ describe("DocumentStore - Embedding Model Change Safety", () => { const { id: versionId } = db .prepare("SELECT id FROM versions WHERE library_id = ? AND name = ?") .get(libraryId, "1.0.0") as { id: number }; + db.prepare("INSERT INTO versions (library_id, name) VALUES (?, ?)").run( + libraryId, + "2.0.0", + ); + const { id: staleVersionId } = db + .prepare("SELECT id FROM versions WHERE library_id = ? AND name = ?") + .get(libraryId, "2.0.0") as { id: number }; const pageId = db .prepare("INSERT INTO pages (version_id, url, title) VALUES (?, ?, ?)") .run(versionId, "https://example.com/legacy", "Legacy Vec").lastInsertRowid; const vector = new Array(1536).fill(0).map((_, index) => (index === 0 ? 1 : 0)); const docId = db .prepare( - "INSERT INTO documents (page_id, content, metadata, sort_order, embedding) VALUES (?, ?, ?, ?, ?)", + "INSERT INTO documents (page_id, content, metadata, sort_order) VALUES (?, ?, ?, ?)", ) .run( pageId, "legacy vector content", JSON.stringify({ path: ["legacy"] }), 0, - JSON.stringify(vector), ).lastInsertRowid; db.exec(` @@ -1977,7 +1983,12 @@ describe("DocumentStore - Embedding Model Change Safety", () => { `); db.prepare( "INSERT INTO documents_vec (rowid, library_id, version_id, embedding) VALUES (?, ?, ?, ?)", - ).run(BigInt(docId), BigInt(libraryId), BigInt(versionId), JSON.stringify(vector)); + ).run( + BigInt(docId), + BigInt(libraryId), + BigInt(staleVersionId), + JSON.stringify(vector), + ); // @ts-expect-error Accessing private method for testing store.ensureVectorTable(); @@ -1994,6 +2005,11 @@ describe("DocumentStore - Embedding Model Change Safety", () => { .prepare("SELECT COUNT(*) as cnt FROM documents_vec WHERE rowid = ?") .get(docId) as { cnt: number }; expect(vectorRows.cnt).toBe(1); + const partitionKeys = db + .prepare("SELECT library_id, version_id FROM documents_vec WHERE rowid = ?") + .get(docId) as { library_id: number; version_id: number }; + expect(partitionKeys.library_id).toBe(libraryId); + expect(partitionKeys.version_id).toBe(versionId); const result = db .prepare(` diff --git a/src/store/DocumentStore.ts b/src/store/DocumentStore.ts index 5f334e5b..f53da335 100644 --- a/src/store/DocumentStore.ts +++ b/src/store/DocumentStore.ts @@ -975,21 +975,28 @@ export class DocumentStore { private rebuildVectorTable(dimension: number, preserveExisting: boolean): void { const transaction = this.db.transaction(() => { - this.db.exec("DROP TABLE IF EXISTS temp_documents_vec_migration"); + this.db.exec("DROP TABLE IF EXISTS _documents_vec_migration"); if (preserveExisting) { this.db .prepare<[number]>(` - CREATE TEMPORARY TABLE temp_documents_vec_migration AS - SELECT rowid, library_id, version_id, embedding - FROM documents_vec - WHERE vec_length(embedding) = ? + CREATE TABLE _documents_vec_migration AS + SELECT + d.id AS rowid, + v.library_id, + v.id AS version_id, + dv.embedding + FROM documents_vec dv + JOIN documents d ON dv.rowid = d.id + JOIN pages p ON d.page_id = p.id + JOIN versions v ON p.version_id = v.id + WHERE vec_length(dv.embedding) = ? `) .run(dimension); this.db.exec("DROP TABLE documents_vec"); } else { this.db.exec(` - CREATE TEMPORARY TABLE temp_documents_vec_migration( + CREATE TABLE _documents_vec_migration( rowid INTEGER PRIMARY KEY, library_id INTEGER NOT NULL, version_id INTEGER NOT NULL, @@ -1003,10 +1010,10 @@ export class DocumentStore { this.db.exec(` INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) SELECT rowid, library_id, version_id, embedding - FROM temp_documents_vec_migration + FROM _documents_vec_migration `); this.backfillVectorTable(dimension); - this.db.exec("DROP TABLE temp_documents_vec_migration"); + this.db.exec("DROP TABLE _documents_vec_migration"); }); transaction(); diff --git a/src/store/applyMigrations.test.ts b/src/store/applyMigrations.test.ts index 8e465830..7261d72c 100644 --- a/src/store/applyMigrations.test.ts +++ b/src/store/applyMigrations.test.ts @@ -164,8 +164,8 @@ describe("Database Migrations", () => { expect(searchResults).toEqual([]); }); - it("should preserve and backfill vectors when migrating to partition keys", () => { - expect(() => applyMigrations(db)).not.toThrow(); + it("should preserve and backfill vectors when migrating to partition keys", async () => { + await expect(applyMigrations(db)).resolves.toBeUndefined(); const ddl = db .prepare( @@ -192,21 +192,32 @@ describe("Database Migrations", () => { .run(versionId, "https://example.com/partition", "Partitioned").lastInsertRowid as | number | bigint; - const vector = new Array(1536).fill(0).map((_, index) => (index === 0 ? 1 : 0)); - const docId = db + const preservedVector = new Array(1536) + .fill(0) + .map((_, index) => (index === 0 ? 1 : 0)); + const backfillVector = new Array(1536) + .fill(0) + .map((_, index) => (index === 1 ? 1 : 0)); + const preservedDocId = db + .prepare( + "INSERT INTO documents (page_id, content, metadata, sort_order) VALUES (?, ?, ?, ?)", + ) + .run(pageId, "Preserved vector content", JSON.stringify({ path: "/partition" }), 0) + .lastInsertRowid as number | bigint; + const backfilledDocId = db .prepare( "INSERT INTO documents (page_id, content, metadata, sort_order, embedding) VALUES (?, ?, ?, ?, ?)", ) .run( pageId, - "Partitioned vector content", + "Backfilled vector content", JSON.stringify({ path: "/partition" }), - 0, - JSON.stringify(vector), + 1, + JSON.stringify(backfillVector), ).lastInsertRowid as number | bigint; db.exec(` - CREATE TEMPORARY TABLE temp_existing_vectors AS + CREATE TABLE _test_existing_vectors AS SELECT rowid, library_id, version_id, embedding FROM documents_vec; DROP TABLE documents_vec; CREATE VIRTUAL TABLE documents_vec USING vec0( @@ -215,16 +226,24 @@ describe("Database Migrations", () => { embedding FLOAT[1536] ); INSERT INTO documents_vec (rowid, library_id, version_id, embedding) - SELECT rowid, library_id, version_id, embedding FROM temp_existing_vectors; - DROP TABLE temp_existing_vectors; + SELECT rowid, library_id, version_id, embedding FROM _test_existing_vectors; + DROP TABLE _test_existing_vectors; DELETE FROM documents_vec; `); + db.prepare( + "INSERT INTO documents_vec (rowid, library_id, version_id, embedding) VALUES (?, ?, ?, ?)", + ).run( + BigInt(preservedDocId), + BigInt(libraryId), + BigInt(versionId), + JSON.stringify(preservedVector), + ); db.prepare("DELETE FROM _schema_migrations WHERE id = ?").run( "014-rebuild-vector-partition-keys.sql", ); - expect(() => applyMigrations(db)).not.toThrow(); + await expect(applyMigrations(db)).resolves.toBeUndefined(); const migratedDdl = db .prepare( @@ -236,9 +255,14 @@ describe("Database Migrations", () => { const vectorRows = db .prepare("SELECT COUNT(*) as cnt FROM documents_vec WHERE rowid = ?") - .get(docId) as { cnt: number }; + .get(preservedDocId) as { cnt: number }; expect(vectorRows.cnt).toBe(1); + const backfilledVectorRows = db + .prepare("SELECT COUNT(*) as cnt FROM documents_vec WHERE rowid = ?") + .get(backfilledDocId) as { cnt: number }; + expect(backfilledVectorRows.cnt).toBe(1); + const result = db .prepare(` SELECT rowid, distance @@ -248,11 +272,11 @@ describe("Database Migrations", () => { AND embedding MATCH ? AND k = 1 `) - .get(libraryId, versionId, JSON.stringify(vector)) as + .get(libraryId, versionId, JSON.stringify(preservedVector)) as | { rowid: number; distance: number } | undefined; - expect(result?.rowid).toBe(Number(docId)); + expect(result?.rowid).toBe(Number(preservedDocId)); expect(result?.distance).toBeCloseTo(0, 6); }); From 96b5ce2c0f6a305aa0bb332336752c6e60d73632 Mon Sep 17 00:00:00 2001 From: Andre Rabold Date: Sat, 6 Jun 2026 08:56:45 -0700 Subject: [PATCH 3/5] fix(store): harden migration workflow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../014-rebuild-vector-partition-keys.sql | 5 + docs/concepts/data-storage.md | 20 ++ .../harden-migration-workflow/.openspec.yaml | 2 + .../harden-migration-workflow/design.md | 113 +++++++++ .../harden-migration-workflow/proposal.md | 29 +++ .../specs/database-migrations/spec.md | 83 ++++++ .../harden-migration-workflow/tasks.md | 34 +++ src/store/applyMigrations.test.ts | 238 +++++++++++++++++- src/store/applyMigrations.ts | 116 ++++++++- 9 files changed, 622 insertions(+), 18 deletions(-) create mode 100644 openspec/changes/harden-migration-workflow/.openspec.yaml create mode 100644 openspec/changes/harden-migration-workflow/design.md create mode 100644 openspec/changes/harden-migration-workflow/proposal.md create mode 100644 openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md create mode 100644 openspec/changes/harden-migration-workflow/tasks.md diff --git a/db/migrations/014-rebuild-vector-partition-keys.sql b/db/migrations/014-rebuild-vector-partition-keys.sql index 538468cc..97d2c5dd 100644 --- a/db/migrations/014-rebuild-vector-partition-keys.sql +++ b/db/migrations/014-rebuild-vector-partition-keys.sql @@ -1,6 +1,7 @@ -- Migration: rebuild documents_vec with sqlite-vec partition keys -- This enables selective KNN queries by library_id and version_id. +-- @migration-step preserve existing vectors -- Preserve compatible vectors from the existing vec table. This uses a -- disk-backed staging table because large vector indexes can exceed memory. DROP TABLE IF EXISTS _documents_vec_partition_migration; @@ -17,6 +18,7 @@ JOIN pages p ON d.page_id = p.id JOIN versions v ON p.version_id = v.id WHERE vec_length(dv.embedding) = 1536; +-- @migration-step rebuild vector table DROP TABLE documents_vec; CREATE VIRTUAL TABLE documents_vec USING vec0( @@ -25,10 +27,12 @@ CREATE VIRTUAL TABLE documents_vec USING vec0( embedding FLOAT[1536] ); +-- @migration-step restore existing vectors INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) SELECT rowid, library_id, version_id, embedding FROM _documents_vec_partition_migration; +-- @migration-step backfill missing vectors -- Backfill any vectors stored on documents but missing from the vec table. INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) SELECT @@ -45,4 +49,5 @@ WHERE d.embedding IS NOT NULL SELECT 1 FROM documents_vec existing WHERE existing.rowid = d.id ); +-- @migration-step cleanup staging data DROP TABLE _documents_vec_partition_migration; diff --git a/docs/concepts/data-storage.md b/docs/concepts/data-storage.md index 42d249e9..d800c0b2 100644 --- a/docs/concepts/data-storage.md +++ b/docs/concepts/data-storage.md @@ -315,6 +315,26 @@ Database transactions ensure consistency: - Batch operations for performance - Automatic rollback on errors +### Migration Safety + +Schema migrations run inside an IMMEDIATE transaction with rollback-capable +SQLite journaling enabled. The migration runner does not use `journal_mode = +OFF` during migration execution because destructive migrations may need to drop +and recreate tables or virtual tables, and rollback must preserve the +pre-migration database if a later step fails. + +The runner still applies rollback-safe tuning for large migrations, including +cache, memory mapping, temporary storage, and `synchronous = NORMAL` settings. +After migrations finish, it configures production settings such as WAL mode, +bounded WAL checkpointing, busy timeout, foreign keys, and normal synchronous +durability. + +Large or destructive migrations should be validated against a copy of important +local databases before running them against the live store. Use SQLite's backup +API or an application-level export/copy workflow so the copy is consistent, then +run the new version against that copy and verify expected table counts and +search behavior before migrating high-value data. + ### Concurrent Access Safe concurrent database access: diff --git a/openspec/changes/harden-migration-workflow/.openspec.yaml b/openspec/changes/harden-migration-workflow/.openspec.yaml new file mode 100644 index 00000000..b4c82a0a --- /dev/null +++ b/openspec/changes/harden-migration-workflow/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-06 diff --git a/openspec/changes/harden-migration-workflow/design.md b/openspec/changes/harden-migration-workflow/design.md new file mode 100644 index 00000000..8f979342 --- /dev/null +++ b/openspec/changes/harden-migration-workflow/design.md @@ -0,0 +1,113 @@ +## Context + +The migration runner previously executed pending SQL migrations inside an IMMEDIATE transaction, but it also applied `journal_mode = OFF` and `synchronous = OFF` before running them. Those settings were introduced to speed up large vector-table migrations and reduce runtime pressure, then production settings such as WAL mode and `synchronous = NORMAL` were applied afterward. + +That workflow is risky for destructive migrations because rollback cannot be trusted when journaling is disabled. PR #416 rebuilds `documents_vec` to use sqlite-vec partition keys, and a failure after dropping the old virtual table could lose the vector index even though `_schema_migrations` does not mark the migration complete. + +The project already documents transaction rollback as a data consistency guarantee, and historical issues call out migration atomicity, rollback, and backup expectations. + +## Goals / Non-Goals + +**Goals:** + +- Keep migrations recoverable by default, including migrations that drop and recreate tables or virtual tables. +- Preserve safe performance tuning that does not disable rollback. +- Provide visible progress for long migrations without changing structured command output contracts. +- Make destructive migration failure behavior testable. +- Document and test a backup/copy workflow for high-value local databases. + +**Non-Goals:** + +- Build a full migration framework with TypeScript migration files. +- Add automatic live-database backup before every migration. +- Solve all disk-space constraints for very large SQLite databases. +- Change public CLI, MCP, or web APIs. + +## Decisions + +### Keep rollback-capable journaling during migration execution + +The migration runner will not set `journal_mode = OFF` before applying migrations. It will continue to use an IMMEDIATE transaction and may apply rollback-safe pragmas such as `synchronous = NORMAL`, `mmap_size`, `cache_size`, and `temp_store`. + +Alternatives considered: + +- Keep `journal_mode = OFF` for speed: rejected because destructive migrations can become unrecoverable on failure. +- Toggle `journal_mode = OFF` only for explicitly marked migrations: rejected for now because the safety benefit depends on perfect classification and future migration authors could mislabel destructive changes. +- Create a backup automatically before every migration: rejected for initial implementation because database copies can be expensive and surprising; documented copy-based testing is the safer first step. + +### Retain production WAL configuration after migration + +After migrations complete, the runner will continue applying production settings: WAL mode, bounded autocheckpointing, busy timeout, foreign keys, and `synchronous = NORMAL`. This preserves the existing concurrency and durability intent while separating runtime settings from migration execution safety. + +Alternatives considered: + +- Leave whatever journal mode the database had before startup: rejected because existing behavior intentionally normalizes production SQLite settings. +- Disable WAL entirely to avoid WAL growth: rejected because post-migration autocheckpointing already bounds WAL growth and WAL improves concurrent reads. + +### Add migration progress checkpoints + +Migration progress will be emitted by the migration runner as diagnostics, not by SQL itself. The runner will display the migration number, total pending migration count, migration filename, one dot per completed block, and total elapsed time. The output may be a single line or multiple lines depending on what fits existing logging conventions best. + +For SQL migrations, blocks are delimited only by full-line marker comments: + +```sql +-- @migration-step preserve existing vectors +... +-- @migration-step rebuild vector table +... +``` + +The runner will not split SQL by semicolon. Each block between markers is passed whole to `db.exec()`. SQL before the first marker is allowed as an implicit first block. If no markers exist, the whole migration runs as one implicit block and emits one completion dot. This keeps existing migrations compatible and avoids pretending to know progress within a long single SQLite statement. + +Example output: + +```text +Applying migration 5/14 014-rebuild-vector-partition-keys.sql: ..... done in 42.8s +``` + +Multi-line output is also acceptable when it is clearer for long migrations: + +```text +Applying migration 5/14 014-rebuild-vector-partition-keys.sql + preserve vectors. rebuild table. restore vectors. backfill. cleanup. +Completed in 42.8s +``` + +Alternatives considered: + +- Print fixed timer-based dots while `db.exec()` runs: rejected because it suggests progress even if SQLite is blocked or stuck. +- Split every SQL file by semicolon: rejected because SQL parsing is fragile and can break triggers or string literals. +- Require all migrations to be TypeScript: rejected as too large a change. + +### Require failure-path tests for destructive migrations + +Any migration that drops, renames, or rebuilds a table or virtual table must include a test that forces a failure after the destructive point and verifies the original data remains available, the migration marker is not written, and retry behavior remains possible. + +Alternatives considered: + +- Only test successful migration results: rejected because it misses the exact data-loss class this change addresses. +- Rely on SQLite transaction tests generically: rejected because sqlite-vec virtual tables and PRAGMA choices can have different behavior than ordinary tables. + +## Risks / Trade-offs + +- [Longer migration runtime] → Keep rollback-safe performance pragmas and test against copied large databases. +- [Higher temporary disk usage] → Document backup/copy testing and log clear migration start/completion messages so users understand what is happening. +- [Progress dots may be sparse for long single statements] → Prefer marker-level checkpoints and include total elapsed time. +- [Existing migrations may lack markers] → Treat markers as incremental; unmarked migrations still log start and completion. +- [Manual SQL marker parsing could be brittle] → Only split on full-line marker comments, allow an implicit first block, and execute each block exactly as written. + +## Migration Plan + +1. Update `applyMigrations()` to remove unsafe `journal_mode = OFF` and `synchronous = OFF` migration pragmas while retaining safe cache/temp pragmas. +2. Add migration progress diagnostics with start/completion messages and marker-based checkpoints. +3. Add marker comments to migration 014 for major phases: preserve, rebuild, restore, backfill, cleanup. +4. Add failure-path tests for migration 014 and progress-output tests for marker handling. +5. Document that important databases should be copied or backed up before running large destructive migrations. +6. Validate on a copied local database containing real vector rows before merging. + +Rollback strategy: if the implementation causes migration regressions, revert the runner changes. Databases migrated successfully remain compatible because the schema changes are unchanged; the workflow only changes execution safety and diagnostics. + +## Open Questions + +- Should the runner expose a CLI flag to suppress progress diagnostics for scripts beyond the existing quiet/logging behavior? +- Should migration marker comments become required for all future destructive migrations, or only recommended after migration 014? diff --git a/openspec/changes/harden-migration-workflow/proposal.md b/openspec/changes/harden-migration-workflow/proposal.md new file mode 100644 index 00000000..c4f13bce --- /dev/null +++ b/openspec/changes/harden-migration-workflow/proposal.md @@ -0,0 +1,29 @@ +## Why + +Database migrations can rebuild large SQLite virtual tables, including `documents_vec`, and must remain recoverable if any step fails. PR #416 exposed that the current migration runner prioritizes speed by disabling journaling, which conflicts with expected rollback behavior and increases data-loss risk during destructive schema changes. + +## What Changes + +- Define a safe migration workflow for destructive and large-dataset migrations. +- Preserve rollback safety by default for schema migrations, especially migrations that drop and recreate tables. +- Keep non-destructive performance tuning where it does not undermine recoverability. +- Add visible migration progress logging that reports meaningful migration phases, using step markers such as `Running migration 014-rebuild-vector-partition-keys.sql: ....`. +- Require migration failure tests for destructive migrations so data preservation and migration-marker behavior are verified. +- Document the operational expectation that users test large migrations on a backup or copied database before running against important local data. + +## Capabilities + +### New Capabilities + +- `database-migrations`: Defines migration safety, recoverability, progress reporting, and validation requirements for SQLite schema/data migrations. + +### Modified Capabilities + +None. + +## Impact + +- Affects `src/store/applyMigrations.ts`, migration SQL files under `db/migrations/`, and migration tests. +- Does not change public CLI, MCP, or web APIs. +- May increase runtime and temporary disk usage for destructive migrations because rollback-safe journaling remains enabled. +- Improves recoverability for failed migrations and gives users clearer progress feedback during long-running database changes. diff --git a/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md b/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md new file mode 100644 index 00000000..a43c4e4e --- /dev/null +++ b/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md @@ -0,0 +1,83 @@ +## ADDED Requirements + +### Requirement: Rollback-safe migration execution + +The system SHALL execute database migrations with rollback-capable SQLite journaling enabled. The migration runner MUST NOT set `journal_mode = OFF` while applying migrations. + +#### Scenario: Migration fails after destructive DDL + +- **WHEN** a pending migration drops or rebuilds a table and a later statement in that migration fails +- **THEN** the migration transaction MUST roll back so the pre-migration table and data remain available +- **AND** the failed migration MUST NOT be recorded in `_schema_migrations` + +#### Scenario: Migration pragmas preserve recoverability + +- **WHEN** the migration runner prepares SQLite settings before applying pending migrations +- **THEN** it MAY apply cache, mmap, temporary-storage, and synchronous settings that preserve rollback behavior +- **AND** it MUST NOT disable journaling for migration execution + +### Requirement: Production SQLite settings after migrations + +The system SHALL configure production SQLite settings after migration execution completes, including WAL mode, bounded WAL checkpointing, busy timeout, foreign key enforcement, and `synchronous = NORMAL`. + +#### Scenario: Post-migration settings are applied + +- **WHEN** migrations complete successfully or the schema is already up to date +- **THEN** the database connection MUST be configured for WAL mode, bounded autocheckpointing, busy timeout, foreign keys, and normal synchronous durability + +### Requirement: Visible migration progress + +The system SHALL emit diagnostic progress for each pending migration. Progress MUST include the migration index and total pending migration count, migration identifier, a visible marker for each completed execution block, total elapsed time, and a completion or failure outcome. + +#### Scenario: Migration with explicit checkpoints + +- **WHEN** a migration file defines checkpoint markers for multiple migration phases +- **THEN** the runner MUST split the migration only on full-line checkpoint marker comments +- **AND** it MUST execute each marker-delimited SQL block as a whole without splitting by semicolon +- **AND** it MUST display one progress marker for each completed block +- **AND** the progress output MUST identify the migration being run + +#### Scenario: Migration has SQL before the first checkpoint + +- **WHEN** a migration file contains SQL before the first checkpoint marker +- **THEN** the runner MUST execute that SQL as an implicit first block +- **AND** it MUST preserve the SQL order relative to later checkpoint blocks + +#### Scenario: Migration without explicit checkpoints + +- **WHEN** a migration file has no checkpoint markers +- **THEN** the runner MUST still display migration start and completion diagnostics +- **AND** it MUST execute the whole migration as one implicit block +- **AND** it MUST report total elapsed time when the migration completes or fails + +#### Scenario: Migration fails during a checkpoint + +- **WHEN** a migration fails while running a checkpoint +- **THEN** the runner MUST emit a failure diagnostic for the migration +- **AND** it MUST NOT emit a success marker for the failed checkpoint +- **AND** it MUST report elapsed time up to the failure + +### Requirement: Destructive migration validation + +The system SHALL require tests for destructive migrations that verify both successful data preservation and failed-migration recoverability. + +#### Scenario: Destructive migration succeeds + +- **WHEN** a migration drops, renames, or rebuilds a table or virtual table +- **THEN** tests MUST verify that compatible pre-migration data is preserved after the migration succeeds + +#### Scenario: Destructive migration fails + +- **WHEN** a destructive migration test injects or creates a failure after the destructive operation would have occurred +- **THEN** tests MUST verify that pre-migration data remains available +- **AND** the migration marker MUST remain unapplied + +### Requirement: Backup guidance for high-value databases + +The system SHALL document a backup or copied-database validation workflow for large or high-value local databases before destructive migrations are applied. + +#### Scenario: User prepares for a destructive migration + +- **WHEN** documentation describes a destructive or large database migration +- **THEN** it MUST instruct users to back up or copy the database before running the migration against important local data +- **AND** it MUST explain how to validate the migration on the copy when practical diff --git a/openspec/changes/harden-migration-workflow/tasks.md b/openspec/changes/harden-migration-workflow/tasks.md new file mode 100644 index 00000000..e005bf00 --- /dev/null +++ b/openspec/changes/harden-migration-workflow/tasks.md @@ -0,0 +1,34 @@ +## 1. Migration Runner Safety + +- [x] 1.1 Update `applyMigrations()` so migration execution never sets `journal_mode = OFF`. +- [x] 1.2 Keep rollback-safe migration pragmas for cache, mmap, temp storage, and `synchronous = NORMAL`. +- [x] 1.3 Preserve post-migration production settings for WAL mode, autocheckpointing, busy timeout, foreign keys, and `synchronous = NORMAL`. +- [x] 1.4 Ensure failed migrations keep `_schema_migrations` unchanged and surface the original failure through `StoreError`. + +## 2. Progress Reporting + +- [x] 2.1 Add support for full-line SQL checkpoint markers such as `-- @migration-step