diff --git a/db/migrations/002-normalize-library-table.sql b/db/migrations/002-normalize-library-table.sql index 6db0394b..092bf95e 100644 --- a/db/migrations/002-normalize-library-table.sql +++ b/db/migrations/002-normalize-library-table.sql @@ -1,5 +1,6 @@ -- Migration: Normalize schema by introducing libraries and versions tables +-- @migration-step create normalized tables -- 1. Create libraries table CREATE TABLE IF NOT EXISTS libraries ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -16,10 +17,12 @@ CREATE TABLE IF NOT EXISTS versions ( UNIQUE(library_id, name) -- Allows one NULL version per library ); +-- @migration-step add document foreign keys -- 3. Add foreign key columns to documents ALTER TABLE documents ADD COLUMN library_id INTEGER REFERENCES libraries(id); ALTER TABLE documents ADD COLUMN version_id INTEGER REFERENCES versions(id); +-- @migration-step populate libraries and versions -- 4. Populate libraries table from existing documents INSERT OR IGNORE INTO libraries (name) SELECT DISTINCT library FROM documents; @@ -32,6 +35,7 @@ SELECT DISTINCT FROM documents d JOIN libraries l ON l.name = d.library; +-- @migration-step backfill document references -- 6. Update documents with foreign key references UPDATE documents SET library_id = (SELECT id FROM libraries WHERE libraries.name = documents.library), @@ -42,6 +46,7 @@ SET library_id = (SELECT id FROM libraries WHERE libraries.name = documents.libr AND COALESCE(v.name, '') = COALESCE(documents.version, '') ); +-- @migration-step create normalization indexes -- 7. Add indexes for performance CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id); CREATE INDEX IF NOT EXISTS idx_documents_version_id ON documents(version_id); diff --git a/db/migrations/003-normalize-vector-table.sql b/db/migrations/003-normalize-vector-table.sql index 6284665c..10106461 100644 --- a/db/migrations/003-normalize-vector-table.sql +++ b/db/migrations/003-normalize-vector-table.sql @@ -1,9 +1,11 @@ -- Migration: Normalize documents_vec table to use library_id and version_id -- Optimized for large datasets (1GB+) +-- @migration-step prepare vector join index -- 1. Ensure optimal indexes for the migration JOIN CREATE INDEX IF NOT EXISTS idx_documents_id_lib_ver ON documents(id, library_id, version_id); +-- @migration-step preserve vectors with normalized keys -- 2. Create temporary table to store vector data with foreign key IDs CREATE TEMPORARY TABLE temp_vector_migration AS SELECT @@ -14,6 +16,7 @@ SELECT FROM documents_vec dv JOIN documents d ON dv.rowid = d.id; +-- @migration-step rebuild vector table -- 3. Drop the old virtual table DROP TABLE documents_vec; @@ -24,10 +27,12 @@ CREATE VIRTUAL TABLE documents_vec USING vec0( embedding FLOAT[1536] ); +-- @migration-step restore normalized vectors -- 5. Restore vector data using foreign key IDs INSERT INTO documents_vec (rowid, library_id, version_id, embedding) SELECT rowid, library_id, version_id, embedding FROM temp_vector_migration; +-- @migration-step cleanup vector staging data -- 6. Clean up temporary table DROP TABLE temp_vector_migration; diff --git a/db/migrations/004-complete-normalization.sql b/db/migrations/004-complete-normalization.sql index 187ab9bd..1d684471 100644 --- a/db/migrations/004-complete-normalization.sql +++ b/db/migrations/004-complete-normalization.sql @@ -2,6 +2,7 @@ -- This migration finalizes the schema normalization process -- Note: Must recreate table because obsolete columns are part of UNIQUE constraint +-- @migration-step create normalized documents table -- 1. Create new documents table with only foreign key references CREATE TABLE documents_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -15,22 +16,26 @@ CREATE TABLE documents_new ( UNIQUE(url, library_id, version_id, sort_order) ); +-- @migration-step copy normalized documents -- 2. Copy data from old table (excluding obsolete library and version columns) INSERT INTO documents_new (id, library_id, version_id, url, content, metadata, sort_order, indexed_at) SELECT id, library_id, version_id, url, content, metadata, sort_order, indexed_at FROM documents; +-- @migration-step replace documents table -- 3. Drop the old documents table DROP TABLE documents; -- 4. Rename the new table to documents ALTER TABLE documents_new RENAME TO documents; +-- @migration-step recreate document indexes -- 5. Recreate indexes that were lost when dropping the table CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id); CREATE INDEX IF NOT EXISTS idx_documents_version_id ON documents(version_id); CREATE INDEX IF NOT EXISTS idx_documents_lib_ver_id ON documents(library_id, version_id); +-- @migration-step recreate fts schema -- 6. Recreate FTS5 virtual table (gets dropped when main table is dropped) -- Using external content approach - FTS index is maintained entirely through triggers CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( @@ -41,6 +46,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( tokenize='porter unicode61' ); +-- @migration-step recreate fts triggers -- 7. Recreate FTS triggers to maintain the index -- Note: Triggers work directly with documents table, no JOIN needed for FTS content CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN @@ -60,6 +66,7 @@ CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON document VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path')); END; +-- @migration-step rebuild fts index -- 8. Rebuild FTS index from existing documents data -- Manually populate the FTS index since we're using external content approach INSERT INTO documents_fts(rowid, content, title, url, path) diff --git a/db/migrations/007-dedupe-unversioned-versions.sql b/db/migrations/007-dedupe-unversioned-versions.sql index abc6fd7c..9b7c311b 100644 --- a/db/migrations/007-dedupe-unversioned-versions.sql +++ b/db/migrations/007-dedupe-unversioned-versions.sql @@ -14,6 +14,7 @@ -- across multiple subsequent statements. All TEMP objects are connection-scoped -- and vanish automatically; safe for repeated runs (we DROP IF EXISTS first). +-- @migration-step collect null-name versions DROP TABLE IF EXISTS temp_null_versions; CREATE TEMP TABLE temp_null_versions AS SELECT v.id, v.library_id, @@ -21,6 +22,7 @@ SELECT v.id, v.library_id, FROM versions v WHERE v.name IS NULL; +-- @migration-step choose canonical versions -- Build canonical mapping per library (one row per library_id) DROP TABLE IF EXISTS temp_canonical_versions; CREATE TEMP TABLE temp_canonical_versions AS @@ -40,6 +42,7 @@ SELECT nv.library_id, FROM temp_null_versions nv GROUP BY nv.library_id; +-- @migration-step repoint documents to canonical versions -- Repoint documents from non-canonical NULL-name versions UPDATE documents SET version_id = ( @@ -49,12 +52,14 @@ SET version_id = ( WHERE version_id IN (SELECT id FROM versions WHERE name IS NULL) AND version_id NOT IN (SELECT keep_id FROM temp_canonical_versions); +-- @migration-step remove surplus versions -- 3: Delete surplus NULL-name rows now unreferenced DELETE FROM versions WHERE name IS NULL AND id NOT IN (SELECT keep_id FROM temp_canonical_versions) AND (SELECT COUNT(*) FROM documents d WHERE d.version_id = versions.id) = 0; +-- @migration-step normalize remaining version names -- 4: Normalize remaining NULL names to '' UPDATE versions SET name = '' WHERE name IS NULL; diff --git a/db/migrations/009-add-pages-table.sql b/db/migrations/009-add-pages-table.sql index 6c099091..6a628461 100644 --- a/db/migrations/009-add-pages-table.sql +++ b/db/migrations/009-add-pages-table.sql @@ -2,6 +2,7 @@ -- This migration introduces a pages table to store page-level metadata once per URL -- and links document chunks to their parent pages via page_id foreign key +-- @migration-step create pages table -- 1. Create pages table to store unique page-level metadata CREATE TABLE IF NOT EXISTS pages ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -16,11 +17,13 @@ CREATE TABLE IF NOT EXISTS pages ( UNIQUE(version_id, url) ); +-- @migration-step create page indexes -- 2. Add indexes for efficient querying CREATE INDEX IF NOT EXISTS idx_pages_version_id ON pages(version_id); CREATE INDEX IF NOT EXISTS idx_pages_url ON pages(url); CREATE INDEX IF NOT EXISTS idx_pages_etag ON pages(etag); +-- @migration-step create page-based documents table -- 3. Create new documents table with page_id foreign key CREATE TABLE documents_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -32,10 +35,12 @@ CREATE TABLE documents_new ( created_at DATETIME DEFAULT CURRENT_TIMESTAMP ); +-- @migration-step create new document indexes -- 4. Create indexes for the new documents table CREATE INDEX IF NOT EXISTS idx_documents_page_id ON documents_new(page_id); CREATE INDEX IF NOT EXISTS idx_documents_sort_order ON documents_new(page_id, sort_order); +-- @migration-step populate pages -- 5. Migrate data from old documents table to new structure -- First, populate pages table with unique page data from existing documents -- Group by version_id and url to ensure uniqueness, using MAX() to handle any duplicates @@ -49,6 +54,7 @@ SELECT FROM documents GROUP BY version_id, url; +-- @migration-step migrate document chunks -- 6. Migrate document chunks to new table structure -- Preserve all existing metadata except page-level fields (url, title, library, version) -- that are now stored in pages and versions tables @@ -72,12 +78,14 @@ SELECT FROM documents d JOIN pages p ON d.version_id = p.version_id AND d.url = p.url; +-- @migration-step replace documents table -- 7. Drop the old documents table DROP TABLE documents; -- 8. Rename the new table to documents ALTER TABLE documents_new RENAME TO documents; +-- @migration-step replace fts schema -- 9. Recreate FTS5 virtual table to work with new structure -- Drop existing FTS table and triggers DROP TRIGGER IF EXISTS documents_fts_after_delete; @@ -94,6 +102,7 @@ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( tokenize='porter unicode61' ); +-- @migration-step recreate fts and page triggers -- 10. Create new FTS triggers that join with pages table CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; @@ -117,6 +126,7 @@ CREATE TRIGGER IF NOT EXISTS pages_updated_at_trigger AFTER UPDATE ON pages BEGI UPDATE pages SET updated_at = CURRENT_TIMESTAMP WHERE id = new.id; END; +-- @migration-step rebuild fts index -- 12. Rebuild FTS index from migrated data INSERT INTO documents_fts(rowid, content, title, url, path) SELECT d.id, d.content, p.title, p.url, json_extract(d.metadata, '$.path') diff --git a/db/migrations/014-rebuild-vector-partition-keys.sql b/db/migrations/014-rebuild-vector-partition-keys.sql new file mode 100644 index 00000000..ae548974 --- /dev/null +++ b/db/migrations/014-rebuild-vector-partition-keys.sql @@ -0,0 +1,56 @@ +-- Migration: rebuild documents_vec with sqlite-vec partition keys +-- This enables selective KNN queries by library_id and version_id. + +-- @migration-step preserve existing vectors +-- Preserve compatible vectors from the existing vec table. The migration +-- runner replaces __DOCUMENTS_VEC_DIMENSION__ with the current documents_vec +-- dimension so databases already reconciled to a custom embedding dimension +-- keep their existing vector size. This uses a disk-backed staging table +-- because large vector indexes can exceed memory. +DROP TABLE IF EXISTS _documents_vec_partition_migration; + +CREATE TABLE _documents_vec_partition_migration AS +SELECT + d.id AS rowid, + v.library_id, + v.id AS version_id, + dv.embedding +FROM documents_vec dv +JOIN documents d ON dv.rowid = d.id +JOIN pages p ON d.page_id = p.id +JOIN versions v ON p.version_id = v.id +WHERE vec_length(dv.embedding) = __DOCUMENTS_VEC_DIMENSION__; + +-- @migration-step rebuild vector table +DROP TABLE documents_vec; + +CREATE VIRTUAL TABLE documents_vec USING vec0( + library_id INTEGER partition key, + version_id INTEGER partition key, + embedding FLOAT[__DOCUMENTS_VEC_DIMENSION__] +); + +-- @migration-step restore existing vectors +INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) +SELECT rowid, library_id, version_id, embedding +FROM _documents_vec_partition_migration; + +-- @migration-step backfill missing vectors +-- Backfill any vectors stored on documents but missing from the vec table. +INSERT OR REPLACE INTO documents_vec (rowid, library_id, version_id, embedding) +SELECT + d.id, + v.library_id, + v.id AS version_id, + json_extract(d.embedding, '$') AS embedding +FROM documents d +JOIN pages p ON d.page_id = p.id +JOIN versions v ON p.version_id = v.id +WHERE d.embedding IS NOT NULL + AND vec_length(json_extract(d.embedding, '$')) = __DOCUMENTS_VEC_DIMENSION__ + AND NOT EXISTS ( + SELECT 1 FROM documents_vec existing WHERE existing.rowid = d.id + ); + +-- @migration-step cleanup staging data +DROP TABLE _documents_vec_partition_migration; diff --git a/docs/concepts/data-storage.md b/docs/concepts/data-storage.md index 42d249e9..d800c0b2 100644 --- a/docs/concepts/data-storage.md +++ b/docs/concepts/data-storage.md @@ -315,6 +315,26 @@ Database transactions ensure consistency: - Batch operations for performance - Automatic rollback on errors +### Migration Safety + +Schema migrations run inside an IMMEDIATE transaction with rollback-capable +SQLite journaling enabled. The migration runner does not use `journal_mode = +OFF` during migration execution because destructive migrations may need to drop +and recreate tables or virtual tables, and rollback must preserve the +pre-migration database if a later step fails. + +The runner still applies rollback-safe tuning for large migrations, including +cache, memory mapping, temporary storage, and `synchronous = NORMAL` settings. +After migrations finish, it configures production settings such as WAL mode, +bounded WAL checkpointing, busy timeout, foreign keys, and normal synchronous +durability. + +Large or destructive migrations should be validated against a copy of important +local databases before running them against the live store. Use SQLite's backup +API or an application-level export/copy workflow so the copy is consistent, then +run the new version against that copy and verify expected table counts and +search behavior before migrating high-value data. + ### Concurrent Access Safe concurrent database access: diff --git a/openspec/changes/harden-migration-workflow/.openspec.yaml b/openspec/changes/harden-migration-workflow/.openspec.yaml new file mode 100644 index 00000000..b4c82a0a --- /dev/null +++ b/openspec/changes/harden-migration-workflow/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-06 diff --git a/openspec/changes/harden-migration-workflow/design.md b/openspec/changes/harden-migration-workflow/design.md new file mode 100644 index 00000000..8f979342 --- /dev/null +++ b/openspec/changes/harden-migration-workflow/design.md @@ -0,0 +1,113 @@ +## Context + +The migration runner previously executed pending SQL migrations inside an IMMEDIATE transaction, but it also applied `journal_mode = OFF` and `synchronous = OFF` before running them. Those settings were introduced to speed up large vector-table migrations and reduce runtime pressure, then production settings such as WAL mode and `synchronous = NORMAL` were applied afterward. + +That workflow is risky for destructive migrations because rollback cannot be trusted when journaling is disabled. PR #416 rebuilds `documents_vec` to use sqlite-vec partition keys, and a failure after dropping the old virtual table could lose the vector index even though `_schema_migrations` does not mark the migration complete. + +The project already documents transaction rollback as a data consistency guarantee, and historical issues call out migration atomicity, rollback, and backup expectations. + +## Goals / Non-Goals + +**Goals:** + +- Keep migrations recoverable by default, including migrations that drop and recreate tables or virtual tables. +- Preserve safe performance tuning that does not disable rollback. +- Provide visible progress for long migrations without changing structured command output contracts. +- Make destructive migration failure behavior testable. +- Document and test a backup/copy workflow for high-value local databases. + +**Non-Goals:** + +- Build a full migration framework with TypeScript migration files. +- Add automatic live-database backup before every migration. +- Solve all disk-space constraints for very large SQLite databases. +- Change public CLI, MCP, or web APIs. + +## Decisions + +### Keep rollback-capable journaling during migration execution + +The migration runner will not set `journal_mode = OFF` before applying migrations. It will continue to use an IMMEDIATE transaction and may apply rollback-safe pragmas such as `synchronous = NORMAL`, `mmap_size`, `cache_size`, and `temp_store`. + +Alternatives considered: + +- Keep `journal_mode = OFF` for speed: rejected because destructive migrations can become unrecoverable on failure. +- Toggle `journal_mode = OFF` only for explicitly marked migrations: rejected for now because the safety benefit depends on perfect classification and future migration authors could mislabel destructive changes. +- Create a backup automatically before every migration: rejected for initial implementation because database copies can be expensive and surprising; documented copy-based testing is the safer first step. + +### Retain production WAL configuration after migration + +After migrations complete, the runner will continue applying production settings: WAL mode, bounded autocheckpointing, busy timeout, foreign keys, and `synchronous = NORMAL`. This preserves the existing concurrency and durability intent while separating runtime settings from migration execution safety. + +Alternatives considered: + +- Leave whatever journal mode the database had before startup: rejected because existing behavior intentionally normalizes production SQLite settings. +- Disable WAL entirely to avoid WAL growth: rejected because post-migration autocheckpointing already bounds WAL growth and WAL improves concurrent reads. + +### Add migration progress checkpoints + +Migration progress will be emitted by the migration runner as diagnostics, not by SQL itself. The runner will display the migration number, total pending migration count, migration filename, one dot per completed block, and total elapsed time. The output may be a single line or multiple lines depending on what fits existing logging conventions best. + +For SQL migrations, blocks are delimited only by full-line marker comments: + +```sql +-- @migration-step preserve existing vectors +... +-- @migration-step rebuild vector table +... +``` + +The runner will not split SQL by semicolon. Each block between markers is passed whole to `db.exec()`. SQL before the first marker is allowed as an implicit first block. If no markers exist, the whole migration runs as one implicit block and emits one completion dot. This keeps existing migrations compatible and avoids pretending to know progress within a long single SQLite statement. + +Example output: + +```text +Applying migration 5/14 014-rebuild-vector-partition-keys.sql: ..... done in 42.8s +``` + +Multi-line output is also acceptable when it is clearer for long migrations: + +```text +Applying migration 5/14 014-rebuild-vector-partition-keys.sql + preserve vectors. rebuild table. restore vectors. backfill. cleanup. +Completed in 42.8s +``` + +Alternatives considered: + +- Print fixed timer-based dots while `db.exec()` runs: rejected because it suggests progress even if SQLite is blocked or stuck. +- Split every SQL file by semicolon: rejected because SQL parsing is fragile and can break triggers or string literals. +- Require all migrations to be TypeScript: rejected as too large a change. + +### Require failure-path tests for destructive migrations + +Any migration that drops, renames, or rebuilds a table or virtual table must include a test that forces a failure after the destructive point and verifies the original data remains available, the migration marker is not written, and retry behavior remains possible. + +Alternatives considered: + +- Only test successful migration results: rejected because it misses the exact data-loss class this change addresses. +- Rely on SQLite transaction tests generically: rejected because sqlite-vec virtual tables and PRAGMA choices can have different behavior than ordinary tables. + +## Risks / Trade-offs + +- [Longer migration runtime] → Keep rollback-safe performance pragmas and test against copied large databases. +- [Higher temporary disk usage] → Document backup/copy testing and log clear migration start/completion messages so users understand what is happening. +- [Progress dots may be sparse for long single statements] → Prefer marker-level checkpoints and include total elapsed time. +- [Existing migrations may lack markers] → Treat markers as incremental; unmarked migrations still log start and completion. +- [Manual SQL marker parsing could be brittle] → Only split on full-line marker comments, allow an implicit first block, and execute each block exactly as written. + +## Migration Plan + +1. Update `applyMigrations()` to remove unsafe `journal_mode = OFF` and `synchronous = OFF` migration pragmas while retaining safe cache/temp pragmas. +2. Add migration progress diagnostics with start/completion messages and marker-based checkpoints. +3. Add marker comments to migration 014 for major phases: preserve, rebuild, restore, backfill, cleanup. +4. Add failure-path tests for migration 014 and progress-output tests for marker handling. +5. Document that important databases should be copied or backed up before running large destructive migrations. +6. Validate on a copied local database containing real vector rows before merging. + +Rollback strategy: if the implementation causes migration regressions, revert the runner changes. Databases migrated successfully remain compatible because the schema changes are unchanged; the workflow only changes execution safety and diagnostics. + +## Open Questions + +- Should the runner expose a CLI flag to suppress progress diagnostics for scripts beyond the existing quiet/logging behavior? +- Should migration marker comments become required for all future destructive migrations, or only recommended after migration 014? diff --git a/openspec/changes/harden-migration-workflow/proposal.md b/openspec/changes/harden-migration-workflow/proposal.md new file mode 100644 index 00000000..c4f13bce --- /dev/null +++ b/openspec/changes/harden-migration-workflow/proposal.md @@ -0,0 +1,29 @@ +## Why + +Database migrations can rebuild large SQLite virtual tables, including `documents_vec`, and must remain recoverable if any step fails. PR #416 exposed that the current migration runner prioritizes speed by disabling journaling, which conflicts with expected rollback behavior and increases data-loss risk during destructive schema changes. + +## What Changes + +- Define a safe migration workflow for destructive and large-dataset migrations. +- Preserve rollback safety by default for schema migrations, especially migrations that drop and recreate tables. +- Keep non-destructive performance tuning where it does not undermine recoverability. +- Add visible migration progress logging that reports meaningful migration phases, using step markers such as `Running migration 014-rebuild-vector-partition-keys.sql: ....`. +- Require migration failure tests for destructive migrations so data preservation and migration-marker behavior are verified. +- Document the operational expectation that users test large migrations on a backup or copied database before running against important local data. + +## Capabilities + +### New Capabilities + +- `database-migrations`: Defines migration safety, recoverability, progress reporting, and validation requirements for SQLite schema/data migrations. + +### Modified Capabilities + +None. + +## Impact + +- Affects `src/store/applyMigrations.ts`, migration SQL files under `db/migrations/`, and migration tests. +- Does not change public CLI, MCP, or web APIs. +- May increase runtime and temporary disk usage for destructive migrations because rollback-safe journaling remains enabled. +- Improves recoverability for failed migrations and gives users clearer progress feedback during long-running database changes. diff --git a/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md b/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md new file mode 100644 index 00000000..a43c4e4e --- /dev/null +++ b/openspec/changes/harden-migration-workflow/specs/database-migrations/spec.md @@ -0,0 +1,83 @@ +## ADDED Requirements + +### Requirement: Rollback-safe migration execution + +The system SHALL execute database migrations with rollback-capable SQLite journaling enabled. The migration runner MUST NOT set `journal_mode = OFF` while applying migrations. + +#### Scenario: Migration fails after destructive DDL + +- **WHEN** a pending migration drops or rebuilds a table and a later statement in that migration fails +- **THEN** the migration transaction MUST roll back so the pre-migration table and data remain available +- **AND** the failed migration MUST NOT be recorded in `_schema_migrations` + +#### Scenario: Migration pragmas preserve recoverability + +- **WHEN** the migration runner prepares SQLite settings before applying pending migrations +- **THEN** it MAY apply cache, mmap, temporary-storage, and synchronous settings that preserve rollback behavior +- **AND** it MUST NOT disable journaling for migration execution + +### Requirement: Production SQLite settings after migrations + +The system SHALL configure production SQLite settings after migration execution completes, including WAL mode, bounded WAL checkpointing, busy timeout, foreign key enforcement, and `synchronous = NORMAL`. + +#### Scenario: Post-migration settings are applied + +- **WHEN** migrations complete successfully or the schema is already up to date +- **THEN** the database connection MUST be configured for WAL mode, bounded autocheckpointing, busy timeout, foreign keys, and normal synchronous durability + +### Requirement: Visible migration progress + +The system SHALL emit diagnostic progress for each pending migration. Progress MUST include the migration index and total pending migration count, migration identifier, a visible marker for each completed execution block, total elapsed time, and a completion or failure outcome. + +#### Scenario: Migration with explicit checkpoints + +- **WHEN** a migration file defines checkpoint markers for multiple migration phases +- **THEN** the runner MUST split the migration only on full-line checkpoint marker comments +- **AND** it MUST execute each marker-delimited SQL block as a whole without splitting by semicolon +- **AND** it MUST display one progress marker for each completed block +- **AND** the progress output MUST identify the migration being run + +#### Scenario: Migration has SQL before the first checkpoint + +- **WHEN** a migration file contains SQL before the first checkpoint marker +- **THEN** the runner MUST execute that SQL as an implicit first block +- **AND** it MUST preserve the SQL order relative to later checkpoint blocks + +#### Scenario: Migration without explicit checkpoints + +- **WHEN** a migration file has no checkpoint markers +- **THEN** the runner MUST still display migration start and completion diagnostics +- **AND** it MUST execute the whole migration as one implicit block +- **AND** it MUST report total elapsed time when the migration completes or fails + +#### Scenario: Migration fails during a checkpoint + +- **WHEN** a migration fails while running a checkpoint +- **THEN** the runner MUST emit a failure diagnostic for the migration +- **AND** it MUST NOT emit a success marker for the failed checkpoint +- **AND** it MUST report elapsed time up to the failure + +### Requirement: Destructive migration validation + +The system SHALL require tests for destructive migrations that verify both successful data preservation and failed-migration recoverability. + +#### Scenario: Destructive migration succeeds + +- **WHEN** a migration drops, renames, or rebuilds a table or virtual table +- **THEN** tests MUST verify that compatible pre-migration data is preserved after the migration succeeds + +#### Scenario: Destructive migration fails + +- **WHEN** a destructive migration test injects or creates a failure after the destructive operation would have occurred +- **THEN** tests MUST verify that pre-migration data remains available +- **AND** the migration marker MUST remain unapplied + +### Requirement: Backup guidance for high-value databases + +The system SHALL document a backup or copied-database validation workflow for large or high-value local databases before destructive migrations are applied. + +#### Scenario: User prepares for a destructive migration + +- **WHEN** documentation describes a destructive or large database migration +- **THEN** it MUST instruct users to back up or copy the database before running the migration against important local data +- **AND** it MUST explain how to validate the migration on the copy when practical diff --git a/openspec/changes/harden-migration-workflow/tasks.md b/openspec/changes/harden-migration-workflow/tasks.md new file mode 100644 index 00000000..e005bf00 --- /dev/null +++ b/openspec/changes/harden-migration-workflow/tasks.md @@ -0,0 +1,34 @@ +## 1. Migration Runner Safety + +- [x] 1.1 Update `applyMigrations()` so migration execution never sets `journal_mode = OFF`. +- [x] 1.2 Keep rollback-safe migration pragmas for cache, mmap, temp storage, and `synchronous = NORMAL`. +- [x] 1.3 Preserve post-migration production settings for WAL mode, autocheckpointing, busy timeout, foreign keys, and `synchronous = NORMAL`. +- [x] 1.4 Ensure failed migrations keep `_schema_migrations` unchanged and surface the original failure through `StoreError`. + +## 2. Progress Reporting + +- [x] 2.1 Add support for full-line SQL checkpoint markers such as `-- @migration-step