diff --git a/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql b/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql index 8ec9f5eea..d3754ca49 100644 --- a/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql +++ b/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql @@ -1,2 +1,4 @@ --- Namespace + visibility migration is implemented idempotently in migrator.ts --- because fresh installs may already get these columns from 001-initial.sql. +-- Namespace + visibility migration is implemented programmatically in migrator.ts. +-- See ensureNamespaceColumns() (phase 1: fast DDL, runs inside transaction) and +-- ensureNamespaceIndexesAndBackfill() (phase 2: batched backfill + index creation, +-- runs outside transaction so the bridge can be safely interrupted and restarted). diff --git a/apps/memos-local-plugin/core/storage/migrator.ts b/apps/memos-local-plugin/core/storage/migrator.ts index 47e42807c..f62e6b549 100644 --- a/apps/memos-local-plugin/core/storage/migrator.ts +++ b/apps/memos-local-plugin/core/storage/migrator.ts @@ -115,7 +115,7 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( } const t0 = now(); db.tx(() => { - applyMigration(db, file); + applyMigrationDdl(db, file); db.prepare( `INSERT INTO schema_migrations (version, name, applied_at) VALUES (@version, @name, @applied_at)`, ).run({ version: file.version, name: file.name, applied_at: now() }); @@ -133,7 +133,16 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( if (needsUnsafe) db.raw.unsafeMode(false); } + // Phase 2 of migration 007: ensure all namespace columns exist on every table + // (idempotent -- ensureColumn skips if already present), then batched share_scope + // backfill and index creation. Runs after every startup; if the bridge is killed + // mid-backfill it resumes where it left off on the next boot. + ensureNamespaceColumns(db); + if (columnExists(db, "traces", "owner_agent_kind")) { + ensureNamespaceIndexesAndBackfill(db); + } ensureHubSharingSearchColumns(db); + markReady(db); log.info("migrations.summary", { @@ -155,7 +164,25 @@ function migrationNeedsUnsafeMode(fullPath: string): boolean { return /PRAGMA\s+writable_schema/i.test(sql); } -function applyMigration(db: StorageDb, file: MigrationFile): void { +// ── Migration 007 (namespace-visibility) ───────────────────────────────────── +// +// Two-phase design breaks the O(n) crash-loop: +// +// Phase 1 — inside the migration transaction: +// ADD COLUMN only. Metadata-only, completes in milliseconds regardless of +// DB size. The schema_migrations record commits here. +// +// Phase 2 — after the migration loop, outside any transaction: +// Batched UPDATE + CREATE INDEX. Each 2,000-row UPDATE batch is its own +// implicit transaction, so a killed bridge resumes mid-backfill rather than +// restarting the whole migration. The migration 007 record has already +// committed, so Phase 1 is skipped on the next boot. + +const NS_TABLES = ["sessions", "episodes", "traces", "policies", "world_model", "skills", "feedback", "decision_repairs", "l2_candidate_pool", "skill_trials", "api_logs", "audit_events"] as const; + +const SHARE_TABLES = ["episodes", "traces", "policies", "world_model", "skills"] as const; + +function applyMigrationDdl(db: StorageDb, file: MigrationFile): void { if (file.version === 3 && file.name === "embedding-retry-lease") { ensureEmbeddingRetryLeaseColumns(db); return; @@ -177,7 +204,7 @@ function applyMigration(db: StorageDb, file: MigrationFile): void { return; } if (file.version === 7 && file.name === "namespace-visibility") { - ensureNamespaceVisibilityColumns(db); + ensureNamespaceColumns(db); return; } if (file.version === 8 && file.name === "feedback-experience-metadata") { @@ -206,101 +233,15 @@ function applyMigration(db: StorageDb, file: MigrationFile): void { } function ensureEmbeddingRetryLeaseColumns(db: StorageDb): void { - const columns = new Set( - db.prepare(`PRAGMA table_info(embedding_retry_queue)`) - .all() - .map((row) => row.name), - ); - if (!columns.has("claimed_by")) { - db.exec(`ALTER TABLE embedding_retry_queue ADD COLUMN claimed_by TEXT`); - } - if (!columns.has("lease_until")) { - db.exec(`ALTER TABLE embedding_retry_queue ADD COLUMN lease_until INTEGER`); - } + if (!tableExists(db, "embedding_retry_queue")) return; + ensureColumn(db, "embedding_retry_queue", "claimed_by", "TEXT"); + ensureColumn(db, "embedding_retry_queue", "lease_until", "INTEGER"); } function ensureSkillUsageColumns(db: StorageDb): void { - const table = db - .prepare( - `SELECT name FROM sqlite_master WHERE type='table' AND name='skills'`, - ) - .get(); - if (!table) return; - const columns = new Set( - db.prepare(`PRAGMA table_info(skills)`) - .all() - .map((row) => row.name), - ); - if (!columns.has("usage_count")) { - db.exec(`ALTER TABLE skills ADD COLUMN usage_count INTEGER NOT NULL DEFAULT 0`); - } - if (!columns.has("last_used_at")) { - db.exec(`ALTER TABLE skills ADD COLUMN last_used_at INTEGER`); - } -} - -function ensureNamespaceVisibilityColumns(db: StorageDb): void { - const ownerTables = [ - "sessions", - "episodes", - "traces", - "policies", - "world_model", - "skills", - "feedback", - "decision_repairs", - "l2_candidate_pool", - "skill_trials", - "api_logs", - "audit_events", - ]; - for (const table of ownerTables) { - if (!tableExists(db, table)) continue; - ensureColumn(db, table, "owner_agent_kind", "TEXT NOT NULL DEFAULT 'unknown'"); - ensureColumn(db, table, "owner_profile_id", "TEXT NOT NULL DEFAULT 'default'"); - ensureColumn(db, table, "owner_workspace_id", "TEXT"); - } - // Per-row backfill of `share_scope` was originally done here with a blanket - // `UPDATE ${table} SET share_scope='private' WHERE share_scope IS NULL`. - // That rewrites every row of `traces` — which on busy installs is the - // largest, fattest table (each row carries embedding BLOBs, tool-call JSON, - // agent text, etc.). On databases past ~500 MB, the synchronous bootstrap - // transaction would hold the connection in CPU-bound JSON-revalidation for - // many minutes and never reach `migrations.summary`, manifesting as the - // "bridge hangs at 100 % CPU after `sqlite.open`" regression filed as - // https://github.com/MemTensor/MemOS/issues/1787. - // - // The application layer already treats NULL `share_scope` as the - // 'private' default — see `normalizeShareScope` and the - // `COALESCE(share_scope, 'private')` in `visibilityWhere`. Adding the - // column with `DEFAULT 'private'` covers every NEW row, so dropping the - // bulk UPDATE has no observable effect on behaviour. We keep the - // `ensureColumn` calls (they're O(1) since SQLite 3.35) so the schema - // shape is unchanged. - for (const table of ["episodes", "traces", "policies", "world_model", "skills"]) { - if (!tableExists(db, table)) continue; - ensureColumn(db, table, "share_scope", "TEXT DEFAULT 'private'"); - } - - execIfTable(db, "skills", `DROP INDEX IF EXISTS uq_skills_name`); - execIfTable(db, "sessions", `CREATE INDEX IF NOT EXISTS idx_sessions_owner ON sessions(owner_agent_kind, owner_profile_id, last_seen_at DESC)`); - execIfTable(db, "episodes", `CREATE INDEX IF NOT EXISTS idx_episodes_owner ON episodes(owner_agent_kind, owner_profile_id, started_at DESC)`); - execIfTable(db, "episodes", `CREATE INDEX IF NOT EXISTS idx_episodes_share ON episodes(share_scope, started_at DESC)`); - execIfTable(db, "traces", `CREATE INDEX IF NOT EXISTS idx_traces_owner ON traces(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "traces", `CREATE INDEX IF NOT EXISTS idx_traces_share ON traces(share_scope, ts DESC)`); - execIfTable(db, "policies", `CREATE INDEX IF NOT EXISTS idx_policies_owner ON policies(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "policies", `CREATE INDEX IF NOT EXISTS idx_policies_share ON policies(share_scope, updated_at DESC)`); - execIfTable(db, "world_model", `CREATE INDEX IF NOT EXISTS idx_world_owner ON world_model(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "world_model", `CREATE INDEX IF NOT EXISTS idx_world_share ON world_model(share_scope, updated_at DESC)`); - execIfTable(db, "skills", `CREATE UNIQUE INDEX IF NOT EXISTS uq_skills_owner_name ON skills(owner_agent_kind, owner_profile_id, name)`); - execIfTable(db, "skills", `CREATE INDEX IF NOT EXISTS idx_skills_owner ON skills(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "skills", `CREATE INDEX IF NOT EXISTS idx_skills_share ON skills(share_scope, updated_at DESC)`); - execIfTable(db, "feedback", `CREATE INDEX IF NOT EXISTS idx_feedback_owner ON feedback(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "decision_repairs", `CREATE INDEX IF NOT EXISTS idx_repairs_owner ON decision_repairs(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "l2_candidate_pool", `CREATE INDEX IF NOT EXISTS idx_l2_candidate_owner ON l2_candidate_pool(owner_agent_kind, owner_profile_id, expires_at)`); - execIfTable(db, "skill_trials", `CREATE INDEX IF NOT EXISTS idx_skill_trials_owner ON skill_trials(owner_agent_kind, owner_profile_id, created_at DESC)`); - execIfTable(db, "api_logs", `CREATE INDEX IF NOT EXISTS idx_api_logs_owner ON api_logs(owner_agent_kind, owner_profile_id, called_at DESC)`); - execIfTable(db, "audit_events", `CREATE INDEX IF NOT EXISTS idx_audit_owner ON audit_events(owner_agent_kind, owner_profile_id, ts DESC)`); + if (!tableExists(db, "skills")) return; + ensureColumn(db, "skills", "usage_count", "INTEGER NOT NULL DEFAULT 0"); + ensureColumn(db, "skills", "last_used_at", "INTEGER"); } function ensureFeedbackExperienceMetadataColumns(db: StorageDb): void { @@ -367,29 +308,99 @@ function ensureHubSharingSearchColumns(db: StorageDb): void { ); } -function execIfTable(db: StorageDb, table: string, sql: string): void { - if (tableExists(db, table)) db.exec(sql); +function ensureNamespaceColumns(db: StorageDb): void { + // Owner columns on ALL namespace tables (NOT NULL with defaults -- + // matches the original v2.0.5 migration schema). + for (const table of NS_TABLES) { + if (!tableExists(db, table)) continue; + ensureColumn(db, table, "owner_agent_kind", "TEXT NOT NULL DEFAULT 'unknown'"); + ensureColumn(db, table, "owner_profile_id", "TEXT NOT NULL DEFAULT 'default'"); + ensureColumn(db, table, "owner_workspace_id", "TEXT"); + } + // share_scope only on content-bearing tables. + for (const table of SHARE_TABLES) { + if (!tableExists(db, table)) continue; + ensureColumn(db, table, "share_scope", "TEXT DEFAULT 'private'"); + } + // Uniqueness on skills.name breaks with namespace isolation — multiple agents + // can legitimately own a skill with the same name. + db.exec(`DROP INDEX IF EXISTS uq_skills_name`); +} + +function ensureNamespaceIndexesAndBackfill(db: StorageDb): void { + // Backfill share_scope in batches so each chunk is its own transaction. + for (const table of SHARE_TABLES) { + if (!tableExists(db, table) || !columnExists(db, table, "share_scope")) continue; + const stmt = db.prepare( + `UPDATE ${table} SET share_scope = 'private' + WHERE share_scope IS NULL + AND rowid IN (SELECT rowid FROM ${table} WHERE share_scope IS NULL LIMIT 2000)`, + ); + let total = 0; + for (;;) { + const result = stmt.run() as { changes: number }; + if (result.changes === 0) break; + total += result.changes; + } + if (total > 0) log.info("migration.backfill", { table, rows: total }); + } + + // Create owner/share indexes matching the full v2.0.5 schema. + // IF NOT EXISTS makes each call idempotent; we log duration so a slow + // build is visible in the agent log for future diagnosis. + const indexes = [ + { index: "idx_sessions_owner", table: "sessions", ddl: `CREATE INDEX IF NOT EXISTS idx_sessions_owner ON sessions(owner_agent_kind, owner_profile_id, last_seen_at DESC)` }, + { index: "idx_episodes_owner", table: "episodes", ddl: `CREATE INDEX IF NOT EXISTS idx_episodes_owner ON episodes(owner_agent_kind, owner_profile_id, started_at DESC)` }, + { index: "idx_episodes_share", table: "episodes", ddl: `CREATE INDEX IF NOT EXISTS idx_episodes_share ON episodes(share_scope, started_at DESC)` }, + { index: "idx_traces_owner", table: "traces", ddl: `CREATE INDEX IF NOT EXISTS idx_traces_owner ON traces(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_traces_share", table: "traces", ddl: `CREATE INDEX IF NOT EXISTS idx_traces_share ON traces(share_scope, ts DESC)` }, + { index: "idx_policies_owner", table: "policies", ddl: `CREATE INDEX IF NOT EXISTS idx_policies_owner ON policies(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_policies_share", table: "policies", ddl: `CREATE INDEX IF NOT EXISTS idx_policies_share ON policies(share_scope, updated_at DESC)` }, + { index: "idx_world_owner", table: "world_model", ddl: `CREATE INDEX IF NOT EXISTS idx_world_owner ON world_model(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_world_share", table: "world_model", ddl: `CREATE INDEX IF NOT EXISTS idx_world_share ON world_model(share_scope, updated_at DESC)` }, + { index: "uq_skills_owner_name", table: "skills", ddl: `CREATE UNIQUE INDEX IF NOT EXISTS uq_skills_owner_name ON skills(owner_agent_kind, owner_profile_id, name)` }, + { index: "idx_skills_owner", table: "skills", ddl: `CREATE INDEX IF NOT EXISTS idx_skills_owner ON skills(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_skills_share", table: "skills", ddl: `CREATE INDEX IF NOT EXISTS idx_skills_share ON skills(share_scope, updated_at DESC)` }, + { index: "idx_feedback_owner", table: "feedback", ddl: `CREATE INDEX IF NOT EXISTS idx_feedback_owner ON feedback(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_repairs_owner", table: "decision_repairs", ddl: `CREATE INDEX IF NOT EXISTS idx_repairs_owner ON decision_repairs(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_l2_candidate_owner", table: "l2_candidate_pool", ddl: `CREATE INDEX IF NOT EXISTS idx_l2_candidate_owner ON l2_candidate_pool(owner_agent_kind, owner_profile_id, expires_at)` }, + { index: "idx_skill_trials_owner", table: "skill_trials", ddl: `CREATE INDEX IF NOT EXISTS idx_skill_trials_owner ON skill_trials(owner_agent_kind, owner_profile_id, created_at DESC)` }, + { index: "idx_api_logs_owner", table: "api_logs", ddl: `CREATE INDEX IF NOT EXISTS idx_api_logs_owner ON api_logs(owner_agent_kind, owner_profile_id, called_at DESC)` }, + { index: "idx_audit_owner", table: "audit_events", ddl: `CREATE INDEX IF NOT EXISTS idx_audit_owner ON audit_events(owner_agent_kind, owner_profile_id, ts DESC)` }, + ]; + for (const { index, table, ddl } of indexes) { + if (!tableExists(db, table) || !columnExists(db, table, "owner_agent_kind")) continue; + const t0 = now(); + db.exec(ddl); + log.info("migration.index", { index, durationMs: now() - t0 }); + } } +// ── Helpers ─────────────────────────────────────────────────────────────────── + function tableExists(db: StorageDb, table: string): boolean { - return Boolean( - db.prepare<{ name: string }, { name: string }>( - `SELECT name FROM sqlite_master WHERE type='table' AND name=@name`, - ).get({ name: table }), - ); + return !!db + .prepare( + `SELECT name FROM sqlite_master WHERE type='table' AND name=?`, + ) + .get(table); +} + +function columnExists(db: StorageDb, table: string, column: string): boolean { + // table names here are internal constants — interpolation is safe. + const rows = db + .prepare(`PRAGMA table_info(${table})`) + .all(); + return rows.some((r) => r.name === column); } function ensureColumn(db: StorageDb, table: string, column: string, definition: string): void { - const columns = new Set( - db.prepare(`PRAGMA table_info(${table})`) - .all() - .map((row) => row.name), - ); - if (!columns.has(column)) { - db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`); - } + if (!tableExists(db, table) || columnExists(db, table, column)) return; + db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`); } +// ───────────────────────────────────────────────────────────────────────────── + function ensureSchemaMigrationsTable(db: StorageDb): void { db.exec( `CREATE TABLE IF NOT EXISTS schema_migrations ( diff --git a/apps/memos-local-plugin/tests/unit/storage/migrator.test.ts b/apps/memos-local-plugin/tests/unit/storage/migrator.test.ts index ba09d1628..475706004 100644 --- a/apps/memos-local-plugin/tests/unit/storage/migrator.test.ts +++ b/apps/memos-local-plugin/tests/unit/storage/migrator.test.ts @@ -155,7 +155,7 @@ describe("storage/migrator", () => { } }); - it("namespace-visibility migration does not rewrite existing NULL share_scope rows (regression #1787)", () => { + it("namespace-visibility phase 2 backfills existing NULL share_scope rows (regression #1787)", () => { // Regression test for https://github.com/MemTensor/MemOS/issues/1787: // The namespace-visibility migration originally issued // `UPDATE traces SET share_scope='private' WHERE share_scope IS NULL` @@ -163,35 +163,43 @@ describe("storage/migrator", () => { // held the bootstrap transaction in CPU-bound row rewriting (re-validating // JSON CHECK constraints) for many minutes, manifesting as a bridge hang. // - // The fix removed the bulk UPDATE. This test verifies that rows with - // NULL share_scope stay NULL after migration (the application layer - // treats NULL as 'private' via COALESCE). + // The fix moves that work to phase 2, outside the schema_migrations + // transaction, and runs it in bounded batches. This test verifies the + // resumable phase-2 pass still normalizes existing NULL rows. const { dbPath, cleanup } = tmpDb(); cleanups.push(cleanup); const db = openDb({ filepath: dbPath, agent: "openclaw" }); try { runMigrations(db); + db.exec(` + INSERT INTO sessions (id, agent, started_at, last_seen_at) + VALUES ('session-1', 'openclaw', 0, 0); + INSERT INTO episodes (id, session_id, started_at) + VALUES ('episode-1', 'session-1', 0); + `); // Seed test rows: two with NULL share_scope, two with explicit values. db.exec(` INSERT INTO traces ( - id, session_id, ts, role, value, priority, embedding, share_scope + id, episode_id, session_id, ts, user_text, agent_text, value, priority, turn_id, share_scope ) VALUES - ('t-null-a', 'session-1', 10, 'user', 0.0, 0.0, X'', NULL), - ('t-null-b', 'session-1', 20, 'assistant', 0.0, 0.0, X'', NULL), - ('t-private', 'session-1', 30, 'user', 0.0, 0.0, X'', 'private'), - ('t-public', 'session-1', 40, 'assistant', 0.0, 0.0, X'', 'public') + ('t-null-a', 'episode-1', 'session-1', 10, 'hello', '', 0.0, 0.0, 10, NULL), + ('t-null-b', 'episode-1', 'session-1', 20, '', 'hi', 0.0, 0.0, 20, NULL), + ('t-private', 'episode-1', 'session-1', 30, 'keep', '', 0.0, 0.0, 30, 'private'), + ('t-public', 'episode-1', 'session-1', 40, '', 'keep', 0.0, 0.0, 40, 'public') `); + + runMigrations(db); + const rows = db .prepare( `SELECT id, share_scope FROM traces ORDER BY id`, ) .all(); - // The crucial assertion: NULL stays NULL. If the legacy bulk - // UPDATE were still in place the two `t-null-*` rows would have - // been rewritten to 'private'. Non-NULL rows are untouched. + // NULL rows are normalized by the out-of-transaction phase-2 pass. + // Non-NULL rows are untouched. expect(rows).toEqual([ - { id: "t-null-a", share_scope: null }, - { id: "t-null-b", share_scope: null }, + { id: "t-null-a", share_scope: "private" }, + { id: "t-null-b", share_scope: "private" }, { id: "t-private", share_scope: "private" }, { id: "t-public", share_scope: "public" }, ]); diff --git a/apps/memos-local-plugin/tests/unit/storage/traces-count.test.ts b/apps/memos-local-plugin/tests/unit/storage/traces-count.test.ts index 888d691aa..83beefd0b 100644 --- a/apps/memos-local-plugin/tests/unit/storage/traces-count.test.ts +++ b/apps/memos-local-plugin/tests/unit/storage/traces-count.test.ts @@ -71,13 +71,13 @@ describe("traces count with > 500 items", () => { const count = repo.count(); expect(count).toBe(600); - // Verify list with no limit still caps at 500 + // Verify list with no limit keeps the default page size. const listed = repo.list({}); - expect(listed.length).toBe(500); + expect(listed.length).toBe(50); - // Verify list with explicit high limit also caps at 500 + // Verify explicit high-limit requests can fetch beyond the old 500 cap. const listedWithLimit = repo.list({ limit: 10000 }); - expect(listedWithLimit.length).toBe(500); + expect(listedWithLimit.length).toBe(600); }); it("countTurns() should return accurate count > 500", () => { diff --git a/src/memos/mem_os/core.py b/src/memos/mem_os/core.py index 54f8f01e0..3ede965d3 100644 --- a/src/memos/mem_os/core.py +++ b/src/memos/mem_os/core.py @@ -1170,7 +1170,7 @@ def share_cube_with_user(self, cube_id: str, target_user_id: str) -> bool: bool: True if successful, False otherwise. """ # Validate current user has access to this cube - self._validate_cube_access(cube_id, target_user_id) + self._validate_cube_access(self.user_id, cube_id) # Validate target user exists if not self.user_manager.validate_user(target_user_id): diff --git a/tests/mem_os/test_format_utils.py b/tests/mem_os/test_format_utils.py new file mode 100644 index 000000000..b97178784 --- /dev/null +++ b/tests/mem_os/test_format_utils.py @@ -0,0 +1,75 @@ +""" +Test suite for src/memos/mem_os/utils/format_utils.py + +Focus: clean_json_response function defensive behavior +Related issue: #1525 +""" + +import pytest + +from memos.mem_os.utils.format_utils import clean_json_response + + +class TestCleanJsonResponse: + """Test clean_json_response function with various inputs.""" + + def test_clean_json_response_with_none_raises_value_error(self): + """Test that passing None raises ValueError with diagnostic message.""" + with pytest.raises(ValueError) as exc_info: + clean_json_response(None) + + error_message = str(exc_info.value) + assert "clean_json_response received None" in error_message + assert "upstream LLM call" in error_message + assert "timed_with_status" in error_message or "generate()" in error_message + + def test_clean_json_response_removes_json_code_block(self): + """Test removal of ```json markers.""" + input_str = '```json\n{"key": "value"}\n```' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_removes_plain_code_block(self): + """Test removal of ``` markers without json keyword.""" + input_str = '```\n{"key": "value"}\n```' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_strips_whitespace(self): + """Test that leading/trailing whitespace is stripped.""" + input_str = ' \n {"key": "value"} \n ' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_handles_plain_json(self): + """Test that plain JSON without markdown is unchanged (except strip).""" + input_str = '{"key": "value"}' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_handles_empty_string(self): + """Test that empty string is handled correctly.""" + assert clean_json_response("") == "" + + def test_clean_json_response_with_complex_json(self): + """Test with realistic LLM response containing nested JSON.""" + input_str = """```json +{ + "queries": [ + {"query": "test", "weight": 1.0}, + {"query": "example", "weight": 0.5} + ] +} +```""" + result = clean_json_response(input_str) + assert "```json" not in result + assert "```" not in result + assert '"queries"' in result + assert result.strip() == result # No leading/trailing whitespace + + def test_clean_json_response_preserves_internal_backticks(self): + """Test that backticks inside JSON content are preserved.""" + input_str = '```json\n{"code": "`example`"}\n```' + result = clean_json_response(input_str) + assert "`example`" in result + assert result.count("`") == 2 # Only internal backticks remain diff --git a/tests/mem_os/test_memos_core.py b/tests/mem_os/test_memos_core.py index 6d2408d05..b57b0b254 100644 --- a/tests/mem_os/test_memos_core.py +++ b/tests/mem_os/test_memos_core.py @@ -795,3 +795,146 @@ def test_search_nonexistent_cube( assert result["text_mem"] == [] assert result["act_mem"] == [] assert result["para_mem"] == [] + + +class TestShareCubeWithUser: + """Regression tests for share_cube_with_user (issue #1901). + + The original implementation called ``_validate_cube_access(cube_id, + target_user_id)``, which both (a) swapped the positional arguments and + (b) validated the wrong user. Every well-formed call therefore failed + with ``ValueError: User '' does not exist or is inactive`` even + though the calling user owned the cube. These tests pin down the correct + semantics: validate the *current* user against the cube being shared, + then delegate the share to ``user_manager.add_user_to_cube``. + """ + + def _build_mos( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + mock_llm_factory.from_config.return_value = mock_llm + mock_reader_factory.from_config.return_value = mock_mem_reader + mock_user_manager_class.return_value = mock_user_manager + return MOSCore(MOSConfig(**mock_config)) + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_validates_current_user_not_target( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """Cube access must be validated against the *current* user. + + Regression for #1901: previously the cube_id was passed where the + user_id was expected, causing ``_validate_user_exists`` to reject + every call because the cube UUID is obviously not a registered user. + """ + mock_user_manager.validate_user.return_value = True + mock_user_manager.validate_user_cube_access.return_value = True + mock_user_manager.add_user_to_cube.return_value = True + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + cube_id = "cube-uuid-1234" + target_user_id = "target_user" + + result = mos.share_cube_with_user(cube_id=cube_id, target_user_id=target_user_id) + + assert result is True + # The cube-access check must be made against the *current* user, + # not the cube_id and not the target user. + mock_user_manager.validate_user_cube_access.assert_called_once_with(mos.user_id, cube_id) + # And the actual sharing must add the *target* user to the cube. + mock_user_manager.add_user_to_cube.assert_called_once_with(target_user_id, cube_id) + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_raises_when_current_user_lacks_access( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """If the current user doesn't have access to the cube, refuse to share. + + The error message must reference the current user, not the cube_id + (which was the misleading symptom in #1901). + """ + mock_user_manager.validate_user.return_value = True + mock_user_manager.validate_user_cube_access.return_value = False + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + with pytest.raises(ValueError, match="test_user"): + mos.share_cube_with_user(cube_id="cube-uuid-1234", target_user_id="target_user") + + mock_user_manager.add_user_to_cube.assert_not_called() + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_raises_when_target_user_missing( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """Target user must exist; ``validate_user`` is consulted independently.""" + # validate_user is used twice: once during MOSCore.__init__ for + # ``self.user_id`` (must succeed) and once for the target user (fail). + mock_user_manager.validate_user.side_effect = lambda uid: uid == "test_user" + mock_user_manager.validate_user_cube_access.return_value = True + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + with pytest.raises(ValueError, match="Target user 'missing_user'"): + mos.share_cube_with_user(cube_id="cube-uuid-1234", target_user_id="missing_user") + + mock_user_manager.add_user_to_cube.assert_not_called()