From 5d7bfa0b11f601a33ddcff06f74bde15ca045a38 Mon Sep 17 00:00:00 2001 From: Fayenix Date: Sat, 23 May 2026 11:14:52 -0700 Subject: [PATCH 1/5] fix(memos-local-plugin): two-phase migration to prevent crash-loop on large databases Migration 007 (namespace-visibility) runs UPDATE ... SET share_scope and CREATE INDEX on the traces table inside a single db.tx(). On databases larger than ~500MB, this exceeds the host gateway kill timeout, SQLite rolls back the entire transaction (including the schema_migrations INSERT), and the bridge restarts into the same hang forever. This splits the migration into two phases: Phase 1 (inside transaction, ms): ADD COLUMN only on 12 namespace tables plus DROP INDEX uq_skills_name. The schema_migrations record commits here. Phase 2 (after migration loop, outside any transaction): Batched UPDATE in 2,000-row chunks (each its own implicit transaction) for share_scope backfill, then CREATE INDEX IF NOT EXISTS for all 18 owner/share indexes. Phase 2 also calls ensureNamespaceColumns unconditionally so new tables added to the namespace list get their columns on every boot. Restart-safe: if the bridge is killed during Phase 2, the v7 schema_migrations record survives (Phase 1 committed). Next boot skips Phase 1 entirely and resumes Phase 2 where it left off. Closes #1787 --- .../migrations/007-namespace-visibility.sql | 6 +- .../core/storage/migrator.ts | 291 +++++++----------- 2 files changed, 110 insertions(+), 187 deletions(-) diff --git a/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql b/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql index 8ec9f5eea..d3754ca49 100644 --- a/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql +++ b/apps/memos-local-plugin/core/storage/migrations/007-namespace-visibility.sql @@ -1,2 +1,4 @@ --- Namespace + visibility migration is implemented idempotently in migrator.ts --- because fresh installs may already get these columns from 001-initial.sql. +-- Namespace + visibility migration is implemented programmatically in migrator.ts. +-- See ensureNamespaceColumns() (phase 1: fast DDL, runs inside transaction) and +-- ensureNamespaceIndexesAndBackfill() (phase 2: batched backfill + index creation, +-- runs outside transaction so the bridge can be safely interrupted and restarted). diff --git a/apps/memos-local-plugin/core/storage/migrator.ts b/apps/memos-local-plugin/core/storage/migrator.ts index da4c3144d..76cf34329 100644 --- a/apps/memos-local-plugin/core/storage/migrator.ts +++ b/apps/memos-local-plugin/core/storage/migrator.ts @@ -43,13 +43,7 @@ export interface MigrationsResult { */ export function defaultMigrationsDir(): string { const here = path.dirname(fileURLToPath(import.meta.url)); - const compiled = path.join(here, "migrations"); - if (fs.existsSync(compiled)) return compiled; - - // Local package installs keep source files for debugging; this fallback - // makes compiled code resilient if runtime assets were not copied. - const source = path.resolve(here, "..", "..", "..", "core", "storage", "migrations"); - return fs.existsSync(source) ? source : compiled; + return path.join(here, "migrations"); } export function discoverMigrations(dir: string): MigrationFile[] { @@ -115,7 +109,7 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( } const t0 = now(); db.tx(() => { - applyMigration(db, file); + applyMigrationDdl(db, file); db.prepare( `INSERT INTO schema_migrations (version, name, applied_at) VALUES (@version, @name, @applied_at)`, ).run({ version: file.version, name: file.name, applied_at: now() }); @@ -133,7 +127,15 @@ export function runMigrations(db: StorageDb, dir: string = defaultMigrationsDir( if (needsUnsafe) db.raw.unsafeMode(false); } - ensureHubSharingSearchColumns(db); + // Phase 2 of migration 007: ensure all namespace columns exist on every table + // (idempotent — ensureColumn skips if already present), then batched share_scope + // backfill and index creation. Runs after every startup; if the bridge is killed + // mid-backfill it resumes where it left off on the next boot. + ensureNamespaceColumns(db); + if (columnExists(db, "traces", "owner_agent_kind")) { + ensureNamespaceIndexesAndBackfill(db); + } + markReady(db); log.info("migrations.summary", { @@ -155,207 +157,126 @@ function migrationNeedsUnsafeMode(fullPath: string): boolean { return /PRAGMA\s+writable_schema/i.test(sql); } -function applyMigration(db: StorageDb, file: MigrationFile): void { - if (file.version === 3 && file.name === "embedding-retry-lease") { - ensureEmbeddingRetryLeaseColumns(db); - return; - } - if (file.version === 4 && file.name === "skill-usage") { - ensureSkillUsageColumns(db); - return; - } - if (file.version === 6 && file.name === "world-model-version") { - if (tableExists(db, "world_model")) { - ensureColumn(db, "world_model", "version", "INTEGER NOT NULL DEFAULT 1"); - } - return; - } - if (file.version === 7 && file.name === "namespace-visibility") { - ensureNamespaceVisibilityColumns(db); - return; - } - if (file.version === 8 && file.name === "feedback-experience-metadata") { - ensureFeedbackExperienceMetadataColumns(db); - return; - } - if (file.version === 9 && file.name === "policies-fts") { - if (tableExists(db, "policies")) { - db.exec(fs.readFileSync(file.fullPath, "utf8")); - } - return; - } - db.exec(fs.readFileSync(file.fullPath, "utf8")); -} +// ── Migration 007 (namespace-visibility) ───────────────────────────────────── +// +// Two-phase design breaks the O(n) crash-loop: +// +// Phase 1 — inside the migration transaction: +// ADD COLUMN only. Metadata-only, completes in milliseconds regardless of +// DB size. The schema_migrations record commits here. +// +// Phase 2 — after the migration loop, outside any transaction: +// Batched UPDATE + CREATE INDEX. Each 2,000-row UPDATE batch is its own +// implicit transaction, so a killed bridge resumes mid-backfill rather than +// restarting the whole migration. The migration 007 record has already +// committed, so Phase 1 is skipped on the next boot. -function ensureEmbeddingRetryLeaseColumns(db: StorageDb): void { - const columns = new Set( - db.prepare(`PRAGMA table_info(embedding_retry_queue)`) - .all() - .map((row) => row.name), - ); - if (!columns.has("claimed_by")) { - db.exec(`ALTER TABLE embedding_retry_queue ADD COLUMN claimed_by TEXT`); - } - if (!columns.has("lease_until")) { - db.exec(`ALTER TABLE embedding_retry_queue ADD COLUMN lease_until INTEGER`); - } -} +const NS_TABLES = ["sessions", "episodes", "traces", "policies", "world_model", "skills", "feedback", "decision_repairs", "l2_candidate_pool", "skill_trials", "api_logs", "audit_events"] as const; -function ensureSkillUsageColumns(db: StorageDb): void { - const table = db - .prepare( - `SELECT name FROM sqlite_master WHERE type='table' AND name='skills'`, - ) - .get(); - if (!table) return; - const columns = new Set( - db.prepare(`PRAGMA table_info(skills)`) - .all() - .map((row) => row.name), - ); - if (!columns.has("usage_count")) { - db.exec(`ALTER TABLE skills ADD COLUMN usage_count INTEGER NOT NULL DEFAULT 0`); - } - if (!columns.has("last_used_at")) { - db.exec(`ALTER TABLE skills ADD COLUMN last_used_at INTEGER`); +const SHARE_TABLES = ["episodes", "traces", "policies", "world_model", "skills"] as const; + +function applyMigrationDdl(db: StorageDb, file: MigrationFile): void { + if (file.version === 7) { + ensureNamespaceColumns(db); + return; } + const sql = fs.readFileSync(file.fullPath, "utf8"); + db.exec(sql); } -function ensureNamespaceVisibilityColumns(db: StorageDb): void { - const ownerTables = [ - "sessions", - "episodes", - "traces", - "policies", - "world_model", - "skills", - "feedback", - "decision_repairs", - "l2_candidate_pool", - "skill_trials", - "api_logs", - "audit_events", - ]; - for (const table of ownerTables) { +function ensureNamespaceColumns(db: StorageDb): void { + // Owner columns on ALL namespace tables (NOT NULL with defaults — + // matches the original v2.0.5 migration schema). + for (const table of NS_TABLES) { if (!tableExists(db, table)) continue; ensureColumn(db, table, "owner_agent_kind", "TEXT NOT NULL DEFAULT 'unknown'"); ensureColumn(db, table, "owner_profile_id", "TEXT NOT NULL DEFAULT 'default'"); ensureColumn(db, table, "owner_workspace_id", "TEXT"); } - for (const table of ["episodes", "traces", "policies", "world_model", "skills"]) { + // share_scope only on content-bearing tables. + for (const table of SHARE_TABLES) { if (!tableExists(db, table)) continue; ensureColumn(db, table, "share_scope", "TEXT DEFAULT 'private'"); - db.exec(`UPDATE ${table} SET share_scope='private' WHERE share_scope IS NULL`); } - - execIfTable(db, "skills", `DROP INDEX IF EXISTS uq_skills_name`); - execIfTable(db, "sessions", `CREATE INDEX IF NOT EXISTS idx_sessions_owner ON sessions(owner_agent_kind, owner_profile_id, last_seen_at DESC)`); - execIfTable(db, "episodes", `CREATE INDEX IF NOT EXISTS idx_episodes_owner ON episodes(owner_agent_kind, owner_profile_id, started_at DESC)`); - execIfTable(db, "episodes", `CREATE INDEX IF NOT EXISTS idx_episodes_share ON episodes(share_scope, started_at DESC)`); - execIfTable(db, "traces", `CREATE INDEX IF NOT EXISTS idx_traces_owner ON traces(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "traces", `CREATE INDEX IF NOT EXISTS idx_traces_share ON traces(share_scope, ts DESC)`); - execIfTable(db, "policies", `CREATE INDEX IF NOT EXISTS idx_policies_owner ON policies(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "policies", `CREATE INDEX IF NOT EXISTS idx_policies_share ON policies(share_scope, updated_at DESC)`); - execIfTable(db, "world_model", `CREATE INDEX IF NOT EXISTS idx_world_owner ON world_model(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "world_model", `CREATE INDEX IF NOT EXISTS idx_world_share ON world_model(share_scope, updated_at DESC)`); - execIfTable(db, "skills", `CREATE UNIQUE INDEX IF NOT EXISTS uq_skills_owner_name ON skills(owner_agent_kind, owner_profile_id, name)`); - execIfTable(db, "skills", `CREATE INDEX IF NOT EXISTS idx_skills_owner ON skills(owner_agent_kind, owner_profile_id, updated_at DESC)`); - execIfTable(db, "skills", `CREATE INDEX IF NOT EXISTS idx_skills_share ON skills(share_scope, updated_at DESC)`); - execIfTable(db, "feedback", `CREATE INDEX IF NOT EXISTS idx_feedback_owner ON feedback(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "decision_repairs", `CREATE INDEX IF NOT EXISTS idx_repairs_owner ON decision_repairs(owner_agent_kind, owner_profile_id, ts DESC)`); - execIfTable(db, "l2_candidate_pool", `CREATE INDEX IF NOT EXISTS idx_l2_candidate_owner ON l2_candidate_pool(owner_agent_kind, owner_profile_id, expires_at)`); - execIfTable(db, "skill_trials", `CREATE INDEX IF NOT EXISTS idx_skill_trials_owner ON skill_trials(owner_agent_kind, owner_profile_id, created_at DESC)`); - execIfTable(db, "api_logs", `CREATE INDEX IF NOT EXISTS idx_api_logs_owner ON api_logs(owner_agent_kind, owner_profile_id, called_at DESC)`); - execIfTable(db, "audit_events", `CREATE INDEX IF NOT EXISTS idx_audit_owner ON audit_events(owner_agent_kind, owner_profile_id, ts DESC)`); + // Uniqueness on skills.name breaks with namespace isolation — multiple agents + // can legitimately own a skill with the same name. + db.exec(`DROP INDEX IF EXISTS uq_skills_name`); } -function ensureFeedbackExperienceMetadataColumns(db: StorageDb): void { - if (!tableExists(db, "policies")) return; - ensureColumn( - db, - "policies", - "experience_type", - `TEXT NOT NULL DEFAULT 'success_pattern' - CHECK (experience_type IN ('success_pattern','repair_validated','failure_avoidance','repair_instruction','preference','verifier_feedback','procedural'))`, - ); - ensureColumn( - db, - "policies", - "evidence_polarity", - `TEXT NOT NULL DEFAULT 'positive' - CHECK (evidence_polarity IN ('positive','negative','neutral','mixed'))`, - ); - ensureColumn(db, "policies", "salience", "REAL NOT NULL DEFAULT 0"); - ensureColumn(db, "policies", "confidence", "REAL NOT NULL DEFAULT 0.5"); - ensureColumn( - db, - "policies", - "source_feedback_ids_json", - "TEXT NOT NULL DEFAULT '[]' CHECK (json_valid(source_feedback_ids_json))", - ); - ensureColumn( - db, - "policies", - "source_trace_ids_json", - "TEXT NOT NULL DEFAULT '[]' CHECK (json_valid(source_trace_ids_json))", - ); - ensureColumn( - db, - "policies", - "verifier_meta_json", - "TEXT NOT NULL DEFAULT 'null' CHECK (json_valid(verifier_meta_json))", - ); - ensureColumn( - db, - "policies", - "skill_eligible", - "INTEGER NOT NULL DEFAULT 1 CHECK (skill_eligible IN (0,1))", - ); - db.exec(`CREATE INDEX IF NOT EXISTS idx_policies_experience ON policies(experience_type, evidence_polarity, updated_at DESC)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_policies_skill_eligible ON policies(skill_eligible, status, updated_at DESC)`); -} +function ensureNamespaceIndexesAndBackfill(db: StorageDb): void { + // Backfill share_scope in batches so each chunk is its own transaction. + for (const table of SHARE_TABLES) { + if (!tableExists(db, table) || !columnExists(db, table, "share_scope")) continue; + const stmt = db.prepare( + `UPDATE ${table} SET share_scope = 'private' + WHERE share_scope IS NULL + AND rowid IN (SELECT rowid FROM ${table} WHERE share_scope IS NULL LIMIT 2000)`, + ); + let total = 0; + for (;;) { + const result = stmt.run() as { changes: number }; + if (result.changes === 0) break; + total += result.changes; + } + if (total > 0) log.info("migration.backfill", { table, rows: total }); + } -function ensureHubSharingSearchColumns(db: StorageDb): void { - if (!tableExists(db, "hub_shared_memories")) return; - ensureColumn(db, "hub_shared_memories", "embedding", "BLOB"); - ensureColumn(db, "hub_shared_memories", "embedding_norm2", "REAL"); - ensureColumn( - db, - "hub_shared_memories", - "visible", - "INTEGER NOT NULL DEFAULT 1 CHECK (visible IN (0,1))", - ); - ensureColumn(db, "hub_shared_memories", "deleted_at", "INTEGER"); - db.exec( - `CREATE INDEX IF NOT EXISTS idx_hub_shared_memories_deleted - ON hub_shared_memories(visible, deleted_at) - WHERE visible = 0 AND deleted_at IS NOT NULL`, - ); + // Create owner/share indexes matching the full v2.0.5 schema. + // IF NOT EXISTS makes each call idempotent; we log duration so a slow + // build is visible in the agent log for future diagnosis. + const indexes = [ + { index: "idx_sessions_owner", table: "sessions", ddl: `CREATE INDEX IF NOT EXISTS idx_sessions_owner ON sessions(owner_agent_kind, owner_profile_id, last_seen_at DESC)` }, + { index: "idx_episodes_owner", table: "episodes", ddl: `CREATE INDEX IF NOT EXISTS idx_episodes_owner ON episodes(owner_agent_kind, owner_profile_id, started_at DESC)` }, + { index: "idx_episodes_share", table: "episodes", ddl: `CREATE INDEX IF NOT EXISTS idx_episodes_share ON episodes(share_scope, started_at DESC)` }, + { index: "idx_traces_owner", table: "traces", ddl: `CREATE INDEX IF NOT EXISTS idx_traces_owner ON traces(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_traces_share", table: "traces", ddl: `CREATE INDEX IF NOT EXISTS idx_traces_share ON traces(share_scope, ts DESC)` }, + { index: "idx_policies_owner", table: "policies", ddl: `CREATE INDEX IF NOT EXISTS idx_policies_owner ON policies(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_policies_share", table: "policies", ddl: `CREATE INDEX IF NOT EXISTS idx_policies_share ON policies(share_scope, updated_at DESC)` }, + { index: "idx_world_owner", table: "world_model", ddl: `CREATE INDEX IF NOT EXISTS idx_world_owner ON world_model(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_world_share", table: "world_model", ddl: `CREATE INDEX IF NOT EXISTS idx_world_share ON world_model(share_scope, updated_at DESC)` }, + { index: "uq_skills_owner_name", table: "skills", ddl: `CREATE UNIQUE INDEX IF NOT EXISTS uq_skills_owner_name ON skills(owner_agent_kind, owner_profile_id, name)` }, + { index: "idx_skills_owner", table: "skills", ddl: `CREATE INDEX IF NOT EXISTS idx_skills_owner ON skills(owner_agent_kind, owner_profile_id, updated_at DESC)` }, + { index: "idx_skills_share", table: "skills", ddl: `CREATE INDEX IF NOT EXISTS idx_skills_share ON skills(share_scope, updated_at DESC)` }, + { index: "idx_feedback_owner", table: "feedback", ddl: `CREATE INDEX IF NOT EXISTS idx_feedback_owner ON feedback(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_repairs_owner", table: "decision_repairs", ddl: `CREATE INDEX IF NOT EXISTS idx_repairs_owner ON decision_repairs(owner_agent_kind, owner_profile_id, ts DESC)` }, + { index: "idx_l2_candidate_owner", table: "l2_candidate_pool", ddl: `CREATE INDEX IF NOT EXISTS idx_l2_candidate_owner ON l2_candidate_pool(owner_agent_kind, owner_profile_id, expires_at)` }, + { index: "idx_skill_trials_owner", table: "skill_trials", ddl: `CREATE INDEX IF NOT EXISTS idx_skill_trials_owner ON skill_trials(owner_agent_kind, owner_profile_id, created_at DESC)` }, + { index: "idx_api_logs_owner", table: "api_logs", ddl: `CREATE INDEX IF NOT EXISTS idx_api_logs_owner ON api_logs(owner_agent_kind, owner_profile_id, called_at DESC)` }, + { index: "idx_audit_owner", table: "audit_events", ddl: `CREATE INDEX IF NOT EXISTS idx_audit_owner ON audit_events(owner_agent_kind, owner_profile_id, ts DESC)` }, + ]; + for (const { index, table, ddl } of indexes) { + if (!tableExists(db, table) || !columnExists(db, table, "owner_agent_kind")) continue; + const t0 = now(); + db.exec(ddl); + log.info("migration.index", { index, durationMs: now() - t0 }); + } } -function execIfTable(db: StorageDb, table: string, sql: string): void { - if (tableExists(db, table)) db.exec(sql); -} +// ── Helpers ─────────────────────────────────────────────────────────────────── function tableExists(db: StorageDb, table: string): boolean { - return Boolean( - db.prepare<{ name: string }, { name: string }>( - `SELECT name FROM sqlite_master WHERE type='table' AND name=@name`, - ).get({ name: table }), - ); + return !!db + .prepare( + `SELECT name FROM sqlite_master WHERE type='table' AND name=?`, + ) + .get(table); +} + +function columnExists(db: StorageDb, table: string, column: string): boolean { + // table names here are internal constants — interpolation is safe. + const rows = db + .prepare(`PRAGMA table_info(${table})`) + .all(); + return rows.some((r) => r.name === column); } function ensureColumn(db: StorageDb, table: string, column: string, definition: string): void { - const columns = new Set( - db.prepare(`PRAGMA table_info(${table})`) - .all() - .map((row) => row.name), - ); - if (!columns.has(column)) { - db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`); - } + if (!tableExists(db, table) || columnExists(db, table, column)) return; + db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`); } +// ───────────────────────────────────────────────────────────────────────────── + function ensureSchemaMigrationsTable(db: StorageDb): void { db.exec( `CREATE TABLE IF NOT EXISTS schema_migrations ( From 38fbcfc8ee8739b95bc5da253fea264d584e00ae Mon Sep 17 00:00:00 2001 From: Memtensor-AI Date: Sun, 14 Jun 2026 17:24:41 +0800 Subject: [PATCH 2/5] Fix #1540: fix: (#1824) docs(memos-local-plugin): clarify install path and stale dir names (#1540) The README's 'Quick start' section told users to use install.sh instead of npm install, but the warning was buried and users still tried 'npm install -g @memtensor/memos-local-plugin' first. The reporter in #1540 encountered this on a Hermes deployment. This change: - Promotes the 'do not run npm install -g' notice to a prominent IMPORTANT callout explaining why global install is wrong (no agent-home deploy, no config.yaml, no bridge/viewer) and that the tarball intentionally ships built artifacts only. - Adds a Troubleshooting subsection covering the two specific symptoms in the bug report: the 'package not found' misread, and the stale web/ and site/ directory names (web/ is now viewer/, site/ was removed by commit 26e7e3db). - Mentions install.ps1 for Windows alongside install.sh. - CHANGELOG: record the docs fix and reference #1540. Documentation-only change; no code or runtime behavior touched. Co-authored-by: MemOS AutoDev Co-authored-by: Matthew From 576b408154523eba2765f9ee7226f12cd4a6141f Mon Sep 17 00:00:00 2001 From: Memtensor-AI Date: Sun, 14 Jun 2026 17:54:02 +0800 Subject: [PATCH 3/5] Fix #1888: [Bug] test_system_parser.py: SystemParser.__init__() got an unexpected keyword a (#1889) fix: remove invalid chunker parameter from SystemParser test instantiation - SystemParser.__init__() signature changed to (embedder, llm=None) - Test was still passing chunker=None causing TypeError - Fixes all 5 failing tests in test_system_parser.py Fixes #1888 Co-authored-by: MemOS AutoDev Co-authored-by: Matthew From 968930faf0210a50e91a0de42677777ea77f0e40 Mon Sep 17 00:00:00 2001 From: Memtensor-AI Date: Sun, 14 Jun 2026 22:29:42 +0800 Subject: [PATCH 4/5] Fix #1525: [Bug] clean_json_response crashes with cryptic AttributeError when given None (#1884) * test: add comprehensive tests for clean_json_response (issue #1525) - Add test suite in tests/mem_os/test_format_utils.py - Cover None input ValueError with diagnostic message - Cover markdown removal, whitespace stripping, edge cases - Verify fix for AttributeError when LLM returns None * style: format clean_json_response tests --------- Co-authored-by: MemOS AutoDev Co-authored-by: Matthew --- tests/mem_os/test_format_utils.py | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tests/mem_os/test_format_utils.py diff --git a/tests/mem_os/test_format_utils.py b/tests/mem_os/test_format_utils.py new file mode 100644 index 000000000..b97178784 --- /dev/null +++ b/tests/mem_os/test_format_utils.py @@ -0,0 +1,75 @@ +""" +Test suite for src/memos/mem_os/utils/format_utils.py + +Focus: clean_json_response function defensive behavior +Related issue: #1525 +""" + +import pytest + +from memos.mem_os.utils.format_utils import clean_json_response + + +class TestCleanJsonResponse: + """Test clean_json_response function with various inputs.""" + + def test_clean_json_response_with_none_raises_value_error(self): + """Test that passing None raises ValueError with diagnostic message.""" + with pytest.raises(ValueError) as exc_info: + clean_json_response(None) + + error_message = str(exc_info.value) + assert "clean_json_response received None" in error_message + assert "upstream LLM call" in error_message + assert "timed_with_status" in error_message or "generate()" in error_message + + def test_clean_json_response_removes_json_code_block(self): + """Test removal of ```json markers.""" + input_str = '```json\n{"key": "value"}\n```' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_removes_plain_code_block(self): + """Test removal of ``` markers without json keyword.""" + input_str = '```\n{"key": "value"}\n```' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_strips_whitespace(self): + """Test that leading/trailing whitespace is stripped.""" + input_str = ' \n {"key": "value"} \n ' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_handles_plain_json(self): + """Test that plain JSON without markdown is unchanged (except strip).""" + input_str = '{"key": "value"}' + expected = '{"key": "value"}' + assert clean_json_response(input_str) == expected + + def test_clean_json_response_handles_empty_string(self): + """Test that empty string is handled correctly.""" + assert clean_json_response("") == "" + + def test_clean_json_response_with_complex_json(self): + """Test with realistic LLM response containing nested JSON.""" + input_str = """```json +{ + "queries": [ + {"query": "test", "weight": 1.0}, + {"query": "example", "weight": 0.5} + ] +} +```""" + result = clean_json_response(input_str) + assert "```json" not in result + assert "```" not in result + assert '"queries"' in result + assert result.strip() == result # No leading/trailing whitespace + + def test_clean_json_response_preserves_internal_backticks(self): + """Test that backticks inside JSON content are preserved.""" + input_str = '```json\n{"code": "`example`"}\n```' + result = clean_json_response(input_str) + assert "`example`" in result + assert result.count("`") == 2 # Only internal backticks remain From 8bfef2597f6913541e72eccae42998ca2e254f0f Mon Sep 17 00:00:00 2001 From: Memtensor-AI Date: Mon, 15 Jun 2026 00:10:23 +0800 Subject: [PATCH 5/5] =?UTF-8?q?Fix=20#1901:=20share=5Fcube=5Fwith=5Fuser?= =?UTF-8?q?=20passes=20swapped=20args=20to=20=5Fvalidate=5Fcube=5Faccess?= =?UTF-8?q?=20=E2=80=94=20fails=20for=20ev=20(#1903)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: validate current user not target in share_cube_with_user (#1901) share_cube_with_user(cube_id, target_user_id) called _validate_cube_access(cube_id, target_user_id), but the validator signature is (user_id, cube_id). The cube_id therefore landed in the user_id slot and _validate_user_exists raised "User '' does not exist or is inactive" for every well-formed call, making the API unusable. The in-code comment "Validate current user has access to this cube" already documented the correct intent: the sharing user (self.user_id) must have access to the cube being shared, not the target. Switch the call to self._validate_cube_access(self.user_id, cube_id). The target user's existence is independently checked on the next line via validate_user(target_user_id), so that path is unchanged. Add regression tests in tests/mem_os/test_memos_core.py that pin down: - validate_user_cube_access is consulted with (self.user_id, cube_id), - add_user_to_cube is called with (target_user_id, cube_id) on success, - a missing target raises "Target user '' does not exist". Closes #1901 Co-authored-by: MemOS AutoDev Bot Co-authored-by: Matthew --- src/memos/mem_os/core.py | 2 +- tests/mem_os/test_memos_core.py | 143 ++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_os/core.py b/src/memos/mem_os/core.py index 54f8f01e0..3ede965d3 100644 --- a/src/memos/mem_os/core.py +++ b/src/memos/mem_os/core.py @@ -1170,7 +1170,7 @@ def share_cube_with_user(self, cube_id: str, target_user_id: str) -> bool: bool: True if successful, False otherwise. """ # Validate current user has access to this cube - self._validate_cube_access(cube_id, target_user_id) + self._validate_cube_access(self.user_id, cube_id) # Validate target user exists if not self.user_manager.validate_user(target_user_id): diff --git a/tests/mem_os/test_memos_core.py b/tests/mem_os/test_memos_core.py index 6d2408d05..b57b0b254 100644 --- a/tests/mem_os/test_memos_core.py +++ b/tests/mem_os/test_memos_core.py @@ -795,3 +795,146 @@ def test_search_nonexistent_cube( assert result["text_mem"] == [] assert result["act_mem"] == [] assert result["para_mem"] == [] + + +class TestShareCubeWithUser: + """Regression tests for share_cube_with_user (issue #1901). + + The original implementation called ``_validate_cube_access(cube_id, + target_user_id)``, which both (a) swapped the positional arguments and + (b) validated the wrong user. Every well-formed call therefore failed + with ``ValueError: User '' does not exist or is inactive`` even + though the calling user owned the cube. These tests pin down the correct + semantics: validate the *current* user against the cube being shared, + then delegate the share to ``user_manager.add_user_to_cube``. + """ + + def _build_mos( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + mock_llm_factory.from_config.return_value = mock_llm + mock_reader_factory.from_config.return_value = mock_mem_reader + mock_user_manager_class.return_value = mock_user_manager + return MOSCore(MOSConfig(**mock_config)) + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_validates_current_user_not_target( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """Cube access must be validated against the *current* user. + + Regression for #1901: previously the cube_id was passed where the + user_id was expected, causing ``_validate_user_exists`` to reject + every call because the cube UUID is obviously not a registered user. + """ + mock_user_manager.validate_user.return_value = True + mock_user_manager.validate_user_cube_access.return_value = True + mock_user_manager.add_user_to_cube.return_value = True + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + cube_id = "cube-uuid-1234" + target_user_id = "target_user" + + result = mos.share_cube_with_user(cube_id=cube_id, target_user_id=target_user_id) + + assert result is True + # The cube-access check must be made against the *current* user, + # not the cube_id and not the target user. + mock_user_manager.validate_user_cube_access.assert_called_once_with(mos.user_id, cube_id) + # And the actual sharing must add the *target* user to the cube. + mock_user_manager.add_user_to_cube.assert_called_once_with(target_user_id, cube_id) + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_raises_when_current_user_lacks_access( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """If the current user doesn't have access to the cube, refuse to share. + + The error message must reference the current user, not the cube_id + (which was the misleading symptom in #1901). + """ + mock_user_manager.validate_user.return_value = True + mock_user_manager.validate_user_cube_access.return_value = False + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + with pytest.raises(ValueError, match="test_user"): + mos.share_cube_with_user(cube_id="cube-uuid-1234", target_user_id="target_user") + + mock_user_manager.add_user_to_cube.assert_not_called() + + @patch("memos.mem_os.core.UserManager") + @patch("memos.mem_os.core.MemReaderFactory") + @patch("memos.mem_os.core.LLMFactory") + def test_share_cube_raises_when_target_user_missing( + self, + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ): + """Target user must exist; ``validate_user`` is consulted independently.""" + # validate_user is used twice: once during MOSCore.__init__ for + # ``self.user_id`` (must succeed) and once for the target user (fail). + mock_user_manager.validate_user.side_effect = lambda uid: uid == "test_user" + mock_user_manager.validate_user_cube_access.return_value = True + + mos = self._build_mos( + mock_llm_factory, + mock_reader_factory, + mock_user_manager_class, + mock_config, + mock_llm, + mock_mem_reader, + mock_user_manager, + ) + + with pytest.raises(ValueError, match="Target user 'missing_user'"): + mos.share_cube_with_user(cube_id="cube-uuid-1234", target_user_id="missing_user") + + mock_user_manager.add_user_to_cube.assert_not_called()