fix: unify workspace memory quality gate

sdwolf4103 · sdwolf4103 · commit ffb04772518b · 2026-04-28T13:21:15.000+08:00
diff --git a/src/extractors.ts b/src/extractors.ts
@@ -1,6 +1,7 @@
 import { createHash } from "crypto";
 import type { ActiveFile, LongTermMemoryEntry, LongTermType, OpenError } from "./types.ts";
 import { LONG_TERM_LIMITS } from "./types.ts";
+import { assessMemoryQuality } from "./memory-quality.ts";
 
 function id(prefix: string): string {
   return `${prefix}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
@@ -51,7 +52,7 @@ export function extractExplicitMemories(text: string): LongTermMemoryEntry[] {
     // 韓文（長詞優先）：기억해줘/메모해줘 must come before 기억해/메모해
     /(?:^|\n)\s*(?:기억해줘|기억해|잊지 마|잊지마|메모해줘|메모해)[:：,，]?\s*(.+)$/gim,
     // 英文：remember this/that - 必須在行首，避免 "to remember" 非指令匹配
-    /(?:^|\n)\s*(?:please\s+)?remember\s+(?:this|that)?[:：,，]?\s*(.+)$/gim,
+    /(?:^|\n)\s*(?:please\s+)?remember(?:\s+(?:this|that))?[:：,，]?\s*(.+)$/gim,
     // save/add to memory
     /(?:^|\n)\s*(?:please\s+)?(?:save|add)\s+(?:this|that)?\s*(?:to|in)\s+memory[:：,，]?\s*(.+)$/gim,
     // commit to memory
@@ -199,7 +200,7 @@ function normalizeCandidateBody(body: string): { text: string; hadTrigger: boole
     /(?:请|請)?(?:帮我|幫我)?(?:记住|記住|记得|記得|记下来|記下來)(?:这一点|這一點|这点|這點|这个|這個)?[:：,，]?\s*(.+)$/im,
     /(?:覚えておいて|覚えて|忘れないで|メモして)[:：,，]?\s*(.+)$/im,
     /(?:기억해줘|기억해|잊지 마|잊지마|메모해줘|메모해)[:：,，]?\s*(.+)$/im,
-    /(?:please\s+)?remember\s+(?:this|that)?[:：,，]?\s*(.+)$/im,
+    /(?:please\s+)?remember(?:\s+(?:this|that))?[:：,，]?\s*(.+)$/im,
     /(?:please\s+)?(?:save|add)\s+(?:this|that)?\s*(?:to|in)\s+memory[:：,，]?\s*(.+)$/im,
     /(?:please\s+)?commit\s+(?:this|that)?\s*to memory[:：,，]?\s*(.+)$/im,
   ];
@@ -273,35 +274,12 @@ function shouldAcceptWorkspaceMemoryCandidate(
   const pathCount = (text.match(/\/[\w.-]+(\/[\w.-]+)+/g) || []).length;
   if (pathCount > 2) return false;
 
-  // Session-specific progress snapshots for project type
-  if (entry.type === "project") {
-    if (isProjectSnapshotViolation(text)) return false;
-  }
+  const quality = assessMemoryQuality({ type: entry.type, text, source: "compaction" });
+  if (!quality.accepted) return false;
 
   return true;
 }
 
-function isProjectSnapshotViolation(text: string): boolean {
-  // Test/suite counts
-  if (/\d+\s+tests?\s+pass(?:ed)?/i.test(text)) return true;
-  if (/\d+\s+suites?\s+(?:pass|fail)/i.test(text)) return true;
-
-  // File counts with snapshot/process context only, not static limits
-  if (/\d+\s*(?:個|个)?\s*(?:files?|文件)/i.test(text)) {
-    const hasSnapshotContext = /同步|synced|uploaded|downloaded|completed|generated|created|modified|processed|完成/i.test(text);
-    const hasLimitContext = /limit|max|maximum|min|minimum|supports?|allowed|per\s+(?:batch|request|upload)/i.test(text);
-    if (hasSnapshotContext && !hasLimitContext) return true;
-  }
-
-  // Phase/Wave/Sprint/Milestone/Task progress
-  if (/(?:phases?|waves?|sprints?|milestones?|tasks?)\s*\d+(?:\s*[-–]\s*\d+)?/i.test(text)) {
-    if (/completed|done|finished|完成/i.test(text)) return true;
-  }
-  if (/(?:已完成|完成).{0,30}(?:phases?|waves?|sprints?|milestones?|tasks?)/i.test(text)) return true;
-
-  return false;
-}
-
 /**
  * Extract candidate block from summary using multiple formats.
  * Supports: Plain text label, Markdown section, legacy XML.
diff --git a/src/memory-quality.ts b/src/memory-quality.ts
@@ -0,0 +1,96 @@
+import type { LongTermMemoryEntry, LongTermSource } from "./types.ts";
+
+export type MemoryQualityInput = Pick<LongTermMemoryEntry, "type" | "text"> & {
+  source?: LongTermSource;
+};
+
+export type MemoryQualityResult = {
+  accepted: boolean;
+  reasons: string[];
+};
+
+export function assessMemoryQuality(entry: MemoryQualityInput): MemoryQualityResult {
+  const reasons: string[] = [];
+  const text = entry.text.trim();
+
+  if (text.length === 0) reasons.push("empty");
+  if (isProgressSnapshotViolation(text)) reasons.push("progress_snapshot");
+  if (isRawErrorViolation(text)) reasons.push("raw_error");
+  if (isCommitOrCiViolation(text)) reasons.push("commit_or_ci_snapshot");
+  if (isPathHeavyViolation(text)) reasons.push("path_heavy");
+  if (isTemporaryStatusViolation(text)) reasons.push("temporary_status");
+  if (entry.type === "feedback" && isFeedbackQualityViolation(text)) reasons.push("bad_feedback");
+  if (entry.type === "decision" && isDecisionQualityViolation(text)) reasons.push("bad_decision");
+
+  return { accepted: reasons.length === 0, reasons };
+}
+
+export function isProgressSnapshotViolation(text: string): boolean {
+  if (/\d+\s+tests?\s+pass(?:ed)?/i.test(text)) return true;
+  if (/\d+\s+suites?\s+(?:pass|fail)/i.test(text)) return true;
+
+  if (/\d+\s*(?:個|个)?\s*(?:files?|文件)/i.test(text)) {
+    const hasSnapshotContext = /同步|synced|uploaded|downloaded|completed|generated|created|modified|processed|完成/i.test(text);
+    const hasLimitContext = /limit|max|maximum|min|minimum|supports?|allowed|per\s+(?:batch|request|upload)/i.test(text);
+    if (hasSnapshotContext && !hasLimitContext) return true;
+  }
+
+  if (/\b(?:completed|done|finished|implemented|added|updated|fixed|reviewed|passed|modified)\b/i.test(text)) {
+    if (/\b(?:wave|phase|task|plan|pr|commit|ci|test|suite|implementation|session|change|fix|review|file)\b/i.test(text)) return true;
+  }
+  if (/(?:已完成|完成|修復|实现|實作).{0,40}(?:wave|phase|task|plan|PR|測試|测试|實作|实现|修復)/iu.test(text)) return true;
+  if (/(?:phases?|waves?|sprints?|milestones?|tasks?)\s*\d+(?:\s*[-–]\s*\d+)?/i.test(text)) {
+    if (/completed|done|finished|完成|已完成/i.test(text)) return true;
+  }
+  if (/(?:已完成|完成).{0,30}(?:phases?|waves?|sprints?|milestones?|tasks?)/i.test(text)) return true;
+  if (/\b(?:currently|right now|latest change|previous session|last wave|next step)\b/i.test(text)) return true;
+  return false;
+}
+
+export function isFeedbackQualityViolation(text: string): boolean {
+  const stablePreference = /\b(?:user|the user)\s+(?:prefers|wants|asked|expects|requires|likes|dislikes)\b/i.test(text)
+    || /\b(?:prefer|preference|going forward|from now on|always|never)\b/i.test(text)
+    || /(?:使用者|用戶|用户).{0,12}(?:偏好|希望|要求|想要)/u.test(text)
+    || /(?:以後|以后|請|请).{0,20}(?:使用|回答|保持|避免)/u.test(text);
+
+  if (stablePreference) return false;
+
+  const internalNote = /\b(?:implemented|updated|fixed|reviewed|added|changed|modified|created|writes|wrote)\b/i.test(text);
+  if (internalNote) return true;
+
+  return true;
+}
+
+export function isDecisionQualityViolation(text: string): boolean {
+  const futureRule = /\b(?:use|keep|prefer|avoid|do not|don't|must|should|never|always|require|choose|reject)\b/i.test(text)
+    || /(?:使用|保持|避免|不要|必須|必须|應該|应该|選擇|选择)/u.test(text);
+  if (!futureRule) return true;
+  if (/\b(?:implemented|added|updated|fixed|completed|reviewed)\b/i.test(text)) return true;
+  if (/\b(?:was|were|has been|had been)\b/i.test(text) && /\b(?:previous|last|latest|this session|this wave|already)\b/i.test(text)) return true;
+  return false;
+}
+
+function isRawErrorViolation(text: string): boolean {
+  if (/^\s*(Error|TypeError|ReferenceError|SyntaxError|Exception):/i.test(text)) return true;
+  if (/at \S+ \([^)]+:\d+:\d+\)/.test(text)) return true;
+  return false;
+}
+
+function isCommitOrCiViolation(text: string): boolean {
+  if (/\b[0-9a-f]{7,40}\b/.test(text)) return true;
+  if (/\bCI\b.*\b(?:passed|failed|run|compatibility|flaky)\b/i.test(text)) return true;
+  if (/\b(?:passed|failed|run|compatibility|flaky)\b.*\bCI\b/i.test(text)) return true;
+  if (/\bcompatibility\s+run\s+\d+/i.test(text)) return true;
+  return false;
+}
+
+function isPathHeavyViolation(text: string): boolean {
+  const pathCount = (text.match(/\/[\w.-]+(?:\/[\w.-]+)+/g) || []).length;
+  return pathCount > 2;
+}
+
+function isTemporaryStatusViolation(text: string): boolean {
+  if (/^(currently|now|pending|in progress|todo|wip)\b/i.test(text)) return true;
+  if (/\b(?:run npm test|tests? are running|next reply|before continuing)\b/i.test(text)) return true;
+  return false;
+}
diff --git a/tests/extractors.test.ts b/tests/extractors.test.ts
@@ -223,7 +223,7 @@ test("parseWorkspaceMemoryCandidates accepts bracketless candidate format", () =
 Memory candidates:
 - project Backend health improvements organized into phased milestones
 - reference Scrypt 參數必須是 N=16384, r=8, p=1
-- feedback 端口 9473 可能被舊進程佔用，需殺掉後重啟
+- feedback User prefers Traditional Chinese memory summaries
 - decision Use output.prompt to replace the default compaction template
 `;
 
@@ -451,14 +451,14 @@ test("parseWorkspaceMemoryCandidates allows benign ignore/instruction wording",
 Memory candidates:
 - [project] Use .gitignore to ignore generated files.
 - [reference] Instruction parser supports Markdown sections and bracketed memory types.
-- [decision] Prompt context uses a frozen workspace snapshot plus hot session state.
+- [decision] Use a frozen workspace snapshot plus hot session state for prompt context.
 `;
   const items = parseWorkspaceMemoryCandidates(summary);
 
   assert.equal(items.length, 3);
   assert.equal(items[0].text, "Use .gitignore to ignore generated files.");
   assert.equal(items[1].text, "Instruction parser supports Markdown sections and bracketed memory types.");
-  assert.equal(items[2].text, "Prompt context uses a frozen workspace snapshot plus hot session state.");
+  assert.equal(items[2].text, "Use a frozen workspace snapshot plus hot session state for prompt context.");
 });
 
 test("parseWorkspaceMemoryCandidates rejects direct system prompt override attempts", () => {
diff --git a/tests/fixtures/memory-quality-current-28.ts b/tests/fixtures/memory-quality-current-28.ts
@@ -0,0 +1,74 @@
+import type { LongTermMemoryEntry } from "../../src/types.ts";
+
+const now = "2026-04-28T00:00:00.000Z";
+
+function mem(
+  id: string,
+  type: LongTermMemoryEntry["type"],
+  text: string,
+  source: LongTermMemoryEntry["source"] = "compaction",
+): LongTermMemoryEntry {
+  return {
+    id,
+    type,
+    text,
+    source,
+    confidence: source === "explicit" ? 1 : 0.75,
+    status: "active",
+    createdAt: now,
+    updatedAt: now,
+  };
+}
+
+export const reviewerCurrent28Fixture: LongTermMemoryEntry[] = [
+  // High-value durable entries. These should survive.
+  mem("good_feedback_language", "feedback", "User prefers architecture reviews in Traditional Chinese", "explicit"),
+  mem("good_feedback_direct", "feedback", "User wants direct architecture feedback with concrete file paths", "explicit"),
+  mem("good_feedback_no_manual_cleanup", "feedback", "User prefers automatic memory cleanup over manual cleanup instructions", "explicit"),
+  mem("good_decision_no_extra_api", "decision", "Do not add extra LLM API calls for memory consolidation"),
+  mem("good_decision_no_semantic_merge", "decision", "Memory dedupe must use exact canonical keys and generic URL/path identity only"),
+  mem("good_decision_no_render_tracking", "decision", "Do not use rendered-memory access tracking as evidence"),
+  mem("good_reference_frozen", "reference", "Workspace memory is rendered as a frozen system[1] snapshot; pending memories remain in hot session state until compaction"),
+  mem("good_project_plugin", "project", "The project is an OpenCode plugin using TypeScript and local JSON stores"),
+  mem("good_reference_accounting", "reference", "Promotion accounting reports promoted, absorbed, superseded, and rejected outcomes"),
+
+  // Pseudo feedback/decision/progress snapshots. These should be superseded/rejected.
+  mem("bad_feedback_wave_done", "feedback", "Wave 1 completed successfully and all tests passed"),
+  mem("bad_feedback_plan_done", "feedback", "Plan 1 critical stability fixes were implemented"),
+  mem("bad_feedback_session_note", "feedback", "The assistant reviewed the code reviewer feedback and updated the plan"),
+  mem("bad_feedback_impl_note", "feedback", "Implemented owner-aware pending journal cleanup in plugin.ts"),
+  mem("bad_decision_commit", "decision", "Commit 53aa6d3 completed consolidation accounting"),
+  mem("bad_decision_tests", "decision", "180 tests pass and 0 tests fail after the latest change"),
+  mem("bad_decision_pr_status", "decision", "PR1 is done and PR2 is ready to start"),
+  mem("bad_project_files", "project", "Modified src/plugin.ts src/workspace-memory.ts src/pending-journal.ts during the last wave"),
+  mem("bad_project_wave", "project", "Wave 3 finished after cache bounds and Bearer redaction were added"),
+  mem("bad_reference_commit", "reference", "Commit a762e86 contains the owner scope fix"),
+  mem("bad_reference_ci", "reference", "CI compatibility run 25033906652 passed"),
+  mem("bad_reference_error", "reference", "TypeError: Cannot read properties of undefined"),
+  mem("bad_project_current", "project", "Currently running npm test before continuing"),
+
+  // Borderline implementation facts. Reject unless they are written as future rules.
+  mem("bad_decision_impl_detail", "decision", "dedupeLongTermEntriesWithAccounting was updated in the previous session"),
+  mem("bad_feedback_internal", "feedback", "The migration writes to disk when redaction changes content"),
+  mem("bad_reference_tmp", "reference", "storage.test.ts had a flaky cross-process test in CI"),
+
+  // Durable future-facing rules. These should survive.
+  mem("good_decision_quality", "decision", "Reject completion and progress statements before storing compaction memory candidates"),
+  mem("good_decision_quality_shared", "decision", "Use one shared memory quality gate for extraction and migration"),
+  mem("good_reference_quality_migration", "reference", "Quality cleanup migration supersedes low-quality compaction memories and does not touch explicit memories"),
+];
+
+export const expectedAcceptedFixtureIds = new Set([
+  "good_feedback_language",
+  "good_feedback_direct",
+  "good_feedback_no_manual_cleanup",
+  "good_decision_no_extra_api",
+  "good_decision_no_semantic_merge",
+  "good_decision_no_render_tracking",
+  "good_reference_frozen",
+  "good_project_plugin",
+  "good_reference_accounting",
+  "good_decision_quality",
+  "good_decision_quality_shared",
+  "good_reference_quality_migration",
+]);
diff --git a/tests/memory-quality-eval.test.ts b/tests/memory-quality-eval.test.ts
@@ -1,6 +1,8 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
+import { extractExplicitMemories, parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
+import { assessMemoryQuality } from "../src/memory-quality.ts";
+import { expectedAcceptedFixtureIds, reviewerCurrent28Fixture } from "./fixtures/memory-quality-current-28.ts";
 
 const acceptedCases = [
   {
@@ -64,6 +66,18 @@ const rejectedCases = [
     name: "temporary pending task",
     line: "- [decision] currently: run npm test before the next reply",
   },
+  {
+    name: "misclassified feedback completion snapshot",
+    line: "- [feedback] Wave 1 completed successfully and all tests passed",
+  },
+  {
+    name: "misclassified decision implementation note",
+    line: "- [decision] Implemented owner-aware cleanup in plugin.ts",
+  },
+  {
+    name: "session internal review note",
+    line: "- [feedback] The assistant reviewed the code reviewer feedback and updated the plan",
+  },
 ] as const;
 
 for (const item of acceptedCases) {
@@ -91,3 +105,40 @@ ${item.line}
     assert.equal(entries.length, 0);
   });
 }
+
+test("reviewer current-28 fixture keeps durable memories and rejects pseudo memories", () => {
+  for (const entry of reviewerCurrent28Fixture) {
+    const result = assessMemoryQuality(entry);
+    assert.equal(
+      result.accepted,
+      expectedAcceptedFixtureIds.has(entry.id),
+      `${entry.id}: ${entry.text} -> ${result.reasons.join(",")}`,
+    );
+  }
+});
+
+test("progress snapshot rejection is type independent", () => {
+  for (const type of ["feedback", "project", "decision", "reference"] as const) {
+    const result = assessMemoryQuality({ type, text: "Wave 2 completed successfully", source: "compaction" });
+    assert.equal(result.accepted, false, `${type} progress snapshots must reject`);
+    assert.ok(result.reasons.includes("progress_snapshot"));
+  }
+});
+
+test("feedback must be stable user preference or instruction", () => {
+  assert.equal(assessMemoryQuality({ type: "feedback", text: "User prefers concise architecture reviews", source: "compaction" }).accepted, true);
+  assert.equal(assessMemoryQuality({ type: "feedback", text: "Implemented owner-aware cleanup in plugin.ts", source: "compaction" }).accepted, false);
+});
+
+test("decision must be future-facing rule, not completed implementation note", () => {
+  assert.equal(assessMemoryQuality({ type: "decision", text: "Do not add semantic merge to memory dedupe", source: "compaction" }).accepted, true);
+  assert.equal(assessMemoryQuality({ type: "decision", text: "Use the cache boundary that was chosen in ADR-2 for future memory rendering", source: "compaction" }).accepted, true);
+  assert.equal(assessMemoryQuality({ type: "decision", text: "Added semantic merge tests in the previous wave", source: "compaction" }).accepted, false);
+});
+
+test("explicit memories bypass extraction quality gate", () => {
+  const entries = extractExplicitMemories("remember: Wave 1 completed successfully and all tests passed");
+  assert.equal(entries.length, 1);
+  assert.equal(entries[0].source, "explicit");
+  assert.match(entries[0].text, /Wave 1 completed/);
+});