Skip to content

Commit ffb0477

Browse files
committed
fix: unify workspace memory quality gate
1 parent a1b9bf4 commit ffb0477

5 files changed

Lines changed: 230 additions & 31 deletions

File tree

src/extractors.ts

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { createHash } from "crypto";
22
import type { ActiveFile, LongTermMemoryEntry, LongTermType, OpenError } from "./types.ts";
33
import { LONG_TERM_LIMITS } from "./types.ts";
4+
import { assessMemoryQuality } from "./memory-quality.ts";
45

56
function id(prefix: string): string {
67
return `${prefix}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
@@ -51,7 +52,7 @@ export function extractExplicitMemories(text: string): LongTermMemoryEntry[] {
5152
// 韓文(長詞優先):기억해줘/메모해줘 must come before 기억해/메모해
5253
/(?:^|\n)\s*(?:|| |||)[:,]?\s*(.+)$/gim,
5354
// 英文:remember this/that - 必須在行首,避免 "to remember" 非指令匹配
54-
/(?:^|\n)\s*(?:please\s+)?remember\s+(?:this|that)?[:,]?\s*(.+)$/gim,
55+
/(?:^|\n)\s*(?:please\s+)?remember(?:\s+(?:this|that))?[:,]?\s*(.+)$/gim,
5556
// save/add to memory
5657
/(?:^|\n)\s*(?:please\s+)?(?:save|add)\s+(?:this|that)?\s*(?:to|in)\s+memory[:,]?\s*(.+)$/gim,
5758
// commit to memory
@@ -199,7 +200,7 @@ function normalizeCandidateBody(body: string): { text: string; hadTrigger: boole
199200
/(?:|)?(?:|)?(?:|||||)(?:|||||)?[:,]?\s*(.+)$/im,
200201
/(?:|||)[:,]?\s*(.+)$/im,
201202
/(?:|| |||)[:,]?\s*(.+)$/im,
202-
/(?:please\s+)?remember\s+(?:this|that)?[:,]?\s*(.+)$/im,
203+
/(?:please\s+)?remember(?:\s+(?:this|that))?[:,]?\s*(.+)$/im,
203204
/(?:please\s+)?(?:save|add)\s+(?:this|that)?\s*(?:to|in)\s+memory[:,]?\s*(.+)$/im,
204205
/(?:please\s+)?commit\s+(?:this|that)?\s*to memory[:,]?\s*(.+)$/im,
205206
];
@@ -273,35 +274,12 @@ function shouldAcceptWorkspaceMemoryCandidate(
273274
const pathCount = (text.match(/\/[\w.-]+(\/[\w.-]+)+/g) || []).length;
274275
if (pathCount > 2) return false;
275276

276-
// Session-specific progress snapshots for project type
277-
if (entry.type === "project") {
278-
if (isProjectSnapshotViolation(text)) return false;
279-
}
277+
const quality = assessMemoryQuality({ type: entry.type, text, source: "compaction" });
278+
if (!quality.accepted) return false;
280279

281280
return true;
282281
}
283282

284-
function isProjectSnapshotViolation(text: string): boolean {
285-
// Test/suite counts
286-
if (/\d+\s+tests?\s+pass(?:ed)?/i.test(text)) return true;
287-
if (/\d+\s+suites?\s+(?:pass|fail)/i.test(text)) return true;
288-
289-
// File counts with snapshot/process context only, not static limits
290-
if (/\d+\s*(?:|)?\s*(?:files?|)/i.test(text)) {
291-
const hasSnapshotContext = /|synced|uploaded|downloaded|completed|generated|created|modified|processed|/i.test(text);
292-
const hasLimitContext = /limit|max|maximum|min|minimum|supports?|allowed|per\s+(?:batch|request|upload)/i.test(text);
293-
if (hasSnapshotContext && !hasLimitContext) return true;
294-
}
295-
296-
// Phase/Wave/Sprint/Milestone/Task progress
297-
if (/(?:phases?|waves?|sprints?|milestones?|tasks?)\s*\d+(?:\s*[-]\s*\d+)?/i.test(text)) {
298-
if (/completed|done|finished|/i.test(text)) return true;
299-
}
300-
if (/(?:|).{0,30}(?:phases?|waves?|sprints?|milestones?|tasks?)/i.test(text)) return true;
301-
302-
return false;
303-
}
304-
305283
/**
306284
* Extract candidate block from summary using multiple formats.
307285
* Supports: Plain text label, Markdown section, legacy XML.

src/memory-quality.ts

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import type { LongTermMemoryEntry, LongTermSource } from "./types.ts";
2+
3+
export type MemoryQualityInput = Pick<LongTermMemoryEntry, "type" | "text"> & {
4+
source?: LongTermSource;
5+
};
6+
7+
export type MemoryQualityResult = {
8+
accepted: boolean;
9+
reasons: string[];
10+
};
11+
12+
export function assessMemoryQuality(entry: MemoryQualityInput): MemoryQualityResult {
13+
const reasons: string[] = [];
14+
const text = entry.text.trim();
15+
16+
if (text.length === 0) reasons.push("empty");
17+
if (isProgressSnapshotViolation(text)) reasons.push("progress_snapshot");
18+
if (isRawErrorViolation(text)) reasons.push("raw_error");
19+
if (isCommitOrCiViolation(text)) reasons.push("commit_or_ci_snapshot");
20+
if (isPathHeavyViolation(text)) reasons.push("path_heavy");
21+
if (isTemporaryStatusViolation(text)) reasons.push("temporary_status");
22+
if (entry.type === "feedback" && isFeedbackQualityViolation(text)) reasons.push("bad_feedback");
23+
if (entry.type === "decision" && isDecisionQualityViolation(text)) reasons.push("bad_decision");
24+
25+
return { accepted: reasons.length === 0, reasons };
26+
}
27+
28+
export function isProgressSnapshotViolation(text: string): boolean {
29+
if (/\d+\s+tests?\s+pass(?:ed)?/i.test(text)) return true;
30+
if (/\d+\s+suites?\s+(?:pass|fail)/i.test(text)) return true;
31+
32+
if (/\d+\s*(?:|)?\s*(?:files?|)/i.test(text)) {
33+
const hasSnapshotContext = /|synced|uploaded|downloaded|completed|generated|created|modified|processed|/i.test(text);
34+
const hasLimitContext = /limit|max|maximum|min|minimum|supports?|allowed|per\s+(?:batch|request|upload)/i.test(text);
35+
if (hasSnapshotContext && !hasLimitContext) return true;
36+
}
37+
38+
if (/\b(?:completed|done|finished|implemented|added|updated|fixed|reviewed|passed|modified)\b/i.test(text)) {
39+
if (/\b(?:wave|phase|task|plan|pr|commit|ci|test|suite|implementation|session|change|fix|review|file)\b/i.test(text)) return true;
40+
}
41+
if (/(?:||||).{0,40}(?:wave|phase|task|plan|PR|||||)/iu.test(text)) return true;
42+
if (/(?:phases?|waves?|sprints?|milestones?|tasks?)\s*\d+(?:\s*[-]\s*\d+)?/i.test(text)) {
43+
if (/completed|done|finished||/i.test(text)) return true;
44+
}
45+
if (/(?:|).{0,30}(?:phases?|waves?|sprints?|milestones?|tasks?)/i.test(text)) return true;
46+
if (/\b(?:currently|right now|latest change|previous session|last wave|next step)\b/i.test(text)) return true;
47+
return false;
48+
}
49+
50+
export function isFeedbackQualityViolation(text: string): boolean {
51+
const stablePreference = /\b(?:user|the user)\s+(?:prefers|wants|asked|expects|requires|likes|dislikes)\b/i.test(text)
52+
|| /\b(?:prefer|preference|going forward|from now on|always|never)\b/i.test(text)
53+
|| /(?:使||).{0,12}(?:|||)/u.test(text)
54+
|| /(?:|||).{0,20}(?:使|||)/u.test(text);
55+
56+
if (stablePreference) return false;
57+
58+
const internalNote = /\b(?:implemented|updated|fixed|reviewed|added|changed|modified|created|writes|wrote)\b/i.test(text);
59+
if (internalNote) return true;
60+
61+
return true;
62+
}
63+
64+
export function isDecisionQualityViolation(text: string): boolean {
65+
const futureRule = /\b(?:use|keep|prefer|avoid|do not|don't|must|should|never|always|require|choose|reject)\b/i.test(text)
66+
|| /(?:使|||||||||)/u.test(text);
67+
if (!futureRule) return true;
68+
if (/\b(?:implemented|added|updated|fixed|completed|reviewed)\b/i.test(text)) return true;
69+
if (/\b(?:was|were|has been|had been)\b/i.test(text) && /\b(?:previous|last|latest|this session|this wave|already)\b/i.test(text)) return true;
70+
return false;
71+
}
72+
73+
function isRawErrorViolation(text: string): boolean {
74+
if (/^\s*(Error|TypeError|ReferenceError|SyntaxError|Exception):/i.test(text)) return true;
75+
if (/at \S+ \([^)]+:\d+:\d+\)/.test(text)) return true;
76+
return false;
77+
}
78+
79+
function isCommitOrCiViolation(text: string): boolean {
80+
if (/\b[0-9a-f]{7,40}\b/.test(text)) return true;
81+
if (/\bCI\b.*\b(?:passed|failed|run|compatibility|flaky)\b/i.test(text)) return true;
82+
if (/\b(?:passed|failed|run|compatibility|flaky)\b.*\bCI\b/i.test(text)) return true;
83+
if (/\bcompatibility\s+run\s+\d+/i.test(text)) return true;
84+
return false;
85+
}
86+
87+
function isPathHeavyViolation(text: string): boolean {
88+
const pathCount = (text.match(/\/[\w.-]+(?:\/[\w.-]+)+/g) || []).length;
89+
return pathCount > 2;
90+
}
91+
92+
function isTemporaryStatusViolation(text: string): boolean {
93+
if (/^(currently|now|pending|in progress|todo|wip)\b/i.test(text)) return true;
94+
if (/\b(?:run npm test|tests? are running|next reply|before continuing)\b/i.test(text)) return true;
95+
return false;
96+
}

tests/extractors.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ test("parseWorkspaceMemoryCandidates accepts bracketless candidate format", () =
223223
Memory candidates:
224224
- project Backend health improvements organized into phased milestones
225225
- reference Scrypt 參數必須是 N=16384, r=8, p=1
226-
- feedback 端口 9473 可能被舊進程佔用,需殺掉後重啟
226+
- feedback User prefers Traditional Chinese memory summaries
227227
- decision Use output.prompt to replace the default compaction template
228228
`;
229229

@@ -451,14 +451,14 @@ test("parseWorkspaceMemoryCandidates allows benign ignore/instruction wording",
451451
Memory candidates:
452452
- [project] Use .gitignore to ignore generated files.
453453
- [reference] Instruction parser supports Markdown sections and bracketed memory types.
454-
- [decision] Prompt context uses a frozen workspace snapshot plus hot session state.
454+
- [decision] Use a frozen workspace snapshot plus hot session state for prompt context.
455455
`;
456456
const items = parseWorkspaceMemoryCandidates(summary);
457457

458458
assert.equal(items.length, 3);
459459
assert.equal(items[0].text, "Use .gitignore to ignore generated files.");
460460
assert.equal(items[1].text, "Instruction parser supports Markdown sections and bracketed memory types.");
461-
assert.equal(items[2].text, "Prompt context uses a frozen workspace snapshot plus hot session state.");
461+
assert.equal(items[2].text, "Use a frozen workspace snapshot plus hot session state for prompt context.");
462462
});
463463

464464
test("parseWorkspaceMemoryCandidates rejects direct system prompt override attempts", () => {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import type { LongTermMemoryEntry } from "../../src/types.ts";
2+
3+
const now = "2026-04-28T00:00:00.000Z";
4+
5+
function mem(
6+
id: string,
7+
type: LongTermMemoryEntry["type"],
8+
text: string,
9+
source: LongTermMemoryEntry["source"] = "compaction",
10+
): LongTermMemoryEntry {
11+
return {
12+
id,
13+
type,
14+
text,
15+
source,
16+
confidence: source === "explicit" ? 1 : 0.75,
17+
status: "active",
18+
createdAt: now,
19+
updatedAt: now,
20+
};
21+
}
22+
23+
export const reviewerCurrent28Fixture: LongTermMemoryEntry[] = [
24+
// High-value durable entries. These should survive.
25+
mem("good_feedback_language", "feedback", "User prefers architecture reviews in Traditional Chinese", "explicit"),
26+
mem("good_feedback_direct", "feedback", "User wants direct architecture feedback with concrete file paths", "explicit"),
27+
mem("good_feedback_no_manual_cleanup", "feedback", "User prefers automatic memory cleanup over manual cleanup instructions", "explicit"),
28+
mem("good_decision_no_extra_api", "decision", "Do not add extra LLM API calls for memory consolidation"),
29+
mem("good_decision_no_semantic_merge", "decision", "Memory dedupe must use exact canonical keys and generic URL/path identity only"),
30+
mem("good_decision_no_render_tracking", "decision", "Do not use rendered-memory access tracking as evidence"),
31+
mem("good_reference_frozen", "reference", "Workspace memory is rendered as a frozen system[1] snapshot; pending memories remain in hot session state until compaction"),
32+
mem("good_project_plugin", "project", "The project is an OpenCode plugin using TypeScript and local JSON stores"),
33+
mem("good_reference_accounting", "reference", "Promotion accounting reports promoted, absorbed, superseded, and rejected outcomes"),
34+
35+
// Pseudo feedback/decision/progress snapshots. These should be superseded/rejected.
36+
mem("bad_feedback_wave_done", "feedback", "Wave 1 completed successfully and all tests passed"),
37+
mem("bad_feedback_plan_done", "feedback", "Plan 1 critical stability fixes were implemented"),
38+
mem("bad_feedback_session_note", "feedback", "The assistant reviewed the code reviewer feedback and updated the plan"),
39+
mem("bad_feedback_impl_note", "feedback", "Implemented owner-aware pending journal cleanup in plugin.ts"),
40+
mem("bad_decision_commit", "decision", "Commit 53aa6d3 completed consolidation accounting"),
41+
mem("bad_decision_tests", "decision", "180 tests pass and 0 tests fail after the latest change"),
42+
mem("bad_decision_pr_status", "decision", "PR1 is done and PR2 is ready to start"),
43+
mem("bad_project_files", "project", "Modified src/plugin.ts src/workspace-memory.ts src/pending-journal.ts during the last wave"),
44+
mem("bad_project_wave", "project", "Wave 3 finished after cache bounds and Bearer redaction were added"),
45+
mem("bad_reference_commit", "reference", "Commit a762e86 contains the owner scope fix"),
46+
mem("bad_reference_ci", "reference", "CI compatibility run 25033906652 passed"),
47+
mem("bad_reference_error", "reference", "TypeError: Cannot read properties of undefined"),
48+
mem("bad_project_current", "project", "Currently running npm test before continuing"),
49+
50+
// Borderline implementation facts. Reject unless they are written as future rules.
51+
mem("bad_decision_impl_detail", "decision", "dedupeLongTermEntriesWithAccounting was updated in the previous session"),
52+
mem("bad_feedback_internal", "feedback", "The migration writes to disk when redaction changes content"),
53+
mem("bad_reference_tmp", "reference", "storage.test.ts had a flaky cross-process test in CI"),
54+
55+
// Durable future-facing rules. These should survive.
56+
mem("good_decision_quality", "decision", "Reject completion and progress statements before storing compaction memory candidates"),
57+
mem("good_decision_quality_shared", "decision", "Use one shared memory quality gate for extraction and migration"),
58+
mem("good_reference_quality_migration", "reference", "Quality cleanup migration supersedes low-quality compaction memories and does not touch explicit memories"),
59+
];
60+
61+
export const expectedAcceptedFixtureIds = new Set([
62+
"good_feedback_language",
63+
"good_feedback_direct",
64+
"good_feedback_no_manual_cleanup",
65+
"good_decision_no_extra_api",
66+
"good_decision_no_semantic_merge",
67+
"good_decision_no_render_tracking",
68+
"good_reference_frozen",
69+
"good_project_plugin",
70+
"good_reference_accounting",
71+
"good_decision_quality",
72+
"good_decision_quality_shared",
73+
"good_reference_quality_migration",
74+
]);

tests/memory-quality-eval.test.ts

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import test from "node:test";
22
import assert from "node:assert/strict";
3-
import { parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
3+
import { extractExplicitMemories, parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
4+
import { assessMemoryQuality } from "../src/memory-quality.ts";
5+
import { expectedAcceptedFixtureIds, reviewerCurrent28Fixture } from "./fixtures/memory-quality-current-28.ts";
46

57
const acceptedCases = [
68
{
@@ -64,6 +66,18 @@ const rejectedCases = [
6466
name: "temporary pending task",
6567
line: "- [decision] currently: run npm test before the next reply",
6668
},
69+
{
70+
name: "misclassified feedback completion snapshot",
71+
line: "- [feedback] Wave 1 completed successfully and all tests passed",
72+
},
73+
{
74+
name: "misclassified decision implementation note",
75+
line: "- [decision] Implemented owner-aware cleanup in plugin.ts",
76+
},
77+
{
78+
name: "session internal review note",
79+
line: "- [feedback] The assistant reviewed the code reviewer feedback and updated the plan",
80+
},
6781
] as const;
6882

6983
for (const item of acceptedCases) {
@@ -91,3 +105,40 @@ ${item.line}
91105
assert.equal(entries.length, 0);
92106
});
93107
}
108+
109+
test("reviewer current-28 fixture keeps durable memories and rejects pseudo memories", () => {
110+
for (const entry of reviewerCurrent28Fixture) {
111+
const result = assessMemoryQuality(entry);
112+
assert.equal(
113+
result.accepted,
114+
expectedAcceptedFixtureIds.has(entry.id),
115+
`${entry.id}: ${entry.text} -> ${result.reasons.join(",")}`,
116+
);
117+
}
118+
});
119+
120+
test("progress snapshot rejection is type independent", () => {
121+
for (const type of ["feedback", "project", "decision", "reference"] as const) {
122+
const result = assessMemoryQuality({ type, text: "Wave 2 completed successfully", source: "compaction" });
123+
assert.equal(result.accepted, false, `${type} progress snapshots must reject`);
124+
assert.ok(result.reasons.includes("progress_snapshot"));
125+
}
126+
});
127+
128+
test("feedback must be stable user preference or instruction", () => {
129+
assert.equal(assessMemoryQuality({ type: "feedback", text: "User prefers concise architecture reviews", source: "compaction" }).accepted, true);
130+
assert.equal(assessMemoryQuality({ type: "feedback", text: "Implemented owner-aware cleanup in plugin.ts", source: "compaction" }).accepted, false);
131+
});
132+
133+
test("decision must be future-facing rule, not completed implementation note", () => {
134+
assert.equal(assessMemoryQuality({ type: "decision", text: "Do not add semantic merge to memory dedupe", source: "compaction" }).accepted, true);
135+
assert.equal(assessMemoryQuality({ type: "decision", text: "Use the cache boundary that was chosen in ADR-2 for future memory rendering", source: "compaction" }).accepted, true);
136+
assert.equal(assessMemoryQuality({ type: "decision", text: "Added semantic merge tests in the previous wave", source: "compaction" }).accepted, false);
137+
});
138+
139+
test("explicit memories bypass extraction quality gate", () => {
140+
const entries = extractExplicitMemories("remember: Wave 1 completed successfully and all tests passed");
141+
assert.equal(entries.length, 1);
142+
assert.equal(entries[0].source, "explicit");
143+
assert.match(entries[0].text, /Wave 1 completed/);
144+
});

0 commit comments

Comments
 (0)