Skip to content

Commit 77d60ab

Browse files
committed
refactor: make memory dedupe repo-agnostic
1 parent 560f63f commit 77d60ab

3 files changed

Lines changed: 200 additions & 107 deletions

File tree

src/workspace-memory.ts

Lines changed: 67 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -313,72 +313,80 @@ export function workspaceMemoryExactKey(entry: Pick<LongTermMemoryEntry, "type"
313313
return `${entry.type}:${canonicalMemoryText(entry.text)}`;
314314
}
315315

316-
/** Extract entity/destination keys for project and reference dedup */
317-
function extractEntityKey(text: string): string | null {
318-
const normalized = canonicalMemoryText(text);
319-
// Check known key phrases (bilingual-friendly)
320-
// opencode + agenthub plugin system
321-
if (/opencode.*agenthub/i.test(normalized)) {
322-
return "opencode-agenthub plugin system";
316+
function normalizeUrlIdentity(raw: string): string | null {
317+
const cleaned = raw.replace(/[),.;:!?]+$/g, "");
318+
try {
319+
const url = new URL(cleaned);
320+
if (url.protocol !== "http:" && url.protocol !== "https:") return null;
321+
url.protocol = url.protocol.toLowerCase();
322+
url.hostname = url.hostname.toLowerCase();
323+
url.hash = "";
324+
if (url.pathname.length > 1) {
325+
url.pathname = url.pathname.replace(/\/+$/g, "");
326+
}
327+
return `url:${url.toString()}`;
328+
} catch {
329+
return null;
323330
}
324-
// For generic config references, fall back to canonical text dedup — no entity key
325-
return null;
326331
}
327332

328-
/** Extract decision topic key for supersession detection */
329-
function decisionTopicKey(text: string): string | null {
330-
const normalized = text.toLowerCase();
331-
// Parser format versions
332-
if (/parser.*formats?|supports?\s*\d+\s*format/i.test(normalized)) {
333-
return "parser-supported-formats";
334-
}
335-
// Compaction template replacement
336-
if (/compaction.*template|output\.prompt|template.*replace/i.test(normalized)) {
337-
return "compaction-template-replacement";
338-
}
339-
// Plugin loading
340-
if (/plugin.*load|npm.*cache|plugin.*config/i.test(normalized)) {
341-
return "plugin-loading-config";
342-
}
343-
// Output format changes (purple/italic, YAML frontmatter, etc)
344-
if (/purple.*italic|markup|markdown.*render|frontmatter/i.test(normalized)) {
345-
return "output-format-rendering";
346-
}
347-
return null;
333+
function normalizePathIdentity(raw: string): string | null {
334+
const unwrapped = raw
335+
.trim()
336+
.replace(/^[`"']+|[`"']+$/g, "")
337+
.replace(/[),.;:!?]+$/g, "")
338+
.replace(/\\+/g, "/");
339+
340+
if (!unwrapped) return null;
341+
const collapsed = unwrapped.startsWith("/")
342+
? `/${unwrapped.slice(1).replace(/\/+$/g, "/").replace(/\/+/g, "/")}`
343+
: unwrapped.replace(/\/+/g, "/");
344+
const withoutTrailingSlash = collapsed.length > 1 ? collapsed.replace(/\/+$/g, "") : collapsed;
345+
return `path:${withoutTrailingSlash}`;
348346
}
349347

350-
/** Extract feedback topic key for supersession detection */
351-
function feedbackTopicKey(text: string): string | null {
352-
const normalized = text.toLowerCase();
353-
// Purple/italic rendering issue
354-
if (/purple.*italic/i.test(normalized)) {
355-
return "purple-italic-rendering";
356-
}
357-
// Browser login/server errors (500 internal_error)
358-
if (/login.*500|500.*internal|internal_error|server.*error/i.test(normalized)) {
359-
return "server-error";
360-
}
361-
// Port occupied / environment issues
362-
if (/port.*occup|9473|||/i.test(normalized)) {
363-
return "port-occupied-environment";
348+
function isConcretePathIdentity(pathIdentity: string): boolean {
349+
const path = pathIdentity.slice("path:".length);
350+
if (!path || path === "." || path === "..") return false;
351+
352+
if (path.startsWith("/")) return true;
353+
if (/^\.\.?\//.test(path)) return true;
354+
if (/^\.[A-Za-z0-9_.-]+\//.test(path)) return true;
355+
if (/^[A-Za-z0-9_.-]+\//.test(path)) return true;
356+
return /\.(?:json|jsonc|ts|tsx|js|jsx|mjs|cjs|md|yaml|yml|toml|lock|config)$/i.test(path);
357+
}
358+
359+
function normalizeConcretePathIdentity(raw: string): string | null {
360+
const pathIdentity = normalizePathIdentity(raw);
361+
if (!pathIdentity) return null;
362+
return isConcretePathIdentity(pathIdentity) ? pathIdentity : null;
363+
}
364+
365+
function extractConcreteIdentityKey(text: string): string | null {
366+
const urlMatch = text.match(/https?:\/\/[^\s`"'<>]+/i);
367+
if (urlMatch) {
368+
const urlIdentity = normalizeUrlIdentity(urlMatch[0]);
369+
if (urlIdentity) return urlIdentity;
364370
}
365-
// Theme preferences
366-
if (/theme|dark.*light|prefer.*theme/i.test(normalized)) {
367-
return "theme-preference";
371+
372+
const wrappedPathPattern = /[`"']([^`"']+)[`"']/g;
373+
for (const match of text.matchAll(wrappedPathPattern)) {
374+
const pathIdentity = normalizeConcretePathIdentity(match[1]);
375+
if (pathIdentity) return pathIdentity;
368376
}
369-
return null;
377+
378+
const pathMatch = text.match(/(?:\/[^\s`"'<>]+|(?:\.{1,2}[\\/]|[A-Za-z0-9_.-]+[\\/])[^\s`"'<>]+|[A-Za-z0-9_.-]+\.(?:json|jsonc|ts|tsx|js|jsx|mjs|cjs|md|yaml|yml|toml|lock|config))(?:\b|$)/);
379+
if (!pathMatch) return null;
380+
381+
return normalizeConcretePathIdentity(pathMatch[0]);
370382
}
371383

372384
export function workspaceMemoryIdentityKey(entry: Pick<LongTermMemoryEntry, "type" | "text">): string {
373385
if (entry.type === "project" || entry.type === "reference") {
374-
return `${entry.type}:${extractEntityKey(entry.text) ?? canonicalMemoryText(entry.text)}`;
375-
}
376-
377-
if (entry.type === "feedback") {
378-
return `${entry.type}:${feedbackTopicKey(entry.text) ?? canonicalMemoryText(entry.text)}`;
386+
return `${entry.type}:${extractConcreteIdentityKey(entry.text) ?? canonicalMemoryText(entry.text)}`;
379387
}
380388

381-
return `decision:${decisionTopicKey(entry.text) ?? canonicalMemoryText(entry.text)}`;
389+
return workspaceMemoryExactKey(entry);
382390
}
383391

384392
function consolidationEvent(
@@ -479,42 +487,33 @@ export function dedupeLongTermEntriesWithAccounting(entries: LongTermMemoryEntry
479487
const absorbed: MemoryConsolidationEvent[] = [];
480488
const superseded: MemoryConsolidationEvent[] = [];
481489

482-
// For project/reference/feedback: detect entity keys FIRST, then dedupe by entity OR canonical
490+
// For project/reference/feedback: dedupe by concrete identity or exact canonical text.
483491
const projectRefEntries = entries.filter(e => e.type === "project" || e.type === "reference" || e.type === "feedback");
484492

485-
// Build entity key dedup for project/reference/feedback
493+
// Build identity key dedup for project/reference/feedback.
486494
const entityDeduped = new Map<string, LongTermMemoryEntry>();
487495
for (const entry of projectRefEntries) {
488496
const key = workspaceMemoryIdentityKey(entry);
489-
const hasTopicIdentity = key !== workspaceMemoryExactKey(entry);
490497

491498
const existing = entityDeduped.get(key);
492499
if (!existing) {
493500
entityDeduped.set(key, entry);
494501
} else {
495-
// Feedback topic conflicts use supersession mode (newer beats longer)
496-
const mode = entry.type === "feedback" && hasTopicIdentity ? "supersession" as const : "entity" as const;
497-
const retained = chooseBetterMemory(entry, existing, mode);
502+
const retained = chooseBetterMemory(entry, existing, "entity");
498503
const dropped = retained === entry ? existing : entry;
499504
const reason = workspaceMemoryExactKey(entry) === workspaceMemoryExactKey(existing)
500505
? "absorbed_exact" as const
501-
: mode === "supersession"
502-
? "superseded_existing" as const
503-
: "absorbed_identity" as const;
506+
: "absorbed_identity" as const;
504507

505-
if (reason === "superseded_existing") {
506-
superseded.push(consolidationEvent(dropped, reason, retained));
507-
} else {
508-
absorbed.push(consolidationEvent(dropped, reason, retained));
509-
}
508+
absorbed.push(consolidationEvent(dropped, reason, retained));
510509

511510
if (retained === entry) {
512511
entityDeduped.set(key, entry);
513512
}
514513
}
515514
}
516515

517-
// For decisions: detect topic keys for supersession, or use canonical
516+
// For decisions: exact canonical duplicates only.
518517
const decisionEntries = entries.filter(e => e.type === "decision");
519518
const decisionDeduped = new Map<string, LongTermMemoryEntry>();
520519
for (const entry of decisionEntries) {

tests/promotion-accounting.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ test("accountPendingPromotions ignores superseded exact keys when detecting exis
9999
assert.deepEqual([...result.clearableKeys], [memoryKey(pending[0])]);
100100
});
101101

102-
test("accountPendingPromotions marks same-topic decision represented after normalization as absorbed", () => {
102+
test("accountPendingPromotions does not absorb same-topic decision without exact match", () => {
103103
const existing = mem("existing", "Parser supports 2 candidate formats.", {
104104
type: "decision",
105105
source: "compaction",
@@ -120,8 +120,8 @@ test("accountPendingPromotions marks same-topic decision represented after norma
120120
const result = accountPendingPromotions({ pending, before, after });
121121

122122
assert.equal(result.promotedKeys.size, 0);
123-
assert.deepEqual([...result.absorbedKeys], [memoryKey(pending[0])]);
124-
assert.equal(result.rejectedKeys.size, 0);
123+
assert.equal(result.absorbedKeys.size, 0);
124+
assert.deepEqual([...result.rejectedKeys], [memoryKey(pending[0])]);
125125
});
126126

127127
test("accountPendingPromotions keeps pending memory rejected when no equivalent survived", () => {

0 commit comments

Comments
 (0)