From a31d2d42e40e12111cf6a5c1aebb1a664b0602c5 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Wed, 24 Jun 2026 02:00:44 +0300 Subject: [PATCH] feat(design-audit): make reference-grounded redesign job-first, not aesthetic-first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The engine grounded every page in an exemplar's visual DNA and judged on visual craft, so it regressed functional pages into generic brochures (docs lost its table of contents + density for marketing cards; an aggregator dropped 30 items to 9; a dashboard shed services into spacious cards). - generate/prompt.ts: persona art director -> product designer; hard rules in priority order — task-first, preserve affordances (never delete nav/ToC), preserve density where it is the value, right-size (don't reskin a tool into a landing page), exemplar is craft-only not a template. Plus a data-driven FUNCTIONAL CONTRACT derived from the page's own DNA (nav count, density, archetype) — density required only when the page is measured dense. - judge/prompt.ts: score task fitness + functional preservation before visual craft; a polished direction that strips nav or density loses. Validated by re-running the regressed pages: docs keeps its ToC + nav + dense code; HN keeps all 30 stories; the dashboard stays a dense service grid. --- .changeset/job-first-redesign-engine.md | 11 ++ src/design/audit/reference/generate/prompt.ts | 103 +++++++++++++----- src/design/audit/reference/judge/prompt.ts | 8 +- tests/design-audit-reference-generate.test.ts | 64 ++++++++++- tests/design-audit-reference-judge.test.ts | 19 +++- 5 files changed, 165 insertions(+), 40 deletions(-) create mode 100644 .changeset/job-first-redesign-engine.md diff --git a/.changeset/job-first-redesign-engine.md b/.changeset/job-first-redesign-engine.md new file mode 100644 index 0000000..22c69ba --- /dev/null +++ b/.changeset/job-first-redesign-engine.md @@ -0,0 +1,11 @@ +--- +'@tangle-network/browser-agent-driver': minor +--- + +design-audit (reference-grounded): make the redesign engine job-first instead of aesthetic-first. The old engine grounded every page in a world-class exemplar's visual DNA and judged on visual craft, so it regressed functional pages into generic brochures — a docs page lost its table-of-contents and dense reference content for two marketing cards and a hero; an aggregator dropped from 30 items to 9; a status dashboard shed services into spacious cards. The fix: + +- **Generator** (`reference/generate/prompt.ts`): persona reframed from art director to product designer. New hard rules in priority order — task-first (design for the page's users and the job in its intent) → preserve functional affordances (never delete navigation/ToC/search to look cleaner) → preserve density where it is the value (docs/dashboards/feeds keep their item count) → right-size the intervention (never turn one kind of page into another) → the exemplar is a source of visual craft only, never a structural template. +- **Functional contract**: a per-page preservation block derived from the page's own measured DNA (navigation-affordance count, layout density, archetype) so "keep what works" is concrete and data-driven, not exhortation — and density is required only when the page is actually measured dense, so a genuinely sparse page is never forced to stay dense. +- **Ranker/judge** (`reference/judge/prompt.ts`): scores task fitness and functional preservation BEFORE visual craft; a polished direction that removes navigation or reduces density loses. "Fit to the reference" counts only as visual craft. + +Validated by re-running the regressed pages: docs now keeps its ToC + prev/next nav + dense code examples; HN keeps all 30 stories + nav; the status dashboard stays a dense service grid with real values. No provider coupling; flag-gated reference engine only. diff --git a/src/design/audit/reference/generate/prompt.ts b/src/design/audit/reference/generate/prompt.ts index b0af3af..8f61757 100644 --- a/src/design/audit/reference/generate/prompt.ts +++ b/src/design/audit/reference/generate/prompt.ts @@ -43,35 +43,48 @@ const truncate = (s: string, max: number): string => * half of every generation call is byte-identical run-to-run. */ const GENERATION_SYSTEM_PROMPT = [ - 'You are a world-class art director. You translate ONE concrete, named', - 'world-class reference into a single coherent, buildable redesign direction for', - 'the page under audit — not a menu of options, one opinionated direction.', + 'You are a senior product designer improving a REAL page so its actual users', + 'accomplish their tasks faster and more easily. A named world-class reference', + 'is provided ONLY as a source of visual craft — type, colour, spacing rhythm,', + 'and motion. It is NOT a template: never copy its structure, layout archetype,', + 'or content into the page. Judge your own work by the page\'s JOB, not by how', + 'decorative it looks.', '', - 'Hard rules:', - '- Ground every decision in the named REFERENCE EXEMPLAR below. Echo its id in', - ' `groundedInExemplarIds`. NEVER invent, guess, or cite an exemplar id you', - ' were not given.', - '- Redesign the page\'s OWN content. You may restyle, regroup, and re-rank what', - ' the page actually contains, but NEVER fabricate content it does not have — no', - ' invented metrics, counts, dates, statuses, activity feeds, table rows, or', - ' whole sections of made-up data. The exemplar dictates HOW the page looks', - ' (type, colour, motion, spacing, hierarchy), never WHAT content it contains.', - '- Match the page\'s real content volume. If the page is sparse, keep the', - ' redesign proportionally restrained — generous whitespace around a few', - ' well-set elements is a valid world-class result. Do not manufacture content', - ' to fill the exemplar\'s denser layout.', - '- Specify concrete values: hex colors, px type sizes, ms durations, and real', - ' revised copy strings — never placeholders like "TODO" or "lorem ipsum".', - ' Revised copy must restate the page\'s actual message, not assert new facts.', - '- The ASCII layout must be a real box-drawing diagram of the proposed page', - ' structure, top to bottom.', - '- Preserve or improve the page\'s measured accessibility (contrast, a11y).', - ' Never propose a change that would regress it.', - '- If the REFERENCE EXEMPLAR\'s DNA reports scroll-driven motion (scroll', - ' reveals, sticky pinning, or parallax) and the page does not, propose concrete', - ' scroll interactions in `motionSpec.cues`, naming the specific reveal, pin, or', - ' parallax grounded in that exemplar. NEVER invent scroll motion the exemplar', - ' does not show.', + 'Hard rules, in PRIORITY ORDER (a lower rule never justifies breaking a higher one):', + '- TASK FIRST. Design for the page\'s primary users and the job in its INTENT', + ' below. Every change must make that job easier. Aesthetics serve the task —', + ' never sacrifice the task to look more like the reference.', + '- PRESERVE FUNCTIONAL AFFORDANCES. Keep every way users find and move through', + ' the page: navigation, table of contents, menus, breadcrumbs, search,', + ' filters, pagination, tabs. Offer equivalent-or-better wayfinding — NEVER', + ' delete navigation to look cleaner.', + '- PRESERVE DENSITY WHERE IT IS THE VALUE. For information-dense or reference', + ' pages (docs, dashboards, status/console, data tables, lists, feeds,', + ' aggregators) density and scannability ARE the craft. Keep at least as many', + ' items / rows / sections visible as the original; do not trade content for', + ' whitespace, hero banners, or marketing-style cards. The reference informs', + ' polish WITHIN that density, never a reduction of it.', + '- RIGHT-SIZE THE INTERVENTION. If the page already serves its job well, the', + ' best redesign is surgical: refine type, colour, spacing, and hierarchy while', + ' keeping the structure and affordances. NEVER turn one kind of page into', + ' another — a docs page or a dashboard must not become a landing page.', + '- Ground the VISUAL CRAFT in the named REFERENCE EXEMPLAR and echo its id in', + ' `groundedInExemplarIds`. NEVER invent, guess, or cite an exemplar id you were', + ' not given. Borrow its craft, never its content or structure.', + '- Use only the page\'s OWN content. NEVER fabricate content it does not have —', + ' no invented metrics, counts, dates, statuses, activity feeds, or sections.', + ' You are given the page\'s design DNA + intent, NOT its full text or data, so', + ' do not assert specific values (numbers, percentages, dates, names) you were', + ' not given; refer to such content by its role. If the page is genuinely', + ' sparse, keep the redesign proportionally restrained rather than manufacturing', + ' content to fill a denser layout.', + '- Specify concrete DESIGN values: hex colours, px type sizes, ms durations.', + ' Revised copy may restyle the page\'s existing wording but must never invent', + ' facts or use placeholders like "TODO" or "lorem ipsum".', + '- The ASCII layout must be a real box-drawing diagram of the proposed structure,', + ' top to bottom, and must show the preserved navigation / affordances.', + '- Preserve or improve the page\'s measured accessibility (contrast, a11y); never', + ' propose a change that would regress it.', '- Output STRICT JSON only: a single object matching the OUTPUT CONTRACT, with', ' no surrounding prose and no markdown code fences.', ].join('\n') @@ -92,7 +105,8 @@ function renderExemplarBlock(hit: RetrievalResult, maxRefChars: number): string const e = hit.exemplar const reasons = hit.reasons.length > 0 ? hit.reasons.join('; ') : 'nearest aesthetic neighbour' return [ - 'REFERENCE EXEMPLAR (ground every choice in this one page)', + 'REFERENCE EXEMPLAR (borrow its VISUAL CRAFT only — type, colour, spacing,', + 'motion. Do NOT copy its structure, information architecture, or content.)', `id: ${e.id} source: ${e.source} type: ${e.pageType}`, `url: ${e.url}`, `job-to-be-done: ${e.jobToBeDone}`, @@ -102,6 +116,36 @@ function renderExemplarBlock(hit: RetrievalResult, maxRefChars: number): string ].join('\n') } +/** + * The per-page FUNCTIONAL CONTRACT — what the redesign must preserve, derived + * from the page's own measured DNA (not a hardcoded page-type table). Navigation + * affordances and information density are user value on functional pages; the + * generator must keep them. Density gating is data-driven off the measured + * `layout.density`, so a genuinely sparse page is never forced to stay dense. + */ +function renderFunctionalContract(ctx: GenerationContext): string { + const dna = ctx.dna + const nav = dna.components?.nav ?? 0 + const density = dna.layout?.density ?? 'balanced' + const archetype = dna.layout?.archetype ?? 'unknown' + const lines = [ + 'FUNCTIONAL CONTRACT (preserve — the redesign must not regress these):', + `page type: ${ctx.classification.type}; measured layout density: ${density}; archetype: ${archetype}; navigation affordances detected: ${nav}.`, + ] + if (nav > 0) + lines.push( + `- Keep all ${nav} navigation / wayfinding affordance(s) (menus, table of contents, breadcrumbs, search, tabs). Provide equivalent-or-better navigation; do not drop any.`, + ) + if (density === 'dense') + lines.push( + '- This page is DENSE: its information density is the value. Keep at least as many items / rows / sections visible as today. Do not trade content for whitespace or hero sections.', + ) + lines.push( + '- Improve craft within this structure. Do not convert this page into a different kind of page.', + ) + return lines.join('\n') +} + function renderConstraints(ctx: GenerationContext): string | null { const m = ctx.measurements if (!m) return null @@ -146,6 +190,7 @@ export function buildDirectionPrompt( const sections: string[] = [ renderPageBlock(ctx, maxRefChars), + renderFunctionalContract(ctx), renderExemplarBlock(exemplar, maxRefChars), ] diff --git a/src/design/audit/reference/judge/prompt.ts b/src/design/audit/reference/judge/prompt.ts index 742e0fb..1d5e0bf 100644 --- a/src/design/audit/reference/judge/prompt.ts +++ b/src/design/audit/reference/judge/prompt.ts @@ -81,8 +81,9 @@ function joinSections(sections: string[]): string { function pairwiseSystem(): string { return [ - 'You are a world-class art director comparing two redesign directions for the same page against a named reference.', - 'Pick the direction with stronger taste, craft, information hierarchy, and fit to the reference.', + 'You are a senior product designer choosing which redesign direction serves the page\'s real users better at their tasks.', + 'Judge in PRIORITY ORDER: (1) TASK FITNESS — does it make the primary user\'s job easier and faster? (2) FUNCTIONAL PRESERVATION — a direction that removes navigation or wayfinding, reduces information density, or shows fewer items/sections than the page needs is WORSE for users, however polished it looks. (3) VISUAL CRAFT — type, colour, spacing rhythm, hierarchy, polish.', + '"Fit to the reference" counts only as visual craft; never reward a direction for importing the reference\'s structure or content.', CONTENT_FIDELITY, ANTI_POSITION_BIAS, RESPONSE_CONTRACT, @@ -95,7 +96,8 @@ function qualitySystem(dimension?: Dimension): string { ? ` Focus ONLY on the "${dimension}" dimension: ${DIMENSION_GUIDANCE[dimension]}.` : '' return [ - `You are a world-class design critic judging which of two designs is closer to world-class quality.${focus}`, + `You are a senior product designer and design critic judging which of two designs is closer to world-class quality FOR ITS USERS' TASKS.${focus}`, + 'Reward fitness for the page\'s job — task efficiency, preserved navigation, and appropriate information density — not decoration. A design that looks cleaner by removing wayfinding or content the users need is worse, not better.', ANTI_POSITION_BIAS, RESPONSE_CONTRACT, 'Use "tie" only when the two are genuinely on par.', diff --git a/tests/design-audit-reference-generate.test.ts b/tests/design-audit-reference-generate.test.ts index ccc349d..982982d 100644 --- a/tests/design-audit-reference-generate.test.ts +++ b/tests/design-audit-reference-generate.test.ts @@ -187,19 +187,71 @@ describe('buildDirectionPrompt', () => { // Regression: a sparse page grounded against a dense exemplar must not be told // to fabricate content to fill the layout (the example.com failure — invented // "Recent Activity" feeds, fake metrics/dates). Fidelity to the page's real - // content is a hard rule; the exemplar governs look, not content. + // content is a hard rule; the exemplar is craft only, never content. it('forbids fabricating content the page does not have (content fidelity)', () => { const hit = makeHit('ex-a') const { system } = buildDirectionPrompt(ctx, hit) const sys = system.toLowerCase() expect(sys).toContain('never fabricate content') expect(sys).toContain("page's own content") - // sparse pages must stay restrained, not be padded to the exemplar's density + // sparse pages stay restrained rather than being padded to the exemplar's density expect(sys).toContain('proportionally restrained') - expect(sys).toContain('do not manufacture content') - // the exemplar governs look, never content - expect(sys).toContain('how the page looks') - expect(sys).toContain('what content it contains') + expect(sys).toContain('rather than manufacturing') + // the exemplar is a source of craft, not content/structure + expect(sys).toContain('borrow its craft, never its content or structure') + // do not assert specific values the model was not given + expect(sys).toContain('do not assert specific values') + }) + + // The job-first reframe: the prompt must lead from the user's task and forbid + // stripping navigation or density to look prettier — the regression that turned + // the python docs page into a marketing brochure (lost ToC, lost density). + it('leads from task fitness and forbids regressing function for aesthetics', () => { + const hit = makeHit('ex-a') + const { system } = buildDirectionPrompt(ctx, hit) + const sys = system.toLowerCase() + // persona is product designer (task outcomes), not art director (decoration) + expect(sys).toContain('product designer') + expect(sys).not.toContain('art director') + // task first, in priority order + expect(sys).toContain('task first') + expect(sys).toContain('priority order') + // never delete navigation to look cleaner + expect(sys).toContain('preserve functional affordances') + expect(sys).toContain('delete navigation') + // density is value on functional pages; right-size rather than reskin + expect(sys).toContain('preserve density where it is the value') + expect(sys).toContain('must not become a landing page') + }) + + // The per-page functional contract is DATA-DRIVEN off measured DNA: it lists the + // page's nav affordances to preserve, and only asserts "DENSE" when the page is + // actually measured dense (a sparse page is never forced to stay dense). + it('injects a data-driven functional contract that preserves nav + real density', () => { + const denseCtx: GenerationContext = { + ...ctx, + dna: makeDNA({ + layout: { columns: 12, gridBaseUnit: 8, whitespaceRatio: 0.2, density: 'dense', archetype: 'nav-content' }, + components: { buttons: 4, inputs: 2, cards: 8, nav: 3 }, + }), + } + const { user } = buildDirectionPrompt(denseCtx, makeHit('ex-a')) + expect(user).toContain('FUNCTIONAL CONTRACT') + expect(user).toContain('Keep all 3 navigation') // 3 nav affordances detected + expect(user).toContain('This page is DENSE') // density === 'dense' + + // a sparse page gets the contract + nav line but NOT the dense directive + const sparseCtx: GenerationContext = { + ...ctx, + dna: makeDNA({ + layout: { columns: 1, gridBaseUnit: 8, whitespaceRatio: 0.8, density: 'sparse', archetype: 'hero' }, + components: { buttons: 1, inputs: 0, cards: 0, nav: 0 }, + }), + } + const { user: sparseUser } = buildDirectionPrompt(sparseCtx, makeHit('ex-a')) + expect(sparseUser).toContain('FUNCTIONAL CONTRACT') + expect(sparseUser).not.toContain('This page is DENSE') + expect(sparseUser).not.toContain('navigation / wayfinding affordance') // nav === 0 }) }) diff --git a/tests/design-audit-reference-judge.test.ts b/tests/design-audit-reference-judge.test.ts index da3b0c9..2a4e94a 100644 --- a/tests/design-audit-reference-judge.test.ts +++ b/tests/design-audit-reference-judge.test.ts @@ -93,7 +93,22 @@ describe('judge prompts', () => { const p2 = buildPairwisePrompt(input, 'AB') expect(p1).toEqual(p2) expect(p1.system).toContain('randomized order that carries NO information') - expect(p1.system).toContain('art director') + expect(p1.system).toContain('product designer') + }) + + // The judge must score task fitness + functional preservation BEFORE visual + // craft — so a polished direction that strips navigation or density loses. This + // de-biases the ranker away from the AI-pretty-but-less-functional aesthetic. + it('ranks task fitness + functional preservation above visual craft', () => { + const sys = buildPairwisePrompt(input, 'AB').system + expect(sys).toContain('PRIORITY ORDER') + expect(sys).toContain('TASK FITNESS') + expect(sys).toContain('FUNCTIONAL PRESERVATION') + expect(sys).toContain('reduces information density') + expect(sys).toContain('is WORSE for users, however polished it looks') + // visual craft is explicitly the lowest priority, and reference-fit is craft-only + expect(sys).toContain('VISUAL CRAFT') + expect(sys).toContain('never reward a direction for importing the reference') }) it('injects the reference only when present', () => { @@ -417,7 +432,7 @@ describe('createTextJudge', () => { }) expect(out.winnerSlot).toBe('B') expect(out.dimension).toBeUndefined() - expect(calls[0].system).toContain('art director') + expect(calls[0].system).toContain('product designer') }) })