From f60bfe22c626bad224dbcee51fc76750be2b1b11 Mon Sep 17 00:00:00 2001 From: Anton Stefer <59652072+antonstefer@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:34:15 +0200 Subject: [PATCH 01/25] feat(logic-grid-ai): add AI translation API for puzzle clues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Translate `Clue[]` to a target locale via a two-stage AI flow: the translator produces localized clues with the constraint JSON shown as ground truth, then a validator round-trips each translation back to a constraint type and checks polarity, direction, numeric/unit preservation, and proper-noun preservation. Failures are fed back to the translator on retry (up to 3 attempts), mirroring the existing generateTheme / rewriteClues pattern. Intended for ahead-of-time puzzle pipelines that produce localized corpora once and serve them statically — quality is the constraint, not latency. Constraints are passed through verbatim, so puzzles remain solvable from the original constraints regardless of the translated text. Validator client is configurable via TranslateOptions.validator. README documents that single-model validation has correlated blind spots and the recommended path is a separate client backed by a different model. When both client and validator are omitted, the validator defaults to a separate Anthropic client at temperature: 0 for deterministic verdicts. Adds optional `temperature` to AnthropicClientOptions (default 0.8, preserves existing behavior). --- packages/logic-grid-ai/README.md | 63 +++ packages/logic-grid-ai/src/client.test.ts | 26 + packages/logic-grid-ai/src/client.ts | 12 +- packages/logic-grid-ai/src/index.ts | 5 + .../src/translate-validation.test.ts | 367 +++++++++++++ .../logic-grid-ai/src/translate-validation.ts | 284 +++++++++++ packages/logic-grid-ai/src/translate.test.ts | 480 ++++++++++++++++++ packages/logic-grid-ai/src/translate.ts | 180 +++++++ packages/logic-grid-ai/src/types.ts | 52 ++ 9 files changed, 1468 insertions(+), 1 deletion(-) create mode 100644 packages/logic-grid-ai/src/translate-validation.test.ts create mode 100644 packages/logic-grid-ai/src/translate-validation.ts create mode 100644 packages/logic-grid-ai/src/translate.test.ts create mode 100644 packages/logic-grid-ai/src/translate.ts diff --git a/packages/logic-grid-ai/README.md b/packages/logic-grid-ai/README.md index 457ec22..38ab54b 100644 --- a/packages/logic-grid-ai/README.md +++ b/packages/logic-grid-ai/README.md @@ -158,6 +158,69 @@ import { validateRewrittenClues } from "logic-grid-ai"; const errors = validateRewrittenClues({ clues: ["..."] }, puzzle.clues.length); ``` +### `translate(options)` + +Translate puzzle clues to a target locale using AI. Intended for **ahead-of-time (AOT)** puzzle pipelines that produce localized corpora once and serve them statically — quality is the constraint, not latency. The package engine stays English-only; this is a post-processing layer. + +```typescript +import { translate } from "logic-grid-ai"; +import { generate } from "logic-grid"; + +const puzzle = generate({ size: 4, categories: 4, seed: 42 }); +const localized = await translate({ + clues: puzzle.clues, + locale: "German", // also accepts BCP-47 like "de-DE" +}); +// Returns Clue[] with the original constraints preserved and `text` +// rendered in German. +``` + +The function runs a two-stage AI flow: + +1. **Translator** produces one localized clue per source clue in a single batched call. The constraint JSON is shown alongside each English clue as ground truth — if the source `text` is ambiguous or has drifted (e.g. via `rewriteClues`), the constraint defines the meaning. +2. **Validator** round-trips each translation back to a constraint type and checks polarity, direction, numeric/unit preservation, and proper-noun preservation. Failures are fed back to the translator on retry (up to 3 attempts). + +```typescript +const localized = await translate({ + clues: puzzle.clues, + locale: "ja-JP", + client: createAnthropicClient(undefined, { model: "claude-sonnet-4-6" }), + validator: createAnthropicClient(undefined, { + model: "claude-opus-4-5", + temperature: 0, + }), +}); +``` + +> **Validator best practice.** Single-model validation has correlated blind spots — the validator's mistakes overlap with the translator's. For production AOT pipelines, pass a `validator` client backed by a _different model_ than the translator. When both `client` and `validator` are omitted, the package creates two default Anthropic clients with `validator` at `temperature: 0` for deterministic verdicts. + +If validation fails on every attempt, `translate` throws a `TranslationError` carrying structured `errors` with stable codes (`constraint_type_mismatch`, `direction_flip`, `numeric_changed`, `proper_noun_dropped`, plus the structural codes `wrong_clue_count`, `non_string_clue`, `empty_translation`, `long_translation`, `duplicate_translation`): + +```typescript +import { translate, TranslationError } from "logic-grid-ai"; + +try { + const localized = await translate({ clues, locale: "German" }); +} catch (err) { + if (err instanceof TranslationError) { + if (err.errors.some((e) => e.code === "direction_flip")) { + // Translator flipped the subject/object on a `before` or `left_of` clue. + } + } + throw err; +} +``` + +Constraints are passed through verbatim — translation only changes the `text` field, so the puzzle remains solvable from the original constraints regardless of how the localized text reads. + +### `createAnthropicClient(apiKey?, options?)` temperature option + +`AnthropicClientOptions` accepts an optional `temperature` (default `0.8`). Use `0` for deterministic responses — typically the right default for validator clients in `translate()`: + +```typescript +const validator = createAnthropicClient(undefined, { temperature: 0 }); +``` + ## How It Works 1. A detailed prompt describes the puzzle structure, category contract, and ordering semantics diff --git a/packages/logic-grid-ai/src/client.test.ts b/packages/logic-grid-ai/src/client.test.ts index 8671b1d..b553f17 100644 --- a/packages/logic-grid-ai/src/client.test.ts +++ b/packages/logic-grid-ai/src/client.test.ts @@ -83,4 +83,30 @@ describe("createAnthropicClient", () => { expect.objectContaining({ model: "claude-haiku-4-5" }), ); }); + + it("uses default temperature 0.8 when none provided", async () => { + mockCreate.mockResolvedValueOnce({ + content: [{ type: "tool_use", id: "call_4", name: "respond", input: {} }], + }); + + const client = createAnthropicClient(); + await client.completeJSON("test", { type: "object" }); + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ temperature: 0.8 }), + ); + }); + + it("uses overridden temperature when passed via options", async () => { + mockCreate.mockResolvedValueOnce({ + content: [{ type: "tool_use", id: "call_5", name: "respond", input: {} }], + }); + + const client = createAnthropicClient(undefined, { temperature: 0 }); + await client.completeJSON("test", { type: "object" }); + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ temperature: 0 }), + ); + }); }); diff --git a/packages/logic-grid-ai/src/client.ts b/packages/logic-grid-ai/src/client.ts index 1e7d4cb..5305f6f 100644 --- a/packages/logic-grid-ai/src/client.ts +++ b/packages/logic-grid-ai/src/client.ts @@ -4,10 +4,19 @@ import type { AIClient, JSONSchema } from "./types"; /** Default model used when no `model` option is provided. */ export const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-6"; +/** Default sampling temperature used when no `temperature` option is provided. */ +export const DEFAULT_ANTHROPIC_TEMPERATURE = 0.8; + /** Optional knobs for the default Anthropic-backed client. */ export interface AnthropicClientOptions { /** Override the model. Defaults to {@link DEFAULT_ANTHROPIC_MODEL}. */ model?: string; + /** + * Override the sampling temperature. Defaults to + * {@link DEFAULT_ANTHROPIC_TEMPERATURE}. Use 0 for deterministic verdicts + * (e.g. validator clients in `translate`). + */ + temperature?: number; } /** @@ -28,13 +37,14 @@ export function createAnthropicClient( ): AIClient { const client = new Anthropic({ apiKey }); const model = options.model ?? DEFAULT_ANTHROPIC_MODEL; + const temperature = options.temperature ?? DEFAULT_ANTHROPIC_TEMPERATURE; return { async completeJSON(prompt: string, schema: JSONSchema): Promise { const response = await client.messages.create({ model, max_tokens: 4096, - temperature: 0.8, + temperature, messages: [{ role: "user", content: prompt }], tools: [ { diff --git a/packages/logic-grid-ai/src/index.ts b/packages/logic-grid-ai/src/index.ts index 3e507f1..22bcdad 100644 --- a/packages/logic-grid-ai/src/index.ts +++ b/packages/logic-grid-ai/src/index.ts @@ -1,8 +1,10 @@ export { generateTheme, ThemeGenerationError } from "./theme"; export { rewriteClues, RewriteCluesError } from "./rewrite"; +export { translate, TranslationError } from "./translate"; export { createAnthropicClient, DEFAULT_ANTHROPIC_MODEL, + DEFAULT_ANTHROPIC_TEMPERATURE, type AnthropicClientOptions, } from "./client"; export { validateThemeResult } from "./validation"; @@ -12,10 +14,13 @@ export type { ThemeResult, RewriteCluesOptions, RewriteCluesResult, + TranslateOptions, AIClient, JSONSchema, ThemeValidationCode, ThemeValidationError, RewriteCluesValidationCode, RewriteCluesValidationError, + TranslationValidationCode, + TranslationValidationError, } from "./types"; diff --git a/packages/logic-grid-ai/src/translate-validation.test.ts b/packages/logic-grid-ai/src/translate-validation.test.ts new file mode 100644 index 0000000..2c9a2e7 --- /dev/null +++ b/packages/logic-grid-ai/src/translate-validation.test.ts @@ -0,0 +1,367 @@ +import { describe, it, expect } from "vitest"; +import { + checkTranslationStructure, + validateTranslation, +} from "./translate-validation"; +import { hasCode } from "./test-utils"; +import type { AIClient } from "./types"; +import type { Clue } from "logic-grid"; + +const SAMPLE_CLUES: Clue[] = [ + { + constraint: { type: "same_position", a: "Alice", b: "Coffee" }, + text: "Alice drinks coffee.", + }, + { + constraint: { type: "before", a: "Alice", b: "Bob", axis: "Year" }, + text: "Alice started before Bob.", + }, + { + constraint: { + type: "not_between", + outer1: "A", + middle: "B", + outer2: "C", + axis: "Year", + }, + text: "B is not between A and C.", + }, +]; + +interface ClueVerdict { + index: number; + constraintType: string; + directionOk: boolean; + numericOk: boolean; + properNounsOk: boolean; +} + +function allOk(clues: Clue[]): { clues: ClueVerdict[] } { + return { + clues: clues.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + }; +} + +function mockValidator(verdicts: { clues: ClueVerdict[] }): AIClient { + return { + completeJSON: () => Promise.resolve(verdicts as T), + }; +} + +describe("checkTranslationStructure", () => { + it("accepts valid output", () => { + const result = { clues: ["one", "two", "three"] }; + expect(checkTranslationStructure(result, 3)).toEqual([]); + }); + + it("rejects wrong clue count", () => { + const errors = checkTranslationStructure({ clues: ["one", "two"] }, 3); + expect(hasCode(errors, "wrong_clue_count")).toBe(true); + expect( + errors.find((e) => e.code === "wrong_clue_count")?.message, + ).toContain("Expected 3 clues, got 2"); + }); + + it("rejects empty translation", () => { + const errors = checkTranslationStructure( + { clues: ["", "two", "three"] }, + 3, + ); + expect(hasCode(errors, "empty_translation")).toBe(true); + expect(errors.find((e) => e.code === "empty_translation")?.clueIndex).toBe( + 1, + ); + }); + + it("rejects whitespace-only translation", () => { + const errors = checkTranslationStructure( + { clues: ["one", " ", "three"] }, + 3, + ); + expect(hasCode(errors, "empty_translation")).toBe(true); + expect(errors.find((e) => e.code === "empty_translation")?.clueIndex).toBe( + 2, + ); + }); + + it("rejects translation exceeding max length", () => { + const errors = checkTranslationStructure( + { clues: ["one", "A".repeat(501), "three"] }, + 3, + ); + expect(hasCode(errors, "long_translation")).toBe(true); + expect(errors.find((e) => e.code === "long_translation")?.clueIndex).toBe( + 2, + ); + }); + + it("rejects duplicate translation (case-insensitive)", () => { + const errors = checkTranslationStructure( + { clues: ["Alice trinkt Kaffee.", "two", "alice trinkt kaffee."] }, + 3, + ); + expect(hasCode(errors, "duplicate_translation")).toBe(true); + expect( + errors.find((e) => e.code === "duplicate_translation")?.clueIndex, + ).toBe(3); + }); + + it("rejects non-string item", () => { + const errors = checkTranslationStructure( + { clues: ["one", 42, "three"] }, + 3, + ); + expect(hasCode(errors, "non_string_clue")).toBe(true); + expect(errors.find((e) => e.code === "non_string_clue")?.clueIndex).toBe(2); + }); + + it("omits clueIndex on count-level errors", () => { + const errors = checkTranslationStructure({ clues: ["one"] }, 3); + const e = errors.find((x) => x.code === "wrong_clue_count"); + expect(e).toBeDefined(); + expect("clueIndex" in (e as object)).toBe(false); + }); +}); + +describe("validateTranslation", () => { + it("returns empty array when validator reports all-OK", async () => { + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(allOk(SAMPLE_CLUES)), + ); + expect(errors).toEqual([]); + }); + + it("returns empty array on empty input without calling validator", async () => { + let called = false; + const validator: AIClient = { + completeJSON: () => { + called = true; + return Promise.resolve({ clues: [] } as T); + }, + }; + + const errors = await validateTranslation([], [], "German", validator); + + expect(errors).toEqual([]); + expect(called).toBe(false); + }); + + it("emits constraint_type_mismatch when verdict type differs from source", async () => { + const verdicts = allOk(SAMPLE_CLUES); + verdicts.clues[0].constraintType = "wrong_type"; + + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(verdicts), + ); + + expect(hasCode(errors, "constraint_type_mismatch")).toBe(true); + expect( + errors.find((e) => e.code === "constraint_type_mismatch")?.clueIndex, + ).toBe(1); + }); + + it("emits direction_flip only for asymmetric constraints", async () => { + const verdicts = allOk(SAMPLE_CLUES); + // Flip on same_position (symmetric, should be ignored) and before (asymmetric) + verdicts.clues[0].directionOk = false; // same_position — ignored + verdicts.clues[1].directionOk = false; // before — emitted + + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(verdicts), + ); + + const flipErrors = errors.filter((e) => e.code === "direction_flip"); + expect(flipErrors).toHaveLength(1); + expect(flipErrors[0].clueIndex).toBe(2); + }); + + it("emits direction_flip on left_of as well as before", async () => { + const leftOfClue: Clue = { + constraint: { type: "left_of", a: "X", b: "Y", axis: "Year" }, + text: "X is directly before Y.", + }; + const verdicts = { + clues: [ + { + index: 1, + constraintType: "left_of", + directionOk: false, + numericOk: true, + properNounsOk: true, + }, + ], + }; + + const errors = await validateTranslation( + [leftOfClue], + ["..."], + "German", + mockValidator(verdicts), + ); + + expect(hasCode(errors, "direction_flip")).toBe(true); + }); + + it("emits numeric_changed when numericOk is false", async () => { + const verdicts = allOk(SAMPLE_CLUES); + verdicts.clues[2].numericOk = false; + + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(verdicts), + ); + + expect(hasCode(errors, "numeric_changed")).toBe(true); + expect(errors.find((e) => e.code === "numeric_changed")?.clueIndex).toBe(3); + }); + + it("emits proper_noun_dropped when properNounsOk is false", async () => { + const verdicts = allOk(SAMPLE_CLUES); + verdicts.clues[0].properNounsOk = false; + + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(verdicts), + ); + + expect(hasCode(errors, "proper_noun_dropped")).toBe(true); + expect( + errors.find((e) => e.code === "proper_noun_dropped")?.clueIndex, + ).toBe(1); + }); + + it("aggregates multiple errors per clue", async () => { + const verdicts = allOk(SAMPLE_CLUES); + verdicts.clues[1].constraintType = "wrong"; + verdicts.clues[1].directionOk = false; + verdicts.clues[1].numericOk = false; + verdicts.clues[1].properNounsOk = false; + + const errors = await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + mockValidator(verdicts), + ); + + const clue2Errors = errors.filter((e) => e.clueIndex === 2); + expect(clue2Errors).toHaveLength(4); + }); + + it("includes locale name in the validator prompt", async () => { + let capturedPrompt = ""; + const validator: AIClient = { + completeJSON: (prompt: string) => { + capturedPrompt = prompt; + return Promise.resolve(allOk(SAMPLE_CLUES) as T); + }, + }; + + await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "Japanese", + validator, + ); + + expect(capturedPrompt).toContain("Japanese"); + expect(capturedPrompt).toContain("reviewing a translation"); + }); + + it("includes both source and translation in validator prompt", async () => { + let capturedPrompt = ""; + const validator: AIClient = { + completeJSON: (prompt: string) => { + capturedPrompt = prompt; + return Promise.resolve(allOk(SAMPLE_CLUES) as T); + }, + }; + + await validateTranslation( + SAMPLE_CLUES, + ["Alice trinkt Kaffee.", "b", "c"], + "German", + validator, + ); + + expect(capturedPrompt).toContain("Alice drinks coffee."); + expect(capturedPrompt).toContain("Alice trinkt Kaffee."); + expect(capturedPrompt).toContain('"type":"same_position"'); + }); + + it("calls validator exactly once per batch", async () => { + let callCount = 0; + const validator: AIClient = { + completeJSON: () => { + callCount++; + return Promise.resolve(allOk(SAMPLE_CLUES) as T); + }, + }; + + await validateTranslation( + SAMPLE_CLUES, + ["a", "b", "c"], + "German", + validator, + ); + + expect(callCount).toBe(1); + }); + + it("does not flag direction on symmetric constraints when directionOk is false", async () => { + const symClues: Clue[] = [ + { + constraint: { type: "next_to", a: "X", b: "Y", axis: "Year" }, + text: "X is next to Y.", + }, + { + constraint: { + type: "exact_distance", + a: "X", + b: "Y", + distance: 2, + axis: "Year", + }, + text: "X is exactly 2 from Y.", + }, + ]; + + const verdicts = { + clues: symClues.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: false, // validator's verdict on symmetric — should be ignored + numericOk: true, + properNounsOk: true, + })), + }; + + const errors = await validateTranslation( + symClues, + ["a", "b"], + "German", + mockValidator(verdicts), + ); + + expect(errors.filter((e) => e.code === "direction_flip")).toHaveLength(0); + }); +}); diff --git a/packages/logic-grid-ai/src/translate-validation.ts b/packages/logic-grid-ai/src/translate-validation.ts new file mode 100644 index 0000000..7ba1b22 --- /dev/null +++ b/packages/logic-grid-ai/src/translate-validation.ts @@ -0,0 +1,284 @@ +import type { Clue, ConstraintType } from "logic-grid"; +import type { + AIClient, + JSONSchema, + TranslationValidationCode, + TranslationValidationError, +} from "./types"; + +/** + * AI-driven semantic validator for translated clues. + * + * NOT exported from the package. Internal to the {@link translate} retry loop. + * + * The validator round-trips each translation back to a constraint type and + * checks four properties per clue: + * 1. Constraint type round-trip (with polarity baked in: `not_between` is a + * distinct value from `between`). + * 2. Direction (only for `before` / `left_of`): does the translation's + * subject/object order match the source constraint's `a`/`b` fields? + * 3. Numeric and unit preservation. + * 4. Proper-noun preservation. + * + * All checks are evaluated by a single AI call against a structured schema — + * the verdicts are typed booleans + an enum, not free-text reasoning. Failures + * are mapped to {@link TranslationValidationError} with stable codes. + * + * Caller is responsible for picking a validator client distinct from the + * translator (or accepting correlated blind spots if the same client is used). + */ + +const CONSTRAINT_TYPES: ConstraintType[] = [ + "same_position", + "not_same_position", + "next_to", + "not_next_to", + "left_of", + "before", + "between", + "not_between", + "exact_distance", +]; + +const ASYMMETRIC: Set = new Set(["before", "left_of"]); + +interface ClueVerdict { + index: number; + constraintType: string; + directionOk: boolean; + numericOk: boolean; + properNounsOk: boolean; +} + +interface ValidatorResult { + clues: ClueVerdict[]; +} + +function err( + code: TranslationValidationCode, + message: string, + clueIndex?: number, +): TranslationValidationError { + return clueIndex !== undefined + ? { code, message, clueIndex } + : { code, message }; +} + +/** + * Cheap, deterministic structural check on the raw translator output. + * Run before the AI validator to reject obvious failures without burning + * an LLM call. Mirrors {@link validateRewrittenClues}'s shape. + */ +export function checkTranslationStructure( + result: { clues: unknown[] }, + expectedCount: number, +): TranslationValidationError[] { + const errors: TranslationValidationError[] = []; + + if (result.clues.length !== expectedCount) { + errors.push( + err( + "wrong_clue_count", + `Expected ${expectedCount} clues, got ${result.clues.length}.`, + ), + ); + } + + const seen = new Set(); + + for (let i = 0; i < result.clues.length; i++) { + const text = result.clues[i]; + const pos = i + 1; + + if (typeof text !== "string") { + errors.push(err("non_string_clue", `Clue ${pos} is not a string.`, pos)); + continue; + } + + if (!text || text.trim() === "") { + errors.push(err("empty_translation", `Clue ${pos} is empty.`, pos)); + continue; + } + + if (text.length > 500) { + errors.push( + err( + "long_translation", + `Clue ${pos} is too long (${text.length} chars, max 500).`, + pos, + ), + ); + } + + const lower = text.toLowerCase(); + if (seen.has(lower)) { + errors.push( + err( + "duplicate_translation", + `Clue ${pos} is a duplicate of an earlier clue.`, + pos, + ), + ); + } + seen.add(lower); + } + + return errors; +} + +function buildSchema(clueCount: number): JSONSchema { + return { + type: "object", + properties: { + clues: { + type: "array", + items: { + type: "object", + properties: { + index: { + type: "number", + description: "1-indexed clue position", + }, + constraintType: { + type: "string", + enum: CONSTRAINT_TYPES, + description: + "The constraint type the translated sentence expresses. Polarity is part of the type — return 'not_between' (not 'between') when the translation expresses negation.", + }, + directionOk: { + type: "boolean", + description: + "For `before` and `left_of`: is the translation's subject the same as the source constraint's `a` field? For symmetric constraints, always true.", + }, + numericOk: { + type: "boolean", + description: + "All numbers and units from the source constraint are preserved exactly in the translated text.", + }, + properNounsOk: { + type: "boolean", + description: + "All proper nouns and category-value names from the source are preserved verbatim.", + }, + }, + required: [ + "index", + "constraintType", + "directionOk", + "numericOk", + "properNounsOk", + ], + }, + minItems: clueCount, + maxItems: clueCount, + }, + }, + required: ["clues"], + }; +} + +function buildPrompt( + sourceClues: Clue[], + translated: string[], + locale: string, +): string { + let prompt = `You are reviewing a translation of logic-puzzle clues from English to ${locale}. + +For each clue, parse the ${locale} sentence back to a constraint and verify: + +1. constraintType: which of these does the ${locale} sentence express? + ${CONSTRAINT_TYPES.join(" | ")} + Polarity is part of the type — \`not_between\` is distinct from \`between\`, + \`not_next_to\` is distinct from \`next_to\`, \`not_same_position\` is + distinct from \`same_position\`. If the negation is dropped, return the + POSITIVE type so the mismatch is visible. + +2. directionOk (only meaningful for \`before\` and \`left_of\`): is the subject + of the ${locale} sentence the same entity as the source constraint's \`a\` + field? If the translation says "B is before A" when the source says + \`before(a=A, b=B)\`, that's a flip — return false. For symmetric + constraints (same_position, not_same_position, next_to, not_next_to, + between, not_between, exact_distance), always return true. + +3. numericOk: are all numbers and units from the source constraint preserved + exactly in the ${locale} text? + +4. properNounsOk: are all proper nouns and category-value names from the + source preserved verbatim (Alice stays Alice; "Black River fund" stays + "Black River fund")? + +Be calibrated — accept fluent translations that preserve meaning even if +phrased differently. Only flag GENUINE semantic drift, not stylistic +variation. + +## Source / translation pairs`; + + for (let i = 0; i < sourceClues.length; i++) { + prompt += `\n\n${i + 1}. EN: "${sourceClues[i].text}"\n Constraint: ${JSON.stringify(sourceClues[i].constraint)}\n ${locale}: "${translated[i]}"`; + } + + return prompt; +} + +export async function validateTranslation( + sourceClues: Clue[], + translated: string[], + locale: string, + validator: AIClient, +): Promise { + if (sourceClues.length === 0) return []; + + const schema = buildSchema(sourceClues.length); + const prompt = buildPrompt(sourceClues, translated, locale); + const result = await validator.completeJSON(prompt, schema); + + const errors: TranslationValidationError[] = []; + + for (let i = 0; i < sourceClues.length; i++) { + const verdict = result.clues[i]; + const source = sourceClues[i]; + const pos = i + 1; + + if (verdict.constraintType !== source.constraint.type) { + errors.push( + err( + "constraint_type_mismatch", + `Clue ${pos}: translation expresses '${verdict.constraintType}' but source constraint is '${source.constraint.type}'.`, + pos, + ), + ); + } + + if (ASYMMETRIC.has(source.constraint.type) && !verdict.directionOk) { + errors.push( + err( + "direction_flip", + `Clue ${pos}: subject/object order is reversed for ${source.constraint.type}.`, + pos, + ), + ); + } + + if (!verdict.numericOk) { + errors.push( + err( + "numeric_changed", + `Clue ${pos}: numbers or units differ from the source constraint.`, + pos, + ), + ); + } + + if (!verdict.properNounsOk) { + errors.push( + err( + "proper_noun_dropped", + `Clue ${pos}: a proper noun or value name was changed.`, + pos, + ), + ); + } + } + + return errors; +} diff --git a/packages/logic-grid-ai/src/translate.test.ts b/packages/logic-grid-ai/src/translate.test.ts new file mode 100644 index 0000000..61137ce --- /dev/null +++ b/packages/logic-grid-ai/src/translate.test.ts @@ -0,0 +1,480 @@ +import { describe, it, expect, vi } from "vitest"; +import { generate, deduce } from "logic-grid"; +import { translate, TranslationError } from "./translate"; +import type { AIClient } from "./types"; +import type { Clue } from "logic-grid"; +import * as clientModule from "./client"; + +const SAMPLE_CLUES: Clue[] = [ + { + constraint: { type: "same_position", a: "Alice", b: "Coffee" }, + text: "Alice drinks coffee.", + }, + { + constraint: { type: "next_to", a: "Cat", b: "Red", axis: "House" }, + text: "The cat lives next to the red house.", + }, + { + constraint: { type: "before", a: "Alice", b: "Bob", axis: "Year" }, + text: "Alice started before Bob.", + }, +]; + +const VALID_TRANSLATIONS = [ + "Alice trinkt Kaffee.", + "Die Katze wohnt neben dem roten Haus.", + "Alice hat vor Bob angefangen.", +]; + +interface ClueVerdict { + index: number; + constraintType: string; + directionOk: boolean; + numericOk: boolean; + properNounsOk: boolean; +} + +function allOkVerdict(clues: Clue[]): { clues: ClueVerdict[] } { + return { + clues: clues.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + }; +} + +/** + * Two-client mock: distinguishes translator from validator calls by prompt + * substring. Returns whichever payload the caller supplied for that role. + */ +function mockSingleClient( + translatorResult: unknown, + validatorResult: unknown, +): AIClient { + return { + completeJSON: (prompt: string): Promise => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(validatorResult as T); + } + return Promise.resolve(translatorResult as T); + }, + }; +} + +describe("translate", () => { + it("returns translated clues from a mock client", async () => { + const result = await translate({ + clues: SAMPLE_CLUES, + locale: "German", + client: mockSingleClient( + { clues: VALID_TRANSLATIONS }, + allOkVerdict(SAMPLE_CLUES), + ), + }); + + expect(result).toHaveLength(3); + expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); + expect(result[1].text).toBe(VALID_TRANSLATIONS[1]); + expect(result[2].text).toBe(VALID_TRANSLATIONS[2]); + }); + + it("preserves original constraints in translated clues", async () => { + const result = await translate({ + clues: SAMPLE_CLUES, + locale: "German", + client: mockSingleClient( + { clues: VALID_TRANSLATIONS }, + allOkVerdict(SAMPLE_CLUES), + ), + }); + + for (let i = 0; i < SAMPLE_CLUES.length; i++) { + expect(result[i].constraint).toBe(SAMPLE_CLUES[i].constraint); + } + }); + + it("uses default Anthropic clients when none provided", async () => { + const spy = vi + .spyOn(clientModule, "createAnthropicClient") + .mockImplementation(() => + mockSingleClient( + { clues: VALID_TRANSLATIONS }, + allOkVerdict(SAMPLE_CLUES), + ), + ); + + const result = await translate({ + clues: SAMPLE_CLUES, + locale: "German", + }); + + // One call for translator (no client), one for validator (temperature: 0). + expect(spy).toHaveBeenCalledTimes(2); + expect(spy).toHaveBeenCalledWith(undefined, { temperature: 0 }); + expect(result).toHaveLength(3); + spy.mockRestore(); + }); + + it("includes locale name in the translator prompt", async () => { + const prompts: string[] = []; + const client: AIClient = { + completeJSON: (prompt: string) => { + prompts.push(prompt); + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + await translate({ clues: SAMPLE_CLUES, locale: "Japanese", client }); + + expect(prompts[0]).toContain("Japanese"); + }); + + it("includes constraint JSON in the translator prompt", async () => { + let translatorPrompt = ""; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + translatorPrompt = prompt; + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + + expect(translatorPrompt).toContain('"type":"same_position"'); + expect(translatorPrompt).toContain('"type":"next_to"'); + expect(translatorPrompt).toContain('"type":"before"'); + }); + + it("uses separate client and validator when both are provided", async () => { + const translatorCalls: string[] = []; + const validatorCalls: string[] = []; + + const client: AIClient = { + completeJSON: (prompt: string) => { + translatorCalls.push(prompt); + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + const validator: AIClient = { + completeJSON: (prompt: string) => { + validatorCalls.push(prompt); + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + }, + }; + + await translate({ + clues: SAMPLE_CLUES, + locale: "German", + client, + validator, + }); + + expect(translatorCalls).toHaveLength(1); + expect(validatorCalls).toHaveLength(1); + expect(translatorCalls[0]).toContain("translating logic-puzzle clues"); + expect(validatorCalls[0]).toContain("reviewing a translation"); + }); + + it("falls back validator to client when validator is omitted", async () => { + const calls: string[] = []; + const client: AIClient = { + completeJSON: (prompt: string) => { + calls.push(prompt); + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + + expect(calls).toHaveLength(2); + expect(calls[0]).toContain("translating logic-puzzle clues"); + expect(calls[1]).toContain("reviewing a translation"); + }); + + it("retries on structural failure", async () => { + let translatorCalls = 0; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + translatorCalls++; + if (translatorCalls < 3) { + return Promise.resolve({ + clues: ["only one"], + } as T); + } + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + const result = await translate({ + clues: SAMPLE_CLUES, + locale: "German", + client, + }); + + expect(translatorCalls).toBe(3); + expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); + }); + + it("retries on semantic failure (constraint type mismatch)", async () => { + let translatorCalls = 0; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + if (translatorCalls < 2) { + // First attempt: validator says constraint type drifted + return Promise.resolve({ + clues: SAMPLE_CLUES.map((_, i) => ({ + index: i + 1, + constraintType: i === 1 ? "next_to" : "near", // drift on non-clue-2 entries + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + } as T); + } + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + translatorCalls++; + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + const result = await translate({ + clues: SAMPLE_CLUES, + locale: "German", + client, + }); + + expect(translatorCalls).toBe(2); + expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); + }); + + it("detects direction-flip on `before` clues", async () => { + let caught: unknown; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve({ + clues: SAMPLE_CLUES.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: c.constraint.type !== "before", // flip on `before` clue + numericOk: true, + properNounsOk: true, + })), + } as T); + } + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + try { + await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + } catch (e) { + caught = e; + } + + expect(caught).toBeInstanceOf(TranslationError); + const err = caught as TranslationError; + expect(err.errors.some((e) => e.code === "direction_flip")).toBe(true); + }); + + it("detects polarity drop (not_between -> between)", async () => { + const polarityClues: Clue[] = [ + { + constraint: { + type: "not_between", + outer1: "A", + middle: "B", + outer2: "C", + axis: "Year", + }, + text: "B is not between A and C.", + }, + ]; + + let caught: unknown; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve({ + clues: [ + { + index: 1, + constraintType: "between", // negation dropped + directionOk: true, + numericOk: true, + properNounsOk: true, + }, + ], + } as T); + } + return Promise.resolve({ clues: ["B ist zwischen A und C."] } as T); + }, + }; + + try { + await translate({ clues: polarityClues, locale: "German", client }); + } catch (e) { + caught = e; + } + + expect(caught).toBeInstanceOf(TranslationError); + const err = caught as TranslationError; + expect(err.errors.some((e) => e.code === "constraint_type_mismatch")).toBe( + true, + ); + }); + + it("throws TranslationError with structured errors after max retries", async () => { + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve({ + clues: SAMPLE_CLUES.map((_, i) => ({ + index: i + 1, + constraintType: "wrong_type", + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + } as T); + } + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + let caught: unknown; + try { + await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + } catch (e) { + caught = e; + } + + expect(caught).toBeInstanceOf(TranslationError); + const err = caught as TranslationError; + expect(err.message).toContain("Translation to German failed after 3"); + expect(err.errors.length).toBeGreaterThan(0); + expect(err.errors[0].code).toBe("constraint_type_mismatch"); + }); + + it("propagates client errors", async () => { + const client: AIClient = { + completeJSON: () => Promise.reject(new Error("Network error")), + }; + + await expect( + translate({ clues: SAMPLE_CLUES, locale: "German", client }), + ).rejects.toThrow("Network error"); + }); + + it("returns empty array for empty clues input", async () => { + let called = false; + const client: AIClient = { + completeJSON: () => { + called = true; + return Promise.resolve({ clues: [] } as T); + }, + }; + + const result = await translate({ clues: [], locale: "German", client }); + + expect(result).toEqual([]); + expect(called).toBe(false); + }); + + it("throws on empty locale", async () => { + await expect( + translate({ clues: SAMPLE_CLUES, locale: "" }), + ).rejects.toThrow("locale must be a non-empty string"); + }); + + it("throws on whitespace-only locale", async () => { + await expect( + translate({ clues: SAMPLE_CLUES, locale: " " }), + ).rejects.toThrow("locale must be a non-empty string"); + }); + + it("feeds validation errors back into retry prompt", async () => { + const translatorPrompts: string[] = []; + let translatorCalls = 0; + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + if (translatorCalls < 2) { + return Promise.resolve({ + clues: SAMPLE_CLUES.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: true, + numericOk: i !== 0, // numeric drift on clue 1 + properNounsOk: true, + })), + } as T); + } + return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + } + translatorPrompts.push(prompt); + translatorCalls++; + return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + }, + }; + + await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + + expect(translatorPrompts.length).toBeGreaterThanOrEqual(2); + expect(translatorPrompts[1]).toContain("Previous attempt had errors"); + expect(translatorPrompts[1]).toContain("numbers or units differ"); + }); + + it("result integrates with generate() and deduce()", async () => { + const puzzle = generate({ size: 4, categories: 4, seed: 42 }); + + const translations = puzzle.clues.map( + (_, i) => `Klue auf Deutsch Nummer ${i + 1}.`, + ); + + const client: AIClient = { + completeJSON: (prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(allOkVerdict(puzzle.clues) as T); + } + return Promise.resolve({ clues: translations } as T); + }, + }; + + const result = await translate({ + clues: puzzle.clues, + locale: "German", + client, + }); + + expect(result).toHaveLength(puzzle.clues.length); + for (let i = 0; i < result.length; i++) { + expect(result[i].constraint).toBe(puzzle.clues[i].constraint); + expect(result[i].text).toBe(translations[i]); + } + + const translatedPuzzle = { ...puzzle, clues: result }; + const deduction = deduce( + translatedPuzzle.constraints, + translatedPuzzle.grid, + ); + expect(deduction.complete).toBe(true); + }); +}); diff --git a/packages/logic-grid-ai/src/translate.ts b/packages/logic-grid-ai/src/translate.ts new file mode 100644 index 0000000..f103cfe --- /dev/null +++ b/packages/logic-grid-ai/src/translate.ts @@ -0,0 +1,180 @@ +import type { + TranslateOptions, + AIClient, + JSONSchema, + TranslationValidationError, +} from "./types"; +import type { Clue } from "logic-grid"; +import { createAnthropicClient } from "./client"; +import { + checkTranslationStructure, + validateTranslation, +} from "./translate-validation"; + +const MAX_RETRIES = 3; + +/** + * Thrown by {@link translate} when AI output fails validation on every retry. + * `errors` contains the structured validation errors from the final attempt. + */ +export class TranslationError extends Error { + readonly errors: TranslationValidationError[]; + + constructor(message: string, errors: TranslationValidationError[]) { + super(message); + this.name = "TranslationError"; + this.errors = errors; + } +} + +interface TranslateRawResult { + clues: string[]; +} + +function buildSchema(clueCount: number): JSONSchema { + return { + type: "object", + properties: { + clues: { + type: "array", + items: { type: "string", minLength: 1 }, + minItems: clueCount, + maxItems: clueCount, + description: + "Translated clue texts, one per source clue, in the same order", + }, + }, + required: ["clues"], + }; +} + +function buildPrompt( + options: TranslateOptions, + previousErrors?: string[], +): string { + const { clues, locale } = options; + + let prompt = `You are translating logic-puzzle clues from English to ${locale}. + +GROUND TRUTH: For each clue, the JSON constraint defines the meaning. The +English text is a stylistic reference — if it disagrees with the constraint, +follow the constraint. + +## Translation rules + +1. Preserve the EXACT semantic relationship for each clue: + - "next_to" / "right next to" means strict rank-adjacency. Use the + adjacency word in ${locale}, not a "near" or "close to" word. + - "left_of" means immediately preceding. Distinct from "before". + - "before" means somewhere earlier in order. Distinct from "left_of". + - "exactly N apart" preserves the numeric value and unit exactly. + - Negative constraints (\`not_*\`) MUST preserve the negation. +2. Preserve directional asymmetry. For \`before\` and \`left_of\`, the + subject is \`a\` and the object is \`b\` — do not swap them. +3. Preserve all proper nouns and category-value names verbatim + (Alice stays Alice; "Black River fund" stays "Black River fund"). +4. Preserve numeric values and units exactly. +5. Output one clue per source clue, in the same order. + +## Source clues`; + + for (let i = 0; i < clues.length; i++) { + prompt += `\n\n${i + 1}. Original: "${clues[i].text}"\n Constraint: ${JSON.stringify(clues[i].constraint)}`; + } + + if (previousErrors && previousErrors.length > 0) { + prompt += `\n\n## Previous attempt had errors — please fix:\n${previousErrors.map((e) => `- ${e}`).join("\n")}`; + } + + return prompt; +} + +/** + * Translate puzzle clues to a target locale using AI. + * + * The package engine is English-only by design. This function is a + * post-processing layer for ahead-of-time (AOT) puzzle pipelines that need + * localized output: generate puzzles in English, then translate the rendered + * clues here. The underlying constraints are passed through verbatim — only + * the surface text changes. + * + * Two-stage AI flow: + * 1. The translator produces a localized clue per source clue, in one + * batched call. The constraint JSON is shown alongside each English + * clue as ground truth. + * 2. A validator (separately configurable client) round-trips each + * translation back to a constraint type and checks polarity, direction, + * numerics, and proper-noun preservation. + * + * Validation failures are fed back to the translator on retry, mirroring + * {@link rewriteClues} and {@link generateTheme}. Up to 3 attempts. + * + * Single-model validation has correlated blind spots — for best rigor pass + * a `validator` client backed by a different model than `client`. + * + * Note: the package retries on *semantic* failures only. Transport-level + * retries (429s, 5xx, network errors) are handled inside the Anthropic SDK + * with exponential backoff and don't consume one of the 3 attempts. + * + * @throws {TranslationError} If translation fails validation after all + * retry attempts. Inspect `error.errors` for the structured failures. + * @throws {Error} If `locale` is empty. + */ +export async function translate(options: TranslateOptions): Promise { + const { clues, locale } = options; + + if (!locale || locale.trim() === "") { + throw new Error("locale must be a non-empty string"); + } + + if (clues.length === 0) return []; + + const translator: AIClient = options.client ?? createAnthropicClient(); + const validator: AIClient = + options.validator ?? + options.client ?? + createAnthropicClient(undefined, { temperature: 0 }); + + const schema = buildSchema(clues.length); + + let lastErrors: TranslationValidationError[] | undefined; + + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + const prompt = buildPrompt( + options, + lastErrors?.map((e) => e.message), + ); + const raw = await translator.completeJSON( + prompt, + schema, + ); + + const structural = checkTranslationStructure(raw, clues.length); + if (structural.length > 0) { + lastErrors = structural; + continue; + } + + const semantic = await validateTranslation( + clues, + raw.clues, + locale, + validator, + ); + if (semantic.length === 0) { + return raw.clues.map((text, i) => ({ + constraint: clues[i].constraint, + text, + })); + } + + lastErrors = semantic; + } + + throw new TranslationError( + `Translation to ${locale} failed after ${MAX_RETRIES} attempts. Last errors:\n${lastErrors! + .map((e) => e.message) + .join("\n")}`, + lastErrors!, + ); +} diff --git a/packages/logic-grid-ai/src/types.ts b/packages/logic-grid-ai/src/types.ts index a2f3510..afa85ae 100644 --- a/packages/logic-grid-ai/src/types.ts +++ b/packages/logic-grid-ai/src/types.ts @@ -107,3 +107,55 @@ export interface RewriteCluesValidationError { /** 1-indexed clue position when the error is scoped to a single clue. */ clueIndex?: number; } + +/** Options for AI-powered clue translation. */ +export interface TranslateOptions { + /** + * Source clues. The `constraint` field is the ground truth that the + * validator compares against; `text` is shown to the translator as a + * stylistic hint but may have already drifted (e.g. via {@link rewriteClues}). + */ + clues: Clue[]; + /** + * Target locale. Free-form string passed verbatim into the prompt — both + * BCP-47 codes ("de-DE", "ja-JP") and plain language names ("German", + * "Japanese") work. Empty string is rejected. + */ + locale: string; + /** Translator client. Defaults to Anthropic SDK using ANTHROPIC_API_KEY. */ + client?: AIClient; + /** + * Validator client. Strongly recommended to pass a client backed by a + * different model than the translator — single-model validation has + * correlated blind spots. Defaults to `client` if omitted; if both are + * omitted, a separate Anthropic client with `temperature: 0` is created + * for deterministic verdicts. + */ + validator?: AIClient; +} + +/** + * Structured validation error for AI-translated clues. + * + * Codes split into two tiers: + * - Structural (cheap, deterministic): wrong count, non-string, empty, too long, duplicate. + * - Semantic (AI-driven): constraint type drift incl. polarity, direction flip on + * asymmetric comparators, numeric / unit drift, proper-noun drop. + */ +export type TranslationValidationCode = + | "wrong_clue_count" + | "non_string_clue" + | "empty_translation" + | "long_translation" + | "duplicate_translation" + | "constraint_type_mismatch" + | "direction_flip" + | "numeric_changed" + | "proper_noun_dropped"; + +export interface TranslationValidationError { + code: TranslationValidationCode; + message: string; + /** 1-indexed clue position when the error is scoped to a single clue. */ + clueIndex?: number; +} From 46348650d691ac41d250bfbe3d7c255de5376783 Mon Sep 17 00:00:00 2001 From: Anton Stefer <59652072+antonstefer@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:34:37 +0200 Subject: [PATCH 02/25] feat(demo): wire AI translation into the demo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add POST /api/translate endpoint mirroring /api/rewrite-clues — input validation, MissingEnvError → 503 with code: missing_api_key, generic 500 fallback. Add a translateClues(locale) method on the puzzle state that fetches the endpoint and replaces puzzle.clues in place. Surface a small locale input + Translate button in +page.svelte, disabled while loading or when the locale field is empty. Endpoint tests dispatch translator vs validator calls by prompt substring against the shared completeJSON mock, since the demo wires a single getAnthropicClient for both roles. --- packages/demo/src/lib/puzzle-state.svelte.ts | 42 ++++ packages/demo/src/routes/+page.svelte | 39 ++++ .../demo/src/routes/api/translate/+server.ts | 58 ++++++ .../src/routes/api/translate/server.test.ts | 185 ++++++++++++++++++ 4 files changed, 324 insertions(+) create mode 100644 packages/demo/src/routes/api/translate/+server.ts create mode 100644 packages/demo/src/routes/api/translate/server.test.ts diff --git a/packages/demo/src/lib/puzzle-state.svelte.ts b/packages/demo/src/lib/puzzle-state.svelte.ts index 3c89090..b4473bc 100644 --- a/packages/demo/src/lib/puzzle-state.svelte.ts +++ b/packages/demo/src/lib/puzzle-state.svelte.ts @@ -428,6 +428,47 @@ export function createPuzzleState() { message = null; } + function translateClues(locale: string) { + if (!puzzle) throw new Error("No active puzzle"); + loading = true; + loadingMessage = "Translating clues…"; + message = null; + + setTimeout(() => { + void (async () => { + try { + const current = puzzle; + if (!current) return; + const res = await fetch("/api/translate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ clues: current.clues, locale }), + }); + if (!res.ok) { + let errorMsg = "Translation failed"; + try { + const body = (await res.json()) as { error: string }; + if (body.error) errorMsg = body.error; + } catch { + // non-JSON response (e.g. HTML error page) + } + throw new Error(errorMsg); + } + const body = (await res.json()) as { clues: typeof current.clues }; + puzzle = { ...current, clues: body.clues }; + } catch (e) { + message = { + text: e instanceof Error ? e.message : String(e), + type: "error", + }; + } finally { + loading = false; + loadingMessage = "Generating…"; + } + })(); + }, 0); + } + return { get puzzle() { return puzzle; @@ -456,5 +497,6 @@ export function createPuzzleState() { nudge, hint, revealCell, + translateClues, }; } diff --git a/packages/demo/src/routes/+page.svelte b/packages/demo/src/routes/+page.svelte index 0f71c76..de7550d 100644 --- a/packages/demo/src/routes/+page.svelte +++ b/packages/demo/src/routes/+page.svelte @@ -189,6 +189,13 @@ let theme = $state(""); let clueStyle = $state(""); let preset = $state("none"); + let translateLocale = $state(""); + + function handleTranslate() { + const locale = translateLocale.trim(); + if (!locale) return; + puzzleState.translateClues(locale); + } function handleNewPuzzle() { const p = presets[preset]; @@ -331,6 +338,22 @@ +
+ + +
+ {#if puzzleState.message}
{ + let clues: unknown, locale: unknown; + try { + ({ clues, locale } = await request.json()); + } catch { + return json({ error: "Invalid JSON" }, { status: 400 }); + } + + if ( + !Array.isArray(clues) || + clues.length === 0 || + !clues.every( + (c: unknown) => + typeof c === "object" && + c !== null && + "text" in c && + typeof (c as Record).text === "string" && + "constraint" in c && + typeof (c as Record).constraint === "object", + ) + ) { + return json({ error: "Invalid clues" }, { status: 400 }); + } + if (typeof locale !== "string" || !locale.trim() || locale.length > 100) { + return json({ error: "Invalid locale" }, { status: 400 }); + } + + try { + const client = getAnthropicClient(); + const result = await translate({ + clues: clues as Clue[], + locale, + client, + }); + return json({ clues: result }); + } catch (e) { + if (e instanceof MissingEnvError) { + console.error(`${e.variable} is not configured`); + return json( + { + error: + "AI translation is unavailable: the server is missing required configuration.", + code: "missing_api_key", + }, + { status: 503 }, + ); + } + console.error("Translation failed:", e); + return json({ error: "Translation failed" }, { status: 500 }); + } +}; diff --git a/packages/demo/src/routes/api/translate/server.test.ts b/packages/demo/src/routes/api/translate/server.test.ts new file mode 100644 index 0000000..336754a --- /dev/null +++ b/packages/demo/src/routes/api/translate/server.test.ts @@ -0,0 +1,185 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { POST } from "./+server"; +import { createAnthropicClient } from "logic-grid-ai"; +import { _resetAnthropicClientCache } from "$lib/server/anthropic"; + +const { envProxy, completeJSON } = vi.hoisted(() => ({ + envProxy: {} as { ANTHROPIC_API_KEY?: string }, + completeJSON: vi.fn(), +})); + +vi.mock("$env/dynamic/private", () => ({ + env: envProxy, +})); + +vi.mock("logic-grid-ai", async (importOriginal) => { + const orig = await importOriginal(); + return { + ...orig, + createAnthropicClient: vi.fn(() => ({ completeJSON })), + }; +}); + +type Handler = (event: { request: Request }) => Promise; +const post = POST as unknown as Handler; + +beforeEach(() => { + delete envProxy.ANTHROPIC_API_KEY; + completeJSON.mockReset(); + _resetAnthropicClientCache(); + vi.spyOn(console, "error").mockImplementation(() => {}); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +function postBody(body: unknown): Request { + return new Request("http://test/api/translate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} + +const SAMPLE_CLUES = [ + { + constraint: { type: "same_position", a: "Alice", b: "Cat" }, + text: "Alice owns the cat.", + }, + { + constraint: { type: "next_to", a: "Bob", b: "Dog", axis: "House" }, + text: "Bob lives next to the dog owner.", + }, +]; + +interface ClueVerdict { + index: number; + constraintType: string; + directionOk: boolean; + numericOk: boolean; + properNounsOk: boolean; +} + +function allOkVerdict(): { clues: ClueVerdict[] } { + return { + clues: SAMPLE_CLUES.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + }; +} + +/** + * Wire the shared completeJSON mock to dispatch translator vs validator calls + * based on prompt substring. Demo's getAnthropicClient supplies one client for + * both roles, so we differentiate at the prompt level. + */ +function dispatchByPrompt( + translatorPayload: unknown, + validatorPayload: unknown, +): void { + completeJSON.mockImplementation((prompt: string) => { + if (prompt.includes("reviewing a translation")) { + return Promise.resolve(validatorPayload); + } + return Promise.resolve(translatorPayload); + }); +} + +describe("POST /api/translate", () => { + it("returns 503 with code missing_api_key when ANTHROPIC_API_KEY is missing", async () => { + const res = await post({ + request: postBody({ clues: SAMPLE_CLUES, locale: "German" }), + }); + + expect(res.status).toBe(503); + const body = (await res.json()) as { error: string; code: string }; + expect(body.code).toBe("missing_api_key"); + expect(body.error).not.toContain("ANTHROPIC_API_KEY"); + expect(body.error.toLowerCase()).toContain("unavailable"); + }); + + it("returns 200 with translated clues on success", async () => { + envProxy.ANTHROPIC_API_KEY = "sk-test"; + const translations = { + clues: ["Alice besitzt die Katze.", "Bob wohnt neben dem Hundebesitzer."], + }; + dispatchByPrompt(translations, allOkVerdict()); + + const res = await post({ + request: postBody({ clues: SAMPLE_CLUES, locale: "German" }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { clues: { text: string }[] }; + expect(body.clues).toHaveLength(2); + expect(body.clues[0].text).toBe("Alice besitzt die Katze."); + expect(body.clues[1].text).toBe("Bob wohnt neben dem Hundebesitzer."); + // The env key actually flowed through to the Anthropic client factory. + expect(vi.mocked(createAnthropicClient)).toHaveBeenCalledWith("sk-test"); + }); + + it("returns 400 on invalid JSON", async () => { + const req = new Request("http://test/api/translate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: "not json", + }); + const res = await post({ request: req }); + expect(res.status).toBe(400); + }); + + it("returns 400 on empty clue list", async () => { + const res = await post({ + request: postBody({ clues: [], locale: "German" }), + }); + expect(res.status).toBe(400); + }); + + it("returns 400 on missing locale", async () => { + const res = await post({ request: postBody({ clues: SAMPLE_CLUES }) }); + expect(res.status).toBe(400); + }); + + it("returns 400 on empty locale string", async () => { + const res = await post({ + request: postBody({ clues: SAMPLE_CLUES, locale: " " }), + }); + expect(res.status).toBe(400); + }); + + it("returns 400 on overlong locale string", async () => { + const res = await post({ + request: postBody({ clues: SAMPLE_CLUES, locale: "x".repeat(101) }), + }); + expect(res.status).toBe(400); + }); + + it("returns 400 on malformed clue items", async () => { + const res = await post({ + request: postBody({ + clues: [{ text: "no constraint" }], + locale: "German", + }), + }); + expect(res.status).toBe(400); + }); + + it("returns generic 500 when translation throws a non-MissingEnvError", async () => { + envProxy.ANTHROPIC_API_KEY = "sk-test"; + completeJSON.mockRejectedValue(new Error("upstream blew up")); + + const res = await post({ + request: postBody({ clues: SAMPLE_CLUES, locale: "German" }), + }); + + expect(res.status).toBe(500); + const body = (await res.json()) as { error: string }; + expect(body.error).toBe("Translation failed"); + expect(body.error).not.toContain("upstream"); + }); +}); From 8db86470fdb8389ebc7cda53d3bd1e781237b651 Mon Sep 17 00:00:00 2001 From: Anton Stefer <59652072+antonstefer@users.noreply.github.com> Date: Thu, 30 Apr 2026 09:22:09 +0200 Subject: [PATCH 03/25] feat(logic-grid-ai): translate category names and value labels alongside clues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `translate` now takes the whole `Puzzle` instead of a `Clue[]`, and returns a `TranslatedPuzzle` carrying three maps: localized clue text (as before), `categoryNames` keyed by canonical category name, and `valueLabels` keyed by canonical value. The original `puzzle.constraints` and `puzzle.grid` are passed through unchanged so the engine continues to operate on canonical English keys; renderers compose the maps over the canonical grid for display. The translator prompt asks the model to produce all three surfaces in one batched call. Proper nouns and numeric/literal values map to themselves verbatim (Alice → Alice, 1972 → 1972); descriptive words translate, with grammatical inflection in clue text expected. Structural pre-checks now also enforce that every canonical category and every canonical value has a non-empty entry in the maps. New error codes: `missing_category_name`, `empty_category_name`, `missing_value_label`, `empty_value_label`. Semantic checks (constraint type round-trip, direction, numeric, proper-noun preservation) remain on the clue surface where most of the risk lives. Adds `TranslatedPuzzle` to the public types. The `temperature` knob on `AnthropicClientOptions` and the validator/translator-fallback shape from the previous commit are reused unchanged. --- packages/logic-grid-ai/README.md | 43 +- packages/logic-grid-ai/src/index.ts | 1 + .../src/translate-validation.test.ts | 384 +++++++++++------ .../logic-grid-ai/src/translate-validation.ts | 143 +++++-- packages/logic-grid-ai/src/translate.test.ts | 399 ++++++++++-------- packages/logic-grid-ai/src/translate.ts | 119 ++++-- packages/logic-grid-ai/src/types.ts | 52 ++- 7 files changed, 739 insertions(+), 402 deletions(-) diff --git a/packages/logic-grid-ai/README.md b/packages/logic-grid-ai/README.md index 38ab54b..8d910f9 100644 --- a/packages/logic-grid-ai/README.md +++ b/packages/logic-grid-ai/README.md @@ -160,7 +160,7 @@ const errors = validateRewrittenClues({ clues: ["..."] }, puzzle.clues.length); ### `translate(options)` -Translate puzzle clues to a target locale using AI. Intended for **ahead-of-time (AOT)** puzzle pipelines that produce localized corpora once and serve them statically — quality is the constraint, not latency. The package engine stays English-only; this is a post-processing layer. +Translate every visible string of a logic-grid puzzle to a target locale using AI: clue text, category names, and category value labels. Intended for **ahead-of-time (AOT)** puzzle pipelines that produce localized corpora once and serve them statically — quality is the constraint, not latency. The package engine stays English-only; this is a post-processing layer that returns localization maps the renderer composes with the canonical puzzle. ```typescript import { translate } from "logic-grid-ai"; @@ -168,21 +168,26 @@ import { generate } from "logic-grid"; const puzzle = generate({ size: 4, categories: 4, seed: 42 }); const localized = await translate({ - clues: puzzle.clues, + puzzle, locale: "German", // also accepts BCP-47 like "de-DE" }); -// Returns Clue[] with the original constraints preserved and `text` -// rendered in German. +// localized = { +// clues: [{ constraint, text: "Bob wohnt genau 2 Häuser vom gelben Haus entfernt." }, ...], +// categoryNames: { "House": "Haus", "Color": "Farbe", ... }, +// valueLabels: { "Yellow": "Gelb", "Cat": "Katze", "Alice": "Alice", ... }, +// } ``` +The original `puzzle.constraints` and `puzzle.grid` are passed through unchanged — the engine continues to operate on canonical English keys. Renderers compose `categoryNames` / `valueLabels` over the canonical grid to display localized headers, falling back to the canonical names for keys without an entry. + The function runs a two-stage AI flow: -1. **Translator** produces one localized clue per source clue in a single batched call. The constraint JSON is shown alongside each English clue as ground truth — if the source `text` is ambiguous or has drifted (e.g. via `rewriteClues`), the constraint defines the meaning. -2. **Validator** round-trips each translation back to a constraint type and checks polarity, direction, numeric/unit preservation, and proper-noun preservation. Failures are fed back to the translator on retry (up to 3 attempts). +1. **Translator** produces all three surfaces (localized clue text, category names, value labels) in a single batched call. The constraint JSON is shown alongside each English clue as ground truth — if the source clue text is ambiguous or has drifted (e.g. via `rewriteClues`), the constraint defines the meaning. +2. **Validator** round-trips each translated clue back to a constraint type and checks polarity, direction, numeric/unit preservation, and proper-noun preservation in the clue text. Failures are fed back to the translator on retry (up to 3 attempts). Completeness of `categoryNames` and `valueLabels` is enforced structurally. ```typescript const localized = await translate({ - clues: puzzle.clues, + puzzle, locale: "ja-JP", client: createAnthropicClient(undefined, { model: "claude-sonnet-4-6" }), validator: createAnthropicClient(undefined, { @@ -194,13 +199,31 @@ const localized = await translate({ > **Validator best practice.** Single-model validation has correlated blind spots — the validator's mistakes overlap with the translator's. For production AOT pipelines, pass a `validator` client backed by a _different model_ than the translator. When both `client` and `validator` are omitted, the package creates two default Anthropic clients with `validator` at `temperature: 0` for deterministic verdicts. -If validation fails on every attempt, `translate` throws a `TranslationError` carrying structured `errors` with stable codes (`constraint_type_mismatch`, `direction_flip`, `numeric_changed`, `proper_noun_dropped`, plus the structural codes `wrong_clue_count`, `non_string_clue`, `empty_translation`, `long_translation`, `duplicate_translation`): +> **Proper nouns stay verbatim.** People names, place names, brand names, and numeric/unit literals (`1972`, `8%`, `7am`) map to themselves in `valueLabels` and remain unchanged in clue text. Descriptive words (colors, animals, common-noun categories) translate, with grammatical inflection in clue text expected (`yellow` → bare label `gelb`, inflected forms `gelben` / `gelbe` are correct in clue context). + +If validation fails on every attempt, `translate` throws a `TranslationError` carrying structured `errors` with stable codes: + +| Code | Surface | Meaning | +| -------------------------- | -------------- | --------------------------------------------------------------------- | +| `wrong_clue_count` | clues | AI returned a different number of clues than the source | +| `non_string_clue` | clues | A clue entry is not a string | +| `empty_translation` | clues | A clue is empty or whitespace-only | +| `long_translation` | clues | A clue exceeds the per-clue length budget | +| `duplicate_translation` | clues | Two clues are identical (case-insensitive) | +| `missing_category_name` | categoryNames | A canonical category from the source has no entry in `categoryNames` | +| `empty_category_name` | categoryNames | A `categoryNames` entry is empty or non-string | +| `missing_value_label` | valueLabels | A canonical value from the source has no entry in `valueLabels` | +| `empty_value_label` | valueLabels | A `valueLabels` entry is empty or non-string | +| `constraint_type_mismatch` | clue semantics | Validator round-trip parsed the translation as a different constraint | +| `direction_flip` | clue semantics | `before` / `left_of` subject/object reversed | +| `numeric_changed` | clue semantics | Numbers or units in a clue differ from the source | +| `proper_noun_dropped` | clue semantics | A proper noun in a clue was changed | ```typescript import { translate, TranslationError } from "logic-grid-ai"; try { - const localized = await translate({ clues, locale: "German" }); + const localized = await translate({ puzzle, locale: "German" }); } catch (err) { if (err instanceof TranslationError) { if (err.errors.some((e) => e.code === "direction_flip")) { @@ -211,8 +234,6 @@ try { } ``` -Constraints are passed through verbatim — translation only changes the `text` field, so the puzzle remains solvable from the original constraints regardless of how the localized text reads. - ### `createAnthropicClient(apiKey?, options?)` temperature option `AnthropicClientOptions` accepts an optional `temperature` (default `0.8`). Use `0` for deterministic responses — typically the right default for validator clients in `translate()`: diff --git a/packages/logic-grid-ai/src/index.ts b/packages/logic-grid-ai/src/index.ts index 22bcdad..3df734b 100644 --- a/packages/logic-grid-ai/src/index.ts +++ b/packages/logic-grid-ai/src/index.ts @@ -15,6 +15,7 @@ export type { RewriteCluesOptions, RewriteCluesResult, TranslateOptions, + TranslatedPuzzle, AIClient, JSONSchema, ThemeValidationCode, diff --git a/packages/logic-grid-ai/src/translate-validation.test.ts b/packages/logic-grid-ai/src/translate-validation.test.ts index 2c9a2e7..8c88586 100644 --- a/packages/logic-grid-ai/src/translate-validation.test.ts +++ b/packages/logic-grid-ai/src/translate-validation.test.ts @@ -5,28 +5,116 @@ import { } from "./translate-validation"; import { hasCode } from "./test-utils"; import type { AIClient } from "./types"; -import type { Clue } from "logic-grid"; +import type { Puzzle } from "logic-grid"; -const SAMPLE_CLUES: Clue[] = [ - { - constraint: { type: "same_position", a: "Alice", b: "Coffee" }, - text: "Alice drinks coffee.", - }, - { - constraint: { type: "before", a: "Alice", b: "Bob", axis: "Year" }, - text: "Alice started before Bob.", +const SAMPLE_PUZZLE: Puzzle = { + grid: { + size: 3, + categories: [ + { + name: "House", + values: ["1", "2", "3"], + noun: "house", + verb: ["lives in the", "does not live in the"], + ordered: true, + orderingPhrases: { + unit: ["house", "houses"], + comparators: { + before: ["lives left of", "lives right of"], + left_of: ["lives directly left of", "lives directly right of"], + next_to: "lives next to", + not_next_to: "does not live next to", + between: "lives between", + not_between: "does not live between", + exact_distance: "lives exactly", + }, + }, + }, + { + name: "Name", + values: ["Alice", "Bob", "Carol"], + noun: "", + subjectPriority: 2, + }, + { + name: "Color", + values: ["Red", "Blue", "Green"], + noun: "house", + valueSuffix: "house", + lowercase: true, + positionAdjective: ["is", "is not"], + subjectPriority: -1, + }, + ], }, - { - constraint: { + constraints: [ + { type: "same_position", a: "Alice", b: "Red" }, + { type: "before", a: "Carol", b: "Bob", axis: "House" }, + { type: "not_between", - outer1: "A", - middle: "B", - outer2: "C", - axis: "Year", + outer1: "Alice", + middle: "Bob", + outer2: "Carol", + axis: "House", }, - text: "B is not between A and C.", - }, -]; + ], + clues: [ + { + constraint: { type: "same_position", a: "Alice", b: "Red" }, + text: "Alice lives in the red house.", + }, + { + constraint: { type: "before", a: "Carol", b: "Bob", axis: "House" }, + text: "Carol lives left of Bob.", + }, + { + constraint: { + type: "not_between", + outer1: "Alice", + middle: "Bob", + outer2: "Carol", + axis: "House", + }, + text: "Bob does not live between Alice and Carol.", + }, + ], + solution: [ + { "1": 0, "2": 1, "3": 2 }, + { Alice: 0, Bob: 2, Carol: 1 }, + { Red: 0, Blue: 2, Green: 1 }, + ], + difficulty: "easy", +}; + +const VALID_VALUE_LABELS = { + "1": "1", + "2": "2", + "3": "3", + Alice: "Alice", + Bob: "Bob", + Carol: "Carol", + Red: "Rot", + Blue: "Blau", + Green: "Grün", +}; + +const VALID_CATEGORY_NAMES = { + House: "Haus", + Name: "Name", + Color: "Farbe", +}; + +function validRaw(): { + clues: unknown[]; + categoryNames: Record; + valueLabels: Record; +} { + return { + clues: ["a", "b", "c"], + categoryNames: { ...VALID_CATEGORY_NAMES }, + valueLabels: { ...VALID_VALUE_LABELS }, + }; +} interface ClueVerdict { index: number; @@ -36,9 +124,9 @@ interface ClueVerdict { properNounsOk: boolean; } -function allOk(clues: Clue[]): { clues: ClueVerdict[] } { +function allOk(): { clues: ClueVerdict[] } { return { - clues: clues.map((c, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((c, i) => ({ index: i + 1, constraintType: c.constraint.type, directionOk: true, @@ -56,34 +144,33 @@ function mockValidator(verdicts: { clues: ClueVerdict[] }): AIClient { describe("checkTranslationStructure", () => { it("accepts valid output", () => { - const result = { clues: ["one", "two", "three"] }; - expect(checkTranslationStructure(result, 3)).toEqual([]); + expect(checkTranslationStructure(validRaw(), SAMPLE_PUZZLE)).toEqual([]); }); it("rejects wrong clue count", () => { - const errors = checkTranslationStructure({ clues: ["one", "two"] }, 3); + const raw = validRaw(); + raw.clues = ["one", "two"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "wrong_clue_count")).toBe(true); expect( errors.find((e) => e.code === "wrong_clue_count")?.message, ).toContain("Expected 3 clues, got 2"); }); - it("rejects empty translation", () => { - const errors = checkTranslationStructure( - { clues: ["", "two", "three"] }, - 3, - ); + it("rejects empty clue text", () => { + const raw = validRaw(); + raw.clues = ["", "two", "three"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "empty_translation")).toBe(true); expect(errors.find((e) => e.code === "empty_translation")?.clueIndex).toBe( 1, ); }); - it("rejects whitespace-only translation", () => { - const errors = checkTranslationStructure( - { clues: ["one", " ", "three"] }, - 3, - ); + it("rejects whitespace-only clue text", () => { + const raw = validRaw(); + raw.clues = ["one", " ", "three"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "empty_translation")).toBe(true); expect(errors.find((e) => e.code === "empty_translation")?.clueIndex).toBe( 2, @@ -91,10 +178,9 @@ describe("checkTranslationStructure", () => { }); it("rejects translation exceeding max length", () => { - const errors = checkTranslationStructure( - { clues: ["one", "A".repeat(501), "three"] }, - 3, - ); + const raw = validRaw(); + raw.clues = ["one", "A".repeat(501), "three"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "long_translation")).toBe(true); expect(errors.find((e) => e.code === "long_translation")?.clueIndex).toBe( 2, @@ -102,27 +188,86 @@ describe("checkTranslationStructure", () => { }); it("rejects duplicate translation (case-insensitive)", () => { - const errors = checkTranslationStructure( - { clues: ["Alice trinkt Kaffee.", "two", "alice trinkt kaffee."] }, - 3, - ); + const raw = validRaw(); + raw.clues = ["Alice trinkt Kaffee.", "two", "alice trinkt kaffee."]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "duplicate_translation")).toBe(true); expect( errors.find((e) => e.code === "duplicate_translation")?.clueIndex, ).toBe(3); }); - it("rejects non-string item", () => { - const errors = checkTranslationStructure( - { clues: ["one", 42, "three"] }, - 3, - ); + it("rejects non-string clue item", () => { + const raw = validRaw(); + raw.clues = ["one", 42, "three"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); expect(hasCode(errors, "non_string_clue")).toBe(true); expect(errors.find((e) => e.code === "non_string_clue")?.clueIndex).toBe(2); }); + it("rejects missing categoryNames key", () => { + const raw = validRaw(); + delete raw.categoryNames.Color; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "missing_category_name")).toBe(true); + expect(errors.find((e) => e.code === "missing_category_name")?.key).toBe( + "Color", + ); + }); + + it("rejects empty categoryNames value", () => { + const raw = validRaw(); + raw.categoryNames.Color = ""; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "empty_category_name")).toBe(true); + expect(errors.find((e) => e.code === "empty_category_name")?.key).toBe( + "Color", + ); + }); + + it("rejects whitespace-only categoryNames value", () => { + const raw = validRaw(); + raw.categoryNames.Color = " "; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "empty_category_name")).toBe(true); + }); + + it("rejects non-string categoryNames value", () => { + const raw = validRaw(); + raw.categoryNames.Color = 42; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "empty_category_name")).toBe(true); + }); + + it("rejects missing valueLabels key", () => { + const raw = validRaw(); + delete raw.valueLabels.Carol; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "missing_value_label")).toBe(true); + expect(errors.find((e) => e.code === "missing_value_label")?.key).toBe( + "Carol", + ); + }); + + it("rejects empty valueLabels value", () => { + const raw = validRaw(); + raw.valueLabels.Red = ""; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "empty_value_label")).toBe(true); + expect(errors.find((e) => e.code === "empty_value_label")?.key).toBe("Red"); + }); + + it("rejects non-string valueLabels value", () => { + const raw = validRaw(); + raw.valueLabels.Red = 42; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); + expect(hasCode(errors, "empty_value_label")).toBe(true); + }); + it("omits clueIndex on count-level errors", () => { - const errors = checkTranslationStructure({ clues: ["one"] }, 3); + const raw = validRaw(); + raw.clues = ["only one"]; + const errors = checkTranslationStructure(raw, SAMPLE_PUZZLE); const e = errors.find((x) => x.code === "wrong_clue_count"); expect(e).toBeDefined(); expect("clueIndex" in (e as object)).toBe(false); @@ -132,15 +277,16 @@ describe("checkTranslationStructure", () => { describe("validateTranslation", () => { it("returns empty array when validator reports all-OK", async () => { const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", - mockValidator(allOk(SAMPLE_CLUES)), + mockValidator(allOk()), ); expect(errors).toEqual([]); }); - it("returns empty array on empty input without calling validator", async () => { + it("returns empty array on empty clues without calling validator", async () => { + const emptyPuzzle: Puzzle = { ...SAMPLE_PUZZLE, clues: [] }; let called = false; const validator: AIClient = { completeJSON: () => { @@ -149,19 +295,24 @@ describe("validateTranslation", () => { }, }; - const errors = await validateTranslation([], [], "German", validator); + const errors = await validateTranslation( + emptyPuzzle, + { clues: [] }, + "German", + validator, + ); expect(errors).toEqual([]); expect(called).toBe(false); }); it("emits constraint_type_mismatch when verdict type differs from source", async () => { - const verdicts = allOk(SAMPLE_CLUES); + const verdicts = allOk(); verdicts.clues[0].constraintType = "wrong_type"; const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", mockValidator(verdicts), ); @@ -173,14 +324,14 @@ describe("validateTranslation", () => { }); it("emits direction_flip only for asymmetric constraints", async () => { - const verdicts = allOk(SAMPLE_CLUES); - // Flip on same_position (symmetric, should be ignored) and before (asymmetric) + const verdicts = allOk(); + // Flip on same_position (symmetric, ignored) and before (asymmetric, emitted) verdicts.clues[0].directionOk = false; // same_position — ignored verdicts.clues[1].directionOk = false; // before — emitted const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", mockValidator(verdicts), ); @@ -191,9 +342,15 @@ describe("validateTranslation", () => { }); it("emits direction_flip on left_of as well as before", async () => { - const leftOfClue: Clue = { - constraint: { type: "left_of", a: "X", b: "Y", axis: "Year" }, - text: "X is directly before Y.", + const leftOfPuzzle: Puzzle = { + ...SAMPLE_PUZZLE, + constraints: [{ type: "left_of", a: "X", b: "Y", axis: "House" }], + clues: [ + { + constraint: { type: "left_of", a: "X", b: "Y", axis: "House" }, + text: "X is directly before Y.", + }, + ], }; const verdicts = { clues: [ @@ -208,8 +365,8 @@ describe("validateTranslation", () => { }; const errors = await validateTranslation( - [leftOfClue], - ["..."], + leftOfPuzzle, + { clues: ["..."] }, "German", mockValidator(verdicts), ); @@ -218,12 +375,12 @@ describe("validateTranslation", () => { }); it("emits numeric_changed when numericOk is false", async () => { - const verdicts = allOk(SAMPLE_CLUES); + const verdicts = allOk(); verdicts.clues[2].numericOk = false; const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", mockValidator(verdicts), ); @@ -233,12 +390,12 @@ describe("validateTranslation", () => { }); it("emits proper_noun_dropped when properNounsOk is false", async () => { - const verdicts = allOk(SAMPLE_CLUES); + const verdicts = allOk(); verdicts.clues[0].properNounsOk = false; const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", mockValidator(verdicts), ); @@ -250,15 +407,15 @@ describe("validateTranslation", () => { }); it("aggregates multiple errors per clue", async () => { - const verdicts = allOk(SAMPLE_CLUES); + const verdicts = allOk(); verdicts.clues[1].constraintType = "wrong"; verdicts.clues[1].directionOk = false; verdicts.clues[1].numericOk = false; verdicts.clues[1].properNounsOk = false; const errors = await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", mockValidator(verdicts), ); @@ -267,43 +424,25 @@ describe("validateTranslation", () => { expect(clue2Errors).toHaveLength(4); }); - it("includes locale name in the validator prompt", async () => { + it("includes locale and source/translation pairs in the validator prompt", async () => { let capturedPrompt = ""; const validator: AIClient = { completeJSON: (prompt: string) => { capturedPrompt = prompt; - return Promise.resolve(allOk(SAMPLE_CLUES) as T); + return Promise.resolve(allOk() as T); }, }; await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["Alice trinkt Kaffee.", "b", "c"] }, "Japanese", validator, ); expect(capturedPrompt).toContain("Japanese"); - expect(capturedPrompt).toContain("reviewing a translation"); - }); - - it("includes both source and translation in validator prompt", async () => { - let capturedPrompt = ""; - const validator: AIClient = { - completeJSON: (prompt: string) => { - capturedPrompt = prompt; - return Promise.resolve(allOk(SAMPLE_CLUES) as T); - }, - }; - - await validateTranslation( - SAMPLE_CLUES, - ["Alice trinkt Kaffee.", "b", "c"], - "German", - validator, - ); - - expect(capturedPrompt).toContain("Alice drinks coffee."); + expect(capturedPrompt).toContain("reviewing translated clues"); + expect(capturedPrompt).toContain("Alice lives in the red house."); expect(capturedPrompt).toContain("Alice trinkt Kaffee."); expect(capturedPrompt).toContain('"type":"same_position"'); }); @@ -313,13 +452,13 @@ describe("validateTranslation", () => { const validator: AIClient = { completeJSON: () => { callCount++; - return Promise.resolve(allOk(SAMPLE_CLUES) as T); + return Promise.resolve(allOk() as T); }, }; await validateTranslation( - SAMPLE_CLUES, - ["a", "b", "c"], + SAMPLE_PUZZLE, + { clues: ["a", "b", "c"] }, "German", validator, ); @@ -328,36 +467,43 @@ describe("validateTranslation", () => { }); it("does not flag direction on symmetric constraints when directionOk is false", async () => { - const symClues: Clue[] = [ - { - constraint: { type: "next_to", a: "X", b: "Y", axis: "Year" }, - text: "X is next to Y.", - }, - { - constraint: { - type: "exact_distance", - a: "X", - b: "Y", - distance: 2, - axis: "Year", + const symPuzzle: Puzzle = { + ...SAMPLE_PUZZLE, + constraints: [ + { type: "next_to", a: "X", b: "Y", axis: "House" }, + { type: "exact_distance", a: "X", b: "Y", distance: 2, axis: "House" }, + ], + clues: [ + { + constraint: { type: "next_to", a: "X", b: "Y", axis: "House" }, + text: "X is next to Y.", }, - text: "X is exactly 2 from Y.", - }, - ]; + { + constraint: { + type: "exact_distance", + a: "X", + b: "Y", + distance: 2, + axis: "House", + }, + text: "X is exactly 2 from Y.", + }, + ], + }; const verdicts = { - clues: symClues.map((c, i) => ({ + clues: symPuzzle.clues.map((c, i) => ({ index: i + 1, constraintType: c.constraint.type, - directionOk: false, // validator's verdict on symmetric — should be ignored + directionOk: false, // verdict is false on symmetric — should be ignored numericOk: true, properNounsOk: true, })), }; const errors = await validateTranslation( - symClues, - ["a", "b"], + symPuzzle, + { clues: ["a", "b"] }, "German", mockValidator(verdicts), ); diff --git a/packages/logic-grid-ai/src/translate-validation.ts b/packages/logic-grid-ai/src/translate-validation.ts index 7ba1b22..f6df785 100644 --- a/packages/logic-grid-ai/src/translate-validation.ts +++ b/packages/logic-grid-ai/src/translate-validation.ts @@ -1,4 +1,4 @@ -import type { Clue, ConstraintType } from "logic-grid"; +import type { Clue, ConstraintType, Puzzle } from "logic-grid"; import type { AIClient, JSONSchema, @@ -7,25 +7,23 @@ import type { } from "./types"; /** - * AI-driven semantic validator for translated clues. + * AI-driven semantic validator for translated puzzles, plus a sync + * structural pre-check. * * NOT exported from the package. Internal to the {@link translate} retry loop. * - * The validator round-trips each translation back to a constraint type and - * checks four properties per clue: + * The semantic validator round-trips each translated clue back to a + * constraint type and checks four properties per clue: * 1. Constraint type round-trip (with polarity baked in: `not_between` is a * distinct value from `between`). * 2. Direction (only for `before` / `left_of`): does the translation's * subject/object order match the source constraint's `a`/`b` fields? - * 3. Numeric and unit preservation. - * 4. Proper-noun preservation. + * 3. Numeric and unit preservation in the clue text. + * 4. Proper-noun preservation in the clue text. * - * All checks are evaluated by a single AI call against a structured schema — - * the verdicts are typed booleans + an enum, not free-text reasoning. Failures - * are mapped to {@link TranslationValidationError} with stable codes. - * - * Caller is responsible for picking a validator client distinct from the - * translator (or accepting correlated blind spots if the same client is used). + * The structural pre-check covers clue counts, empties, duplicates, and + * the completeness of `categoryNames` / `valueLabels` (every canonical key + * from the source puzzle must appear with a non-empty translation). */ const CONSTRAINT_TYPES: ConstraintType[] = [ @@ -54,49 +52,64 @@ interface ValidatorResult { clues: ClueVerdict[]; } +interface RawTranslation { + clues: unknown[]; + categoryNames: Record; + valueLabels: Record; +} + function err( code: TranslationValidationCode, message: string, - clueIndex?: number, + opts: { clueIndex?: number; key?: string } = {}, ): TranslationValidationError { - return clueIndex !== undefined - ? { code, message, clueIndex } - : { code, message }; + const e: TranslationValidationError = { code, message }; + if (opts.clueIndex !== undefined) e.clueIndex = opts.clueIndex; + if (opts.key !== undefined) e.key = opts.key; + return e; } /** * Cheap, deterministic structural check on the raw translator output. * Run before the AI validator to reject obvious failures without burning - * an LLM call. Mirrors {@link validateRewrittenClues}'s shape. + * an LLM call. */ export function checkTranslationStructure( - result: { clues: unknown[] }, - expectedCount: number, + raw: RawTranslation, + puzzle: Puzzle, ): TranslationValidationError[] { const errors: TranslationValidationError[] = []; + const expectedClueCount = puzzle.clues.length; - if (result.clues.length !== expectedCount) { + // --- Clues --- + if (raw.clues.length !== expectedClueCount) { errors.push( err( "wrong_clue_count", - `Expected ${expectedCount} clues, got ${result.clues.length}.`, + `Expected ${expectedClueCount} clues, got ${raw.clues.length}.`, ), ); } const seen = new Set(); - for (let i = 0; i < result.clues.length; i++) { - const text = result.clues[i]; + for (let i = 0; i < raw.clues.length; i++) { + const text = raw.clues[i]; const pos = i + 1; if (typeof text !== "string") { - errors.push(err("non_string_clue", `Clue ${pos} is not a string.`, pos)); + errors.push( + err("non_string_clue", `Clue ${pos} is not a string.`, { + clueIndex: pos, + }), + ); continue; } if (!text || text.trim() === "") { - errors.push(err("empty_translation", `Clue ${pos} is empty.`, pos)); + errors.push( + err("empty_translation", `Clue ${pos} is empty.`, { clueIndex: pos }), + ); continue; } @@ -105,7 +118,7 @@ export function checkTranslationStructure( err( "long_translation", `Clue ${pos} is too long (${text.length} chars, max 500).`, - pos, + { clueIndex: pos }, ), ); } @@ -116,13 +129,63 @@ export function checkTranslationStructure( err( "duplicate_translation", `Clue ${pos} is a duplicate of an earlier clue.`, - pos, + { clueIndex: pos }, ), ); } seen.add(lower); } + // --- Category names --- + for (const cat of puzzle.grid.categories) { + const localized = raw.categoryNames[cat.name]; + if (localized === undefined) { + errors.push( + err( + "missing_category_name", + `Category "${cat.name}" has no localized name in categoryNames.`, + { key: cat.name }, + ), + ); + continue; + } + if (typeof localized !== "string" || localized.trim() === "") { + errors.push( + err( + "empty_category_name", + `Localized name for category "${cat.name}" is empty.`, + { key: cat.name }, + ), + ); + } + } + + // --- Value labels --- + for (const cat of puzzle.grid.categories) { + for (const value of cat.values) { + const localized = raw.valueLabels[value]; + if (localized === undefined) { + errors.push( + err( + "missing_value_label", + `Value "${value}" has no localized label in valueLabels.`, + { key: value }, + ), + ); + continue; + } + if (typeof localized !== "string" || localized.trim() === "") { + errors.push( + err( + "empty_value_label", + `Localized label for value "${value}" is empty.`, + { key: value }, + ), + ); + } + } + } + return errors; } @@ -158,7 +221,7 @@ function buildSchema(clueCount: number): JSONSchema { properNounsOk: { type: "boolean", description: - "All proper nouns and category-value names from the source are preserved verbatim.", + "All proper nouns and category-value names from the source are preserved verbatim in the clue text (inflection of descriptive words is fine).", }, }, required: [ @@ -182,7 +245,7 @@ function buildPrompt( translated: string[], locale: string, ): string { - let prompt = `You are reviewing a translation of logic-puzzle clues from English to ${locale}. + let prompt = `You are reviewing translated clues for a logic-grid puzzle (English → ${locale}). For each clue, parse the ${locale} sentence back to a constraint and verify: @@ -203,9 +266,10 @@ For each clue, parse the ${locale} sentence back to a constraint and verify: 3. numericOk: are all numbers and units from the source constraint preserved exactly in the ${locale} text? -4. properNounsOk: are all proper nouns and category-value names from the - source preserved verbatim (Alice stays Alice; "Black River fund" stays - "Black River fund")? +4. properNounsOk: are all proper nouns from the source preserved verbatim + in the ${locale} clue text? Names of people, places, brands, ships, and + numeric/literal values must NOT be translated. Inflection of descriptive + words (colors, animals, common nouns) is FINE — that's not a violation. Be calibrated — accept fluent translations that preserve meaning even if phrased differently. Only flag GENUINE semantic drift, not stylistic @@ -221,15 +285,16 @@ variation. } export async function validateTranslation( - sourceClues: Clue[], - translated: string[], + puzzle: Puzzle, + raw: { clues: string[] }, locale: string, validator: AIClient, ): Promise { + const sourceClues = puzzle.clues; if (sourceClues.length === 0) return []; const schema = buildSchema(sourceClues.length); - const prompt = buildPrompt(sourceClues, translated, locale); + const prompt = buildPrompt(sourceClues, raw.clues, locale); const result = await validator.completeJSON(prompt, schema); const errors: TranslationValidationError[] = []; @@ -244,7 +309,7 @@ export async function validateTranslation( err( "constraint_type_mismatch", `Clue ${pos}: translation expresses '${verdict.constraintType}' but source constraint is '${source.constraint.type}'.`, - pos, + { clueIndex: pos }, ), ); } @@ -254,7 +319,7 @@ export async function validateTranslation( err( "direction_flip", `Clue ${pos}: subject/object order is reversed for ${source.constraint.type}.`, - pos, + { clueIndex: pos }, ), ); } @@ -264,7 +329,7 @@ export async function validateTranslation( err( "numeric_changed", `Clue ${pos}: numbers or units differ from the source constraint.`, - pos, + { clueIndex: pos }, ), ); } @@ -274,7 +339,7 @@ export async function validateTranslation( err( "proper_noun_dropped", `Clue ${pos}: a proper noun or value name was changed.`, - pos, + { clueIndex: pos }, ), ); } diff --git a/packages/logic-grid-ai/src/translate.test.ts b/packages/logic-grid-ai/src/translate.test.ts index 61137ce..9eeb170 100644 --- a/packages/logic-grid-ai/src/translate.test.ts +++ b/packages/logic-grid-ai/src/translate.test.ts @@ -2,29 +2,108 @@ import { describe, it, expect, vi } from "vitest"; import { generate, deduce } from "logic-grid"; import { translate, TranslationError } from "./translate"; import type { AIClient } from "./types"; -import type { Clue } from "logic-grid"; +import type { Puzzle } from "logic-grid"; import * as clientModule from "./client"; -const SAMPLE_CLUES: Clue[] = [ - { - constraint: { type: "same_position", a: "Alice", b: "Coffee" }, - text: "Alice drinks coffee.", - }, - { - constraint: { type: "next_to", a: "Cat", b: "Red", axis: "House" }, - text: "The cat lives next to the red house.", - }, - { - constraint: { type: "before", a: "Alice", b: "Bob", axis: "Year" }, - text: "Alice started before Bob.", +// A small but representative fixture covering same_position, next_to, and +// before (asymmetric direction-sensitive). Built by hand instead of via +// generate() so individual clue/value text is stable across vitest runs. +const SAMPLE_PUZZLE: Puzzle = { + grid: { + size: 3, + categories: [ + { + name: "House", + values: ["1", "2", "3"], + noun: "house", + verb: ["lives in the", "does not live in the"], + ordered: true, + orderingPhrases: { + unit: ["house", "houses"], + comparators: { + before: ["lives left of", "lives right of"], + left_of: ["lives directly left of", "lives directly right of"], + next_to: "lives next to", + not_next_to: "does not live next to", + between: "lives between", + not_between: "does not live between", + exact_distance: "lives exactly", + }, + }, + }, + { + name: "Name", + values: ["Alice", "Bob", "Carol"], + noun: "", + subjectPriority: 2, + }, + { + name: "Color", + values: ["Red", "Blue", "Green"], + noun: "house", + valueSuffix: "house", + lowercase: true, + positionAdjective: ["is", "is not"], + subjectPriority: -1, + }, + ], }, + constraints: [ + { type: "same_position", a: "Alice", b: "Red" }, + { type: "next_to", a: "Bob", b: "Green", axis: "House" }, + { type: "before", a: "Carol", b: "Bob", axis: "House" }, + ], + clues: [ + { + constraint: { type: "same_position", a: "Alice", b: "Red" }, + text: "Alice lives in the red house.", + }, + { + constraint: { type: "next_to", a: "Bob", b: "Green", axis: "House" }, + text: "Bob lives next to the green house.", + }, + { + constraint: { type: "before", a: "Carol", b: "Bob", axis: "House" }, + text: "Carol lives left of Bob.", + }, + ], + solution: [ + { "1": 0, "2": 1, "3": 2 }, + { Alice: 0, Bob: 2, Carol: 1 }, + { Red: 0, Blue: 2, Green: 1 }, + ], + difficulty: "easy", +}; + +const VALID_CLUE_TEXT = [ + "Alice wohnt im roten Haus.", + "Bob wohnt neben dem grünen Haus.", + "Carol wohnt links von Bob.", ]; -const VALID_TRANSLATIONS = [ - "Alice trinkt Kaffee.", - "Die Katze wohnt neben dem roten Haus.", - "Alice hat vor Bob angefangen.", -]; +const VALID_CATEGORY_NAMES = { + House: "Haus", + Name: "Name", + Color: "Farbe", +}; + +const VALID_VALUE_LABELS = { + "1": "1", + "2": "2", + "3": "3", + Alice: "Alice", + Bob: "Bob", + Carol: "Carol", + Red: "Rot", + Blue: "Blau", + Green: "Grün", +}; + +const VALID_TRANSLATION = { + clues: VALID_CLUE_TEXT, + categoryNames: VALID_CATEGORY_NAMES, + valueLabels: VALID_VALUE_LABELS, +}; interface ClueVerdict { index: number; @@ -34,9 +113,9 @@ interface ClueVerdict { properNounsOk: boolean; } -function allOkVerdict(clues: Clue[]): { clues: ClueVerdict[] } { +function allOkVerdict(): { clues: ClueVerdict[] } { return { - clues: clues.map((c, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((c, i) => ({ index: i + 1, constraintType: c.constraint.type, directionOk: true, @@ -47,8 +126,9 @@ function allOkVerdict(clues: Clue[]): { clues: ClueVerdict[] } { } /** - * Two-client mock: distinguishes translator from validator calls by prompt - * substring. Returns whichever payload the caller supplied for that role. + * Single-client mock that dispatches translator vs validator calls by + * prompt substring. Translator and validator share completeJSON when + * the demo / consumer doesn't pass a separate validator. */ function mockSingleClient( translatorResult: unknown, @@ -56,7 +136,7 @@ function mockSingleClient( ): AIClient { return { completeJSON: (prompt: string): Promise => { - if (prompt.includes("reviewing a translation")) { + if (prompt.includes("reviewing translated clues")) { return Promise.resolve(validatorResult as T); } return Promise.resolve(translatorResult as T); @@ -65,34 +145,30 @@ function mockSingleClient( } describe("translate", () => { - it("returns translated clues from a mock client", async () => { + it("returns translated puzzle with localized clues, category names, and value labels", async () => { const result = await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", - client: mockSingleClient( - { clues: VALID_TRANSLATIONS }, - allOkVerdict(SAMPLE_CLUES), - ), + client: mockSingleClient(VALID_TRANSLATION, allOkVerdict()), }); - expect(result).toHaveLength(3); - expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); - expect(result[1].text).toBe(VALID_TRANSLATIONS[1]); - expect(result[2].text).toBe(VALID_TRANSLATIONS[2]); + expect(result.clues).toHaveLength(3); + expect(result.clues[0].text).toBe(VALID_CLUE_TEXT[0]); + expect(result.categoryNames).toEqual(VALID_CATEGORY_NAMES); + expect(result.valueLabels).toEqual(VALID_VALUE_LABELS); }); it("preserves original constraints in translated clues", async () => { const result = await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", - client: mockSingleClient( - { clues: VALID_TRANSLATIONS }, - allOkVerdict(SAMPLE_CLUES), - ), + client: mockSingleClient(VALID_TRANSLATION, allOkVerdict()), }); - for (let i = 0; i < SAMPLE_CLUES.length; i++) { - expect(result[i].constraint).toBe(SAMPLE_CLUES[i].constraint); + for (let i = 0; i < SAMPLE_PUZZLE.clues.length; i++) { + expect(result.clues[i].constraint).toBe( + SAMPLE_PUZZLE.clues[i].constraint, + ); } }); @@ -100,58 +176,43 @@ describe("translate", () => { const spy = vi .spyOn(clientModule, "createAnthropicClient") .mockImplementation(() => - mockSingleClient( - { clues: VALID_TRANSLATIONS }, - allOkVerdict(SAMPLE_CLUES), - ), + mockSingleClient(VALID_TRANSLATION, allOkVerdict()), ); const result = await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", }); // One call for translator (no client), one for validator (temperature: 0). expect(spy).toHaveBeenCalledTimes(2); expect(spy).toHaveBeenCalledWith(undefined, { temperature: 0 }); - expect(result).toHaveLength(3); + expect(result.clues).toHaveLength(3); spy.mockRestore(); }); - it("includes locale name in the translator prompt", async () => { + it("includes locale and category list in the translator prompt", async () => { const prompts: string[] = []; const client: AIClient = { completeJSON: (prompt: string) => { prompts.push(prompt); - if (prompt.includes("reviewing a translation")) { - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + if (prompt.includes("reviewing translated clues")) { + return Promise.resolve(allOkVerdict() as T); } - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; - await translate({ clues: SAMPLE_CLUES, locale: "Japanese", client }); + await translate({ puzzle: SAMPLE_PUZZLE, locale: "Japanese", client }); expect(prompts[0]).toContain("Japanese"); - }); - - it("includes constraint JSON in the translator prompt", async () => { - let translatorPrompt = ""; - const client: AIClient = { - completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); - } - translatorPrompt = prompt; - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); - }, - }; - - await translate({ clues: SAMPLE_CLUES, locale: "German", client }); - - expect(translatorPrompt).toContain('"type":"same_position"'); - expect(translatorPrompt).toContain('"type":"next_to"'); - expect(translatorPrompt).toContain('"type":"before"'); + // Category list is included for the translator's reference + expect(prompts[0]).toContain("House:"); + expect(prompts[0]).toContain("Color:"); + // Constraint JSON for ground truth + expect(prompts[0]).toContain('"type":"same_position"'); + expect(prompts[0]).toContain('"type":"next_to"'); + expect(prompts[0]).toContain('"type":"before"'); }); it("uses separate client and validator when both are provided", async () => { @@ -161,18 +222,18 @@ describe("translate", () => { const client: AIClient = { completeJSON: (prompt: string) => { translatorCalls.push(prompt); - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; const validator: AIClient = { completeJSON: (prompt: string) => { validatorCalls.push(prompt); - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + return Promise.resolve(allOkVerdict() as T); }, }; await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", client, validator, @@ -180,8 +241,8 @@ describe("translate", () => { expect(translatorCalls).toHaveLength(1); expect(validatorCalls).toHaveLength(1); - expect(translatorCalls[0]).toContain("translating logic-puzzle clues"); - expect(validatorCalls[0]).toContain("reviewing a translation"); + expect(translatorCalls[0]).toContain("translating a logic-grid puzzle"); + expect(validatorCalls[0]).toContain("reviewing translated clues"); }); it("falls back validator to client when validator is omitted", async () => { @@ -189,102 +250,105 @@ describe("translate", () => { const client: AIClient = { completeJSON: (prompt: string) => { calls.push(prompt); - if (prompt.includes("reviewing a translation")) { - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + if (prompt.includes("reviewing translated clues")) { + return Promise.resolve(allOkVerdict() as T); } - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; - await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + await translate({ puzzle: SAMPLE_PUZZLE, locale: "German", client }); expect(calls).toHaveLength(2); - expect(calls[0]).toContain("translating logic-puzzle clues"); - expect(calls[1]).toContain("reviewing a translation"); + expect(calls[0]).toContain("translating a logic-grid puzzle"); + expect(calls[1]).toContain("reviewing translated clues"); }); - it("retries on structural failure", async () => { + it("retries on structural failure (missing valueLabels key)", async () => { let translatorCalls = 0; const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + if (prompt.includes("reviewing translated clues")) { + return Promise.resolve(allOkVerdict() as T); } translatorCalls++; if (translatorCalls < 3) { + // Drop one valueLabels entry to fail structural check + const { Carol: _carol, ...partial } = VALID_VALUE_LABELS; + void _carol; return Promise.resolve({ - clues: ["only one"], + ...VALID_TRANSLATION, + valueLabels: partial, } as T); } - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; const result = await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", client, }); expect(translatorCalls).toBe(3); - expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); + expect(result.valueLabels).toEqual(VALID_VALUE_LABELS); }); it("retries on semantic failure (constraint type mismatch)", async () => { let translatorCalls = 0; const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { + if (prompt.includes("reviewing translated clues")) { if (translatorCalls < 2) { - // First attempt: validator says constraint type drifted return Promise.resolve({ - clues: SAMPLE_CLUES.map((_, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((_, i) => ({ index: i + 1, - constraintType: i === 1 ? "next_to" : "near", // drift on non-clue-2 entries + constraintType: i === 1 ? "next_to" : "near", directionOk: true, numericOk: true, properNounsOk: true, })), } as T); } - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + return Promise.resolve(allOkVerdict() as T); } translatorCalls++; - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; const result = await translate({ - clues: SAMPLE_CLUES, + puzzle: SAMPLE_PUZZLE, locale: "German", client, }); expect(translatorCalls).toBe(2); - expect(result[0].text).toBe(VALID_TRANSLATIONS[0]); + expect(result.clues[0].text).toBe(VALID_CLUE_TEXT[0]); }); it("detects direction-flip on `before` clues", async () => { let caught: unknown; const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { + if (prompt.includes("reviewing translated clues")) { return Promise.resolve({ - clues: SAMPLE_CLUES.map((c, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((c, i) => ({ index: i + 1, constraintType: c.constraint.type, - directionOk: c.constraint.type !== "before", // flip on `before` clue + directionOk: c.constraint.type !== "before", // flip on `before` numericOk: true, properNounsOk: true, })), } as T); } - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; try { - await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + await translate({ puzzle: SAMPLE_PUZZLE, locale: "German", client }); } catch (e) { caught = e; } @@ -294,59 +358,12 @@ describe("translate", () => { expect(err.errors.some((e) => e.code === "direction_flip")).toBe(true); }); - it("detects polarity drop (not_between -> between)", async () => { - const polarityClues: Clue[] = [ - { - constraint: { - type: "not_between", - outer1: "A", - middle: "B", - outer2: "C", - axis: "Year", - }, - text: "B is not between A and C.", - }, - ]; - - let caught: unknown; - const client: AIClient = { - completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { - return Promise.resolve({ - clues: [ - { - index: 1, - constraintType: "between", // negation dropped - directionOk: true, - numericOk: true, - properNounsOk: true, - }, - ], - } as T); - } - return Promise.resolve({ clues: ["B ist zwischen A und C."] } as T); - }, - }; - - try { - await translate({ clues: polarityClues, locale: "German", client }); - } catch (e) { - caught = e; - } - - expect(caught).toBeInstanceOf(TranslationError); - const err = caught as TranslationError; - expect(err.errors.some((e) => e.code === "constraint_type_mismatch")).toBe( - true, - ); - }); - it("throws TranslationError with structured errors after max retries", async () => { const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { + if (prompt.includes("reviewing translated clues")) { return Promise.resolve({ - clues: SAMPLE_CLUES.map((_, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((_, i) => ({ index: i + 1, constraintType: "wrong_type", directionOk: true, @@ -355,13 +372,13 @@ describe("translate", () => { })), } as T); } - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; let caught: unknown; try { - await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + await translate({ puzzle: SAMPLE_PUZZLE, locale: "German", client }); } catch (e) { caught = e; } @@ -379,34 +396,19 @@ describe("translate", () => { }; await expect( - translate({ clues: SAMPLE_CLUES, locale: "German", client }), + translate({ puzzle: SAMPLE_PUZZLE, locale: "German", client }), ).rejects.toThrow("Network error"); }); - it("returns empty array for empty clues input", async () => { - let called = false; - const client: AIClient = { - completeJSON: () => { - called = true; - return Promise.resolve({ clues: [] } as T); - }, - }; - - const result = await translate({ clues: [], locale: "German", client }); - - expect(result).toEqual([]); - expect(called).toBe(false); - }); - it("throws on empty locale", async () => { await expect( - translate({ clues: SAMPLE_CLUES, locale: "" }), + translate({ puzzle: SAMPLE_PUZZLE, locale: "" }), ).rejects.toThrow("locale must be a non-empty string"); }); it("throws on whitespace-only locale", async () => { await expect( - translate({ clues: SAMPLE_CLUES, locale: " " }), + translate({ puzzle: SAMPLE_PUZZLE, locale: " " }), ).rejects.toThrow("locale must be a non-empty string"); }); @@ -415,27 +417,27 @@ describe("translate", () => { let translatorCalls = 0; const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { + if (prompt.includes("reviewing translated clues")) { if (translatorCalls < 2) { return Promise.resolve({ - clues: SAMPLE_CLUES.map((c, i) => ({ + clues: SAMPLE_PUZZLE.clues.map((c, i) => ({ index: i + 1, constraintType: c.constraint.type, directionOk: true, - numericOk: i !== 0, // numeric drift on clue 1 + numericOk: i !== 0, properNounsOk: true, })), } as T); } - return Promise.resolve(allOkVerdict(SAMPLE_CLUES) as T); + return Promise.resolve(allOkVerdict() as T); } translatorPrompts.push(prompt); translatorCalls++; - return Promise.resolve({ clues: VALID_TRANSLATIONS } as T); + return Promise.resolve(VALID_TRANSLATION as T); }, }; - await translate({ clues: SAMPLE_CLUES, locale: "German", client }); + await translate({ puzzle: SAMPLE_PUZZLE, locale: "German", client }); expect(translatorPrompts.length).toBeGreaterThanOrEqual(2); expect(translatorPrompts[1]).toContain("Previous attempt had errors"); @@ -445,32 +447,57 @@ describe("translate", () => { it("result integrates with generate() and deduce()", async () => { const puzzle = generate({ size: 4, categories: 4, seed: 42 }); - const translations = puzzle.clues.map( + const translatedClues = puzzle.clues.map( (_, i) => `Klue auf Deutsch Nummer ${i + 1}.`, ); + const categoryNames: Record = {}; + for (const cat of puzzle.grid.categories) { + categoryNames[cat.name] = `[${cat.name}]`; + } + const valueLabels: Record = {}; + for (const cat of puzzle.grid.categories) { + for (const v of cat.values) { + valueLabels[v] = `[${v}]`; + } + } + + const verdicts = { + clues: puzzle.clues.map((c, i) => ({ + index: i + 1, + constraintType: c.constraint.type, + directionOk: true, + numericOk: true, + properNounsOk: true, + })), + }; const client: AIClient = { completeJSON: (prompt: string) => { - if (prompt.includes("reviewing a translation")) { - return Promise.resolve(allOkVerdict(puzzle.clues) as T); + if (prompt.includes("reviewing translated clues")) { + return Promise.resolve(verdicts as T); } - return Promise.resolve({ clues: translations } as T); + return Promise.resolve({ + clues: translatedClues, + categoryNames, + valueLabels, + } as T); }, }; const result = await translate({ - clues: puzzle.clues, + puzzle, locale: "German", client, }); - expect(result).toHaveLength(puzzle.clues.length); - for (let i = 0; i < result.length; i++) { - expect(result[i].constraint).toBe(puzzle.clues[i].constraint); - expect(result[i].text).toBe(translations[i]); + expect(result.clues).toHaveLength(puzzle.clues.length); + for (let i = 0; i < result.clues.length; i++) { + expect(result.clues[i].constraint).toBe(puzzle.clues[i].constraint); + expect(result.clues[i].text).toBe(translatedClues[i]); } - const translatedPuzzle = { ...puzzle, clues: result }; + // Constraints unchanged → puzzle still solvable from canonical state. + const translatedPuzzle = { ...puzzle, clues: result.clues }; const deduction = deduce( translatedPuzzle.constraints, translatedPuzzle.grid, diff --git a/packages/logic-grid-ai/src/translate.ts b/packages/logic-grid-ai/src/translate.ts index f103cfe..f6fbcb5 100644 --- a/packages/logic-grid-ai/src/translate.ts +++ b/packages/logic-grid-ai/src/translate.ts @@ -1,10 +1,10 @@ import type { TranslateOptions, + TranslatedPuzzle, AIClient, JSONSchema, TranslationValidationError, } from "./types"; -import type { Clue } from "logic-grid"; import { createAnthropicClient } from "./client"; import { checkTranslationStructure, @@ -29,6 +29,8 @@ export class TranslationError extends Error { interface TranslateRawResult { clues: string[]; + categoryNames: Record; + valueLabels: Record; } function buildSchema(clueCount: number): JSONSchema { @@ -41,10 +43,20 @@ function buildSchema(clueCount: number): JSONSchema { minItems: clueCount, maxItems: clueCount, description: - "Translated clue texts, one per source clue, in the same order", + "Translated clue texts, one per source clue, in the same order.", + }, + categoryNames: { + type: "object", + description: + "Map from each canonical category name (English) to its localized display name. Every category from the source puzzle must appear as a key.", + }, + valueLabels: { + type: "object", + description: + "Map from each canonical category value (English) to its localized label. Every value from every category must appear as a key. Proper nouns (people, places, brands) map to themselves verbatim. Numeric/literal values (like '1972' or '8%') stay as the literal string.", }, }, - required: ["clues"], + required: ["clues", "categoryNames", "valueLabels"], }; } @@ -52,13 +64,33 @@ function buildPrompt( options: TranslateOptions, previousErrors?: string[], ): string { - const { clues, locale } = options; - - let prompt = `You are translating logic-puzzle clues from English to ${locale}. + const { puzzle, locale } = options; + const { grid, clues } = puzzle; + + const categoryList = grid.categories + .map( + (c) => + `- ${c.name}: [${c.values.map((v) => `"${v}"`).join(", ")}]${ + c.noun !== undefined && c.noun !== "" + ? ` (noun phrase in clues: "${c.noun}")` + : "" + }`, + ) + .join("\n"); + + let prompt = `You are translating a logic-grid puzzle from English to ${locale}. GROUND TRUTH: For each clue, the JSON constraint defines the meaning. The -English text is a stylistic reference — if it disagrees with the constraint, -follow the constraint. +English clue text is a stylistic reference — if it disagrees with the +constraint, follow the constraint. + +You must produce three things: + +A. Localized clue text, one per source clue, in order. +B. \`categoryNames\`: a map from each canonical category name to its localized + display name. ALL category names listed below must appear as keys. +C. \`valueLabels\`: a map from each canonical category value to its localized + label. ALL values listed below must appear as keys. ## Translation rules @@ -71,10 +103,22 @@ follow the constraint. - Negative constraints (\`not_*\`) MUST preserve the negation. 2. Preserve directional asymmetry. For \`before\` and \`left_of\`, the subject is \`a\` and the object is \`b\` — do not swap them. -3. Preserve all proper nouns and category-value names verbatim - (Alice stays Alice; "Black River fund" stays "Black River fund"). -4. Preserve numeric values and units exactly. -5. Output one clue per source clue, in the same order. +3. **Proper nouns and literal values stay verbatim** in BOTH the clue text + AND \`valueLabels\`: + - People names (Alice, Bob, Carol). + - Place names, brand names, ship names, fund names. + - Numeric or unit literals like "1972", "8%", "7am". + In \`valueLabels\`, these map to themselves: \`{ "Alice": "Alice" }\`. +4. **Descriptive words and adjectives translate** in both surfaces. Color + names, animal names, common-noun categories. Inflections in clue text + are expected (e.g. "yellow" → "gelb" in the bare label, "gelben" / + "gelbe" in the inflected clue text — both correct). +5. Category names ARE descriptive — translate them too unless they're + already a proper noun. + +## Categories + +${categoryList} ## Source clues`; @@ -90,21 +134,23 @@ follow the constraint. } /** - * Translate puzzle clues to a target locale using AI. + * Translate a logic-grid puzzle to a target locale using AI. * * The package engine is English-only by design. This function is a * post-processing layer for ahead-of-time (AOT) puzzle pipelines that need - * localized output: generate puzzles in English, then translate the rendered - * clues here. The underlying constraints are passed through verbatim — only - * the surface text changes. + * localized output: generate puzzles in English, then translate the visible + * surfaces — clue text, category names, and value labels — here. The + * underlying constraints and the canonical `puzzle.grid` are passed through + * verbatim; only the rendered text changes. * * Two-stage AI flow: - * 1. The translator produces a localized clue per source clue, in one - * batched call. The constraint JSON is shown alongside each English - * clue as ground truth. + * 1. The translator produces localized clues + category-name map + value- + * label map in one batched call. The constraint JSON is shown alongside + * each English clue as ground truth. * 2. A validator (separately configurable client) round-trips each - * translation back to a constraint type and checks polarity, direction, - * numerics, and proper-noun preservation. + * translated clue back to a constraint type and checks polarity, + * direction, numerics, and proper-noun preservation across all three + * output surfaces. * * Validation failures are fed back to the translator on retry, mirroring * {@link rewriteClues} and {@link generateTheme}. Up to 3 attempts. @@ -120,22 +166,22 @@ follow the constraint. * retry attempts. Inspect `error.errors` for the structured failures. * @throws {Error} If `locale` is empty. */ -export async function translate(options: TranslateOptions): Promise { - const { clues, locale } = options; +export async function translate( + options: TranslateOptions, +): Promise { + const { puzzle, locale } = options; if (!locale || locale.trim() === "") { throw new Error("locale must be a non-empty string"); } - if (clues.length === 0) return []; - const translator: AIClient = options.client ?? createAnthropicClient(); const validator: AIClient = options.validator ?? options.client ?? createAnthropicClient(undefined, { temperature: 0 }); - const schema = buildSchema(clues.length); + const schema = buildSchema(puzzle.clues.length); let lastErrors: TranslationValidationError[] | undefined; @@ -149,23 +195,22 @@ export async function translate(options: TranslateOptions): Promise { schema, ); - const structural = checkTranslationStructure(raw, clues.length); + const structural = checkTranslationStructure(raw, puzzle); if (structural.length > 0) { lastErrors = structural; continue; } - const semantic = await validateTranslation( - clues, - raw.clues, - locale, - validator, - ); + const semantic = await validateTranslation(puzzle, raw, locale, validator); if (semantic.length === 0) { - return raw.clues.map((text, i) => ({ - constraint: clues[i].constraint, - text, - })); + return { + clues: raw.clues.map((text, i) => ({ + constraint: puzzle.clues[i].constraint, + text, + })), + categoryNames: raw.categoryNames, + valueLabels: raw.valueLabels, + }; } lastErrors = semantic; diff --git a/packages/logic-grid-ai/src/types.ts b/packages/logic-grid-ai/src/types.ts index afa85ae..8b1c9ff 100644 --- a/packages/logic-grid-ai/src/types.ts +++ b/packages/logic-grid-ai/src/types.ts @@ -1,4 +1,4 @@ -import type { Category, Clue } from "logic-grid"; +import type { Category, Clue, Puzzle } from "logic-grid"; /** Options for AI-powered theme generation. */ export interface ThemeOptions { @@ -108,14 +108,15 @@ export interface RewriteCluesValidationError { clueIndex?: number; } -/** Options for AI-powered clue translation. */ +/** Options for AI-powered puzzle translation. */ export interface TranslateOptions { /** - * Source clues. The `constraint` field is the ground truth that the - * validator compares against; `text` is shown to the translator as a - * stylistic hint but may have already drifted (e.g. via {@link rewriteClues}). + * Source puzzle. The `constraints` and `grid.categories` are the ground + * truth that validation compares against; rendered clue `text` is shown + * to the translator as a stylistic hint but may have already drifted + * (e.g. via {@link rewriteClues}). */ - clues: Clue[]; + puzzle: Puzzle; /** * Target locale. Free-form string passed verbatim into the prompt — both * BCP-47 codes ("de-DE", "ja-JP") and plain language names ("German", @@ -135,12 +136,37 @@ export interface TranslateOptions { } /** - * Structured validation error for AI-translated clues. + * Result of translating a puzzle. + * + * Constraints and the canonical `grid` are NOT modified — the engine + * continues to operate on the original English keys. The renderer composes + * the original puzzle with these maps to display localized strings. + */ +export interface TranslatedPuzzle { + /** Localized clue text, in the same order as `puzzle.clues`. */ + clues: Clue[]; + /** + * Map from canonical category name → localized display name. + * E.g. `{ "House": "Haus", "Color": "Farbe" }`. + */ + categoryNames: Record; + /** + * Map from canonical value (across all categories) → localized label. + * Values are globally unique in a logic-grid puzzle, so a flat map is + * unambiguous. Proper nouns map to themselves verbatim. + * E.g. `{ "Yellow": "Gelb", "Cat": "Katze", "Alice": "Alice" }`. + */ + valueLabels: Record; +} + +/** + * Structured validation error for AI-translated puzzles. * * Codes split into two tiers: - * - Structural (cheap, deterministic): wrong count, non-string, empty, too long, duplicate. - * - Semantic (AI-driven): constraint type drift incl. polarity, direction flip on - * asymmetric comparators, numeric / unit drift, proper-noun drop. + * - Structural (cheap, deterministic): wrong counts, non-strings, empties, + * over-length, duplicates, missing keys. + * - Semantic (AI-driven): constraint type drift incl. polarity, direction + * flip on asymmetric comparators, numeric / unit drift, proper-noun drop. */ export type TranslationValidationCode = | "wrong_clue_count" @@ -148,6 +174,10 @@ export type TranslationValidationCode = | "empty_translation" | "long_translation" | "duplicate_translation" + | "missing_category_name" + | "empty_category_name" + | "missing_value_label" + | "empty_value_label" | "constraint_type_mismatch" | "direction_flip" | "numeric_changed" @@ -158,4 +188,6 @@ export interface TranslationValidationError { message: string; /** 1-indexed clue position when the error is scoped to a single clue. */ clueIndex?: number; + /** Canonical category or value name when the error is scoped to one. */ + key?: string; } From 9e098cc6798b77a20d6b7f2479624136807bb59e Mon Sep 17 00:00:00 2001 From: Anton Stefer <59652072+antonstefer@users.noreply.github.com> Date: Thu, 30 Apr 2026 09:22:30 +0200 Subject: [PATCH 04/25] feat(demo): localize grid headers from translation maps The /api/translate endpoint now sends the full Puzzle and returns the TranslatedPuzzle shape (clues + categoryNames + valueLabels). The puzzle state stores the translation maps in a new `localization` field, cleared whenever a new puzzle is generated. PuzzleGrid takes the maps as an optional prop and falls back to canonical names per key, so partial localization still renders gracefully. Renames the state action from translateClues to translatePuzzle and the button label from "Translate clues" to "Translate puzzle" to reflect the broader scope. --- packages/demo/src/lib/PuzzleGrid.svelte | 35 ++- packages/demo/src/lib/puzzle-state.svelte.ts | 32 ++- packages/demo/src/routes/+page.svelte | 5 +- .../demo/src/routes/api/translate/+server.ts | 50 +++-- .../src/routes/api/translate/server.test.ts | 202 +++++++++++++----- 5 files changed, 235 insertions(+), 89 deletions(-) diff --git a/packages/demo/src/lib/PuzzleGrid.svelte b/packages/demo/src/lib/PuzzleGrid.svelte index bd8ed73..fe93d8d 100644 --- a/packages/demo/src/lib/PuzzleGrid.svelte +++ b/packages/demo/src/lib/PuzzleGrid.svelte @@ -1,15 +1,28 @@