From 43efb6bd95c62b0fa01b6cb7efe6a139266f2300 Mon Sep 17 00:00:00 2001 From: Reza Ilmi Date: Sun, 24 May 2026 20:00:30 +0800 Subject: [PATCH 1/5] fix(student-space): stabilize onboarding and voice capture Restore the signed-out onboarding entry, keep the current GLB companion path, and make capture voice sessions English-only with guarded Realtime replies. --- src/agents/mirror.prompt.md | 1 + src/agents/openai-realtime/mirror-payloads.ts | 31 ++++++ src/agents/openai-realtime/mirror-prompt.ts | 34 +++--- .../mirror-realtime-live.prompt.md | 2 + src/components/student-space/EngineHost.tsx | 26 ++++- .../student-space/capture/AskSheet.tsx | 103 ++++++++++------- .../student-space/hud/StudentSpaceHud.tsx | 6 +- .../student-space/onboarding/EggHatcher.tsx | 31 +++--- .../student-space/world/WorldInteractions.tsx | 2 +- src/components/ui/drawer.tsx | 5 +- .../student-space/Game/State/Profile.js | 9 +- src/engine/student-space/Game/View/Kira.d.ts | 7 +- src/engine/student-space/Game/View/Kira.js | 53 ++++----- .../Game/View/Onboarding/copy.js | 2 +- src/engine/student-space/Game/View/View.js | 8 +- .../student-space/realtime-mirror-client.ts | 104 +++++++++++++++--- src/routes/_app.tsx | 42 ++++++- test/agents/openai-realtime-mirror.test.ts | 7 +- .../student-space/EngineHost.test.tsx | 10 +- .../capture/capture-stack.test.tsx | 60 +++++++++- .../realtime-mirror-client.test.ts | 90 ++++++++++++++- 21 files changed, 481 insertions(+), 152 deletions(-) diff --git a/src/agents/mirror.prompt.md b/src/agents/mirror.prompt.md index 97053df..708a6b8 100644 --- a/src/agents/mirror.prompt.md +++ b/src/agents/mirror.prompt.md @@ -22,6 +22,7 @@ If you'd find prior reflections useful — for example, you suspect the student ## Hard constraints +- **English only.** Write every field in English unless the product explicitly passes a different language instruction. - **No diagnostic language.** Do not label the student's personality, ability, or identity. Describe what they did and what they said, never who they are. - **No advice.** Do not suggest what to do. That is not your job. - **No careers, no pathways.** That is Pathfinder's job. diff --git a/src/agents/openai-realtime/mirror-payloads.ts b/src/agents/openai-realtime/mirror-payloads.ts index 840d1ca..fe9c536 100644 --- a/src/agents/openai-realtime/mirror-payloads.ts +++ b/src/agents/openai-realtime/mirror-payloads.ts @@ -8,6 +8,8 @@ import LIVE_PROMPT_RAW from './mirror-realtime-live.prompt.md?raw' const MIRROR_JSON_SHAPE = '{"validation":"","inferred_meaning":"","story_reframe":""}' export const OPENAI_REALTIME_MIRROR_VOICE = 'marin' +export const OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE = 'en' +export const OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL = 'gpt-4o-mini-transcribe' const LIVE_INSTRUCTIONS = LIVE_PROMPT_RAW.trim() @@ -15,6 +17,34 @@ export function buildRealtimeMirrorLiveInstructions(): string { return LIVE_INSTRUCTIONS } +export function buildRealtimeMirrorLiveAudioInputConfig() { + return { + transcription: { + model: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, + language: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + }, + noise_reduction: { type: 'far_field' }, + turn_detection: { + type: 'server_vad', + create_response: false, + interrupt_response: true, + threshold: 0.5, + prefix_padding_ms: 700, + silence_duration_ms: 800, + }, + } as const +} + +export function buildRealtimeMirrorLiveResponseInstructions(): string { + return [ + buildRealtimeMirrorLiveInstructions(), + '', + 'The student has just finished one English voice turn.', + 'Reply in English only.', + 'Keep this spoken reply short and natural.', + ].join('\n') +} + export function buildRealtimeMirrorUserInput(transcript: string): string { return [ 'The student had this live voice session with the Companion while looking into the mirror scene.', @@ -42,6 +72,7 @@ export function buildRealtimeMirrorRepairInput(previousText: string): string { export function buildRealtimeMirrorResponseInstructions(): string { return [ 'Use the latest student transcript item in this conversation.', + 'Write every field in English.', 'Return ONLY a JSON object with validation, inferred_meaning, and story_reframe.', `The object must match this shape: ${MIRROR_JSON_SHAPE}.`, 'Do not ask a question. Do not give advice. Do not include Markdown.', diff --git a/src/agents/openai-realtime/mirror-prompt.ts b/src/agents/openai-realtime/mirror-prompt.ts index b141f01..a74bda6 100644 --- a/src/agents/openai-realtime/mirror-prompt.ts +++ b/src/agents/openai-realtime/mirror-prompt.ts @@ -4,15 +4,21 @@ import { fileURLToPath } from 'node:url' import type { RealtimeSessionCreateRequest } from 'openai/resources/realtime/realtime' import { OPENAI_REALTIME_MIRROR_DEFAULT_MODEL } from './config' import { + buildRealtimeMirrorLiveAudioInputConfig, buildRealtimeMirrorLiveInstructions, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, OPENAI_REALTIME_MIRROR_VOICE, } from './mirror-payloads' export { + buildRealtimeMirrorLiveAudioInputConfig, buildRealtimeMirrorLiveInstructions, buildRealtimeMirrorRepairInput, buildRealtimeMirrorResponseInstructions, buildRealtimeMirrorUserInput, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, OPENAI_REALTIME_MIRROR_VOICE, } from './mirror-payloads' @@ -32,6 +38,7 @@ export function buildRealtimeMirrorInstructions(): string { getMirrorSystemPrompt(), '', '## Realtime session rules', + '- Always write the final Mirror JSON fields in English.', '- The student is not in an interview. Do not ask questions.', '- For voice input, listen until the app sends the explicit stop/commit event.', '- Return text only.', @@ -61,22 +68,17 @@ export function buildRealtimeMirrorSessionConfig({ output_modalities: [mode === 'live_audio' ? 'audio' : 'text'], max_output_tokens: 1000, audio: { - input: { - transcription: { - model: 'gpt-4o-mini-transcribe', - language: 'en', - }, - noise_reduction: { type: 'near_field' }, - turn_detection: - mode === 'live_audio' - ? { - type: 'semantic_vad', - create_response: true, - interrupt_response: true, - eagerness: 'auto', - } - : null, - }, + input: + mode === 'live_audio' + ? buildRealtimeMirrorLiveAudioInputConfig() + : { + transcription: { + model: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, + language: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + }, + noise_reduction: { type: 'far_field' }, + turn_detection: null, + }, ...(mode === 'live_audio' ? { output: { voice } } : {}), }, tool_choice: 'none', diff --git a/src/agents/openai-realtime/mirror-realtime-live.prompt.md b/src/agents/openai-realtime/mirror-realtime-live.prompt.md index 8adb3c4..0987b82 100644 --- a/src/agents/openai-realtime/mirror-realtime-live.prompt.md +++ b/src/agents/openai-realtime/mirror-realtime-live.prompt.md @@ -1,5 +1,7 @@ You are a reflective journaling companion. Students talk with you in live conversation. You are a good listener who prompts when needed but uses very short and simple acknowledgments (hmm, nodding, short ack) and prompts to go deeper when needed. +Always respond in English. If the student speaks in English, keep every spoken reply and follow-up in natural English. Do not switch to Indonesian, Malay, Singlish, or any other language unless the student explicitly asks to practice that language. + ## Two modes You are always in one of two modes. Read the conversation to know diff --git a/src/components/student-space/EngineHost.tsx b/src/components/student-space/EngineHost.tsx index a14ce9e..878d0cf 100644 --- a/src/components/student-space/EngineHost.tsx +++ b/src/components/student-space/EngineHost.tsx @@ -46,7 +46,17 @@ const SURFACES_REQUIRING_HYDRATION = new Set(['trajectory']) * is unsafe under SSR: some engine modules still expect a browser-owned * `window` / `document` during evaluation. */ -export function EngineHost({ className, children }: { className?: string; children?: ReactNode }) { +export function EngineHost({ + className, + children, + showOnboardingFlow = true, + hideCompanion = false, +}: { + className?: string + children?: ReactNode + showOnboardingFlow?: boolean + hideCompanion?: boolean +}) { const containerRef = useRef(null) const [error, setError] = useState(null) const backend = useMemo(() => createStudentSpaceBackendBridge(), []) @@ -95,6 +105,18 @@ export function EngineHost({ className, children }: { className?: string; childr game.setRenderActive(isWorldRoute) }, [game, isWorldRoute]) + useEffect(() => { + if (!game || !hideCompanion) return + const group = (game as unknown as { view?: { kira?: { group?: { visible: boolean } } } }).view + ?.kira?.group + if (!group) return + const previousVisible = group.visible + group.visible = false + return () => { + group.visible = previousVisible + } + }, [game, hideCompanion]) + useEffect(() => { document.body.classList.toggle('student-space-page-route', !isWorldRoute) return () => document.body.classList.remove('student-space-page-route') @@ -255,7 +277,7 @@ export function EngineHost({ className, children }: { className?: string; childr - + {showOnboardingFlow ? : null} {import.meta.env.DEV && game ? : null} {children} diff --git a/src/components/student-space/capture/AskSheet.tsx b/src/components/student-space/capture/AskSheet.tsx index a141bcc..b83edc4 100644 --- a/src/components/student-space/capture/AskSheet.tsx +++ b/src/components/student-space/capture/AskSheet.tsx @@ -38,7 +38,12 @@ type Reframe = { backend?: boolean } type ThreadMessage = { role: 'kira' | 'you'; text: string } -type LiveMessage = { id?: string; role?: string; text?: string; status?: string } +type LiveMessage = { + id?: string + role?: string + text?: string + status?: 'streaming' | 'final' | 'discarded' | string +} type CaptureEntry = { id?: string kind?: string @@ -79,9 +84,13 @@ type CameraInstance = { zoomTo?: (pos: Vec3Like, look: Vec3Like, duration?: number, opts?: { owner?: string }) => void restoreZoom?: (duration?: number, opts?: { owner?: string }) => void } +type KiraActor = { + group?: { position?: Vec3Like; rotation?: { y: number } } + facing?: number +} type KiraCameraView = { camera?: CameraInstance & { instance?: { position?: Vec3Like } } - kira?: { group?: { position?: Vec3Like } } + kira?: KiraActor captureFocus?: boolean } @@ -307,14 +316,14 @@ export function AskSheet() { }, [capture, open, prefilledText, readOnly, setAudioCaptureHandle, setRealtimeCaptureHandle]) // Camera dolly toward Kira + freeze her wander while Capture is open. - // Mirrors KiraNarrator's framing (`perch + unit * 2.6m` along the current - // viewing axis) so Kira lands centered without yanking the user out of - // their orientation. Restores on close. + // Uses a slightly wider composition than the first-chat framing so her face + // stays visible above the capture sheet. Restores on close. useEffect(() => { if (!open) return const view = (engine as unknown as { view?: KiraCameraView } | null)?.view const camera = view?.camera - const kira = view?.kira?.group?.position + const kiraActor = view?.kira + const kira = kiraActor?.group?.position if (!camera?.zoomTo || !kira) return const Vec = kira.constructor as new (x: number, y: number, z: number) => Vec3Like const liveCam = camera.instance?.position @@ -323,12 +332,41 @@ export function AskSheet() { const flat = Math.hypot(dx, dz) || 1 const unitX = dx / flat const unitZ = dz / flat - const camPos: Vec3Like = new Vec(kira.x + unitX * 2.6, kira.y + 1.05, kira.z + unitZ * 2.6) - const camLook: Vec3Like = new Vec(kira.x, kira.y + 0.85, kira.z) + const targetYaw = Math.atan2(-unitZ, unitX) + const camPos: Vec3Like = new Vec(kira.x + unitX * 4.2, kira.y + 1.05, kira.z + unitZ * 4.2) + const camLook: Vec3Like = new Vec(kira.x, kira.y + 0.72, kira.z) + const rotation = kiraActor?.group?.rotation + const initialYaw = rotation?.y + let yawFrame: number | null = null + let cancelled = false + const setKiraYaw = (yaw: number) => { + if (!rotation) return + rotation.y = yaw + if (kiraActor) kiraActor.facing = yaw + } + const animateKiraYaw = (to: number, duration: number) => { + if (!rotation) return + const from = rotation.y + const startedAt = performance.now() + const tick = (now: number) => { + if (cancelled) return + const t = Math.min(1, Math.max(0, (now - startedAt) / duration)) + const eased = t * t * (3 - 2 * t) + let delta = to - from + delta = ((delta + Math.PI * 3) % (Math.PI * 2)) - Math.PI + setKiraYaw(from + delta * eased) + if (t < 1) yawFrame = window.requestAnimationFrame(tick) + } + yawFrame = window.requestAnimationFrame(tick) + } camera.zoomTo(camPos, camLook, 700, { owner: 'capture' }) + animateKiraYaw(targetYaw, 700) if (view) view.captureFocus = true return () => { + cancelled = true + if (yawFrame != null) window.cancelAnimationFrame(yawFrame) camera.restoreZoom?.(620, { owner: 'capture' }) + if (typeof initialYaw === 'number') setKiraYaw(initialYaw) if (view) view.captureFocus = false } }, [open, engine]) @@ -410,11 +448,12 @@ export function AskSheet() { if (!mountedRef.current || recordingRunRef.current !== runId) return setLiveDialogue((items) => { const id = message.id || `${message.role || 'student'}-${Date.now()}` + if (message.status === 'discarded') return items.filter((item) => item.id !== id) const next = items.filter((item) => item.id !== id) next.push({ ...message, id }) return next }) - if (message.role === 'student' && message.text) { + if (message.role === 'student' && message.status === 'final' && message.text) { setReviewText((current) => [current, message.text].filter(Boolean).join(' ').trim()) } }, @@ -802,7 +841,7 @@ export function AskSheet() { } const liveStudentText = liveDialogue - .filter((message) => message.role === 'student' && message.text) + .filter((message) => message.role === 'student' && message.status === 'final' && message.text) .map((message) => message.text) .join(' ') @@ -846,6 +885,7 @@ export function AskSheet() { closeLabel={readOnly || dismissOnBack ? 'Close' : 'Back'} className="border-white/75 bg-[#fff7e8]/96 text-[#2b2620] shadow-[0_22px_60px_rgba(35,25,18,0.26)] backdrop-blur-md" popup + hideOverlay > Capture @@ -1040,40 +1080,25 @@ export function AskSheet() { ) : null} {stage === 'recording' ? ( -
-
-
-

I'm listening.

-

- Speak naturally. Pause when you're done — I'll read it back. -

+
{liveDialogue.length === 0 ? ( -
-
-
{liveHint ? ( -

{liveHint}

+

{liveHint}

) : null}