diff --git a/src/agents/mirror.prompt.md b/src/agents/mirror.prompt.md index 97053df..708a6b8 100644 --- a/src/agents/mirror.prompt.md +++ b/src/agents/mirror.prompt.md @@ -22,6 +22,7 @@ If you'd find prior reflections useful — for example, you suspect the student ## Hard constraints +- **English only.** Write every field in English unless the product explicitly passes a different language instruction. - **No diagnostic language.** Do not label the student's personality, ability, or identity. Describe what they did and what they said, never who they are. - **No advice.** Do not suggest what to do. That is not your job. - **No careers, no pathways.** That is Pathfinder's job. diff --git a/src/agents/openai-realtime/mirror-payloads.ts b/src/agents/openai-realtime/mirror-payloads.ts index 840d1ca..fe9c536 100644 --- a/src/agents/openai-realtime/mirror-payloads.ts +++ b/src/agents/openai-realtime/mirror-payloads.ts @@ -8,6 +8,8 @@ import LIVE_PROMPT_RAW from './mirror-realtime-live.prompt.md?raw' const MIRROR_JSON_SHAPE = '{"validation":"","inferred_meaning":"","story_reframe":""}' export const OPENAI_REALTIME_MIRROR_VOICE = 'marin' +export const OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE = 'en' +export const OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL = 'gpt-4o-mini-transcribe' const LIVE_INSTRUCTIONS = LIVE_PROMPT_RAW.trim() @@ -15,6 +17,34 @@ export function buildRealtimeMirrorLiveInstructions(): string { return LIVE_INSTRUCTIONS } +export function buildRealtimeMirrorLiveAudioInputConfig() { + return { + transcription: { + model: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, + language: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + }, + noise_reduction: { type: 'far_field' }, + turn_detection: { + type: 'server_vad', + create_response: false, + interrupt_response: true, + threshold: 0.5, + prefix_padding_ms: 700, + silence_duration_ms: 800, + }, + } as const +} + +export function buildRealtimeMirrorLiveResponseInstructions(): string { + return [ + buildRealtimeMirrorLiveInstructions(), + '', + 'The student has just finished one English voice turn.', + 'Reply in English only.', + 'Keep this spoken reply short and natural.', + ].join('\n') +} + export function buildRealtimeMirrorUserInput(transcript: string): string { return [ 'The student had this live voice session with the Companion while looking into the mirror scene.', @@ -42,6 +72,7 @@ export function buildRealtimeMirrorRepairInput(previousText: string): string { export function buildRealtimeMirrorResponseInstructions(): string { return [ 'Use the latest student transcript item in this conversation.', + 'Write every field in English.', 'Return ONLY a JSON object with validation, inferred_meaning, and story_reframe.', `The object must match this shape: ${MIRROR_JSON_SHAPE}.`, 'Do not ask a question. Do not give advice. Do not include Markdown.', diff --git a/src/agents/openai-realtime/mirror-prompt.ts b/src/agents/openai-realtime/mirror-prompt.ts index b141f01..a74bda6 100644 --- a/src/agents/openai-realtime/mirror-prompt.ts +++ b/src/agents/openai-realtime/mirror-prompt.ts @@ -4,15 +4,21 @@ import { fileURLToPath } from 'node:url' import type { RealtimeSessionCreateRequest } from 'openai/resources/realtime/realtime' import { OPENAI_REALTIME_MIRROR_DEFAULT_MODEL } from './config' import { + buildRealtimeMirrorLiveAudioInputConfig, buildRealtimeMirrorLiveInstructions, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, OPENAI_REALTIME_MIRROR_VOICE, } from './mirror-payloads' export { + buildRealtimeMirrorLiveAudioInputConfig, buildRealtimeMirrorLiveInstructions, buildRealtimeMirrorRepairInput, buildRealtimeMirrorResponseInstructions, buildRealtimeMirrorUserInput, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, OPENAI_REALTIME_MIRROR_VOICE, } from './mirror-payloads' @@ -32,6 +38,7 @@ export function buildRealtimeMirrorInstructions(): string { getMirrorSystemPrompt(), '', '## Realtime session rules', + '- Always write the final Mirror JSON fields in English.', '- The student is not in an interview. Do not ask questions.', '- For voice input, listen until the app sends the explicit stop/commit event.', '- Return text only.', @@ -61,22 +68,17 @@ export function buildRealtimeMirrorSessionConfig({ output_modalities: [mode === 'live_audio' ? 'audio' : 'text'], max_output_tokens: 1000, audio: { - input: { - transcription: { - model: 'gpt-4o-mini-transcribe', - language: 'en', - }, - noise_reduction: { type: 'near_field' }, - turn_detection: - mode === 'live_audio' - ? { - type: 'semantic_vad', - create_response: true, - interrupt_response: true, - eagerness: 'auto', - } - : null, - }, + input: + mode === 'live_audio' + ? buildRealtimeMirrorLiveAudioInputConfig() + : { + transcription: { + model: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_MODEL, + language: OPENAI_REALTIME_MIRROR_TRANSCRIPTION_LANGUAGE, + }, + noise_reduction: { type: 'far_field' }, + turn_detection: null, + }, ...(mode === 'live_audio' ? { output: { voice } } : {}), }, tool_choice: 'none', diff --git a/src/agents/openai-realtime/mirror-realtime-live.prompt.md b/src/agents/openai-realtime/mirror-realtime-live.prompt.md index 8adb3c4..b85c8af 100644 --- a/src/agents/openai-realtime/mirror-realtime-live.prompt.md +++ b/src/agents/openai-realtime/mirror-realtime-live.prompt.md @@ -1,5 +1,7 @@ You are a reflective journaling companion. Students talk with you in live conversation. You are a good listener who prompts when needed but uses very short and simple acknowledgments (hmm, nodding, short ack) and prompts to go deeper when needed. +Always respond in English. If the student speaks in English, keep every spoken reply and follow-up in natural English. Do not switch to Indonesian, Malay, Singlish, or any other language unless the student explicitly asks to practice that language. + ## Two modes You are always in one of two modes. Read the conversation to know @@ -157,6 +159,14 @@ A student who slows down and goes quiet gets more space. If the student is checking whether the mic works: say only "I can hear you." +If the student only says a very short or vague thing ("hi", + "hello", "I don't know", "nothing", "not sure", or one + unclear fragment), do not assume an event has already + happened. Do not ask "who were you with?", "where were you?", + or any detail that implies a story exists. Instead, invite + them gently toward school or after-school life: + "Anything interesting happen during school or after school today?" + Or: "Anything from school or after school still on your mind?" If the student asks what to talk about: give one simple invitation about something that happened recently, then leave space. diff --git a/src/components/student-space/EngineHost.tsx b/src/components/student-space/EngineHost.tsx index a14ce9e..878d0cf 100644 --- a/src/components/student-space/EngineHost.tsx +++ b/src/components/student-space/EngineHost.tsx @@ -46,7 +46,17 @@ const SURFACES_REQUIRING_HYDRATION = new Set(['trajectory']) * is unsafe under SSR: some engine modules still expect a browser-owned * `window` / `document` during evaluation. */ -export function EngineHost({ className, children }: { className?: string; children?: ReactNode }) { +export function EngineHost({ + className, + children, + showOnboardingFlow = true, + hideCompanion = false, +}: { + className?: string + children?: ReactNode + showOnboardingFlow?: boolean + hideCompanion?: boolean +}) { const containerRef = useRef(null) const [error, setError] = useState(null) const backend = useMemo(() => createStudentSpaceBackendBridge(), []) @@ -95,6 +105,18 @@ export function EngineHost({ className, children }: { className?: string; childr game.setRenderActive(isWorldRoute) }, [game, isWorldRoute]) + useEffect(() => { + if (!game || !hideCompanion) return + const group = (game as unknown as { view?: { kira?: { group?: { visible: boolean } } } }).view + ?.kira?.group + if (!group) return + const previousVisible = group.visible + group.visible = false + return () => { + group.visible = previousVisible + } + }, [game, hideCompanion]) + useEffect(() => { document.body.classList.toggle('student-space-page-route', !isWorldRoute) return () => document.body.classList.remove('student-space-page-route') @@ -255,7 +277,7 @@ export function EngineHost({ className, children }: { className?: string; childr - + {showOnboardingFlow ? : null} {import.meta.env.DEV && game ? : null} {children} diff --git a/src/components/student-space/capture/AskSheet.tsx b/src/components/student-space/capture/AskSheet.tsx index a141bcc..a8b4b86 100644 --- a/src/components/student-space/capture/AskSheet.tsx +++ b/src/components/student-space/capture/AskSheet.tsx @@ -4,6 +4,7 @@ import { type KeyboardEvent, useCallback, useEffect, + useLayoutEffect, useMemo, useRef, useState, @@ -38,7 +39,12 @@ type Reframe = { backend?: boolean } type ThreadMessage = { role: 'kira' | 'you'; text: string } -type LiveMessage = { id?: string; role?: string; text?: string; status?: string } +type LiveMessage = { + id?: string + role?: string + text?: string + status?: 'streaming' | 'final' | 'discarded' | string +} type CaptureEntry = { id?: string kind?: string @@ -79,9 +85,13 @@ type CameraInstance = { zoomTo?: (pos: Vec3Like, look: Vec3Like, duration?: number, opts?: { owner?: string }) => void restoreZoom?: (duration?: number, opts?: { owner?: string }) => void } +type KiraActor = { + group?: { position?: Vec3Like; rotation?: { y: number } } + facing?: number +} type KiraCameraView = { camera?: CameraInstance & { instance?: { position?: Vec3Like } } - kira?: { group?: { position?: Vec3Like } } + kira?: KiraActor captureFocus?: boolean } @@ -218,6 +228,7 @@ export function AskSheet() { const realtimeCaptureRef = useRef(null) const focusTimeoutRef = useRef(null) const liveDialogueRef = useRef(null) + const liveDialogueEndRef = useRef(null) const mountedRef = useRef(false) const openRef = useRef(open) const recordingRunRef = useRef(0) @@ -307,14 +318,14 @@ export function AskSheet() { }, [capture, open, prefilledText, readOnly, setAudioCaptureHandle, setRealtimeCaptureHandle]) // Camera dolly toward Kira + freeze her wander while Capture is open. - // Mirrors KiraNarrator's framing (`perch + unit * 2.6m` along the current - // viewing axis) so Kira lands centered without yanking the user out of - // their orientation. Restores on close. + // Uses a slightly wider composition than the first-chat framing so her face + // stays visible above the capture sheet. Restores on close. useEffect(() => { if (!open) return const view = (engine as unknown as { view?: KiraCameraView } | null)?.view const camera = view?.camera - const kira = view?.kira?.group?.position + const kiraActor = view?.kira + const kira = kiraActor?.group?.position if (!camera?.zoomTo || !kira) return const Vec = kira.constructor as new (x: number, y: number, z: number) => Vec3Like const liveCam = camera.instance?.position @@ -323,12 +334,41 @@ export function AskSheet() { const flat = Math.hypot(dx, dz) || 1 const unitX = dx / flat const unitZ = dz / flat - const camPos: Vec3Like = new Vec(kira.x + unitX * 2.6, kira.y + 1.05, kira.z + unitZ * 2.6) - const camLook: Vec3Like = new Vec(kira.x, kira.y + 0.85, kira.z) + const targetYaw = Math.atan2(-unitZ, unitX) + const camPos: Vec3Like = new Vec(kira.x + unitX * 4.2, kira.y + 1.05, kira.z + unitZ * 4.2) + const camLook: Vec3Like = new Vec(kira.x, kira.y + 0.72, kira.z) + const rotation = kiraActor?.group?.rotation + const initialYaw = rotation?.y + let yawFrame: number | null = null + let cancelled = false + const setKiraYaw = (yaw: number) => { + if (!rotation) return + rotation.y = yaw + if (kiraActor) kiraActor.facing = yaw + } + const animateKiraYaw = (to: number, duration: number) => { + if (!rotation) return + const from = rotation.y + const startedAt = performance.now() + const tick = (now: number) => { + if (cancelled) return + const t = Math.min(1, Math.max(0, (now - startedAt) / duration)) + const eased = t * t * (3 - 2 * t) + let delta = to - from + delta = ((delta + Math.PI * 3) % (Math.PI * 2)) - Math.PI + setKiraYaw(from + delta * eased) + if (t < 1) yawFrame = window.requestAnimationFrame(tick) + } + yawFrame = window.requestAnimationFrame(tick) + } camera.zoomTo(camPos, camLook, 700, { owner: 'capture' }) + animateKiraYaw(targetYaw, 700) if (view) view.captureFocus = true return () => { + cancelled = true + if (yawFrame != null) window.cancelAnimationFrame(yawFrame) camera.restoreZoom?.(620, { owner: 'capture' }) + if (typeof initialYaw === 'number') setKiraYaw(initialYaw) if (view) view.captureFocus = false } }, [open, engine]) @@ -410,11 +450,12 @@ export function AskSheet() { if (!mountedRef.current || recordingRunRef.current !== runId) return setLiveDialogue((items) => { const id = message.id || `${message.role || 'student'}-${Date.now()}` + if (message.status === 'discarded') return items.filter((item) => item.id !== id) const next = items.filter((item) => item.id !== id) next.push({ ...message, id }) return next }) - if (message.role === 'student' && message.text) { + if (message.role === 'student' && message.status === 'final' && message.text) { setReviewText((current) => [current, message.text].filter(Boolean).join(' ').trim()) } }, @@ -802,7 +843,7 @@ export function AskSheet() { } const liveStudentText = liveDialogue - .filter((message) => message.role === 'student' && message.text) + .filter((message) => message.role === 'student' && message.status === 'final' && message.text) .map((message) => message.text) .join(' ') @@ -810,13 +851,34 @@ export function AskSheet() { if (stage === 'recording' && liveStudentText) setReviewText(liveStudentText) }, [liveStudentText, stage]) - // biome-ignore lint/correctness/useExhaustiveDependencies: liveDialogue is the scroll trigger, not read in the body. - useEffect(() => { + const visibleLiveDialogue = useMemo( + () => + liveDialogue.length > 0 + ? liveDialogue + : [ + { + id: 'student-listening-placeholder', + role: 'student', + text: 'Listening...', + status: 'streaming', + } satisfies LiveMessage, + ], + [liveDialogue], + ) + + // biome-ignore lint/correctness/useExhaustiveDependencies: visibleLiveDialogue is the scroll trigger, not read in the body. + useLayoutEffect(() => { if (stage !== 'recording') return const node = liveDialogueRef.current if (!node) return - node.scrollTo({ top: node.scrollHeight, behavior: 'smooth' }) - }, [liveDialogue, stage]) + const scrollToBottom = () => { + node.scrollTop = node.scrollHeight + liveDialogueEndRef.current?.scrollIntoView({ block: 'end' }) + } + scrollToBottom() + const frame = window.requestAnimationFrame(scrollToBottom) + return () => window.cancelAnimationFrame(frame) + }, [visibleLiveDialogue, stage]) const [typeMode, setTypeMode] = useState(false) @@ -846,6 +908,7 @@ export function AskSheet() { closeLabel={readOnly || dismissOnBack ? 'Close' : 'Back'} className="border-white/75 bg-[#fff7e8]/96 text-[#2b2620] shadow-[0_22px_60px_rgba(35,25,18,0.26)] backdrop-blur-md" popup + hideOverlay > Capture @@ -1040,64 +1103,39 @@ export function AskSheet() { ) : null} {stage === 'recording' ? ( -
-
-
-

I'm listening.

-

- Speak naturally. Pause when you're done — I'll read it back. -

+
- {liveDialogue.length === 0 ? ( -
-
-
-
- ) : ( - liveDialogue.map((message) => ( -
- - {message.role === 'kira' ? 'Kira' : 'You'} - + {visibleLiveDialogue.map((message) => ( +
+ + {message.role === 'kira' ? companionName : 'You'} + + {message.status === 'streaming' && message.role !== 'kira' ? ( + + ) : (

{message.text}

-
- )) - )} + )} +
+ ))} +
{liveHint ? ( -

{liveHint}

+

{liveHint}

) : null}