Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions scripts/attach-agent-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ async function main(): Promise<void> {
const explicitAgent = argValue('--agent')
const explicitProject = argValue('--project')
const explicitName = argValue('--name')
const maxMessagesRaw = argValue('--max-messages')
const maxMessages = maxMessagesRaw ? parseInt(maxMessagesRaw, 10) : undefined
if (maxMessages !== undefined && (!Number.isFinite(maxMessages) || maxMessages <= 0)) {
throw new Error(`--max-messages must be a positive integer; got ${maxMessagesRaw}`)
}

const indexHit = resolveFromIndex(token.replace(/^#/, ''), indexPath)
const chatId = (indexHit?.id ?? token).trim()
Expand Down Expand Up @@ -317,7 +322,8 @@ async function main(): Promise<void> {
chatId,
projectHint: projectPath,
dryRun,
force: hasFlag('--force')
force: hasFlag('--force'),
maxMessages
})
report.pathTaken = 'backfill-only'
report.hapiSessionId = forceSession
Expand Down Expand Up @@ -388,7 +394,8 @@ async function main(): Promise<void> {
chatId,
projectHint: projectPath,
dryRun,
force: row.c > 0
force: row.c > 0,
maxMessages
})
report.backfill = bf
report.messagesInSqlite = row.c + (dryRun ? bf.total : bf.inserted)
Expand All @@ -410,6 +417,16 @@ async function main(): Promise<void> {
}
}

// Surface backfill truncation prominently at the end of the report so it
// doesn't get lost in the per-stream warnings buried mid-output.
const bfReport = report.backfill as { truncated?: boolean; rawTranscriptLines?: number; maxMessagesApplied?: number; inserted?: number } | null
if (bfReport && bfReport.truncated) {
console.warn(
`\nwarn: BACKFILL TRUNCATED — imported ${bfReport.inserted} of ${bfReport.rawTranscriptLines} transcript records ` +
`(cap ${bfReport.maxMessagesApplied}). Re-run with --max-messages ${bfReport.rawTranscriptLines} to capture the tail.\n`
)
}

console.log(JSON.stringify(report, null, 2))
}

Expand Down
66 changes: 66 additions & 0 deletions scripts/backfill-agent-transcript.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,69 @@ describe('resolveTranscriptPath: duplicate UUID across Cursor projects', () => {
}
})
})

describe('backfillSessionMessages: truncation reporting', () => {
const dirs: string[] = []
afterEach(() => {
for (const dir of dirs.splice(0)) rmSync(dir, { recursive: true, force: true })
})

it('flags truncated=true when transcript exceeds --max-messages cap', () => {
const dir = mkdtempSync(join(tmpdir(), 'hapi-backfill-trunc-'))
dirs.push(dir)
const dbPath = join(dir, 'hapi.db')
const transcript = join(dir, 't.jsonl')
// 5 user turns, cap at 3 -> truncated
writeFileSync(transcript, Array.from({ length: 5 }, (_, i) =>
JSON.stringify({ role: 'user', message: { content: [{ type: 'text', text: `msg ${i}` }] } })
).join('\n') + '\n')

const store = new Store(dbPath)
const session = store.sessions.getOrCreateSession('trunc-test', {
path: dir, host: 'test', flavor: 'cursor',
cursorSessionId: '22222222-2222-4222-8222-222222222222'
}, null, 'default')
store.close()

const result = backfillSessionMessages({
dbPath,
sessionId: session.id,
agent: 'cursor',
chatId: '22222222-2222-4222-8222-222222222222',
transcriptPath: transcript,
maxMessages: 3
})
expect(result.rawTranscriptLines).toBe(5)
expect(result.maxMessagesApplied).toBe(3)
expect(result.truncated).toBe(true)
expect(result.inserted).toBe(3)
})

it('flags truncated=false when transcript fits within cap', () => {
const dir = mkdtempSync(join(tmpdir(), 'hapi-backfill-fit-'))
dirs.push(dir)
const dbPath = join(dir, 'hapi.db')
const transcript = join(dir, 't.jsonl')
writeFileSync(transcript, JSON.stringify({
role: 'user', message: { content: [{ type: 'text', text: 'one and done' }] }
}) + '\n')

const store = new Store(dbPath)
const session = store.sessions.getOrCreateSession('fit-test', {
path: dir, host: 'test', flavor: 'cursor',
cursorSessionId: '33333333-3333-4333-8333-333333333333'
}, null, 'default')
store.close()

const result = backfillSessionMessages({
dbPath,
sessionId: session.id,
agent: 'cursor',
chatId: '33333333-3333-4333-8333-333333333333',
transcriptPath: transcript
})
expect(result.rawTranscriptLines).toBe(1)
expect(result.truncated).toBe(false)
expect(result.inserted).toBe(1)
})
})
76 changes: 66 additions & 10 deletions scripts/backfill-agent-transcript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ type AgentFlavor = 'cursor' | 'claude' | 'codex'
type HapiMessageContent = Record<string, unknown>

const BACKFILL_META = { sentFrom: 'backfill' }
const MAX_MESSAGES = 2000
// Per-attach cap on imported messages. Set high enough to cover real chats
// (jessica-story founding chat: 2974 lines; agent-notify 5.4MB chats can hit
// 6k+ lines). Override via --max-messages or HAPI_BACKFILL_MAX_MESSAGES env.
// At 50k we'd cap on roughly 30MB of jsonl input — generous, and the import
// is one-time per session so there's no steady-state cost.
const DEFAULT_MAX_MESSAGES = 50_000

function argValue(name: string): string | undefined {
const i = process.argv.indexOf(name)
Expand All @@ -34,7 +39,8 @@ function argValue(name: string): string | undefined {
function usage(): never {
console.error(`Usage: bun scripts/backfill-agent-transcript.ts \\
--session <hapiSessionId> (--agent cursor|claude|codex --chat-id <uuid> | --transcript <path>) \\
[--project <projectDir>] # tie-breaker when the same chat UUID exists under multiple Cursor/Claude projects
[--project <projectDir>] # tie-breaker when the same chat UUID exists under multiple Cursor/Claude projects
[--max-messages <N>] # per-attach import cap (default 50000; env: HAPI_BACKFILL_MAX_MESSAGES)
[--db ~/.hapi/hapi.db] [--dry-run] [--force]`)
process.exit(2)
}
Expand Down Expand Up @@ -247,7 +253,12 @@ function convertCodexTranscriptLine(row: Record<string, unknown>): { userMessage
return null
}

export function transcriptLinesToHapiMessages(agent: AgentFlavor, transcriptPath: string): HapiMessageContent[] {
export function transcriptLinesToHapiMessages(
agent: AgentFlavor,
transcriptPath: string,
opts?: { maxMessages?: number }
): HapiMessageContent[] {
const max = opts?.maxMessages ?? DEFAULT_MAX_MESSAGES
const raw = readFileSync(transcriptPath, 'utf8')
const lines = raw.split('\n').filter((l) => l.trim())
const out: HapiMessageContent[] = []
Expand Down Expand Up @@ -346,7 +357,16 @@ export function transcriptLinesToHapiMessages(agent: AgentFlavor, transcriptPath
}
}

return out.slice(0, MAX_MESSAGES)
return out.slice(0, max)
}

/**
* Count raw transcript-line records that WOULD be considered for backfill,
* before any per-attach cap. Lets callers detect truncation without
* re-parsing the whole file.
*/
export function countTranscriptRecords(transcriptPath: string): number {
return readFileSync(transcriptPath, 'utf8').split('\n').filter((l) => l.trim()).length
}

function insertBackfillMessage(
Expand All @@ -366,6 +386,19 @@ function insertBackfillMessage(
return 'inserted'
}

export type BackfillResult = {
inserted: number
skipped: number
transcriptPath: string
total: number
/** Raw line count of the source transcript before any per-attach cap. */
rawTranscriptLines: number
/** The cap actually applied this run (DEFAULT_MAX_MESSAGES or override). */
maxMessagesApplied: number
/** True iff rawTranscriptLines exceeded the cap and we dropped the tail. */
truncated: boolean
}

export function backfillSessionMessages(opts: {
dbPath: string
sessionId: string
Expand All @@ -375,15 +408,31 @@ export function backfillSessionMessages(opts: {
projectHint?: string
dryRun?: boolean
force?: boolean
}): { inserted: number; skipped: number; transcriptPath: string; total: number } {
/** Override per-attach cap (default 50_000). Also accepts HAPI_BACKFILL_MAX_MESSAGES env. */
maxMessages?: number
}): BackfillResult {
const transcriptPath = opts.transcriptPath ?? resolveTranscriptPath(opts.agent, opts.chatId, opts.projectHint)
if (!transcriptPath) {
throw new Error(`transcript not found for ${opts.agent} chat ${opts.chatId}`)
}

const messages = transcriptLinesToHapiMessages(opts.agent, transcriptPath)
const envMax = process.env.HAPI_BACKFILL_MAX_MESSAGES
? parseInt(process.env.HAPI_BACKFILL_MAX_MESSAGES, 10)
: undefined
const maxMessagesApplied = opts.maxMessages ?? (envMax && envMax > 0 ? envMax : DEFAULT_MAX_MESSAGES)
const rawTranscriptLines = countTranscriptRecords(transcriptPath)
const truncated = rawTranscriptLines > maxMessagesApplied

if (truncated) {
console.warn(
`warn: transcript has ${rawTranscriptLines} records, capping at ${maxMessagesApplied} (${rawTranscriptLines - maxMessagesApplied} dropped). ` +
`Raise the cap via --max-messages or HAPI_BACKFILL_MAX_MESSAGES.`
)
}

const messages = transcriptLinesToHapiMessages(opts.agent, transcriptPath, { maxMessages: maxMessagesApplied })
if (messages.length === 0) {
return { inserted: 0, skipped: 0, transcriptPath, total: 0 }
return { inserted: 0, skipped: 0, transcriptPath, total: 0, rawTranscriptLines, maxMessagesApplied, truncated }
}

const prevAllowNewer = process.env.HAPI_STORE_ALLOW_NEWER_SCHEMA
Expand All @@ -402,7 +451,7 @@ export function backfillSessionMessages(opts: {
}

if (opts.dryRun) {
return { inserted: messages.length, skipped: 0, transcriptPath, total: messages.length }
return { inserted: messages.length, skipped: 0, transcriptPath, total: messages.length, rawTranscriptLines, maxMessagesApplied, truncated }
}

let inserted = 0
Expand All @@ -414,7 +463,7 @@ export function backfillSessionMessages(opts: {
else skipped += 1
}

return { inserted, skipped, transcriptPath, total: messages.length }
return { inserted, skipped, transcriptPath, total: messages.length, rawTranscriptLines, maxMessagesApplied, truncated }
} finally {
store.close()
if (prevAllowNewer === undefined) {
Expand All @@ -431,6 +480,7 @@ if (import.meta.main) {
const chatId = argValue('--chat-id')
const transcript = argValue('--transcript')
const projectHint = argValue('--project')
const maxMessagesRaw = argValue('--max-messages')
const dbPath = expandHome(argValue('--db') ?? process.env.HAPI_DB ?? join(homedir(), '.hapi', 'hapi.db'))
const dryRun = process.argv.includes('--dry-run')
const force = process.argv.includes('--force')
Expand All @@ -441,6 +491,11 @@ if (import.meta.main) {

const agent = agentRaw as AgentFlavor
const resolvedChatId = chatId ?? basename(transcript!, '.jsonl')
const maxMessages = maxMessagesRaw ? parseInt(maxMessagesRaw, 10) : undefined
if (maxMessages !== undefined && (!Number.isFinite(maxMessages) || maxMessages <= 0)) {
console.error(`--max-messages must be a positive integer; got ${maxMessagesRaw}`)
process.exit(2)
}

const result = backfillSessionMessages({
dbPath,
Expand All @@ -450,7 +505,8 @@ if (import.meta.main) {
transcriptPath: transcript ? expandHome(transcript) : undefined,
projectHint: projectHint ? expandHome(projectHint) : undefined,
dryRun,
force
force,
maxMessages
})

console.log(JSON.stringify({ ok: true, dryRun, ...result }, null, 2))
Expand Down
Loading