Skip to content

Commit 63fa316

Browse files
authored
fix(core): retry first-turn agent generation on transient errors (#125) (#163)
## Summary \`USE_AGENT_RUNTIME\` 默认 ON,99% 用户走 \`generateViaAgent → agent.prompt()\` 单次调用,**没有任何重试**。旧的 \`completeWithRetry\`(3 次指数退避)只在 USE_AGENT_RUNTIME=0 时才走。pi-ai 自身也无内建重试。结果:429/5xx/transient network 直接抛错给用户。 ## What changed - **\`packages/providers/src/retry.ts\`**:抽出通用 \`withBackoff<T>(fn, opts)\`,承担 sleep / jitter / classify / Retry-After / abort 全部逻辑。\`completeWithRetry\` 改写为薄 wrapper(注入 provider-error normalization + \`provider.error\` 日志),行为完全不变,原 15 个测试不动 - **\`packages/providers/src/index.ts\`**:export \`withBackoff\` / \`BackoffOptions\` / \`RetryDecision\` - **\`packages/core/src/agent.ts\`**:\`input.history.length === 0\`(first turn,**幂等**)时用 \`withBackoff({ maxRetries: 3 })\` 包 \`agent.prompt() + agent.waitForIdle()\`。non-first-turn 仍走 \`sendOnce()\`,避免多 turn 对话被重发污染状态 - 重试通过 \`log.warn('[generate] step=send_request.retry', ...)\` 暴露,UI feedback 留作 follow-up - \`USE_AGENT_RUNTIME\` 默认值未动;Codex 401 路径未动(follow-up) ## Test plan - [x] 9 new \`withBackoff\` tests:first-try success / 503→ok / 429 Retry-After / 4xx no retry / exhaustion / pre-aborted signal / mid-backoff abort / custom classify - [x] 4 new agent tests:first-turn 500→success / first-turn 3×500 exhaustion / first-turn 401 no retry / **non-first-turn 500 no retry**(关键:保护 multi-turn 状态) - [x] \`pnpm test\` providers (139) + core (220) all green - [x] \`pnpm typecheck\` + \`pnpm lint\` clean - [x] changeset added (patch bump) Closes #125 --------- Signed-off-by: hqhq1025 <1506751656@qq.com>
1 parent c0149f7 commit 63fa316

6 files changed

Lines changed: 399 additions & 35 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@open-codesign/core": patch
3+
"@open-codesign/providers": patch
4+
---
5+
6+
Fix: retry first-turn agent generation on transient provider errors (5xx, 429, network). The agent runtime now wraps `agent.prompt()` + `waitForIdle()` in a backoff loop for the first turn only — multi-turn requests still fail fast to avoid corrupting mid-session tool state. Extracted a generic `withBackoff` helper in `@open-codesign/providers` that shares the existing classify/jitter/Retry-After/abort logic with `completeWithRetry`. (#125)

packages/core/src/agent.test.ts

Lines changed: 175 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,20 @@ interface AgentScript {
3737
stopReason?: 'stop' | 'error' | 'aborted';
3838
errorMessage?: string;
3939
promptThrows?: Error;
40+
/**
41+
* When > 0, `promptThrows` is thrown only on the first N prompt() calls;
42+
* subsequent calls resolve normally. Lets tests script "transient failure
43+
* then success" sequences for first-turn retry coverage.
44+
*/
45+
promptThrowsTimes?: number;
46+
/**
47+
* When true together with `promptThrows`, the mock pushes a partial
48+
* assistant message onto `agent.state.messages` BEFORE throwing on
49+
* each failing attempt. Simulates "model streamed tokens / tool call
50+
* then the connection dropped" — the real pi-agent-core path where a
51+
* retry at the outer send boundary would replay tool side effects.
52+
*/
53+
promptPushesAssistantBeforeThrow?: boolean;
4054
/**
4155
* When set, the mock invokes `options.getApiKey` before emitting the
4256
* assistant response and — if it throws — converts the throw into an
@@ -64,7 +78,34 @@ vi.mock('@mariozechner/pi-agent-core', () => {
6478
}
6579
async prompt(message: unknown): Promise<void> {
6680
this.call.prompts.push({ message });
67-
if (scriptedAgent.promptThrows) throw scriptedAgent.promptThrows;
81+
if (scriptedAgent.promptThrows) {
82+
const limit = scriptedAgent.promptThrowsTimes ?? Number.POSITIVE_INFINITY;
83+
if (this.call.prompts.length <= limit) {
84+
if (scriptedAgent.promptPushesAssistantBeforeThrow) {
85+
const partial: AgentMessage = {
86+
role: 'assistant',
87+
// biome-ignore lint/suspicious/noExplicitAny: same.
88+
api: 'anthropic-messages' as any,
89+
// biome-ignore lint/suspicious/noExplicitAny: same.
90+
provider: 'anthropic' as any,
91+
model: 'mock-model',
92+
content: [{ type: 'text', text: 'partial tokens before drop' }],
93+
usage: {
94+
input: 0,
95+
output: 0,
96+
cacheRead: 0,
97+
cacheWrite: 0,
98+
totalTokens: 0,
99+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
100+
},
101+
stopReason: 'error',
102+
timestamp: Date.now(),
103+
};
104+
this.state.messages.push(partial);
105+
}
106+
throw scriptedAgent.promptThrows;
107+
}
108+
}
68109

69110
// Simulate pi-agent-core's per-turn getApiKey invocation. Real
70111
// runAgentLoop calls `await config.getApiKey(provider)` (line 156 of
@@ -406,9 +447,14 @@ describe('generateViaAgent() — Phase 1 pass-through', () => {
406447
signal: controller.signal,
407448
});
408449
controller.abort();
409-
// Mock's prompt() is synchronous enough to complete; just verify the wire-up
410-
// registered by confirming the Agent.abort() call counter after settlement.
411-
await promise;
450+
// With first-turn withBackoff the pre-call signal check may short-circuit
451+
// the prompt entirely (throwing PROVIDER_ABORTED), or the prompt may have
452+
// already completed; either way the `signal → agent.abort()` listener
453+
// registered before sending should have fired.
454+
await promise.catch(() => {
455+
// Expected when abort arrives before the withBackoff loop enters its
456+
// first iteration.
457+
});
412458
expect(agentCalls[0]?.aborted).toBe(true);
413459
});
414460

@@ -474,6 +520,131 @@ describe('generateViaAgent() — Phase 1 pass-through', () => {
474520
});
475521
});
476522

523+
describe('generateViaAgent() — first-turn retry', () => {
524+
class HttpError extends Error {
525+
constructor(
526+
message: string,
527+
public readonly status: number,
528+
) {
529+
super(message);
530+
this.name = 'HttpError';
531+
}
532+
}
533+
534+
it('retries a transient 500 on the first turn and resolves on the second attempt', async () => {
535+
vi.useFakeTimers();
536+
try {
537+
scriptedAgent = {
538+
assistantText: RESPONSE_WITH_ARTIFACT,
539+
promptThrows: new HttpError('upstream 500', 500),
540+
promptThrowsTimes: 1,
541+
};
542+
const onRetry = vi.fn();
543+
const promise = generateViaAgent(
544+
{
545+
prompt: 'design a meditation app',
546+
history: [],
547+
model: MODEL,
548+
apiKey: 'sk-test',
549+
},
550+
{ onRetry },
551+
);
552+
await vi.runAllTimersAsync();
553+
const result = await promise;
554+
expect(result.artifacts).toHaveLength(1);
555+
expect(agentCalls[0]?.prompts.length).toBe(2);
556+
expect(onRetry).toHaveBeenCalledTimes(1);
557+
expect(onRetry.mock.calls[0]?.[0].reason).toMatch(/server error/);
558+
} finally {
559+
vi.useRealTimers();
560+
}
561+
});
562+
563+
it('throws after three consecutive 500s on the first turn (retries exhausted)', async () => {
564+
vi.useFakeTimers();
565+
try {
566+
scriptedAgent = {
567+
assistantText: '',
568+
promptThrows: new HttpError('still down', 500),
569+
};
570+
const promise = generateViaAgent({
571+
prompt: 'design a dashboard',
572+
history: [],
573+
model: MODEL,
574+
apiKey: 'sk-test',
575+
});
576+
// Swallow the expected rejection while we drain timers so the test
577+
// does not surface it as an unhandled promise.
578+
const settled = promise.catch((err: unknown) => ({ rejected: err }));
579+
await vi.runAllTimersAsync();
580+
const outcome = (await settled) as { rejected?: unknown };
581+
expect(outcome.rejected).toBeDefined();
582+
expect(agentCalls[0]?.prompts.length).toBe(3);
583+
} finally {
584+
vi.useRealTimers();
585+
}
586+
});
587+
588+
it('does not retry 4xx client errors (no 401 replay)', async () => {
589+
scriptedAgent = {
590+
assistantText: '',
591+
promptThrows: new HttpError('unauthorized', 401),
592+
};
593+
await expect(
594+
generateViaAgent({
595+
prompt: 'design a dashboard',
596+
history: [],
597+
model: MODEL,
598+
apiKey: 'sk-test',
599+
}),
600+
).rejects.toBeTruthy();
601+
expect(agentCalls[0]?.prompts.length).toBe(1);
602+
});
603+
604+
it('does not retry once the agent has produced an assistant message (side-effect guard)', async () => {
605+
// First-turn + transient 500, BUT the mock pushes a partial assistant
606+
// message before throwing, simulating "model already emitted tokens /
607+
// tool calls before the connection dropped". Replaying would re-run
608+
// any text_editor / set_todos side effects, so retry must be blocked
609+
// regardless of the HTTP status. A single attempt is the only safe move.
610+
scriptedAgent = {
611+
assistantText: '',
612+
promptThrows: new HttpError('upstream 500', 500),
613+
promptPushesAssistantBeforeThrow: true,
614+
};
615+
await expect(
616+
generateViaAgent({
617+
prompt: 'design a dashboard',
618+
history: [],
619+
model: MODEL,
620+
apiKey: 'sk-test',
621+
}),
622+
).rejects.toBeTruthy();
623+
expect(agentCalls[0]?.prompts.length).toBe(1);
624+
});
625+
626+
it('does not retry when history is non-empty (protects multi-turn agent state)', async () => {
627+
scriptedAgent = {
628+
assistantText: '',
629+
promptThrows: new HttpError('upstream 500', 500),
630+
};
631+
await expect(
632+
generateViaAgent({
633+
prompt: 'refine this',
634+
history: [
635+
{ role: 'user', content: 'first request' },
636+
{ role: 'assistant', content: 'first reply' },
637+
],
638+
model: MODEL,
639+
apiKey: 'sk-test',
640+
}),
641+
).rejects.toBeTruthy();
642+
// Single attempt: replaying a partial multi-turn session would corrupt
643+
// tool state, so the second+ turn must surface transient errors directly.
644+
expect(agentCalls[0]?.prompts.length).toBe(1);
645+
});
646+
});
647+
477648
describe('FRAME_TEMPLATES — device frame starter assets', () => {
478649
it('exposes iphone, ipad, watch, android, and macos-safari frames as JSX modules with EDITMODE markers', async () => {
479650
const { FRAME_TEMPLATES } = await import('./frames/index.js');

packages/core/src/agent.ts

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@ import {
3030
} from '@mariozechner/pi-agent-core';
3131
import type { Message as PiAiMessage, Model as PiAiModel } from '@mariozechner/pi-ai';
3232
import { type ArtifactEvent, createArtifactParser } from '@open-codesign/artifacts';
33-
import type { RetryReason } from '@open-codesign/providers';
33+
import type { RetryDecision, RetryReason } from '@open-codesign/providers';
3434
import {
35+
classifyError,
3536
claudeCodeIdentityHeaders,
3637
looksLikeClaudeOAuthToken,
3738
shouldForceClaudeCodeIdentity,
39+
withBackoff,
3840
} from '@open-codesign/providers';
3941
import {
4042
type Artifact,
@@ -822,9 +824,59 @@ export async function generateViaAgent(
822824

823825
log.info('[generate] step=send_request', ctx);
824826
const sendStart = Date.now();
827+
// First-turn-only retry, further guarded by a side-effect check. Multi-turn
828+
// requests carry half-complete agent state (tool calls mid-flight, transcript
829+
// accumulated in pi-agent-core's internal loop) — retrying would replay
830+
// partial progress and corrupt the session. Even on the first turn, retrying
831+
// is safe only before any assistant message has landed in `agent.state`:
832+
// once the model has emitted tokens or tool calls, side effects (text_editor
833+
// writes, set_todos state) have already fired and a retry would re-run them.
834+
// The pre-attempt snapshot of `agent.state.messages.length` lets us detect
835+
// whether the failed attempt produced any such artefact and, if so, mark the
836+
// error as non-retryable.
837+
const isFirstTurn = input.history.length === 0;
838+
const RETRY_BLOCKED = Symbol.for('open-codesign.retry.blocked');
839+
type RetryBlockedError = Error & { [RETRY_BLOCKED]?: true };
840+
const sendOnce = async (): Promise<void> => {
841+
const preLen = agent.state.messages.length;
842+
try {
843+
await agent.prompt(userContent);
844+
await agent.waitForIdle();
845+
} catch (err) {
846+
if (agent.state.messages.length > preLen) {
847+
const tagged = (err instanceof Error ? err : new Error(String(err))) as RetryBlockedError;
848+
tagged[RETRY_BLOCKED] = true;
849+
throw tagged;
850+
}
851+
throw err;
852+
}
853+
};
825854
try {
826-
await agent.prompt(userContent);
827-
await agent.waitForIdle();
855+
if (isFirstTurn) {
856+
const retryOpts: Parameters<typeof withBackoff>[1] = {
857+
maxRetries: 3,
858+
classify: (err): RetryDecision => {
859+
if ((err as RetryBlockedError)[RETRY_BLOCKED]) {
860+
return { retry: false, reason: 'agent already produced side effects' };
861+
}
862+
return classifyError(err);
863+
},
864+
onRetry: (info: RetryReason) => {
865+
log.warn('[generate] step=send_request.retry', {
866+
...ctx,
867+
attempt: info.attempt,
868+
totalAttempts: info.totalAttempts,
869+
delayMs: info.delayMs,
870+
reason: info.reason,
871+
});
872+
deps.onRetry?.(info);
873+
},
874+
};
875+
if (input.signal) retryOpts.signal = input.signal;
876+
await withBackoff(sendOnce, retryOpts);
877+
} else {
878+
await sendOnce();
879+
}
828880
} catch (err) {
829881
log.error('[generate] step=send_request.fail', {
830882
...ctx,

packages/providers/src/index.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,13 @@ export {
411411
withClaudeCodeIdentity,
412412
} from './claude-code-compat';
413413

414-
export { completeWithRetry, classifyError, sleepWithAbort } from './retry';
415-
export type { CompleteWithRetryOptions, RetryReason } from './retry';
414+
export { completeWithRetry, classifyError, sleepWithAbort, withBackoff } from './retry';
415+
export type {
416+
BackoffOptions,
417+
CompleteWithRetryOptions,
418+
RetryDecision,
419+
RetryReason,
420+
} from './retry';
416421

417422
export { injectSkillsIntoMessages, formatSkillsForPrompt, filterActive } from './skill-injector';
418423

0 commit comments

Comments
 (0)