Skip to content

Commit a965e58

Browse files
fix: send screenshot attachments to ChatGPT Codex (#157)
## Summary Fixes a `chatgpt-codex` bug where uploaded screenshots were only reduced to filename hints, so models like `gpt-5.4` could ignore the attached visual reference entirely. This patch keeps text attachments unchanged, encodes supported image attachments as data URLs in prompt preparation, and forwards them as Responses `input_image` parts on the ChatGPT Codex generate path. ## Type of change - [x] Bug fix - [ ] New feature - [ ] Refactor (no behavior change) - [ ] Documentation - [ ] Build / CI / tooling - [ ] Breaking change ## Linked issue - Refs local repro: attached screenshot uploads were not being used by the model response. ## Checklist - [ ] I read [`docs/VISION.md`](../docs/VISION.md), [`docs/PRINCIPLES.md`](../docs/PRINCIPLES.md), and [`CLAUDE.md`](../CLAUDE.md) before starting - [x] Commits are signed with DCO (`git commit -s`) - [ ] `pnpm lint && pnpm typecheck && pnpm test` passes locally - [x] Added/updated tests for the change - [x] Added a changeset (`pnpm changeset`) if user-visible - [ ] Updated docs if behavior changed Validation run for this PR: - `corepack pnpm --filter @open-codesign/desktop test -- --run src/main/prompt-context.test.ts src/main/codex-generate.test.ts` - `corepack pnpm --filter @open-codesign/desktop typecheck` - pre-push checks passed: workspace typecheck + `biome check .` Note on full test suite: - `pnpm test` still has existing Windows baseline failures unrelated to this patch (provider token-store permission assertion, core builtin-skill loader expectation, exporter PDF timeout, and opencode path separator assertions). ## Screenshots / recordings (UI changes) - N/A --------- Signed-off-by: Sun-sunshine06 <Sun-sunshine06@users.noreply.github.com> Co-authored-by: Sun-sunshine06 <Sun-sunshine06@users.noreply.github.com> Co-authored-by: Qihan <>
1 parent b793a8f commit a965e58

6 files changed

Lines changed: 257 additions & 11 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
'@open-codesign/desktop': patch
3+
'@open-codesign/core': patch
4+
---
5+
6+
fix: send attached screenshots to ChatGPT Codex as image inputs
7+
8+
Image attachments in the desktop app were previously reduced to filename-only hints on the `chatgpt-codex` route, so models like `gpt-5.4` could ignore uploaded screenshots entirely.
9+
10+
This change keeps the existing text-attachment behavior, but reads supported image files into data URLs and forwards them as Responses `input_image` parts for ChatGPT Codex generations.

apps/desktop/src/main/prompt-context.test.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,34 @@ describe('preparePromptContext', () => {
3232
});
3333

3434
it('allows binary attachments (png) up to 10MB - 500KB png passes', async () => {
35-
// Binary attachments only get filename, no content read - allowed larger
3635
await expect(
3736
preparePromptContext({
3837
attachments: [{ path: 'C:/repo/image.png', name: 'image.png', size: 543_034 }],
3938
}),
4039
).rejects.toMatchObject({
4140
code: 'ATTACHMENT_READ_FAILED',
4241
});
43-
// It fails because the file doesn't exist, but importantly - NOT ATTACHMENT_TOO_LARGE
42+
});
43+
44+
it('encodes supported image attachments as data URLs', async () => {
45+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'codesign-image-attachment-'));
46+
const filePath = path.join(dir, 'shot.png');
47+
const pngBytes = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00]);
48+
await fs.writeFile(filePath, pngBytes);
49+
50+
const result = await preparePromptContext({
51+
attachments: [{ path: filePath, name: 'shot.png', size: pngBytes.length }],
52+
});
53+
54+
expect(result.attachments).toHaveLength(1);
55+
expect(result.attachments[0]).toMatchObject({
56+
name: 'shot.png',
57+
mediaType: 'image/png',
58+
});
59+
expect(result.attachments[0]?.imageDataUrl).toBe(
60+
`data:image/png;base64,${pngBytes.toString('base64')}`,
61+
);
62+
expect(result.attachments[0]?.excerpt).toBeUndefined();
4463
});
4564

4665
it('throws ATTACHMENT_TOO_LARGE for unknown extension text > 256KB', async () => {

apps/desktop/src/main/prompt-context.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,19 @@ const TEXT_EXTS = new Set([
2828
'.yml',
2929
]);
3030

31+
const IMAGE_MIME_TYPES: Record<string, string> = {
32+
'.avif': 'image/avif',
33+
'.bmp': 'image/bmp',
34+
'.gif': 'image/gif',
35+
'.jpeg': 'image/jpeg',
36+
'.jpg': 'image/jpeg',
37+
'.png': 'image/png',
38+
'.webp': 'image/webp',
39+
};
40+
3141
const MAX_ATTACHMENT_CHARS = 6_000;
3242
const MAX_TEXT_ATTACHMENT_BYTES = 256_000;
33-
const MAX_BINARY_ATTACHMENT_BYTES = 10_000_000; // 10MB - binary attachments only need filename, not content
43+
const MAX_BINARY_ATTACHMENT_BYTES = 10_000_000;
3444
const MAX_URL_EXCERPT_CHARS = 1_200;
3545
const MAX_URL_RESPONSE_BYTES = 256_000;
3646
const REFERENCE_CONTENT_TYPES = ['text/html', 'application/xhtml+xml'];
@@ -60,9 +70,8 @@ function isProbablyText(buffer: Buffer, extension: string): boolean {
6070

6171
async function readAttachment(file: LocalInputFile): Promise<AttachmentContext> {
6272
const extension = extname(file.name).toLowerCase();
73+
const imageMimeType = IMAGE_MIME_TYPES[extension];
6374

64-
// Binary attachments (images, etc) only need filename - we don't send content to LLM
65-
// So allow much larger size limit
6675
const isKnownTextExtension = TEXT_EXTS.has(extension);
6776
const maxFileBytes = isKnownTextExtension
6877
? MAX_TEXT_ATTACHMENT_BYTES
@@ -97,8 +106,18 @@ async function readAttachment(file: LocalInputFile): Promise<AttachmentContext>
97106
}
98107

99108
if (!looksText) {
100-
// Definitely binary - we don't need any more content
101-
buffer = probe;
109+
if (imageMimeType) {
110+
const length = Math.max(
111+
1,
112+
Math.min(file.size || MAX_BINARY_ATTACHMENT_BYTES, maxFileBytes),
113+
);
114+
const fullBuffer = Buffer.alloc(length);
115+
const { bytesRead } = await handle.read(fullBuffer, 0, fullBuffer.length, 0);
116+
buffer = fullBuffer.subarray(0, bytesRead);
117+
} else {
118+
// Non-image binary files stay filename-only for now.
119+
buffer = probe;
120+
}
102121
} else {
103122
// It looks like text and fits within limit - read the whole thing
104123
const length = Math.max(
@@ -127,6 +146,15 @@ async function readAttachment(file: LocalInputFile): Promise<AttachmentContext>
127146
}
128147

129148
if (!isProbablyText(buffer, extension)) {
149+
if (imageMimeType) {
150+
return {
151+
name: file.name,
152+
path: file.path,
153+
note: 'Attached as an image input. Use the visual content directly, not just the filename.',
154+
mediaType: imageMimeType,
155+
imageDataUrl: `data:${imageMimeType};base64,${buffer.toString('base64')}`,
156+
};
157+
}
130158
return {
131159
name: file.name,
132160
path: file.path,

packages/core/src/index.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ export interface AttachmentContext {
6161
path: string;
6262
excerpt?: string | undefined;
6363
note?: string | undefined;
64+
mediaType?: string | undefined;
65+
imageDataUrl?: string | undefined;
6466
}
6567

6668
export interface ReferenceUrlContext {
@@ -160,11 +162,34 @@ interface ModelRunInput {
160162
signal?: AbortSignal | undefined;
161163
onRetry?: ((info: RetryReason) => void) | undefined;
162164
messages: ChatMessage[];
165+
userImages?: Array<{ data: string; mimeType: string }> | undefined;
163166
logger?: CoreLogger | undefined;
164167
/** Log step namespace, e.g. 'generate' or 'apply_comment'. Defaults to 'generate'. */
165168
logScope?: string | undefined;
166169
}
167170

171+
function attachmentToImageInput(
172+
attachment: AttachmentContext,
173+
): { data: string; mimeType: string } | null {
174+
if (!attachment.imageDataUrl || !attachment.mediaType) return null;
175+
const prefix = `data:${attachment.mediaType};base64,`;
176+
if (!attachment.imageDataUrl.startsWith(prefix)) return null;
177+
return {
178+
data: attachment.imageDataUrl.slice(prefix.length),
179+
mimeType: attachment.mediaType,
180+
};
181+
}
182+
183+
function imageInputsForWire(
184+
attachments: AttachmentContext[] | undefined,
185+
wire: WireApi | undefined,
186+
): Array<{ data: string; mimeType: string }> {
187+
if (wire !== 'openai-codex-responses') return [];
188+
return (attachments ?? [])
189+
.map((attachment) => attachmentToImageInput(attachment))
190+
.filter((image): image is { data: string; mimeType: string } => image !== null);
191+
}
192+
168193
function createHtmlArtifact(content: string, index: number): Artifact {
169194
return {
170195
id: `design-${index + 1}`,
@@ -333,6 +358,7 @@ async function runModel(input: ModelRunInput): Promise<GenerateOutput> {
333358
...(input.baseUrl !== undefined ? { baseUrl: input.baseUrl } : {}),
334359
...(input.wire !== undefined ? { wire: input.wire } : {}),
335360
...(input.httpHeaders !== undefined ? { httpHeaders: input.httpHeaders } : {}),
361+
...(input.userImages !== undefined ? { userImages: input.userImages } : {}),
336362
...(input.allowKeyless === true ? { allowKeyless: true } : {}),
337363
...(input.signal !== undefined ? { signal: input.signal } : {}),
338364
maxTokens: MAX_OUTPUT_TOKENS,
@@ -645,6 +671,7 @@ export async function generate(input: GenerateInput): Promise<GenerateOutput> {
645671
signal: input.signal,
646672
onRetry: input.onRetry,
647673
messages,
674+
userImages: imageInputsForWire(input.attachments, input.wire),
648675
logger: input.logger,
649676
});
650677
return skillResult.warnings.length > 0
@@ -698,6 +725,7 @@ export async function applyComment(input: ApplyCommentInput): Promise<GenerateOu
698725
signal: input.signal,
699726
onRetry: input.onRetry,
700727
messages,
728+
userImages: imageInputsForWire(input.attachments, input.wire),
701729
logger: input.logger,
702730
logScope: 'apply_comment',
703731
});

packages/providers/src/index.test.ts

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,110 @@ describe('complete', () => {
197197

198198
expect(result.content).toBe('ok');
199199
});
200+
201+
it('appends image inputs to the final user turn for openai-codex-responses', async () => {
202+
getModelMock.mockReturnValue({
203+
id: 'gpt-5.4',
204+
api: 'openai-codex-responses',
205+
provider: 'openai-codex',
206+
input: ['text', 'image'],
207+
});
208+
completeSimpleMock.mockImplementationOnce(async (_model, context) => {
209+
expect(context.messages).toEqual([
210+
{
211+
role: 'user',
212+
content: 'earlier turn',
213+
timestamp: 1,
214+
},
215+
{
216+
role: 'assistant',
217+
content: [{ type: 'text', text: 'tell me more' }],
218+
api: 'openai-codex-responses',
219+
provider: 'openai-codex',
220+
model: 'gpt-5.4',
221+
usage: {
222+
input: 0,
223+
output: 0,
224+
cacheRead: 0,
225+
cacheWrite: 0,
226+
totalTokens: 0,
227+
cost: {
228+
input: 0,
229+
output: 0,
230+
cacheRead: 0,
231+
cacheWrite: 0,
232+
total: 0,
233+
},
234+
},
235+
stopReason: 'stop',
236+
timestamp: 2,
237+
},
238+
{
239+
role: 'user',
240+
content: [
241+
{ type: 'text', text: 'use this screenshot' },
242+
{ type: 'image', data: 'AAAA', mimeType: 'image/png' },
243+
],
244+
timestamp: 3,
245+
},
246+
]);
247+
return {
248+
role: 'assistant',
249+
content: [{ type: 'text', text: 'ok' }],
250+
api: 'openai-codex-responses',
251+
provider: 'openai-codex',
252+
model: 'gpt-5.4',
253+
usage: {
254+
input: 1,
255+
output: 1,
256+
cacheRead: 0,
257+
cacheWrite: 0,
258+
totalTokens: 2,
259+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
260+
},
261+
stopReason: 'stop',
262+
timestamp: Date.now(),
263+
};
264+
});
265+
266+
const result = await complete(
267+
{ provider: 'chatgpt-codex', modelId: 'gpt-5.4' },
268+
[
269+
{ role: 'user', content: 'earlier turn' },
270+
{ role: 'assistant', content: 'tell me more' },
271+
{ role: 'user', content: 'use this screenshot' },
272+
],
273+
{
274+
apiKey: 'token',
275+
wire: 'openai-codex-responses',
276+
userImages: [{ data: 'AAAA', mimeType: 'image/png' }],
277+
},
278+
);
279+
280+
expect(result.content).toBe('ok');
281+
});
282+
283+
it('rejects oversized combined image inputs for openai-codex-responses', async () => {
284+
getModelMock.mockReturnValue({
285+
id: 'gpt-5.4',
286+
api: 'openai-codex-responses',
287+
provider: 'openai-codex',
288+
input: ['text', 'image'],
289+
});
290+
291+
await expect(
292+
complete(
293+
{ provider: 'chatgpt-codex', modelId: 'gpt-5.4' },
294+
[{ role: 'user', content: 'use these screenshots' }],
295+
{
296+
apiKey: 'token',
297+
wire: 'openai-codex-responses',
298+
userImages: [
299+
{ data: 'A'.repeat(2_700_000), mimeType: 'image/png' },
300+
{ data: 'A'.repeat(2_700_000), mimeType: 'image/png' },
301+
],
302+
},
303+
),
304+
).rejects.toMatchObject({ code: 'ATTACHMENT_TOO_LARGE' });
305+
});
200306
});

0 commit comments

Comments
 (0)