Skip to content

Commit 83e81e0

Browse files
authored
fix(desktop): degrade-probe test connection for OpenAI-compat endpoints without /models (#179) (#182)
Fixes #179. ## Summary - **Test connection degrade-probe.** When `GET /models` returns 404 on `openai-chat` or `openai-responses` wires, fall back to a minimal `POST /chat/completions` before declaring the endpoint dead. A 2xx or any API-originated 4xx (400 model_unknown / 402 insufficient credits / 422 / 429) counts as pass; 401/403 is re-surfaced as an auth error; only 404 + 5xx + network errors keep the original failure. Anthropic wires are intentionally not degraded — `/v1/models` is standard there. Successful responses now carry `probeMethod: 'models' | 'chat_completion_degraded'` so the renderer can surface "/models unavailable but /chat/completions responded" for gateways like Zhipu GLM. - **Suppress misleading `missingV1` diagnostic hint.** `diagnose()` in `@open-codesign/shared` now skips the "add /v1" hypothesis when the baseUrl already carries a `/v\d+` segment (GLM `/v4`, AI Studio `/v1beta`, Cloudflare Workers AI `/v1`, ...). Previously the panel would offer to corrupt a perfectly correct baseUrl by appending `/v1`. ## Reproducing the GLM scenario (issue #179) Adding Zhipu GLM as a custom provider (OpenAI Chat wire, baseUrl `https://open.bigmodel.cn/api/paas/v4`) used to fail "Test connection" with HTTP 404 plus an auto-suggest to append `/v1`. After this change: - The probe sees `/models` return 404, degrades to `POST /chat/completions`, sees a real response from the gateway, and reports `ok: true` with `probeMethod: 'chat_completion_degraded'`. - If the user's stored key is bad, the degrade probe's 401 is surfaced as the auth-error hint rather than the 404 one. - If the baseUrl really is wrong and both endpoints 404, the original 404 error is preserved (no silent pass). ## Test plan - [x] `pnpm test` (881 tests pass) - [x] `pnpm typecheck` (clean) - [x] `pnpm lint` (clean) - [x] New Vitest cases in `apps/desktop/src/main/connection-ipc.test.ts` cover: GLM 404 /models + 200 /chat/completions, 404 + 404 (preserves original 404), 404 + 400 (model_unknown still passes), 404 + 401 (surfaces auth error), 200 /models (no probe, `probeMethod=models`), anthropic 404 (no degrade), openai-responses degrade. - [x] New `diagnostics.test.ts` cases cover: GLM `/v4`, Cloudflare Workers AI `/v1`, AI Studio `/v1beta` — all skip `missingV1`; plain host without version still suggests `missingV1`. ## PRINCIPLES §5b - Compatibility: green — response type is additive (`probeMethod` is optional when reading existing responses). - Upgradeability: green — no schema/IPC version bump needed; callers that don't read `probeMethod` keep working. - No bloat: green — ~55 lines added to one helper plus two tiny diagnostic tweaks; no new deps. - Elegance: green — degrade is scoped to the exact wires that need it, auth failures are correctly re-routed, and the fallback is invisible to happy-path providers. --------- Signed-off-by: hqhq1025 <1506751656@qq.com>
1 parent 5664e59 commit 83e81e0

4 files changed

Lines changed: 373 additions & 8 deletions

File tree

apps/desktop/src/main/connection-ipc.test.ts

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
getCacheKey,
1919
normalizeBaseUrl,
2020
normalizeOllamaBaseUrl,
21+
runProviderTest,
2122
} from './connection-ipc';
2223

2324
// ---------------------------------------------------------------------------
@@ -840,3 +841,235 @@ describe('normalizeOllamaBaseUrl', () => {
840841
expect(() => normalizeOllamaBaseUrl('http://')).toThrow(/not a valid URL/);
841842
});
842843
});
844+
845+
// ---------------------------------------------------------------------------
846+
// runProviderTest — degrade-probe when /models 404s on OpenAI-compat endpoints
847+
// (regression for Zhipu GLM and similar gateways that don't expose /models).
848+
// ---------------------------------------------------------------------------
849+
850+
interface FakeFetchCall {
851+
url: string;
852+
method: string;
853+
body: string | undefined;
854+
}
855+
856+
function installFakeFetch(
857+
handler: (url: string, init: RequestInit) => { status: number; body?: unknown },
858+
): { calls: FakeFetchCall[]; restore: () => void } {
859+
const calls: FakeFetchCall[] = [];
860+
const originalFetch = globalThis.fetch;
861+
const fake = (async (url: string, init: RequestInit = {}) => {
862+
calls.push({
863+
url,
864+
method: typeof init.method === 'string' ? init.method : 'GET',
865+
body: typeof init.body === 'string' ? init.body : undefined,
866+
});
867+
const { status, body } = handler(url, init);
868+
return new Response(body === undefined ? null : JSON.stringify(body), {
869+
status,
870+
headers: { 'content-type': 'application/json' },
871+
});
872+
}) as unknown as typeof fetch;
873+
(globalThis as { fetch: typeof fetch }).fetch = fake;
874+
return {
875+
calls,
876+
restore: () => {
877+
(globalThis as { fetch: typeof fetch }).fetch = originalFetch;
878+
},
879+
};
880+
}
881+
882+
describe('runProviderTest degrade-probe (issue #179)', () => {
883+
beforeEach(() => {
884+
// Use real timers so fetchWithTimeout's AbortController doesn't get stuck
885+
// behind vi.useFakeTimers() from the outer beforeEach.
886+
vi.useRealTimers();
887+
});
888+
889+
it('openai-chat: /models 404 but /chat/completions 200 → ok, probeMethod=chat_completion_degraded (GLM case)', async () => {
890+
const { calls, restore } = installFakeFetch((url) => {
891+
if (url.endsWith('/models')) return { status: 404, body: { error: 'not found' } };
892+
if (url.endsWith('/chat/completions')) return { status: 200, body: { id: 'probe-response' } };
893+
return { status: 500 };
894+
});
895+
try {
896+
const res = await runProviderTest({
897+
provider: 'glm',
898+
wire: 'openai-chat',
899+
apiKey: 'sk-glm-test',
900+
baseUrl: 'https://open.bigmodel.cn/api/paas/v4',
901+
});
902+
expect(res.ok).toBe(true);
903+
if (res.ok) expect(res.probeMethod).toBe('chat_completion_degraded');
904+
expect(calls).toHaveLength(2);
905+
expect(calls[0]?.url).toMatch(/\/models$/);
906+
expect(calls[1]?.url).toMatch(/\/chat\/completions$/);
907+
expect(calls[1]?.method).toBe('POST');
908+
expect(calls[1]?.body).toBeTruthy();
909+
const body = JSON.parse(calls[1]?.body ?? '{}');
910+
expect(body.max_tokens).toBe(1);
911+
expect(body.stream).toBe(false);
912+
expect(Array.isArray(body.messages)).toBe(true);
913+
} finally {
914+
restore();
915+
}
916+
});
917+
918+
it('openai-chat: /models 404 and /chat/completions also 404 → preserves original 404', async () => {
919+
const { restore } = installFakeFetch(() => ({ status: 404 }));
920+
try {
921+
const res = await runProviderTest({
922+
provider: 'broken-gateway',
923+
wire: 'openai-chat',
924+
apiKey: 'sk-test',
925+
baseUrl: 'https://broken.example.com/v1',
926+
});
927+
expect(res.ok).toBe(false);
928+
if (!res.ok) {
929+
expect(res.code).toBe('404');
930+
expect(res.message).toBe('HTTP 404');
931+
}
932+
} finally {
933+
restore();
934+
}
935+
});
936+
937+
it('openai-chat: /models 404 + /chat/completions 400 (model_unknown) → still pass (endpoint alive)', async () => {
938+
const { restore } = installFakeFetch((url) => {
939+
if (url.endsWith('/models')) return { status: 404 };
940+
return { status: 400, body: { error: { message: 'model_not_found' } } };
941+
});
942+
try {
943+
const res = await runProviderTest({
944+
provider: 'glm',
945+
wire: 'openai-chat',
946+
apiKey: 'sk-glm-test',
947+
baseUrl: 'https://open.bigmodel.cn/api/paas/v4',
948+
});
949+
expect(res.ok).toBe(true);
950+
if (res.ok) expect(res.probeMethod).toBe('chat_completion_degraded');
951+
} finally {
952+
restore();
953+
}
954+
});
955+
956+
it('openai-chat: /models 404 + /chat/completions 401 → surface auth error, not 404', async () => {
957+
const { restore } = installFakeFetch((url) => {
958+
if (url.endsWith('/models')) return { status: 404 };
959+
return { status: 401 };
960+
});
961+
try {
962+
const res = await runProviderTest({
963+
provider: 'glm',
964+
wire: 'openai-chat',
965+
apiKey: 'wrong-key',
966+
baseUrl: 'https://open.bigmodel.cn/api/paas/v4',
967+
});
968+
expect(res.ok).toBe(false);
969+
if (!res.ok) {
970+
expect(res.code).toBe('401');
971+
expect(res.message).toBe('HTTP 401');
972+
}
973+
} finally {
974+
restore();
975+
}
976+
});
977+
978+
it('openai-chat: /models 200 → no degrade probe, probeMethod=models', async () => {
979+
const { calls, restore } = installFakeFetch(() => ({ status: 200, body: { data: [] } }));
980+
try {
981+
const res = await runProviderTest({
982+
provider: 'openai',
983+
wire: 'openai-chat',
984+
apiKey: 'sk-test',
985+
baseUrl: 'https://api.openai.com/v1',
986+
});
987+
expect(res.ok).toBe(true);
988+
if (res.ok) expect(res.probeMethod).toBe('models');
989+
expect(calls).toHaveLength(1);
990+
expect(calls[0]?.method).toBe('GET');
991+
} finally {
992+
restore();
993+
}
994+
});
995+
996+
it('anthropic: /models 404 does NOT degrade (standard endpoint must stay authoritative)', async () => {
997+
const { calls, restore } = installFakeFetch(() => ({ status: 404 }));
998+
try {
999+
const res = await runProviderTest({
1000+
provider: 'anthropic-like',
1001+
wire: 'anthropic',
1002+
apiKey: 'sk-ant-test',
1003+
baseUrl: 'https://api.anthropic.com',
1004+
});
1005+
expect(res.ok).toBe(false);
1006+
if (!res.ok) expect(res.code).toBe('404');
1007+
// Only /v1/models should have been probed — no /v1/messages degrade.
1008+
expect(calls).toHaveLength(1);
1009+
expect(calls[0]?.url).toMatch(/\/v1\/models$/);
1010+
} finally {
1011+
restore();
1012+
}
1013+
});
1014+
1015+
it('openai-responses: /models 404 + /responses 2xx → probeMethod=responses_degraded', async () => {
1016+
const { calls, restore } = installFakeFetch((url) => {
1017+
if (url.endsWith('/models')) return { status: 404 };
1018+
if (url.endsWith('/responses')) return { status: 200, body: { ok: true } };
1019+
return { status: 500 };
1020+
});
1021+
try {
1022+
const res = await runProviderTest({
1023+
provider: 'responses-gateway',
1024+
wire: 'openai-responses',
1025+
apiKey: 'sk-test',
1026+
baseUrl: 'https://gateway.example.com/v1',
1027+
});
1028+
expect(res.ok).toBe(true);
1029+
if (res.ok) expect(res.probeMethod).toBe('responses_degraded');
1030+
expect(calls).toHaveLength(2);
1031+
expect(calls[0]?.url).toMatch(/\/models$/);
1032+
expect(calls[1]?.url).toMatch(/\/responses$/);
1033+
expect(calls[1]?.method).toBe('POST');
1034+
const body = JSON.parse(calls[1]?.body ?? '{}');
1035+
// Responses API shape — must NOT look like /chat/completions payload.
1036+
expect(body.max_output_tokens).toBe(1);
1037+
expect(Array.isArray(body.input)).toBe(true);
1038+
expect(body.messages).toBeUndefined();
1039+
} finally {
1040+
restore();
1041+
}
1042+
});
1043+
1044+
it('openai-responses: /models 404 + /responses 404 → preserves original 404 (no /chat/completions false-positive)', async () => {
1045+
// Regression: the previous implementation probed /chat/completions for
1046+
// every OpenAI-compat wire. A gateway that only implements /chat/completions
1047+
// would then report the connection healthy even though real inference (on
1048+
// /responses) would 404 at generate-time. We want the opposite: if the
1049+
// wire's real endpoint is dead, the test must fail.
1050+
const { calls, restore } = installFakeFetch((url) => {
1051+
if (url.endsWith('/models')) return { status: 404 };
1052+
if (url.endsWith('/responses')) return { status: 404 };
1053+
// A gateway that only has /chat/completions — must not be consulted.
1054+
if (url.endsWith('/chat/completions')) return { status: 200, body: { id: 'wrong-probe' } };
1055+
return { status: 500 };
1056+
});
1057+
try {
1058+
const res = await runProviderTest({
1059+
provider: 'chat-only-gateway',
1060+
wire: 'openai-responses',
1061+
apiKey: 'sk-test',
1062+
baseUrl: 'https://gateway.example.com/v1',
1063+
});
1064+
expect(res.ok).toBe(false);
1065+
if (!res.ok) {
1066+
expect(res.code).toBe('404');
1067+
expect(res.message).toBe('HTTP 404');
1068+
}
1069+
// /chat/completions must NOT have been probed for an openai-responses wire.
1070+
expect(calls.some((c) => c.url.endsWith('/chat/completions'))).toBe(false);
1071+
} finally {
1072+
restore();
1073+
}
1074+
});
1075+
});

apps/desktop/src/main/connection-ipc.ts

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ interface ModelsListPayloadV1 {
3939

4040
export interface ConnectionTestResult {
4141
ok: true;
42+
/**
43+
* `models` when the standard GET /models probe succeeded.
44+
* `chat_completion_degraded` when /models 404'd but POST /chat/completions
45+
* proved the openai-chat wire is alive (e.g. Zhipu GLM — no public /models).
46+
* `responses_degraded` when /models 404'd but POST /responses proved the
47+
* openai-responses wire is alive. We probe the wire's real inference
48+
* endpoint so a gateway that only implements /chat/completions can't
49+
* false-positive for a user whose provider is on the Responses API.
50+
*/
51+
probeMethod?: 'models' | 'chat_completion_degraded' | 'responses_degraded';
4252
}
4353

4454
export interface ConnectionTestError {
@@ -345,7 +355,7 @@ export function _getModelsCache(): Map<string, CacheEntry> {
345355
// IPC registration
346356
// ---------------------------------------------------------------------------
347357

348-
interface ActiveProviderCredentials {
358+
export interface ActiveProviderCredentials {
349359
provider: string;
350360
wire: WireApi;
351361
apiKey: string;
@@ -446,7 +456,9 @@ async function testChatGPTCodexOAuth(): Promise<ConnectionTestResponse> {
446456
return { ok: true };
447457
}
448458

449-
async function runProviderTest(creds: ActiveProviderCredentials): Promise<ConnectionTestResponse> {
459+
export async function runProviderTest(
460+
creds: ActiveProviderCredentials,
461+
): Promise<ConnectionTestResponse> {
450462
// ChatGPT subscription uses OAuth + ChatGPT-Account-Id headers; its host
451463
// has no `/models` endpoint that a generic Bearer probe can reach. A plain
452464
// HTTP probe would return 401 here and render as the misleading "API key
@@ -456,7 +468,7 @@ async function runProviderTest(creds: ActiveProviderCredentials): Promise<Connec
456468
return testChatGPTCodexOAuth();
457469
}
458470

459-
const { url } = buildEndpointForWire(creds.wire, creds.baseUrl);
471+
const { url, normalizedBaseUrl } = buildEndpointForWire(creds.wire, creds.baseUrl);
460472
const headers = buildAuthHeadersForWire(
461473
creds.wire,
462474
creds.apiKey,
@@ -477,10 +489,90 @@ async function runProviderTest(creds: ActiveProviderCredentials): Promise<Connec
477489
};
478490
}
479491
if (!res.ok) {
492+
// Some OpenAI-compatible gateways (Zhipu GLM, a handful of self-hosted
493+
// proxies) don't expose /models but their /chat/completions works fine.
494+
// If the primary probe 404s on those wires, degrade-probe with a tiny
495+
// chat request before declaring the endpoint dead. We intentionally do
496+
// not degrade anthropic — its /v1/models is standard, and skipping it
497+
// would mask real path-shape mistakes.
498+
if (res.status === 404 && (creds.wire === 'openai-chat' || creds.wire === 'openai-responses')) {
499+
const probe = await probeInferenceEndpoint(creds.wire, normalizedBaseUrl, headers);
500+
if (probe.kind === 'pass') {
501+
return {
502+
ok: true,
503+
probeMethod:
504+
creds.wire === 'openai-responses' ? 'responses_degraded' : 'chat_completion_degraded',
505+
};
506+
}
507+
if (probe.kind === 'http' && probe.status !== 404) {
508+
const { code, hint } = classifyHttpError(probe.status);
509+
return { ok: false, code, message: `HTTP ${probe.status}`, hint };
510+
}
511+
// Inference endpoint also 404'd (or the network dropped) — fall through
512+
// and report the original /models 404.
513+
}
480514
const { code, hint } = classifyHttpError(res.status);
481515
return { ok: false, code, message: `HTTP ${res.status}`, hint };
482516
}
483-
return { ok: true };
517+
return { ok: true, probeMethod: 'models' };
518+
}
519+
520+
type ProbeResult =
521+
| { kind: 'pass' }
522+
| { kind: 'http'; status: number }
523+
| { kind: 'network'; message: string };
524+
525+
/**
526+
* POST a minimal inference request to verify the endpoint is alive when GET
527+
* /models returned 404. We dispatch by wire so that providers on the
528+
* Responses API (which may not implement /chat/completions at all) can't
529+
* false-positive via a gateway that only speaks the other shape. A 2xx
530+
* response or any API-originated 4xx (400 model_unknown, 402 insufficient
531+
* credits, 422, 429 — and 401/403 too, which we surface as auth) counts as
532+
* "endpoint reachable". Only 404 and 5xx count as a real failure. The
533+
* request body is intentionally minimal; if the gateway rejects the payload
534+
* shape with a 4xx we still know the route exists.
535+
*/
536+
async function probeInferenceEndpoint(
537+
wire: 'openai-chat' | 'openai-responses',
538+
normalizedBaseUrl: string,
539+
headers: Record<string, string>,
540+
): Promise<ProbeResult> {
541+
const url =
542+
wire === 'openai-responses'
543+
? `${normalizedBaseUrl}/responses`
544+
: `${normalizedBaseUrl}/chat/completions`;
545+
const body =
546+
wire === 'openai-responses'
547+
? JSON.stringify({
548+
model: 'probe',
549+
input: [{ role: 'user', content: [{ type: 'input_text', text: 'ping' }] }],
550+
max_output_tokens: 1,
551+
stream: false,
552+
})
553+
: JSON.stringify({
554+
model: 'probe',
555+
messages: [{ role: 'user', content: 'ping' }],
556+
max_tokens: 1,
557+
stream: false,
558+
});
559+
let res: Response;
560+
try {
561+
res = await fetchWithTimeout(url, {
562+
method: 'POST',
563+
headers: { ...headers, 'content-type': 'application/json' },
564+
body,
565+
});
566+
} catch (err) {
567+
return { kind: 'network', message: err instanceof Error ? err.message : String(err) };
568+
}
569+
if (res.ok) return { kind: 'pass' };
570+
if (res.status === 404 || res.status >= 500) return { kind: 'http', status: res.status };
571+
// 401/403 — endpoint alive but auth rejected; surface as auth error so the
572+
// diagnostics panel shows the key-invalid hint instead of the 404 one.
573+
if (res.status === 401 || res.status === 403) return { kind: 'http', status: res.status };
574+
// 400/402/422/429 etc. — endpoint alive, request-level rejection.
575+
return { kind: 'pass' };
484576
}
485577

486578
export function registerConnectionIpc(): void {

0 commit comments

Comments
 (0)