From 524e9a96b1e25e7b9270e00aec40704352a620b1 Mon Sep 17 00:00:00 2001 From: BukJiang <1171971708@qq.com> Date: Wed, 10 Jun 2026 20:44:07 +0800 Subject: [PATCH 1/2] fix(tool): allow custom default vision model in OpenAIMultiModalTool --- .../tool/multimodal/OpenAIMultiModalTool.java | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java b/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java index d9ed5ab016..17cb9b56ff 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java +++ b/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java @@ -67,13 +67,16 @@ public class OpenAIMultiModalTool { /** Base URL for OpenAI API (defaults to https://api.openai.com). */ private final String baseUrl; + /** Default vision model used when the caller does not specify one. */ + private final String defaultModelName; + /** * Create a new OpenAIMultiModalTool with default base URL. * * @param apiKey the OpenAI API key */ public OpenAIMultiModalTool(String apiKey) { - this(apiKey, null); + this(apiKey, null, "gpt-4o"); } /** @@ -83,11 +86,27 @@ public OpenAIMultiModalTool(String apiKey) { * @param baseUrl the base URL (null for default https://api.openai.com) */ public OpenAIMultiModalTool(String apiKey, String baseUrl) { + this(apiKey, baseUrl, "gpt-4o"); + } + + /** + * Create a new OpenAIMultiModalTool with custom base URL and default vision model. + * + * @param apiKey the OpenAI API key + * @param baseUrl the base URL (null for default https://api.openai.com) + * @param defaultModelName the default vision model name used when the caller omits the model + * parameter (e.g., "gpt-4o" for OpenAI, or your backend's vision model name) + */ + public OpenAIMultiModalTool(String apiKey, String baseUrl, String defaultModelName) { if (apiKey == null || apiKey.trim().isEmpty()) { throw new IllegalArgumentException("OpenAI API key cannot be empty."); } + if (defaultModelName == null || defaultModelName.trim().isEmpty()) { + throw new IllegalArgumentException("defaultModelName cannot be empty."); + } this.apiKey = apiKey; this.baseUrl = baseUrl; + this.defaultModelName = defaultModelName; this.client = new OpenAIClient(); } @@ -99,6 +118,7 @@ public OpenAIMultiModalTool(String apiKey, String baseUrl) { protected OpenAIMultiModalTool(OpenAIClient client) { this.apiKey = "test-key"; this.baseUrl = null; + this.defaultModelName = "gpt-4o"; this.client = client; } @@ -249,7 +269,7 @@ public Mono openaiTextToImage( * * @param imageUrls the URLs of the images to analyze * @param prompt the text prompt describing what to extract from the images - * @param model the vision model to use (e.g., "gpt-4o", "gpt-4-vision-preview") + * @param model the vision model to use (leave empty to use the configured default) * @param maxTokens the maximum number of tokens in the response * @return a ToolResultBlock containing the text description of the images */ @@ -272,8 +292,8 @@ public Mono openaiImageToText( @ToolParam( name = "model", description = - "The vision model to use, e.g., 'gpt-4o'," - + " 'gpt-4-vision-preview'", + "The vision model to use (leave empty to use the configured" + + " default)", required = false) String model, @ToolParam( @@ -283,7 +303,9 @@ public Mono openaiImageToText( Integer maxTokens) { String finalModel = - Optional.ofNullable(model).filter(s -> !s.trim().isEmpty()).orElse("gpt-4o"); + Optional.ofNullable(model) + .filter(s -> !s.trim().isEmpty()) + .orElse(this.defaultModelName); String finalPrompt = Optional.ofNullable(prompt) .filter(s -> !s.trim().isEmpty()) From ac883254e4f389cca5788b7cfea6707e2d3e045b Mon Sep 17 00:00:00 2001 From: BukJiang <1171971708@qq.com> Date: Wed, 10 Jun 2026 21:35:28 +0800 Subject: [PATCH 2/2] test(tool): add unit tests for defaultModelName in OpenAIMultiModalTool --- .../tool/multimodal/OpenAIMultiModalTool.java | 15 ++++++- .../multimodal/OpenAIMultiModalToolTest.java | 39 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java b/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java index 17cb9b56ff..2ef48e46c4 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java +++ b/agentscope-core/src/main/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalTool.java @@ -116,9 +116,22 @@ public OpenAIMultiModalTool(String apiKey, String baseUrl, String defaultModelNa * @param client the OpenAI client */ protected OpenAIMultiModalTool(OpenAIClient client) { + this(client, "gpt-4o"); + } + + /** + * Create a new OpenAIMultiModalTool with custom client and default model (for testing). + * + * @param client the OpenAI client + * @param defaultModelName the default vision model name + */ + protected OpenAIMultiModalTool(OpenAIClient client, String defaultModelName) { + if (defaultModelName == null || defaultModelName.trim().isEmpty()) { + throw new IllegalArgumentException("defaultModelName cannot be empty."); + } this.apiKey = "test-key"; this.baseUrl = null; - this.defaultModelName = "gpt-4o"; + this.defaultModelName = defaultModelName; this.client = client; } diff --git a/agentscope-core/src/test/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalToolTest.java b/agentscope-core/src/test/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalToolTest.java index 80bfc47045..26a34cdb40 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalToolTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/tool/multimodal/OpenAIMultiModalToolTest.java @@ -17,8 +17,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -65,4 +67,41 @@ void testTextToImage_Url() { assertTrue(image.getSource() instanceof URLSource); assertEquals("https://example.com/cat.png", ((URLSource) image.getSource()).getUrl()); } + + @Test + void testImageToText_usesCustomDefaultModel() { + String imageUrl = "https://example.com/image.png"; + String jsonResponse = + "{\"choices\": [{\"message\": {\"content\": \"A cat sitting on a mat.\"}}]}"; + + when(client.callApi( + any(), + any(), + eq("/v1/chat/completions"), + argThat( + req -> { + @SuppressWarnings("unchecked") + java.util.Map map = + (java.util.Map) req; + return "my-custom-vision-model".equals(map.get("model")); + }))) + .thenReturn(jsonResponse); + + OpenAIMultiModalTool toolWithCustomModel = + new OpenAIMultiModalTool(client, "my-custom-vision-model"); + + Mono resultMono = + toolWithCustomModel.openaiImageToText(imageUrl, null, null, null); + ToolResultBlock result = resultMono.block(); + + assertNotNull(result); + } + + @Test + void testConstructor_rejectsBlankDefaultModelName() { + assertThrows( + IllegalArgumentException.class, () -> new OpenAIMultiModalTool("key", null, "")); + assertThrows( + IllegalArgumentException.class, () -> new OpenAIMultiModalTool("key", null, null)); + } }