Skip to content

Commit 655e65f

Browse files
authored
[Update] Update model settings for 2026.2 live leaderboard. (#1492)
* [Update] Update models for 2026.2 live leaderboard. * fix bug * fix setting
1 parent f765748 commit 655e65f

11 files changed

Lines changed: 444 additions & 28 deletions

File tree

vlmeval/api/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .reka import Reka
88
from .glm_vision import GLMVisionAPI
99
from .cloudwalk import CWWrapper
10-
from .sensechat_vision import SenseChatVisionAPI
10+
from .sensechat_vision import SenseChatVisionAPI, SenseChatVisionV2API
1111
from .siliconflow import SiliconFlowAPI, TeleMMAPI
1212
from .telemm import TeleMM2_API
1313
from .telemm_thinking import TeleMM2Thinking_API
@@ -40,5 +40,5 @@
4040
'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", 'KimiVLAPIWrapper', 'KimiVLAPI',
4141
'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
4242
'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API', 'TogetherAPI', 'GCPVertexAPI',
43-
'BedrockAPI'
43+
'BedrockAPI', 'SenseChatVisionV2API'
4444
]

vlmeval/api/gpt.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import sys
44
from .base import BaseAPI
5+
import math
56

67
APIBASES = {
78
'OFFICIAL': 'https://api.openai.com/v1/chat/completions',
@@ -44,6 +45,7 @@ def __init__(self,
4445
api_base: str = None,
4546
max_tokens: int = 2048,
4647
img_size: int = -1,
48+
total_img_size: int = -1,
4749
img_detail: str = 'low',
4850
use_azure: bool = False,
4951
**kwargs):
@@ -109,6 +111,8 @@ def __init__(self,
109111
self.key = key
110112
assert img_size > 0 or img_size == -1
111113
self.img_size = img_size
114+
assert total_img_size > 0 or total_img_size == -1
115+
self.total_img_size = total_img_size
112116
assert img_detail in ['high', 'low']
113117
self.img_detail = img_detail
114118
self.timeout = timeout
@@ -160,6 +164,7 @@ def __init__(self,
160164
def prepare_itlist(self, inputs):
161165
assert np.all([isinstance(x, dict) for x in inputs])
162166
has_images = np.sum([x['type'] == 'image' for x in inputs])
167+
image_num = len([x['type'] == 'image' for x in inputs])
163168
if has_images:
164169
content_list = []
165170
for msg in inputs:
@@ -168,7 +173,13 @@ def prepare_itlist(self, inputs):
168173
elif msg['type'] == 'image':
169174
from PIL import Image
170175
img = Image.open(msg['value'])
171-
b64 = encode_image_to_base64(img, target_size=self.img_size)
176+
target_size = math.inf
177+
if self.img_size > 0:
178+
target_size = self.img_size
179+
if self.total_img_size > 0:
180+
target_size = min(target_size, int(self.img_size / (image_num**0.5)))
181+
target_size = -1 if math.isinf(target_size) else target_size
182+
b64 = encode_image_to_base64(img, target_size=target_size)
172183
img_struct = dict(url=f'data:image/jpeg;base64,{b64}', detail=self.img_detail)
173184
content_list.append(dict(type='image_url', image_url=img_struct))
174185
else:

vlmeval/api/sensechat_vision.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,3 +305,110 @@ def generate_inner(self, inputs, **kwargs) -> str:
305305
class SenseChatVisionAPI(SenseChatVisionWrapper):
306306
def generate(self, message, dataset=None):
307307
return super(SenseChatVisionAPI, self).generate(message, dataset=dataset)
308+
309+
310+
class SenseChatVisionV2API(BaseAPI):
311+
312+
is_api: bool = True
313+
314+
def __init__(self,
315+
model: str = 'SenseNova-V6-5-Pro-20251215',
316+
retry: int = 5,
317+
key: str = None,
318+
verbose: bool = False,
319+
system_prompt: str = None,
320+
temperature: float = 0,
321+
timeout: int = 300,
322+
api_base: str = "https://api.sensenova.cn/compatible-mode/v2/chat/completions",
323+
max_completion_tokens: int = 4096,
324+
img_size: int = -1,
325+
**kwargs):
326+
327+
self.model = model
328+
self.fail_msg = 'Failed to obtain answer via API. '
329+
self.max_completion_tokens = max_completion_tokens
330+
self.temperature = temperature
331+
self.api_base = api_base
332+
self.key = key
333+
assert img_size > 0 or img_size == -1
334+
self.img_size = img_size
335+
self.timeout = timeout
336+
super().__init__(retry=retry, system_prompt=system_prompt, verbose=verbose, **kwargs)
337+
338+
self.logger.info(f'Using API Base: {self.api_base}; API Key: {self.key}')
339+
340+
def generate(self, message, dataset=None):
341+
return super(SenseChatVisionV2API, self).generate(message)
342+
343+
def prepare_itlist(self, inputs):
344+
import numpy as np
345+
from vlmeval.smp import encode_image_to_base64
346+
347+
assert np.all([isinstance(x, dict) for x in inputs])
348+
has_images = np.sum([x['type'] == 'image' for x in inputs])
349+
image_num = len([x['type'] == 'image' for x in inputs])
350+
if has_images:
351+
content_list = []
352+
for msg in inputs:
353+
if msg['type'] == 'text':
354+
content_list.append(dict(type='text', text=msg['value']))
355+
elif msg['type'] == 'image':
356+
from PIL import Image
357+
img = Image.open(msg['value'])
358+
b64 = encode_image_to_base64(img, target_size=int(self.img_size / (image_num ** 0.5)))
359+
img_struct = dict(url=f'data:image/jpeg;base64,{b64}')
360+
content_list.append(dict(type='image_url', image_url=img_struct))
361+
else:
362+
assert all([x['type'] == 'text' for x in inputs])
363+
text = '\n'.join([x['value'] for x in inputs])
364+
content_list = [dict(type='text', text=text)]
365+
return content_list
366+
367+
def prepare_inputs(self, inputs):
368+
input_msgs = []
369+
if self.system_prompt is not None:
370+
input_msgs.append(dict(role='system', content=self.system_prompt))
371+
assert isinstance(inputs, list) and isinstance(inputs[0], dict)
372+
assert all(['type' in x for x in inputs]) or all(['role' in x for x in inputs]), inputs
373+
if 'role' in inputs[0]:
374+
assert inputs[-1]['role'] == 'user', inputs[-1]
375+
for item in inputs:
376+
input_msgs.append(dict(role=item['role'], content=self.prepare_itlist(item['content'])))
377+
else:
378+
input_msgs.append(dict(role='user', content=self.prepare_itlist(inputs)))
379+
return input_msgs
380+
381+
def generate_inner(self, inputs, **kwargs) -> str:
382+
import json
383+
input_msgs = self.prepare_inputs(inputs)
384+
385+
headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.key}'}
386+
387+
payload = dict(model=self.model, messages=input_msgs, stream=False, **kwargs)
388+
389+
proxies = {}
390+
if os.getenv('http_proxy'):
391+
proxies['http'] = os.getenv('http_proxy')
392+
if os.getenv('https_proxy'):
393+
proxies['https'] = os.getenv('https_proxy')
394+
proxies = proxies or None
395+
396+
response = requests.post(
397+
self.api_base,
398+
headers=headers,
399+
data=json.dumps(payload),
400+
proxies=proxies,
401+
timeout=self.timeout * 1.1,
402+
)
403+
ret_code = response.status_code
404+
ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code
405+
answer = self.fail_msg
406+
try:
407+
resp_struct = json.loads(response.text)
408+
answer = resp_struct['choices'][0]['message']['content'].strip()
409+
except Exception as err:
410+
if self.verbose:
411+
self.logger.error(f'{type(err)}: {err}')
412+
self.logger.error(response.text if hasattr(response, 'text') else response)
413+
414+
return ret_code, answer, response

0 commit comments

Comments
 (0)