@@ -305,3 +305,110 @@ def generate_inner(self, inputs, **kwargs) -> str:
305305class SenseChatVisionAPI (SenseChatVisionWrapper ):
306306 def generate (self , message , dataset = None ):
307307 return super (SenseChatVisionAPI , self ).generate (message , dataset = dataset )
308+
309+
310+ class SenseChatVisionV2API (BaseAPI ):
311+
312+ is_api : bool = True
313+
314+ def __init__ (self ,
315+ model : str = 'SenseNova-V6-5-Pro-20251215' ,
316+ retry : int = 5 ,
317+ key : str = None ,
318+ verbose : bool = False ,
319+ system_prompt : str = None ,
320+ temperature : float = 0 ,
321+ timeout : int = 300 ,
322+ api_base : str = "https://api.sensenova.cn/compatible-mode/v2/chat/completions" ,
323+ max_completion_tokens : int = 4096 ,
324+ img_size : int = - 1 ,
325+ ** kwargs ):
326+
327+ self .model = model
328+ self .fail_msg = 'Failed to obtain answer via API. '
329+ self .max_completion_tokens = max_completion_tokens
330+ self .temperature = temperature
331+ self .api_base = api_base
332+ self .key = key
333+ assert img_size > 0 or img_size == - 1
334+ self .img_size = img_size
335+ self .timeout = timeout
336+ super ().__init__ (retry = retry , system_prompt = system_prompt , verbose = verbose , ** kwargs )
337+
338+ self .logger .info (f'Using API Base: { self .api_base } ; API Key: { self .key } ' )
339+
340+ def generate (self , message , dataset = None ):
341+ return super (SenseChatVisionV2API , self ).generate (message )
342+
343+ def prepare_itlist (self , inputs ):
344+ import numpy as np
345+ from vlmeval .smp import encode_image_to_base64
346+
347+ assert np .all ([isinstance (x , dict ) for x in inputs ])
348+ has_images = np .sum ([x ['type' ] == 'image' for x in inputs ])
349+ image_num = len ([x ['type' ] == 'image' for x in inputs ])
350+ if has_images :
351+ content_list = []
352+ for msg in inputs :
353+ if msg ['type' ] == 'text' :
354+ content_list .append (dict (type = 'text' , text = msg ['value' ]))
355+ elif msg ['type' ] == 'image' :
356+ from PIL import Image
357+ img = Image .open (msg ['value' ])
358+ b64 = encode_image_to_base64 (img , target_size = int (self .img_size / (image_num ** 0.5 )))
359+ img_struct = dict (url = f'data:image/jpeg;base64,{ b64 } ' )
360+ content_list .append (dict (type = 'image_url' , image_url = img_struct ))
361+ else :
362+ assert all ([x ['type' ] == 'text' for x in inputs ])
363+ text = '\n ' .join ([x ['value' ] for x in inputs ])
364+ content_list = [dict (type = 'text' , text = text )]
365+ return content_list
366+
367+ def prepare_inputs (self , inputs ):
368+ input_msgs = []
369+ if self .system_prompt is not None :
370+ input_msgs .append (dict (role = 'system' , content = self .system_prompt ))
371+ assert isinstance (inputs , list ) and isinstance (inputs [0 ], dict )
372+ assert all (['type' in x for x in inputs ]) or all (['role' in x for x in inputs ]), inputs
373+ if 'role' in inputs [0 ]:
374+ assert inputs [- 1 ]['role' ] == 'user' , inputs [- 1 ]
375+ for item in inputs :
376+ input_msgs .append (dict (role = item ['role' ], content = self .prepare_itlist (item ['content' ])))
377+ else :
378+ input_msgs .append (dict (role = 'user' , content = self .prepare_itlist (inputs )))
379+ return input_msgs
380+
381+ def generate_inner (self , inputs , ** kwargs ) -> str :
382+ import json
383+ input_msgs = self .prepare_inputs (inputs )
384+
385+ headers = {'Content-Type' : 'application/json' , 'Authorization' : f'Bearer { self .key } ' }
386+
387+ payload = dict (model = self .model , messages = input_msgs , stream = False , ** kwargs )
388+
389+ proxies = {}
390+ if os .getenv ('http_proxy' ):
391+ proxies ['http' ] = os .getenv ('http_proxy' )
392+ if os .getenv ('https_proxy' ):
393+ proxies ['https' ] = os .getenv ('https_proxy' )
394+ proxies = proxies or None
395+
396+ response = requests .post (
397+ self .api_base ,
398+ headers = headers ,
399+ data = json .dumps (payload ),
400+ proxies = proxies ,
401+ timeout = self .timeout * 1.1 ,
402+ )
403+ ret_code = response .status_code
404+ ret_code = 0 if (200 <= int (ret_code ) < 300 ) else ret_code
405+ answer = self .fail_msg
406+ try :
407+ resp_struct = json .loads (response .text )
408+ answer = resp_struct ['choices' ][0 ]['message' ]['content' ].strip ()
409+ except Exception as err :
410+ if self .verbose :
411+ self .logger .error (f'{ type (err )} : { err } ' )
412+ self .logger .error (response .text if hasattr (response , 'text' ) else response )
413+
414+ return ret_code , answer , response
0 commit comments