Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,18 @@
| ChatGLM-6B | text2vec-large-chinese |
| ChatGLM-6B-int8 | ernie-3.0-base-zh |
| ChatGLM-6B-int4 | ernie-3.0-nano-zh |
| ChatGLM-6B-int4-qe | ernie-3.0-xbase-zh |
| Vicuna-7b-1.1 | simbert-base-chinese |
| Vicuna-13b-1.1 | paraphrase-multilingual-MiniLM-L12-v2 |
| BELLE-LLaMA-7B-2M | |
| BELLE-LLaMA-13B-2M | |
| internlm-chat-7b-8k | |
| internlm-chat-7b-v1_1 | |
| internlm-chat-7b | |
| ChatGLM-6B-int4-qe | ernie-3.0-xbase-zh |
| Vicuna-7b-1.1 | simbert-base-chinese |
| Vicuna-13b-1.1 | paraphrase-multilingual-MiniLM-L12-v2 |
| BELLE-LLaMA-7B-2M | |
| BELLE-LLaMA-13B-2M | |
| internlm-chat-7b-8k | |
| internlm-chat-7b-v1_1 | |
| internlm-chat-7b | |
| [MiniMax-M2.7](https://platform.minimaxi.com/) (Cloud API) | |
| [MiniMax-M2.7-highspeed](https://platform.minimaxi.com/) (Cloud API) | |

> **MiniMax Cloud LLM**: 除本地模型外,项目还支持通过 [MiniMax](https://platform.minimaxi.com/) 云端API调用大语言模型,无需GPU即可使用。设置环境变量 `MINIMAX_API_KEY` 后,在模型选择下拉框中选择 MiniMax-M2.7 或 MiniMax-M2.7-highspeed 即可。

## 💪 更新日志

Expand Down
56 changes: 30 additions & 26 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,32 +72,36 @@ def init_model_config(
)
self.llm = None
torch.cuda.empty_cache()
self.llm = ChatLLM()
if 'chatglm2' in large_language_model.lower():
self.llm.model_type = 'chatglm2'
self.llm.model_name_or_path = llm_model_dict['chatglm2'][
large_language_model]
elif 'chatglm' in large_language_model.lower():
self.llm.model_type = 'chatglm'
self.llm.model_name_or_path = llm_model_dict['chatglm'][
large_language_model]
elif 'belle' in large_language_model.lower():
self.llm.model_type = 'belle'
self.llm.model_name_or_path = llm_model_dict['belle'][
large_language_model]
elif 'vicuna' in large_language_model.lower():
self.llm.model_type = 'vicuna'
self.llm.model_name_or_path = llm_model_dict['vicuna'][
large_language_model]
elif 'internlm' in large_language_model.lower():
self.llm.model_type = 'internlm'
self.llm.model_name_or_path = llm_model_dict['internlm'][
large_language_model]
elif 'yuan2' in large_language_model.lower():
self.llm.model_type = 'yuan2'
self.llm.model_name_or_path = llm_model_dict['yuan2'][large_language_model]

self.llm.load_llm(llm_device=LLM_DEVICE, num_gpus=num_gpus)
if 'minimax' in large_language_model.lower():
from minimax_llm import ChatMiniMaxLLM
self.llm = ChatMiniMaxLLM(
model_name=llm_model_dict['minimax'][large_language_model])
else:
self.llm = ChatLLM()
if 'chatglm2' in large_language_model.lower():
self.llm.model_type = 'chatglm2'
self.llm.model_name_or_path = llm_model_dict['chatglm2'][
large_language_model]
elif 'chatglm' in large_language_model.lower():
self.llm.model_type = 'chatglm'
self.llm.model_name_or_path = llm_model_dict['chatglm'][
large_language_model]
elif 'belle' in large_language_model.lower():
self.llm.model_type = 'belle'
self.llm.model_name_or_path = llm_model_dict['belle'][
large_language_model]
elif 'vicuna' in large_language_model.lower():
self.llm.model_type = 'vicuna'
self.llm.model_name_or_path = llm_model_dict['vicuna'][
large_language_model]
elif 'internlm' in large_language_model.lower():
self.llm.model_type = 'internlm'
self.llm.model_name_or_path = llm_model_dict['internlm'][
large_language_model]
elif 'yuan2' in large_language_model.lower():
self.llm.model_type = 'yuan2'
self.llm.model_name_or_path = llm_model_dict['yuan2'][large_language_model]
self.llm.load_llm(llm_device=LLM_DEVICE, num_gpus=num_gpus)

def init_knowledge_vector_store(self, filepath):

Expand Down
6 changes: 5 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@
"internlm-chat-7b-8k": "internlm/internlm-chat-7b-8k",
"internlm-chat-7b": "internlm/internlm-chat-7b",
"internlm-chat-7b-v1_1": "internlm/internlm-chat-7b-v1_1",
}
},
"yuan2":{
"Yuan2-2B-hf":"IEITYuan/Yuan2-2B-hf"
},
"minimax": {
"MiniMax-M2.7": "MiniMax-M2.7",
"MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
}
}
94 changes: 94 additions & 0 deletions minimax_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""MiniMax Cloud LLM provider for LangChain-ChatGLM-Webui.

Uses the MiniMax OpenAI-compatible API (https://api.minimax.io/v1) to provide
cloud-based LLM inference as an alternative to locally-loaded models.
"""

import os
import re
from typing import Any, Dict, List, Optional

from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from openai import OpenAI


class ChatMiniMaxLLM(LLM):
"""LangChain LLM wrapper for MiniMax cloud models via OpenAI-compatible API."""

model_name: str = "MiniMax-M2.7"
temperature: float = 0.1
max_token: int = 10000
top_p: float = 0.9
history: list = []
api_key: str = ""
api_base: str = "https://api.minimax.io/v1"
client: Any = None

def __init__(self, model_name: str = "MiniMax-M2.7", **kwargs):
super().__init__(**kwargs)
self.model_name = model_name
self.api_key = os.environ.get("MINIMAX_API_KEY", "")
if not self.api_key:
raise ValueError(
"MINIMAX_API_KEY environment variable is required. "
"Get your API key from https://platform.minimaxi.com/"
)
self._init_client()

def _init_client(self):
"""Initialize the OpenAI client pointing to MiniMax API."""
self.client = OpenAI(
api_key=self.api_key,
base_url=self.api_base,
)

@property
def _llm_type(self) -> str:
return "ChatMiniMaxLLM"

def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
# MiniMax requires temperature in (0.0, 1.0]
temperature = max(0.01, min(self.temperature, 1.0))

messages = []
# Include conversation history
for h in self.history:
if h and len(h) == 2:
if h[0]:
messages.append({"role": "user", "content": h[0]})
if h[1]:
messages.append({"role": "assistant", "content": h[1]})
messages.append({"role": "user", "content": prompt})

response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
temperature=temperature,
max_tokens=self.max_token,
top_p=self.top_p,
)

result = response.choices[0].message.content or ""

# Strip thinking tags that MiniMax M2.7 may produce
result = re.sub(r"<think>.*?</think>\s*", "", result, flags=re.DOTALL)

if stop is not None:
result = enforce_stop_tokens(result, stop)

self.history = self.history + [[None, result]]
return result

def load_llm(self, **kwargs):
"""No-op for cloud models (no local model to load)."""
pass

@property
def _identifying_params(self) -> Dict[str, Any]:
return {
"model_name": self.model_name,
"api_base": self.api_base,
"temperature": self.temperature,
"max_token": self.max_token,
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ langchain-serve # API版本需要
protobuf==4.25.2
langchain-community
einops
openai>=1.0.0 # MiniMax Cloud LLM (OpenAI-compatible API)
Loading