X-D-Lab · octo-patch · Mar 26, 2026
diff --git a/README.md b/README.md
@@ -57,14 +57,18 @@
 | ChatGLM-6B | text2vec-large-chinese |
 | ChatGLM-6B-int8 | ernie-3.0-base-zh |
 | ChatGLM-6B-int4 | ernie-3.0-nano-zh |
-| ChatGLM-6B-int4-qe | ernie-3.0-xbase-zh | 
-| Vicuna-7b-1.1 | simbert-base-chinese | 
-| Vicuna-13b-1.1 | paraphrase-multilingual-MiniLM-L12-v2 | 
-| BELLE-LLaMA-7B-2M |  | 
-| BELLE-LLaMA-13B-2M | | 
-| internlm-chat-7b-8k | | 
-| internlm-chat-7b-v1_1 | | 
-| internlm-chat-7b | | 
+| ChatGLM-6B-int4-qe | ernie-3.0-xbase-zh |
+| Vicuna-7b-1.1 | simbert-base-chinese |
+| Vicuna-13b-1.1 | paraphrase-multilingual-MiniLM-L12-v2 |
+| BELLE-LLaMA-7B-2M |  |
+| BELLE-LLaMA-13B-2M | |
+| internlm-chat-7b-8k | |
+| internlm-chat-7b-v1_1 | |
+| internlm-chat-7b | |
+| [MiniMax-M2.7](https://platform.minimaxi.com/) (Cloud API) | |
+| [MiniMax-M2.7-highspeed](https://platform.minimaxi.com/) (Cloud API) | |
+
+> **MiniMax Cloud LLM**: 除本地模型外，项目还支持通过 [MiniMax](https://platform.minimaxi.com/) 云端API调用大语言模型，无需GPU即可使用。设置环境变量 `MINIMAX_API_KEY` 后，在模型选择下拉框中选择 MiniMax-M2.7 或 MiniMax-M2.7-highspeed 即可。
 
 ## 💪 更新日志
 

diff --git a/app.py b/app.py
@@ -72,32 +72,36 @@ def init_model_config(
         )
         self.llm = None
         torch.cuda.empty_cache()
-        self.llm = ChatLLM()
-        if 'chatglm2' in large_language_model.lower():
-            self.llm.model_type = 'chatglm2'
-            self.llm.model_name_or_path = llm_model_dict['chatglm2'][
-                large_language_model]
-        elif 'chatglm' in large_language_model.lower():
-            self.llm.model_type = 'chatglm'
-            self.llm.model_name_or_path = llm_model_dict['chatglm'][
-                large_language_model]
-        elif 'belle' in large_language_model.lower():
-            self.llm.model_type = 'belle'
-            self.llm.model_name_or_path = llm_model_dict['belle'][
-                large_language_model]
-        elif 'vicuna' in large_language_model.lower():
-            self.llm.model_type = 'vicuna'
-            self.llm.model_name_or_path = llm_model_dict['vicuna'][
-                large_language_model]
-        elif 'internlm' in large_language_model.lower():
-            self.llm.model_type = 'internlm'
-            self.llm.model_name_or_path = llm_model_dict['internlm'][
-                large_language_model]
-        elif 'yuan2' in large_language_model.lower():
-            self.llm.model_type = 'yuan2'
-            self.llm.model_name_or_path = llm_model_dict['yuan2'][large_language_model] 
-
-        self.llm.load_llm(llm_device=LLM_DEVICE, num_gpus=num_gpus)
+        if 'minimax' in large_language_model.lower():
+            from minimax_llm import ChatMiniMaxLLM
+            self.llm = ChatMiniMaxLLM(
+                model_name=llm_model_dict['minimax'][large_language_model])
+        else:
+            self.llm = ChatLLM()
+            if 'chatglm2' in large_language_model.lower():
+                self.llm.model_type = 'chatglm2'
+                self.llm.model_name_or_path = llm_model_dict['chatglm2'][
+                    large_language_model]
+            elif 'chatglm' in large_language_model.lower():
+                self.llm.model_type = 'chatglm'
+                self.llm.model_name_or_path = llm_model_dict['chatglm'][
+                    large_language_model]
+            elif 'belle' in large_language_model.lower():
+                self.llm.model_type = 'belle'
+                self.llm.model_name_or_path = llm_model_dict['belle'][
+                    large_language_model]
+            elif 'vicuna' in large_language_model.lower():
+                self.llm.model_type = 'vicuna'
+                self.llm.model_name_or_path = llm_model_dict['vicuna'][
+                    large_language_model]
+            elif 'internlm' in large_language_model.lower():
+                self.llm.model_type = 'internlm'
+                self.llm.model_name_or_path = llm_model_dict['internlm'][
+                    large_language_model]
+            elif 'yuan2' in large_language_model.lower():
+                self.llm.model_type = 'yuan2'
+                self.llm.model_name_or_path = llm_model_dict['yuan2'][large_language_model]
+            self.llm.load_llm(llm_device=LLM_DEVICE, num_gpus=num_gpus)
 
     def init_knowledge_vector_store(self, filepath):
 

diff --git a/config.py b/config.py
@@ -57,8 +57,12 @@
         "internlm-chat-7b-8k": "internlm/internlm-chat-7b-8k",
         "internlm-chat-7b": "internlm/internlm-chat-7b",
         "internlm-chat-7b-v1_1": "internlm/internlm-chat-7b-v1_1",
-    }
+    },
     "yuan2":{
         "Yuan2-2B-hf":"IEITYuan/Yuan2-2B-hf"
+    },
+    "minimax": {
+        "MiniMax-M2.7": "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed": "MiniMax-M2.7-highspeed",
     }
 }
diff --git a/minimax_llm.py b/minimax_llm.py
@@ -0,0 +1,94 @@
+"""MiniMax Cloud LLM provider for LangChain-ChatGLM-Webui.
+
+Uses the MiniMax OpenAI-compatible API (https://api.minimax.io/v1) to provide
+cloud-based LLM inference as an alternative to locally-loaded models.
+"""
+
+import os
+import re
+from typing import Any, Dict, List, Optional
+
+from langchain.llms.base import LLM
+from langchain.llms.utils import enforce_stop_tokens
+from openai import OpenAI
+
+
+class ChatMiniMaxLLM(LLM):
+    """LangChain LLM wrapper for MiniMax cloud models via OpenAI-compatible API."""
+
+    model_name: str = "MiniMax-M2.7"
+    temperature: float = 0.1
+    max_token: int = 10000
+    top_p: float = 0.9
+    history: list = []
+    api_key: str = ""
+    api_base: str = "https://api.minimax.io/v1"
+    client: Any = None
+
+    def __init__(self, model_name: str = "MiniMax-M2.7", **kwargs):
+        super().__init__(**kwargs)
+        self.model_name = model_name
+        self.api_key = os.environ.get("MINIMAX_API_KEY", "")
+        if not self.api_key:
+            raise ValueError(
+                "MINIMAX_API_KEY environment variable is required. "
+                "Get your API key from https://platform.minimaxi.com/"
+            )
+        self._init_client()
+
+    def _init_client(self):
+        """Initialize the OpenAI client pointing to MiniMax API."""
+        self.client = OpenAI(
+            api_key=self.api_key,
+            base_url=self.api_base,
+        )
+
+    @property
+    def _llm_type(self) -> str:
+        return "ChatMiniMaxLLM"
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        # MiniMax requires temperature in (0.0, 1.0]
+        temperature = max(0.01, min(self.temperature, 1.0))
+
+        messages = []
+        # Include conversation history
+        for h in self.history:
+            if h and len(h) == 2:
+                if h[0]:
+                    messages.append({"role": "user", "content": h[0]})
+                if h[1]:
+                    messages.append({"role": "assistant", "content": h[1]})
+        messages.append({"role": "user", "content": prompt})
+
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=self.max_token,
+            top_p=self.top_p,
+        )
+
+        result = response.choices[0].message.content or ""
+
+        # Strip thinking tags that MiniMax M2.7 may produce
+        result = re.sub(r"<think>.*?</think>\s*", "", result, flags=re.DOTALL)
+
+        if stop is not None:
+            result = enforce_stop_tokens(result, stop)
+
+        self.history = self.history + [[None, result]]
+        return result
+
+    def load_llm(self, **kwargs):
+        """No-op for cloud models (no local model to load)."""
+        pass
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        return {
+            "model_name": self.model_name,
+            "api_base": self.api_base,
+            "temperature": self.temperature,
+            "max_token": self.max_token,
+        }
diff --git a/requirements.txt b/requirements.txt
@@ -20,3 +20,4 @@ langchain-serve # API版本需要
 protobuf==4.25.2
 langchain-community
 einops
+openai>=1.0.0    # MiniMax Cloud LLM (OpenAI-compatible API)