diff --git a/docs/language-models/hosted-models/nvidia-nim.mdx b/docs/language-models/hosted-models/nvidia-nim.mdx new file mode 100644 index 0000000000..50e97357f5 --- /dev/null +++ b/docs/language-models/hosted-models/nvidia-nim.mdx @@ -0,0 +1,60 @@ +--- +title: NVIDIA NIM +--- + +Open Interpreter supports NVIDIA NIM via LiteLLM's OpenAI-compatible provider route. + +Use the `nvidia_nim/` model prefix, for example: + + + +```bash Terminal +interpreter --model nvidia_nim/meta/llama-3.1-8b-instruct +``` + +```python Python +from interpreter import interpreter + +interpreter.llm.model = "nvidia_nim/meta/llama-3.1-8b-instruct" +interpreter.chat() +``` + + + +## Shorthand aliases + +Open Interpreter also supports these shorthand model names: + +- `llama-3.1-8b` → `nvidia_nim/meta/llama-3.1-8b-instruct` +- `llama-3.1-70b` → `nvidia_nim/meta/llama-3.1-70b-instruct` +- `llama-3.1-405b` → `nvidia_nim/meta/llama-3.1-405b-instruct` +- `llama-3.3-70b` → `nvidia_nim/meta/llama-3.3-70b-instruct` +- `llama-4-maverick` → `nvidia_nim/meta/llama-4-maverick-17b-128e-instruct` +- `nemotron-70b` → `nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct` +- `nemotron-ultra` → `nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1` +- `nemotron-340b` → `nvidia_nim/nvidia/nemotron-4-340b-instruct` +- `deepseek-v3` → `nvidia_nim/deepseek-ai/deepseek-v3.2` +- `qwen3-coder` → `nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct` + +You can also use `nvidia/...` and Open Interpreter will normalize it to `nvidia_nim/...`. + +## Required environment variables + +Set one of the following environment variables: + +| Environment Variable | Description | +| ---------------------- | ----------------------------------- | +| `NVIDIA_API_KEY` | NVIDIA API key for NIM requests | +| `NVIDIA_NIM_API_KEY` | Alternate environment variable name | + +## Optional settings + +By default, Open Interpreter uses: + +- `api_base = https://integrate.api.nvidia.com/v1` +- Model-specific `context_window` values for common NIM models +- `max_tokens` default of 20% of context window (capped at 4096) + +## Notes + +NIM is OpenAI-compatible, so Open Interpreter uses the standard LiteLLM request path. diff --git a/docs/mint.json b/docs/mint.json index 943db2e055..8dfcbd44d4 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -72,6 +72,7 @@ "language-models/hosted-models/aws-sagemaker", "language-models/hosted-models/baseten", "language-models/hosted-models/cloudflare", + "language-models/hosted-models/nvidia-nim", "language-models/hosted-models/cohere", "language-models/hosted-models/ai21", "language-models/hosted-models/deepinfra", diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index 6da9b8e1e2..8a6bd040a5 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -29,6 +29,40 @@ # Create or get the logger logger = logging.getLogger("LiteLLM") +NVIDIA_MODEL_ALIASES = { + "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", + "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", + "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", + "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", + "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", + "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", + "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", + "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", + "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", + "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", +} + +NVIDIA_CONTEXT_WINDOWS = { + "meta/llama-3.1-8b-instruct": 128000, + "meta/llama-3.1-70b-instruct": 128000, + "meta/llama-3.1-405b-instruct": 128000, + "meta/llama-3.3-70b-instruct": 128000, + "meta/llama-4-maverick-17b-128e-instruct": 1000000, + "nvidia/llama-3.1-nemotron-70b-instruct": 32768, + "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000, + "nvidia/nemotron-4-340b-instruct": 4096, + "mistralai/mixtral-8x22b-instruct-v0.1": 65536, + "mistralai/mistral-large-2-instruct": 128000, + "deepseek-ai/deepseek-v3.2": 131072, + "qwen/qwen3-coder-480b-a35b-instruct": 32768, +} + +NVIDIA_NIM_API_BASE = "https://integrate.api.nvidia.com/v1" + +def _get_nvidia_api_key(): + """Get NVIDIA API key from environment, checking both legacy and standard keys.""" + return os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY") + class SuppressDebugFilter(logging.Filter): def filter(self, record): @@ -116,6 +150,11 @@ def run(self, messages): ]: model = "claude-3-5-sonnet-20240620" self.model = "claude-3-5-sonnet-20240620" + + if model in NVIDIA_MODEL_ALIASES: + model = NVIDIA_MODEL_ALIASES[model] + self.model = model + # Setup our model endpoint if model == "i": model = "openai/i" @@ -340,6 +379,12 @@ def load(self): if self._is_loaded: return + if self.model in NVIDIA_MODEL_ALIASES: + self.model = NVIDIA_MODEL_ALIASES[self.model] + + if self.model.startswith("nvidia/"): + self.model = self.model.replace("nvidia/", "nvidia_nim/", 1) + if self.model.startswith("ollama/") and not ":" in self.model: self.model = self.model + ":latest" @@ -375,7 +420,7 @@ def load(self): requests.post(f"{api_base}/api/pull", json={"name": model_name}) # Get context window if not set - if self.context_window == None: + if self.context_window is None: response = requests.post( f"{api_base}/api/show", json={"name": model_name} ) @@ -387,7 +432,7 @@ def load(self): break if context_length is not None: self.context_window = context_length - if self.max_tokens == None: + if self.max_tokens is None: if self.context_window != None: self.max_tokens = int(self.context_window * 0.2) @@ -402,6 +447,27 @@ def load(self): self.interpreter.display_message("*Model loaded.*\n") + if self.model.startswith("nvidia_nim/"): + model_name = self.model.replace("nvidia_nim/", "", 1) + + if self.model.startswith("nvidia_nim/") and not self.api_base: + self.api_base = NVIDIA_NIM_API_BASE + + if not self.api_key: + self.api_key = _get_nvidia_api_key() + if not self.api_key: + self.interpreter.display_message( + f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n" + ) + exit() + + model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192) + + if self.context_window is None: + self.context_window = model_context_window + if self.max_tokens is None: + self.max_tokens = min(int(model_context_window * 0.2), 4096) + # Validate LLM should be moved here!! if self.context_window == None: