From 3c9a5143779c67dc9df624092a30abc0e6643cfa Mon Sep 17 00:00:00 2001 From: Thanniru Sai Teja Date: Wed, 15 Apr 2026 21:37:36 +0530 Subject: [PATCH 1/4] feat: add NVIDIA NIM provider support in llm.py --- interpreter/core/llm/llm.py | 72 +++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index 6da9b8e1e2..6ec04cd0a4 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -116,6 +116,23 @@ def run(self, messages): ]: model = "claude-3-5-sonnet-20240620" self.model = "claude-3-5-sonnet-20240620" + + NVIDIA_MODEL_ALIASES = { + "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", + "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", + "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", + "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", + "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", + "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", + "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", + "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", + "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", + "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", + } + if model in NVIDIA_MODEL_ALIASES: + model = NVIDIA_MODEL_ALIASES[model] + self.model = model + # Setup our model endpoint if model == "i": model = "openai/i" @@ -340,6 +357,24 @@ def load(self): if self._is_loaded: return + NVIDIA_MODEL_ALIASES = { + "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", + "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", + "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", + "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", + "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", + "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", + "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", + "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", + "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", + "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", + } + if self.model in NVIDIA_MODEL_ALIASES: + self.model = NVIDIA_MODEL_ALIASES[self.model] + + if self.model.startswith("nvidia/"): + self.model = self.model.replace("nvidia/", "nvidia_nim/", 1) + if self.model.startswith("ollama/") and not ":" in self.model: self.model = self.model + ":latest" @@ -402,6 +437,43 @@ def load(self): self.interpreter.display_message("*Model loaded.*\n") + if self.model.startswith("nvidia_nim/"): + model_name = self.model.replace("nvidia_nim/", "", 1) + + if not self.api_base: + self.api_base = "https://integrate.api.nvidia.com/v1" + + if not self.api_key: + self.api_key = os.getenv("NVIDIA_API_KEY") or os.getenv( + "NVIDIA_NIM_API_KEY" + ) + if not self.api_key: + self.interpreter.display_message( + f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n" + ) + exit() + + nvidia_context_windows = { + "meta/llama-3.1-8b-instruct": 128000, + "meta/llama-3.1-70b-instruct": 128000, + "meta/llama-3.1-405b-instruct": 128000, + "meta/llama-3.3-70b-instruct": 128000, + "meta/llama-4-maverick-17b-128e-instruct": 1000000, + "nvidia/llama-3.1-nemotron-70b-instruct": 32768, + "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000, + "nvidia/nemotron-4-340b-instruct": 4096, + "mistralai/mixtral-8x22b-instruct-v0.1": 65536, + "mistralai/mistral-large-2-instruct": 128000, + "deepseek-ai/deepseek-v3.2": 131072, + "qwen/qwen3-coder-480b-a35b-instruct": 32768, + } + model_context_window = nvidia_context_windows.get(model_name, 8192) + + if self.context_window == None: + self.context_window = model_context_window + if self.max_tokens is None: + self.max_tokens = min(int(model_context_window * 0.2), 4096) + # Validate LLM should be moved here!! if self.context_window == None: From f557e1af4d46f28280e402ae9e577ce3c0f4e8be Mon Sep 17 00:00:00 2001 From: Thanniru Sai Teja Date: Wed, 15 Apr 2026 21:55:48 +0530 Subject: [PATCH 2/4] docs: add NVIDIA NIM hosted model guide --- .../hosted-models/nvidia-nim.mdx | 60 +++++++++++++++++++ docs/mint.json | 1 + 2 files changed, 61 insertions(+) create mode 100644 docs/language-models/hosted-models/nvidia-nim.mdx diff --git a/docs/language-models/hosted-models/nvidia-nim.mdx b/docs/language-models/hosted-models/nvidia-nim.mdx new file mode 100644 index 0000000000..50e97357f5 --- /dev/null +++ b/docs/language-models/hosted-models/nvidia-nim.mdx @@ -0,0 +1,60 @@ +--- +title: NVIDIA NIM +--- + +Open Interpreter supports NVIDIA NIM via LiteLLM's OpenAI-compatible provider route. + +Use the `nvidia_nim/` model prefix, for example: + + + +```bash Terminal +interpreter --model nvidia_nim/meta/llama-3.1-8b-instruct +``` + +```python Python +from interpreter import interpreter + +interpreter.llm.model = "nvidia_nim/meta/llama-3.1-8b-instruct" +interpreter.chat() +``` + + + +## Shorthand aliases + +Open Interpreter also supports these shorthand model names: + +- `llama-3.1-8b` → `nvidia_nim/meta/llama-3.1-8b-instruct` +- `llama-3.1-70b` → `nvidia_nim/meta/llama-3.1-70b-instruct` +- `llama-3.1-405b` → `nvidia_nim/meta/llama-3.1-405b-instruct` +- `llama-3.3-70b` → `nvidia_nim/meta/llama-3.3-70b-instruct` +- `llama-4-maverick` → `nvidia_nim/meta/llama-4-maverick-17b-128e-instruct` +- `nemotron-70b` → `nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct` +- `nemotron-ultra` → `nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1` +- `nemotron-340b` → `nvidia_nim/nvidia/nemotron-4-340b-instruct` +- `deepseek-v3` → `nvidia_nim/deepseek-ai/deepseek-v3.2` +- `qwen3-coder` → `nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct` + +You can also use `nvidia/...` and Open Interpreter will normalize it to `nvidia_nim/...`. + +## Required environment variables + +Set one of the following environment variables: + +| Environment Variable | Description | +| ---------------------- | ----------------------------------- | +| `NVIDIA_API_KEY` | NVIDIA API key for NIM requests | +| `NVIDIA_NIM_API_KEY` | Alternate environment variable name | + +## Optional settings + +By default, Open Interpreter uses: + +- `api_base = https://integrate.api.nvidia.com/v1` +- Model-specific `context_window` values for common NIM models +- `max_tokens` default of 20% of context window (capped at 4096) + +## Notes + +NIM is OpenAI-compatible, so Open Interpreter uses the standard LiteLLM request path. diff --git a/docs/mint.json b/docs/mint.json index 943db2e055..8dfcbd44d4 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -72,6 +72,7 @@ "language-models/hosted-models/aws-sagemaker", "language-models/hosted-models/baseten", "language-models/hosted-models/cloudflare", + "language-models/hosted-models/nvidia-nim", "language-models/hosted-models/cohere", "language-models/hosted-models/ai21", "language-models/hosted-models/deepinfra", From ac560a4054c47b305f10047d05be870064b9d6c8 Mon Sep 17 00:00:00 2001 From: Thanniru Sai Teja Date: Thu, 16 Apr 2026 17:16:07 +0530 Subject: [PATCH 3/4] Fix NVIDIA NIM integration issues - Move NVIDIA_MODEL_ALIASES to module-level constant (no duplication) - Add NVIDIA_CONTEXT_WINDOWS and NVIDIA_NIM_API_BASE constants - Create _get_nvidia_api_key() helper function - Fix API base to only set for nvidia_nim/ models - Make None checks consistent (== None style) - Optimize performance by eliminating dict recreation --- interpreter/core/llm/llm.py | 84 +++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index 6ec04cd0a4..d28f6677e3 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -29,6 +29,40 @@ # Create or get the logger logger = logging.getLogger("LiteLLM") +NVIDIA_MODEL_ALIASES = { + "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", + "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", + "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", + "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", + "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", + "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", + "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", + "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", + "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", + "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", +} + +NVIDIA_CONTEXT_WINDOWS = { + "meta/llama-3.1-8b-instruct": 128000, + "meta/llama-3.1-70b-instruct": 128000, + "meta/llama-3.1-405b-instruct": 128000, + "meta/llama-3.3-70b-instruct": 128000, + "meta/llama-4-maverick-17b-128e-instruct": 1000000, + "nvidia/llama-3.1-nemotron-70b-instruct": 32768, + "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000, + "nvidia/nemotron-4-340b-instruct": 4096, + "mistralai/mixtral-8x22b-instruct-v0.1": 65536, + "mistralai/mistral-large-2-instruct": 128000, + "deepseek-ai/deepseek-v3.2": 131072, + "qwen/qwen3-coder-480b-a35b-instruct": 32768, +} + +NVIDIA_NIM_API_BASE = "https://integrate.api.nvidia.com/v1" + +def _get_nvidia_api_key(): + """Get NVIDIA API key from environment, checking both legacy and standard keys.""" + return os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY") + class SuppressDebugFilter(logging.Filter): def filter(self, record): @@ -117,18 +151,6 @@ def run(self, messages): model = "claude-3-5-sonnet-20240620" self.model = "claude-3-5-sonnet-20240620" - NVIDIA_MODEL_ALIASES = { - "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", - "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", - "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", - "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", - "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", - "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", - "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", - "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", - "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", - "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", - } if model in NVIDIA_MODEL_ALIASES: model = NVIDIA_MODEL_ALIASES[model] self.model = model @@ -357,18 +379,6 @@ def load(self): if self._is_loaded: return - NVIDIA_MODEL_ALIASES = { - "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct", - "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct", - "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct", - "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct", - "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct", - "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct", - "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", - "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct", - "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2", - "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct", - } if self.model in NVIDIA_MODEL_ALIASES: self.model = NVIDIA_MODEL_ALIASES[self.model] @@ -440,38 +450,22 @@ def load(self): if self.model.startswith("nvidia_nim/"): model_name = self.model.replace("nvidia_nim/", "", 1) - if not self.api_base: - self.api_base = "https://integrate.api.nvidia.com/v1" + if self.model.startswith("nvidia_nim/") and not self.api_base: + self.api_base = NVIDIA_NIM_API_BASE if not self.api_key: - self.api_key = os.getenv("NVIDIA_API_KEY") or os.getenv( - "NVIDIA_NIM_API_KEY" - ) + self.api_key = _get_nvidia_api_key() if not self.api_key: self.interpreter.display_message( f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n" ) exit() - nvidia_context_windows = { - "meta/llama-3.1-8b-instruct": 128000, - "meta/llama-3.1-70b-instruct": 128000, - "meta/llama-3.1-405b-instruct": 128000, - "meta/llama-3.3-70b-instruct": 128000, - "meta/llama-4-maverick-17b-128e-instruct": 1000000, - "nvidia/llama-3.1-nemotron-70b-instruct": 32768, - "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000, - "nvidia/nemotron-4-340b-instruct": 4096, - "mistralai/mixtral-8x22b-instruct-v0.1": 65536, - "mistralai/mistral-large-2-instruct": 128000, - "deepseek-ai/deepseek-v3.2": 131072, - "qwen/qwen3-coder-480b-a35b-instruct": 32768, - } - model_context_window = nvidia_context_windows.get(model_name, 8192) + model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192) if self.context_window == None: self.context_window = model_context_window - if self.max_tokens is None: + if self.max_tokens == None: self.max_tokens = min(int(model_context_window * 0.2), 4096) # Validate LLM should be moved here!! From 2e99e3e47941d3366a467ecf35fdd3ad693388d0 Mon Sep 17 00:00:00 2001 From: Thanniru Sai Teja Date: Thu, 16 Apr 2026 23:02:56 +0530 Subject: [PATCH 4/4] Fix PEP8 None comparison in NVIDIA block Change == None to is None for singleton comparisons as per PEP8 --- interpreter/core/llm/llm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index d28f6677e3..8a6bd040a5 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -420,7 +420,7 @@ def load(self): requests.post(f"{api_base}/api/pull", json={"name": model_name}) # Get context window if not set - if self.context_window == None: + if self.context_window is None: response = requests.post( f"{api_base}/api/show", json={"name": model_name} ) @@ -432,7 +432,7 @@ def load(self): break if context_length is not None: self.context_window = context_length - if self.max_tokens == None: + if self.max_tokens is None: if self.context_window != None: self.max_tokens = int(self.context_window * 0.2) @@ -463,9 +463,9 @@ def load(self): model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192) - if self.context_window == None: + if self.context_window is None: self.context_window = model_context_window - if self.max_tokens == None: + if self.max_tokens is None: self.max_tokens = min(int(model_context_window * 0.2), 4096) # Validate LLM should be moved here!!