From 3c9a5143779c67dc9df624092a30abc0e6643cfa Mon Sep 17 00:00:00 2001
From: Thanniru Sai Teja <iamsaitejathanniru@gmail.com>
Date: Wed, 15 Apr 2026 21:37:36 +0530
Subject: [PATCH 1/4] feat: add NVIDIA NIM provider support in llm.py

---
 interpreter/core/llm/llm.py | 72 +++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
index 6da9b8e1e2..6ec04cd0a4 100644
--- a/interpreter/core/llm/llm.py
+++ b/interpreter/core/llm/llm.py
@@ -116,6 +116,23 @@ def run(self, messages):
         ]:
             model = "claude-3-5-sonnet-20240620"
             self.model = "claude-3-5-sonnet-20240620"
+
+        NVIDIA_MODEL_ALIASES = {
+            "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
+            "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
+            "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
+            "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
+            "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
+            "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
+            "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
+            "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
+            "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
+            "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
+        }
+        if model in NVIDIA_MODEL_ALIASES:
+            model = NVIDIA_MODEL_ALIASES[model]
+            self.model = model
+
         # Setup our model endpoint
         if model == "i":
             model = "openai/i"
@@ -340,6 +357,24 @@ def load(self):
         if self._is_loaded:
             return
 
+        NVIDIA_MODEL_ALIASES = {
+            "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
+            "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
+            "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
+            "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
+            "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
+            "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
+            "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
+            "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
+            "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
+            "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
+        }
+        if self.model in NVIDIA_MODEL_ALIASES:
+            self.model = NVIDIA_MODEL_ALIASES[self.model]
+
+        if self.model.startswith("nvidia/"):
+            self.model = self.model.replace("nvidia/", "nvidia_nim/", 1)
+
         if self.model.startswith("ollama/") and not ":" in self.model:
             self.model = self.model + ":latest"
 
@@ -402,6 +437,43 @@ def load(self):
 
             self.interpreter.display_message("*Model loaded.*\n")
 
+        if self.model.startswith("nvidia_nim/"):
+            model_name = self.model.replace("nvidia_nim/", "", 1)
+
+            if not self.api_base:
+                self.api_base = "https://integrate.api.nvidia.com/v1"
+
+            if not self.api_key:
+                self.api_key = os.getenv("NVIDIA_API_KEY") or os.getenv(
+                    "NVIDIA_NIM_API_KEY"
+                )
+                if not self.api_key:
+                    self.interpreter.display_message(
+                        f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n"
+                    )
+                    exit()
+
+            nvidia_context_windows = {
+                "meta/llama-3.1-8b-instruct": 128000,
+                "meta/llama-3.1-70b-instruct": 128000,
+                "meta/llama-3.1-405b-instruct": 128000,
+                "meta/llama-3.3-70b-instruct": 128000,
+                "meta/llama-4-maverick-17b-128e-instruct": 1000000,
+                "nvidia/llama-3.1-nemotron-70b-instruct": 32768,
+                "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000,
+                "nvidia/nemotron-4-340b-instruct": 4096,
+                "mistralai/mixtral-8x22b-instruct-v0.1": 65536,
+                "mistralai/mistral-large-2-instruct": 128000,
+                "deepseek-ai/deepseek-v3.2": 131072,
+                "qwen/qwen3-coder-480b-a35b-instruct": 32768,
+            }
+            model_context_window = nvidia_context_windows.get(model_name, 8192)
+
+            if self.context_window == None:
+                self.context_window = model_context_window
+            if self.max_tokens is None:
+                self.max_tokens = min(int(model_context_window * 0.2), 4096)
+
         # Validate LLM should be moved here!!
 
         if self.context_window == None:

From f557e1af4d46f28280e402ae9e577ce3c0f4e8be Mon Sep 17 00:00:00 2001
From: Thanniru Sai Teja <iamsaitejathanniru@gmail.com>
Date: Wed, 15 Apr 2026 21:55:48 +0530
Subject: [PATCH 2/4] docs: add NVIDIA NIM hosted model guide

---
 .../hosted-models/nvidia-nim.mdx              | 60 +++++++++++++++++++
 docs/mint.json                                |  1 +
 2 files changed, 61 insertions(+)
 create mode 100644 docs/language-models/hosted-models/nvidia-nim.mdx

diff --git a/docs/language-models/hosted-models/nvidia-nim.mdx b/docs/language-models/hosted-models/nvidia-nim.mdx
new file mode 100644
index 0000000000..50e97357f5
--- /dev/null
+++ b/docs/language-models/hosted-models/nvidia-nim.mdx
@@ -0,0 +1,60 @@
+---
+title: NVIDIA NIM
+---
+
+Open Interpreter supports NVIDIA NIM via LiteLLM's OpenAI-compatible provider route.
+
+Use the `nvidia_nim/` model prefix, for example:
+
+<CodeGroup>
+
+```bash Terminal
+interpreter --model nvidia_nim/meta/llama-3.1-8b-instruct
+```
+
+```python Python
+from interpreter import interpreter
+
+interpreter.llm.model = "nvidia_nim/meta/llama-3.1-8b-instruct"
+interpreter.chat()
+```
+
+</CodeGroup>
+
+## Shorthand aliases
+
+Open Interpreter also supports these shorthand model names:
+
+- `llama-3.1-8b` → `nvidia_nim/meta/llama-3.1-8b-instruct`
+- `llama-3.1-70b` → `nvidia_nim/meta/llama-3.1-70b-instruct`
+- `llama-3.1-405b` → `nvidia_nim/meta/llama-3.1-405b-instruct`
+- `llama-3.3-70b` → `nvidia_nim/meta/llama-3.3-70b-instruct`
+- `llama-4-maverick` → `nvidia_nim/meta/llama-4-maverick-17b-128e-instruct`
+- `nemotron-70b` → `nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct`
+- `nemotron-ultra` → `nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1`
+- `nemotron-340b` → `nvidia_nim/nvidia/nemotron-4-340b-instruct`
+- `deepseek-v3` → `nvidia_nim/deepseek-ai/deepseek-v3.2`
+- `qwen3-coder` → `nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct`
+
+You can also use `nvidia/...` and Open Interpreter will normalize it to `nvidia_nim/...`.
+
+## Required environment variables
+
+Set one of the following environment variables:
+
+| Environment Variable   | Description                         |
+| ---------------------- | ----------------------------------- |
+| `NVIDIA_API_KEY`       | NVIDIA API key for NIM requests     |
+| `NVIDIA_NIM_API_KEY`   | Alternate environment variable name |
+
+## Optional settings
+
+By default, Open Interpreter uses:
+
+- `api_base = https://integrate.api.nvidia.com/v1`
+- Model-specific `context_window` values for common NIM models
+- `max_tokens` default of 20% of context window (capped at 4096)
+
+## Notes
+
+NIM is OpenAI-compatible, so Open Interpreter uses the standard LiteLLM request path.
diff --git a/docs/mint.json b/docs/mint.json
index 943db2e055..8dfcbd44d4 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -72,6 +72,7 @@
             "language-models/hosted-models/aws-sagemaker",
             "language-models/hosted-models/baseten",
             "language-models/hosted-models/cloudflare",
+            "language-models/hosted-models/nvidia-nim",
             "language-models/hosted-models/cohere",
             "language-models/hosted-models/ai21",
             "language-models/hosted-models/deepinfra",

From ac560a4054c47b305f10047d05be870064b9d6c8 Mon Sep 17 00:00:00 2001
From: Thanniru Sai Teja <iamsaitejathanniru@gmail.com>
Date: Thu, 16 Apr 2026 17:16:07 +0530
Subject: [PATCH 3/4] Fix NVIDIA NIM integration issues

- Move NVIDIA_MODEL_ALIASES to module-level constant (no duplication)
- Add NVIDIA_CONTEXT_WINDOWS and NVIDIA_NIM_API_BASE constants
- Create _get_nvidia_api_key() helper function
- Fix API base to only set for nvidia_nim/ models
- Make None checks consistent (== None style)
- Optimize performance by eliminating dict recreation
---
 interpreter/core/llm/llm.py | 84 +++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
index 6ec04cd0a4..d28f6677e3 100644
--- a/interpreter/core/llm/llm.py
+++ b/interpreter/core/llm/llm.py
@@ -29,6 +29,40 @@
 # Create or get the logger
 logger = logging.getLogger("LiteLLM")
 
+NVIDIA_MODEL_ALIASES = {
+    "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
+    "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
+    "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
+    "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
+    "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
+    "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
+    "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
+    "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
+    "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
+    "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
+}
+
+NVIDIA_CONTEXT_WINDOWS = {
+    "meta/llama-3.1-8b-instruct": 128000,
+    "meta/llama-3.1-70b-instruct": 128000,
+    "meta/llama-3.1-405b-instruct": 128000,
+    "meta/llama-3.3-70b-instruct": 128000,
+    "meta/llama-4-maverick-17b-128e-instruct": 1000000,
+    "nvidia/llama-3.1-nemotron-70b-instruct": 32768,
+    "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000,
+    "nvidia/nemotron-4-340b-instruct": 4096,
+    "mistralai/mixtral-8x22b-instruct-v0.1": 65536,
+    "mistralai/mistral-large-2-instruct": 128000,
+    "deepseek-ai/deepseek-v3.2": 131072,
+    "qwen/qwen3-coder-480b-a35b-instruct": 32768,
+}
+
+NVIDIA_NIM_API_BASE = "https://integrate.api.nvidia.com/v1"
+
+def _get_nvidia_api_key():
+    """Get NVIDIA API key from environment, checking both legacy and standard keys."""
+    return os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY")
+
 
 class SuppressDebugFilter(logging.Filter):
     def filter(self, record):
@@ -117,18 +151,6 @@ def run(self, messages):
             model = "claude-3-5-sonnet-20240620"
             self.model = "claude-3-5-sonnet-20240620"
 
-        NVIDIA_MODEL_ALIASES = {
-            "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
-            "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
-            "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
-            "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
-            "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
-            "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
-            "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
-            "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
-            "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
-            "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
-        }
         if model in NVIDIA_MODEL_ALIASES:
             model = NVIDIA_MODEL_ALIASES[model]
             self.model = model
@@ -357,18 +379,6 @@ def load(self):
         if self._is_loaded:
             return
 
-        NVIDIA_MODEL_ALIASES = {
-            "llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
-            "llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
-            "llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
-            "llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
-            "llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
-            "nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
-            "nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
-            "nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
-            "deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
-            "qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
-        }
         if self.model in NVIDIA_MODEL_ALIASES:
             self.model = NVIDIA_MODEL_ALIASES[self.model]
 
@@ -440,38 +450,22 @@ def load(self):
         if self.model.startswith("nvidia_nim/"):
             model_name = self.model.replace("nvidia_nim/", "", 1)
 
-            if not self.api_base:
-                self.api_base = "https://integrate.api.nvidia.com/v1"
+            if self.model.startswith("nvidia_nim/") and not self.api_base:
+                self.api_base = NVIDIA_NIM_API_BASE
 
             if not self.api_key:
-                self.api_key = os.getenv("NVIDIA_API_KEY") or os.getenv(
-                    "NVIDIA_NIM_API_KEY"
-                )
+                self.api_key = _get_nvidia_api_key()
                 if not self.api_key:
                     self.interpreter.display_message(
                         f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n"
                     )
                     exit()
 
-            nvidia_context_windows = {
-                "meta/llama-3.1-8b-instruct": 128000,
-                "meta/llama-3.1-70b-instruct": 128000,
-                "meta/llama-3.1-405b-instruct": 128000,
-                "meta/llama-3.3-70b-instruct": 128000,
-                "meta/llama-4-maverick-17b-128e-instruct": 1000000,
-                "nvidia/llama-3.1-nemotron-70b-instruct": 32768,
-                "nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000,
-                "nvidia/nemotron-4-340b-instruct": 4096,
-                "mistralai/mixtral-8x22b-instruct-v0.1": 65536,
-                "mistralai/mistral-large-2-instruct": 128000,
-                "deepseek-ai/deepseek-v3.2": 131072,
-                "qwen/qwen3-coder-480b-a35b-instruct": 32768,
-            }
-            model_context_window = nvidia_context_windows.get(model_name, 8192)
+            model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192)
 
             if self.context_window == None:
                 self.context_window = model_context_window
-            if self.max_tokens is None:
+            if self.max_tokens == None:
                 self.max_tokens = min(int(model_context_window * 0.2), 4096)
 
         # Validate LLM should be moved here!!

From 2e99e3e47941d3366a467ecf35fdd3ad693388d0 Mon Sep 17 00:00:00 2001
From: Thanniru Sai Teja <iamsaitejathanniru@gmail.com>
Date: Thu, 16 Apr 2026 23:02:56 +0530
Subject: [PATCH 4/4] Fix PEP8 None comparison in NVIDIA block

Change == None to is None for singleton comparisons as per PEP8
---
 interpreter/core/llm/llm.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
index d28f6677e3..8a6bd040a5 100644
--- a/interpreter/core/llm/llm.py
+++ b/interpreter/core/llm/llm.py
@@ -420,7 +420,7 @@ def load(self):
                 requests.post(f"{api_base}/api/pull", json={"name": model_name})
 
             # Get context window if not set
-            if self.context_window == None:
+            if self.context_window is None:
                 response = requests.post(
                     f"{api_base}/api/show", json={"name": model_name}
                 )
@@ -432,7 +432,7 @@ def load(self):
                         break
                 if context_length is not None:
                     self.context_window = context_length
-            if self.max_tokens == None:
+            if self.max_tokens is None:
                 if self.context_window != None:
                     self.max_tokens = int(self.context_window * 0.2)
 
@@ -463,9 +463,9 @@ def load(self):
 
             model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192)
 
-            if self.context_window == None:
+            if self.context_window is None:
                 self.context_window = model_context_window
-            if self.max_tokens == None:
+            if self.max_tokens is None:
                 self.max_tokens = min(int(model_context_window * 0.2), 4096)
 
         # Validate LLM should be moved here!!