Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions docs/language-models/hosted-models/nvidia-nim.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
title: NVIDIA NIM
---

Open Interpreter supports NVIDIA NIM via LiteLLM's OpenAI-compatible provider route.

Use the `nvidia_nim/` model prefix, for example:

<CodeGroup>

```bash Terminal
interpreter --model nvidia_nim/meta/llama-3.1-8b-instruct
```

```python Python
from interpreter import interpreter

interpreter.llm.model = "nvidia_nim/meta/llama-3.1-8b-instruct"
interpreter.chat()
```

</CodeGroup>

## Shorthand aliases

Open Interpreter also supports these shorthand model names:

- `llama-3.1-8b` → `nvidia_nim/meta/llama-3.1-8b-instruct`
- `llama-3.1-70b` → `nvidia_nim/meta/llama-3.1-70b-instruct`
- `llama-3.1-405b` → `nvidia_nim/meta/llama-3.1-405b-instruct`
- `llama-3.3-70b` → `nvidia_nim/meta/llama-3.3-70b-instruct`
- `llama-4-maverick` → `nvidia_nim/meta/llama-4-maverick-17b-128e-instruct`
- `nemotron-70b` → `nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct`
- `nemotron-ultra` → `nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1`
- `nemotron-340b` → `nvidia_nim/nvidia/nemotron-4-340b-instruct`
- `deepseek-v3` → `nvidia_nim/deepseek-ai/deepseek-v3.2`
- `qwen3-coder` → `nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct`

You can also use `nvidia/...` and Open Interpreter will normalize it to `nvidia_nim/...`.

## Required environment variables

Set one of the following environment variables:

| Environment Variable | Description |
| ---------------------- | ----------------------------------- |
| `NVIDIA_API_KEY` | NVIDIA API key for NIM requests |
| `NVIDIA_NIM_API_KEY` | Alternate environment variable name |

## Optional settings

By default, Open Interpreter uses:

- `api_base = https://integrate.api.nvidia.com/v1`
- Model-specific `context_window` values for common NIM models
- `max_tokens` default of 20% of context window (capped at 4096)

## Notes

NIM is OpenAI-compatible, so Open Interpreter uses the standard LiteLLM request path.
1 change: 1 addition & 0 deletions docs/mint.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"language-models/hosted-models/aws-sagemaker",
"language-models/hosted-models/baseten",
"language-models/hosted-models/cloudflare",
"language-models/hosted-models/nvidia-nim",
"language-models/hosted-models/cohere",
"language-models/hosted-models/ai21",
"language-models/hosted-models/deepinfra",
Expand Down
70 changes: 68 additions & 2 deletions interpreter/core/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,40 @@
# Create or get the logger
logger = logging.getLogger("LiteLLM")

NVIDIA_MODEL_ALIASES = {
"llama-3.1-8b": "nvidia_nim/meta/llama-3.1-8b-instruct",
"llama-3.1-70b": "nvidia_nim/meta/llama-3.1-70b-instruct",
"llama-3.1-405b": "nvidia_nim/meta/llama-3.1-405b-instruct",
"llama-3.3-70b": "nvidia_nim/meta/llama-3.3-70b-instruct",
"llama-4-maverick": "nvidia_nim/meta/llama-4-maverick-17b-128e-instruct",
"nemotron-70b": "nvidia_nim/nvidia/llama-3.1-nemotron-70b-instruct",
"nemotron-ultra": "nvidia_nim/nvidia/llama-3.1-nemotron-ultra-253b-v1",
"nemotron-340b": "nvidia_nim/nvidia/nemotron-4-340b-instruct",
"deepseek-v3": "nvidia_nim/deepseek-ai/deepseek-v3.2",
"qwen3-coder": "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
}

NVIDIA_CONTEXT_WINDOWS = {
"meta/llama-3.1-8b-instruct": 128000,
"meta/llama-3.1-70b-instruct": 128000,
"meta/llama-3.1-405b-instruct": 128000,
"meta/llama-3.3-70b-instruct": 128000,
"meta/llama-4-maverick-17b-128e-instruct": 1000000,
"nvidia/llama-3.1-nemotron-70b-instruct": 32768,
"nvidia/llama-3.1-nemotron-ultra-253b-v1": 128000,
"nvidia/nemotron-4-340b-instruct": 4096,
"mistralai/mixtral-8x22b-instruct-v0.1": 65536,
"mistralai/mistral-large-2-instruct": 128000,
"deepseek-ai/deepseek-v3.2": 131072,
"qwen/qwen3-coder-480b-a35b-instruct": 32768,
}

NVIDIA_NIM_API_BASE = "https://integrate.api.nvidia.com/v1"

def _get_nvidia_api_key():
"""Get NVIDIA API key from environment, checking both legacy and standard keys."""
return os.getenv("NVIDIA_API_KEY") or os.getenv("NVIDIA_NIM_API_KEY")


class SuppressDebugFilter(logging.Filter):
def filter(self, record):
Expand Down Expand Up @@ -116,6 +150,11 @@ def run(self, messages):
]:
model = "claude-3-5-sonnet-20240620"
self.model = "claude-3-5-sonnet-20240620"

if model in NVIDIA_MODEL_ALIASES:
model = NVIDIA_MODEL_ALIASES[model]
self.model = model

# Setup our model endpoint
if model == "i":
model = "openai/i"
Expand Down Expand Up @@ -340,6 +379,12 @@ def load(self):
if self._is_loaded:
return

if self.model in NVIDIA_MODEL_ALIASES:
self.model = NVIDIA_MODEL_ALIASES[self.model]

if self.model.startswith("nvidia/"):
self.model = self.model.replace("nvidia/", "nvidia_nim/", 1)

if self.model.startswith("ollama/") and not ":" in self.model:
self.model = self.model + ":latest"

Expand Down Expand Up @@ -375,7 +420,7 @@ def load(self):
requests.post(f"{api_base}/api/pull", json={"name": model_name})

# Get context window if not set
if self.context_window == None:
if self.context_window is None:
response = requests.post(
f"{api_base}/api/show", json={"name": model_name}
)
Expand All @@ -387,7 +432,7 @@ def load(self):
break
if context_length is not None:
self.context_window = context_length
if self.max_tokens == None:
if self.max_tokens is None:
if self.context_window != None:
self.max_tokens = int(self.context_window * 0.2)

Expand All @@ -402,6 +447,27 @@ def load(self):

self.interpreter.display_message("*Model loaded.*\n")

if self.model.startswith("nvidia_nim/"):
model_name = self.model.replace("nvidia_nim/", "", 1)

if self.model.startswith("nvidia_nim/") and not self.api_base:
self.api_base = NVIDIA_NIM_API_BASE

if not self.api_key:
self.api_key = _get_nvidia_api_key()
if not self.api_key:
self.interpreter.display_message(
f"> Missing NVIDIA API key\n\nTo use `{model_name}`, set either `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY`.\n"
)
exit()

model_context_window = NVIDIA_CONTEXT_WINDOWS.get(model_name, 8192)

if self.context_window is None:
self.context_window = model_context_window
if self.max_tokens is None:
self.max_tokens = min(int(model_context_window * 0.2), 4096)

# Validate LLM should be moved here!!

if self.context_window == None:
Expand Down
Loading