NVIDIA · elezar · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
@@ -60,7 +60,7 @@ enforced by the supervisor in that nested namespace.
 | `cap_add` | `SYS_ADMIN`, `NET_ADMIN`, `SYS_PTRACE`, `SYSLOG` | Required for supervisor isolation setup and process inspection |
 | `security_opt` | `apparmor=unconfined` | Docker's default AppArmor profile blocks mount operations required by network namespace setup |
 | `restart_policy` | `unless-stopped` | Resume managed sandboxes after Docker or gateway restarts |
-| `device_requests` | CDI all-GPU request when `spec.gpu` is true | Enables Docker CDI GPU sandboxes when daemon support is detected |
+| `device_requests` | CDI GPU request when `spec.gpu` is present | Uses the shared CDI GPU resolver: empty `device_id` defaults to `nvidia.com/gpu=all`, explicit IDs pass through |
 
 ## Gateway Callback
 

@@ -391,7 +391,7 @@ When `openshell sandbox create` cannot connect to a gateway (connection refused,
 1. `should_attempt_bootstrap()` in `crates/openshell-cli/src/bootstrap.rs` checks the error type. It returns `true` for connectivity errors and missing default TLS materials, but `false` for TLS handshake/auth errors.
 2. If running in a terminal, the user is prompted to confirm.
 3. `run_bootstrap()` deploys a gateway named `"openshell"`, sets it as active, and returns fresh `TlsOptions` pointing to the newly-written mTLS certs.
-4. When `sandbox create` requests GPU explicitly (`--gpu`) or infers it from an image whose final name component contains `gpu` (such as `nvidia-gpu`), the bootstrap path enables gateway GPU support before retrying sandbox creation, using the same CDI-or-fallback selection as `gateway start --gpu`.
+4. When `sandbox create` sends a present GPU request, either explicitly (`--gpu`) or from image-name inference on a final component containing `gpu` (such as `nvidia-gpu`), the bootstrap path enables gateway GPU support before retrying sandbox creation, using the same CDI-or-fallback selection as `gateway start --gpu`.
 
 ## Container Environment Variables
 

@@ -43,7 +43,7 @@ graph TB
 | Supervisor delivery | hostPath volume (read-only) | Embedded in rootfs tarball | OCI image volume (read-only) |
 | Network model | Supervisor creates netns inside pod | gvproxy virtio-net (192.168.127.0/24) | Supervisor creates netns inside container |
 | Credential injection | Plaintext env var + K8s Secret volume (0400) | Rootfs file copy (0600) + env vars | Podman `secret_env` API + env vars |
-| GPU support | Yes (nvidia.com/gpu resource) | No | Yes (CDI device) |
+| GPU support | Yes (nvidia.com/gpu resource) | Yes (single passthrough GPU) | Yes (CDI device IDs via shared resolver) |
 | `stop_sandbox` | Unimplemented | Unimplemented | Implemented (graceful stop) |
 | State storage | Kubernetes API (CRD) | In-memory HashMap + filesystem | Podman daemon (container state) |
 | Endpoint resolution | Pod IP / cluster DNS | 127.0.0.1 + allocated port | 127.0.0.1 + ephemeral port |

@@ -27,7 +27,7 @@ The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sa
 
 ### GPU image-name detection
 
-`sandbox create` also infers GPU intent from the final image name. The current rule matches when the last image name component contains `gpu` (for example `ghcr.io/nvidia/openshell-community/sandboxes/nvidia-gpu:latest` or `registry.example.com/team/my-gpu-image:latest`). When that rule matches, the sandbox request is treated the same as passing `--gpu`.
+`sandbox create` also infers GPU intent from the final image name. The current rule matches when the last image name component contains `gpu` (for example `ghcr.io/nvidia/openshell-community/sandboxes/nvidia-gpu:latest` or `registry.example.com/team/my-gpu-image:latest`). When that rule matches, the sandbox request includes a present GPU request with no device IDs, the same shape produced by passing `--gpu`.
 
 ### Dockerfile build flow
 

@@ -27,11 +27,12 @@ use openshell_core::proto::{
     CreateProviderRequest, CreateSandboxRequest, DeleteProviderRequest, DeleteSandboxRequest,
     ExecSandboxRequest, GetClusterInferenceRequest, GetDraftHistoryRequest, GetDraftPolicyRequest,
     GetGatewayConfigRequest, GetProviderRequest, GetSandboxConfigRequest, GetSandboxLogsRequest,
-    GetSandboxPolicyStatusRequest, GetSandboxRequest, HealthRequest, ListProvidersRequest,
-    ListSandboxPoliciesRequest, ListSandboxesRequest, PolicySource, PolicyStatus, Provider,
-    RejectDraftChunkRequest, Sandbox, SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate,
-    SetClusterInferenceRequest, SettingScope, SettingValue, UpdateConfigRequest,
-    UpdateProviderRequest, WatchSandboxRequest, exec_sandbox_event, setting_value,
+    GetSandboxPolicyStatusRequest, GetSandboxRequest, GpuRequestSpec, HealthRequest,
+    ListProvidersRequest, ListSandboxPoliciesRequest, ListSandboxesRequest, PolicySource,
+    PolicyStatus, Provider, RejectDraftChunkRequest, Sandbox, SandboxPhase, SandboxPolicy,
+    SandboxSpec, SandboxTemplate, SetClusterInferenceRequest, SettingScope, SettingValue,
+    UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest, exec_sandbox_event,
+    setting_value,
 };
 use openshell_core::settings::{self, SettingValueKind};
 use openshell_core::{ObjectId, ObjectName};
@@ -2324,8 +2325,7 @@ pub async fn sandbox_create(
 
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
-            gpu: requested_gpu,
-            gpu_device: gpu_device.unwrap_or_default().to_string(),
+            gpu: gpu_request_from_cli(requested_gpu, gpu_device),
             policy,
             providers: configured_providers,
             template,
@@ -2757,6 +2757,15 @@ pub async fn sandbox_create(
     }
 }
 
+fn gpu_request_from_cli(requested_gpu: bool, gpu_device: Option<&str>) -> Option<GpuRequestSpec> {
+    requested_gpu.then(|| GpuRequestSpec {
+        device_id: gpu_device
+            .filter(|device_id| !device_id.is_empty())
+            .map(|device_id| vec![device_id.to_string()])
+            .unwrap_or_default(),
+    })
+}
+
 /// Resolved source for the `--from` flag on `sandbox create`.
 #[derive(Debug)]
 enum ResolvedSource {
@@ -5794,8 +5803,8 @@ mod tests {
     use super::{
         GatewayControlTarget, TlsOptions, dockerfile_sources_supported_for_gateway,
         format_gateway_select_header, format_gateway_select_items, gateway_add, gateway_auth_label,
-        gateway_select_with, gateway_type_label, git_sync_files, http_health_check,
-        image_requests_gpu, inferred_provider_type, parse_cli_setting_value,
+        gateway_select_with, gateway_type_label, git_sync_files, gpu_request_from_cli,
+        http_health_check, image_requests_gpu, inferred_provider_type, parse_cli_setting_value,
         parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message,
         ready_false_condition_message, resolve_from, resolve_gateway_control_target_from,
         sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name,
@@ -6045,6 +6054,26 @@ mod tests {
         assert!(!source_requests_gpu("base"));
     }
 
+    #[test]
+    fn gpu_request_from_cli_uses_presence_with_empty_device_ids_for_default_gpu() {
+        let request = gpu_request_from_cli(true, None).expect("gpu request should be present");
+
+        assert!(request.device_id.is_empty());
+    }
+
+    #[test]
+    fn gpu_request_from_cli_maps_gpu_device_to_one_device_id() {
+        let request = gpu_request_from_cli(true, Some("0000:2d:00.0"))
+            .expect("gpu request should be present");
+
+        assert_eq!(request.device_id, vec!["0000:2d:00.0"]);
+    }
+
+    #[test]
+    fn gpu_request_from_cli_omits_gpu_request_when_not_requested() {
+        assert!(gpu_request_from_cli(false, Some("0")).is_none());
+    }
+
     #[test]
     fn resolve_from_classifies_existing_dockerfile_path() {
         let temp = tempfile::tempdir().expect("failed to create tempdir");

@@ -0,0 +1,70 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Shared GPU request helpers.
+
+use crate::config::CDI_GPU_DEVICE_ALL;
+use crate::proto::compute::v1::GpuRequestSpec;
+
+/// Resolve a driver GPU request into CDI device identifiers.
+///
+/// `None` means no GPU was requested. Presence with no explicit device IDs
+/// uses the CDI all-GPU request; otherwise the driver-native IDs pass through.
+#[must_use]
+pub fn cdi_gpu_device_ids(gpu: Option<&GpuRequestSpec>) -> Option<Vec<String>> {
+    match gpu {
+        Some(gpu) if gpu.device_id.is_empty() => Some(vec![CDI_GPU_DEVICE_ALL.to_string()]),
+        Some(gpu) => Some(gpu.device_id.clone()),
+        None => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cdi_gpu_device_ids_returns_none_when_absent() {
+        assert_eq!(cdi_gpu_device_ids(None), None);
+    }
+
+    #[test]
+    fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
+        let request = GpuRequestSpec { device_id: vec![] };
+
+        assert_eq!(
+            cdi_gpu_device_ids(Some(&request)),
+            Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
+        );
+    }
+
+    #[test]
+    fn cdi_gpu_device_ids_passes_single_device_id_through() {
+        let request = GpuRequestSpec {
+            device_id: vec!["nvidia.com/gpu=0".to_string()],
+        };
+
+        assert_eq!(
+            cdi_gpu_device_ids(Some(&request)),
+            Some(vec!["nvidia.com/gpu=0".to_string()])
+        );
+    }
+
+    #[test]
+    fn cdi_gpu_device_ids_passes_multiple_device_ids_through() {
+        let request = GpuRequestSpec {
+            device_id: vec![
+                "nvidia.com/gpu=0".to_string(),
+                "nvidia.com/gpu=1".to_string(),
+            ],
+        };
+
+        assert_eq!(
+            cdi_gpu_device_ids(Some(&request)),
+            Some(vec![
+                "nvidia.com/gpu=0".to_string(),
+                "nvidia.com/gpu=1".to_string()
+            ])
+        );
+    }
+}
@@ -12,6 +12,7 @@
 pub mod config;
 pub mod error;
 pub mod forward;
+pub mod gpu;
 pub mod image;
 pub mod inference;
 pub mod metadata;

@@ -18,17 +18,16 @@ use bollard::query_parameters::{
 };
 use bytes::Bytes;
 use futures::{Stream, StreamExt};
-use openshell_core::config::{
-    CDI_GPU_DEVICE_ALL, DEFAULT_DOCKER_NETWORK_NAME, DEFAULT_STOP_TIMEOUT_SECS,
-};
+use openshell_core::config::{DEFAULT_DOCKER_NETWORK_NAME, DEFAULT_STOP_TIMEOUT_SECS};
+use openshell_core::gpu::cdi_gpu_device_ids;
 use openshell_core::proto::compute::v1::{
     CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse,
     DriverCondition, DriverSandbox, DriverSandboxStatus, DriverSandboxTemplate,
     GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse,
-    ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse,
-    ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent,
-    WatchSandboxesEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent,
-    compute_driver_server::ComputeDriver, watch_sandboxes_event,
+    GpuRequestSpec, ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest,
+    StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse,
+    WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesRequest,
+    WatchSandboxesSandboxEvent, compute_driver_server::ComputeDriver, watch_sandboxes_event,
 };
 use openshell_core::{Config, Error, Result as CoreResult};
 use std::collections::HashMap;
@@ -306,11 +305,7 @@ impl DockerComputeDriver {
                 "docker sandboxes require a template image",
             ));
         }
-        if spec.gpu && !config.supports_gpu {
-            return Err(Status::failed_precondition(
-                "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.",
-            ));
-        }
+        Self::validate_gpu_request(spec.gpu.as_ref(), config.supports_gpu)?;
         if !template.agent_socket_path.trim().is_empty() {
             return Err(Status::failed_precondition(
                 "docker compute driver does not support template.agent_socket_path",
@@ -330,6 +325,18 @@ impl DockerComputeDriver {
         Ok(())
     }
 
+    fn validate_gpu_request(
+        gpu: Option<&GpuRequestSpec>,
+        supports_gpu: bool,
+    ) -> Result<(), Status> {
+        if gpu.is_some() && !supports_gpu {
+            return Err(Status::failed_precondition(
+                "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.",
+            ));
+        }
+        Ok(())
+    }
+
     async fn get_sandbox_snapshot(
         &self,
         sandbox_id: &str,
@@ -932,11 +939,11 @@ fn build_environment(sandbox: &DriverSandbox, config: &DockerDriverRuntimeConfig
         .collect()
 }
 
-fn docker_gpu_device_requests(gpu: bool) -> Option<Vec<DeviceRequest>> {
-    gpu.then(|| {
+fn docker_gpu_device_requests(gpu: Option<&GpuRequestSpec>) -> Option<Vec<DeviceRequest>> {
+    cdi_gpu_device_ids(gpu).map(|device_ids| {
         vec![DeviceRequest {
             driver: Some("cdi".to_string()),
-            device_ids: Some(vec![CDI_GPU_DEVICE_ALL.to_string()]),
+            device_ids: Some(device_ids),
             ..Default::default()
         }]
     })
@@ -983,7 +990,7 @@ fn build_container_create_body(
         host_config: Some(HostConfig {
             nano_cpus: resource_limits.nano_cpus,
             memory: resource_limits.memory_bytes,
-            device_requests: docker_gpu_device_requests(spec.gpu),
+            device_requests: docker_gpu_device_requests(spec.gpu.as_ref()),
             mounts: Some(build_mounts(config)),
             restart_policy: Some(RestartPolicy {
                 name: Some(RestartPolicyNameEnum::UNLESS_STOPPED),

@@ -2,9 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use super::*;
-use openshell_core::config::DEFAULT_SERVER_PORT;
+use openshell_core::config::{CDI_GPU_DEVICE_ALL, DEFAULT_SERVER_PORT};
 use openshell_core::proto::compute::v1::{
-    DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate,
+    DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate, GpuRequestSpec,
 };
 use std::fs;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
@@ -31,8 +31,7 @@ fn test_sandbox() -> DriverSandbox {
                 resources: None,
                 platform_config: None,
             }),
-            gpu: false,
-            gpu_device: String::new(),
+            gpu: None,
         }),
         status: None,
     }
@@ -348,7 +347,7 @@ fn build_container_create_body_clears_inherited_cmd() {
 fn validate_sandbox_rejects_gpu_when_cdi_unavailable() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().gpu = true;
+    sandbox.spec.as_mut().unwrap().gpu = Some(GpuRequestSpec { device_id: vec![] });
 
     let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
 
@@ -361,7 +360,7 @@ fn build_container_create_body_maps_gpu_to_all_cdi_device() {
     let mut config = runtime_config();
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().gpu = true;
+    sandbox.spec.as_mut().unwrap().gpu = Some(GpuRequestSpec { device_id: vec![] });
 
     let create_body = build_container_create_body(&sandbox, &config).unwrap();
     let request = create_body
@@ -378,6 +377,36 @@ fn build_container_create_body_maps_gpu_to_all_cdi_device() {
     );
 }
 
+#[test]
+fn build_container_create_body_passes_explicit_cdi_device_ids_through() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    sandbox.spec.as_mut().unwrap().gpu = Some(GpuRequestSpec {
+        device_id: vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=1".to_string(),
+        ],
+    });
+
+    let create_body = build_container_create_body(&sandbox, &config).unwrap();
+    let request = create_body
+        .host_config
+        .as_ref()
+        .and_then(|host_config| host_config.device_requests.as_ref())
+        .and_then(|requests| requests.first())
+        .expect("GPU request should add a Docker device request");
+
+    assert_eq!(request.driver.as_deref(), Some("cdi"));
+    assert_eq!(
+        request.device_ids.as_ref().unwrap(),
+        &vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=1".to_string()
+        ]
+    );
+}
+
 #[test]
 fn require_sandbox_identifier_rejects_when_id_and_name_are_empty() {
     // Regression test: `delete_sandbox` (and the other identifier-keyed