Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ jobs:
DOCKER_BUILDER: openshell
run: |
set -euo pipefail
mise exec -- tasks/scripts/docker-build-image.sh "${{ inputs.component }}" \
mise exec -- tasks/scripts/container-build-image.sh "${{ inputs.component }}" \
--cache-from "type=gha,scope=${{ inputs.component }}-${{ matrix.arch }}" \
--cache-to "type=gha,mode=max,scope=${{ inputs.component }}-${{ matrix.arch }}"

Expand Down
4 changes: 2 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ ocsf_emit!(event);

- Always use `uv` for Python commands (e.g., `uv pip install`, `uv run`, `uv venv`)

## Docker
## Containers

- Always prefer `mise` commands over direct docker builds (e.g., `mise run docker:build` instead of `docker build`)
- Always prefer `mise` commands over direct container builds (e.g., `mise run build:container` instead of `docker build` or `podman build`)

## Cluster Infrastructure Changes

Expand Down
2 changes: 1 addition & 1 deletion architecture/build-containers.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ The chart remains the supported deployment artifact for Kubernetes.

`deploy/docker/Dockerfile.images` no longer compiles Rust. CI calls `.github/workflows/shadow-rust-native-build.yml` through `workflow_call` to build `openshell-gateway` or `openshell-sandbox` natively on the target architecture. `.github/workflows/docker-build.yml` downloads the resulting artifact, stages it at `deploy/docker/.build/prebuilt-binaries/<arch>/`, builds the per-arch image with the local Buildx driver, and merges multi-arch pushes with `docker buildx imagetools create`. Callers normally publish the GitHub SHA tag, but can pass `image-tag` to publish isolated temporary tags for validation.

Local image builds use `tasks/scripts/stage-prebuilt-binaries.sh` through `tasks/scripts/docker-build-image.sh` before invoking Docker, so clean checkouts do not need to create the staging directory manually.
Local image builds use `tasks/scripts/stage-prebuilt-binaries.sh` through `tasks/scripts/container-build-image.sh` before invoking Docker, so clean checkouts do not need to create the staging directory manually.

## Supervisor Delivery

Expand Down
2 changes: 1 addition & 1 deletion crates/openshell-cli/src/doctor_llm_prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ If DNS is broken, all image pulls from the distribution registry will fail, as w
| `tls handshake eof` from `openshell status` | Server not running or mTLS credentials missing/mismatched | Check StatefulSet replicas (Step 3) and mTLS files (Step 6) |
| StatefulSet `0/0` replicas | StatefulSet scaled to zero (failed deploy, manual scale-down, or Helm misconfiguration) | `openshell doctor exec -- kubectl -n openshell scale statefulset openshell --replicas=1` |
| Local mTLS files missing | Deploy was interrupted before credentials were persisted | Extract from cluster secret `openshell-client-tls` (Step 6) |
| Container not found | Image not built | `mise run docker:build:cluster` (local) or re-deploy (remote) |
| Container not found | Image not built | `mise run build:container:cluster` (local) or re-deploy (remote) |
| Container exited, OOMKilled | Insufficient memory | Increase host memory or reduce workload |
| Container exited, non-zero exit | k3s crash, port conflict, privilege issue | Check `openshell doctor logs` for details |
| `/readyz` fails | k3s still starting or crashed | Wait longer or check container logs for k3s errors |
Expand Down
10 changes: 5 additions & 5 deletions crates/openshell-vm/scripts/build-rootfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ fi

case "$GUEST_ARCH" in
aarch64)
DOCKER_PLATFORM="linux/arm64"
CONTAINER_PLATFORM="linux/arm64"
K3S_BINARY_SUFFIX="-arm64"
K3S_CHECKSUM_VAR="K3S_ARM64_SHA256"
RUST_TARGET="aarch64-unknown-linux-gnu"
;;
x86_64)
DOCKER_PLATFORM="linux/amd64"
CONTAINER_PLATFORM="linux/amd64"
K3S_BINARY_SUFFIX="" # x86_64 binary has no suffix
K3S_CHECKSUM_VAR="K3S_AMD64_SHA256"
RUST_TARGET="x86_64-unknown-linux-gnu"
Expand Down Expand Up @@ -294,7 +294,7 @@ fi
ce rm -f "${CONTAINER_NAME}" 2>/dev/null || true

echo "==> Building base image..."
ce build --platform "${DOCKER_PLATFORM}" -t "${BASE_IMAGE_TAG}" \
ce build --platform "${CONTAINER_PLATFORM}" -t "${BASE_IMAGE_TAG}" \
--build-arg "BASE_IMAGE=${VM_BASE_IMAGE}" -f - . <<'DOCKERFILE'
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
Expand All @@ -318,7 +318,7 @@ DOCKERFILE

# Create a container and export the filesystem
echo "==> Creating container..."
ce create --platform "${DOCKER_PLATFORM}" --name "${CONTAINER_NAME}" "${BASE_IMAGE_TAG}" /bin/true
ce create --platform "${CONTAINER_PLATFORM}" --name "${CONTAINER_NAME}" "${BASE_IMAGE_TAG}" /bin/true

echo "==> Exporting filesystem..."
# Previous builds may leave overlayfs work/ dirs with permissions that
Expand Down Expand Up @@ -513,7 +513,7 @@ pull_and_save() {
# Try to pull; if the registry is unavailable, fall back to the
# local Docker image cache (image may exist from a previous pull).
echo " pulling: ${image}..."
if ! ce pull --platform "${DOCKER_PLATFORM}" "${image}" --quiet 2>/dev/null; then
if ! ce pull --platform "${CONTAINER_PLATFORM}" "${image}" --quiet 2>/dev/null; then
echo " pull failed, checking local image cache..."
if ! ce image inspect "${image}" >/dev/null 2>&1; then
echo "ERROR: image ${image} not available locally or from registry"
Expand Down
2 changes: 1 addition & 1 deletion e2e/with-docker-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ else
CONTAINER_ENGINE=docker \
DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \
DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output
bash "${ROOT}/tasks/scripts/container-build-image.sh" supervisor-output
fi

if [ ! -f "${SUPERVISOR_BIN}" ]; then
Expand Down
4 changes: 2 additions & 2 deletions scripts/build-benchmark/cluster-deploy-fast-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ run_fast_deploy() {
BUILDKIT_PROGRESS=plain \
CLUSTER_NAME="${CLUSTER_NAME}" \
DEPLOY_FAST_STATE_FILE="${state_file}" \
DOCKER_BUILD_CACHE_DIR="${CACHE_DIR}" \
CONTAINER_BUILD_CACHE_DIR="${CACHE_DIR}" \
"$@" \
mise run cluster
) >"${log_file}" 2>&1 || true
Expand All @@ -252,7 +252,7 @@ run_fast_deploy_args() {
BUILDKIT_PROGRESS=plain \
CLUSTER_NAME="${CLUSTER_NAME}" \
DEPLOY_FAST_STATE_FILE="${state_file}" \
DOCKER_BUILD_CACHE_DIR="${CACHE_DIR}" \
CONTAINER_BUILD_CACHE_DIR="${CACHE_DIR}" \
mise run cluster -- "$@"
) >"${log_file}" 2>&1 || true
end=$(date +%s)
Expand Down
4 changes: 2 additions & 2 deletions scripts/docker-cleanup.sh → scripts/container-cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Clean up stale Docker images, volumes, and build cache that are not in use
# Clean up stale container images, volumes, and build cache that are not in use
# by the currently deployed OpenShell cluster.
#
# Preserves:
Expand All @@ -13,7 +13,7 @@
# - Volumes attached to running containers
#
# Usage:
# ./scripts/docker-cleanup.sh [options]
# ./scripts/container-cleanup.sh [options]
#
# Options:
# --dry-run Show what would be removed without deleting anything
Expand Down
4 changes: 2 additions & 2 deletions scripts/remote-deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ rm -f .env
echo "==> Building container images (tag=${IMAGE_TAG})..."
export OPENSHELL_CARGO_VERSION="${CARGO_VERSION}"
export IMAGE_TAG
mise exec -- tasks/scripts/docker-build-image.sh cluster
mise exec -- tasks/scripts/docker-build-image.sh gateway
mise exec -- tasks/scripts/container-build-image.sh cluster
mise exec -- tasks/scripts/container-build-image.sh gateway

export OPENSHELL_CLUSTER_IMAGE="openshell/cluster:${IMAGE_TAG}"
export OPENSHELL_PUSH_IMAGES="openshell/gateway:${IMAGE_TAG}"
Expand Down
2 changes: 1 addition & 1 deletion tasks/ci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

[build]
description = "Build the whole project"
depends = ["build:rust:workspace", "build:docker", "build:python:wheel"]
depends = ["build:rust:workspace", "build:container", "build:python:wheel"]

["build:rust"]
description = "Alias for build:rust:workspace"
Expand Down
2 changes: 1 addition & 1 deletion tasks/cluster.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ run = "tasks/scripts/cluster.sh"
["cluster:build:full"]
description = "Build and deploy local k3s cluster with OpenShell"
depends = [
"build:docker:gateway",
"build:container:gateway",
]
run = "tasks/scripts/cluster-bootstrap.sh build"
hide = true
Expand Down
46 changes: 46 additions & 0 deletions tasks/container.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Container image build tasks (engine-neutral: Docker or Podman)

["build:container"]
description = "Build all container images"
depends = [
"build:container:gateway",
"build:container:cluster",
"build:container:supervisor",
]
hide = true

["build:container:ci"]
description = "Build the CI container image"
run = "tasks/scripts/container-build-ci.sh"
hide = true

["build:container:gateway"]
description = "Build the gateway container image"
run = "tasks/scripts/container-build-image.sh gateway"
hide = true

["build:container:supervisor"]
description = "Build the standalone supervisor container image (Ubuntu-based, for K8s pods)"
run = "tasks/scripts/container-build-image.sh supervisor"
hide = true

["build:container:cluster"]
description = "Build the k3s cluster image (component images pulled at runtime from registry)"
run = "tasks/scripts/container-build-image.sh cluster"
hide = true

["build:container:cluster:multiarch"]
description = "Build multi-arch cluster image and push to a registry"
run = "tasks/scripts/container-publish-multiarch.sh"
hide = true

["container:cleanup"]
description = "Remove stale images, volumes, and build cache not used by the current cluster"
run = "scripts/container-cleanup.sh --force"

["container:cleanup:dry-run"]
description = "Preview what container:cleanup would remove without deleting anything"
run = "scripts/container-cleanup.sh --dry-run"
61 changes: 29 additions & 32 deletions tasks/docker.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Docker image build tasks
# Backwards-compatible aliases: build:docker:* → build:container:*
# These exist so CI workflows and muscle memory keep working.

["build:docker"]
description = "Build all Docker images"
depends = [
"build:docker:gateway",
"build:docker:cluster",
"build:docker:supervisor",
]
description = "Alias → build:container (all container images)"
depends = ["build:container"]
hide = true

["build:docker:ci"]
description = "Build the CI Docker image"
run = "tasks/scripts/docker-build-ci.sh"
description = "Alias → build:container:ci"
depends = ["build:container:ci"]
hide = true

["build:docker:prebuilt"]
Expand All @@ -23,49 +20,49 @@ run = "tasks/scripts/stage-prebuilt-binaries.sh all"
hide = true

["build:docker:gateway"]
description = "Build the gateway Docker image"
run = "tasks/scripts/docker-build-image.sh gateway"
description = "Alias → build:container:gateway"
depends = ["build:container:gateway"]
hide = true

["build:docker:supervisor"]
description = "Build the supervisor image (FROM scratch, binary only)"
run = "tasks/scripts/docker-build-image.sh supervisor"
description = "Alias → build:container:supervisor"
depends = ["build:container:supervisor"]
hide = true

["build:docker:cluster"]
description = "Build the k3s cluster image (component images pulled at runtime from registry)"
run = "tasks/scripts/docker-build-image.sh cluster"
description = "Alias → build:container:cluster"
depends = ["build:container:cluster"]
hide = true

["build:docker:cluster:multiarch"]
description = "Alias → build:container:cluster:multiarch"
depends = ["build:container:cluster:multiarch"]
hide = true

["docker:build:gateway"]
description = "Alias for build:docker:gateway"
depends = ["build:docker:gateway"]
description = "Alias build:container:gateway"
depends = ["build:container:gateway"]
hide = true

["docker:build:supervisor"]
description = "Alias for build:docker:supervisor"
depends = ["build:docker:supervisor"]
description = "Alias build:container:supervisor"
depends = ["build:container:supervisor"]
hide = true

["docker:build:cluster"]
description = "Alias for build:docker:cluster"
depends = ["build:docker:cluster"]
hide = true

["build:docker:cluster:multiarch"]
description = "Build multi-arch cluster image and push to a registry"
run = "tasks/scripts/docker-publish-multiarch.sh"
description = "Alias → build:container:cluster"
depends = ["build:container:cluster"]
hide = true

["docker:build:cluster:multiarch"]
description = "Alias for build:docker:cluster:multiarch"
depends = ["build:docker:cluster:multiarch"]
description = "Alias build:container:cluster:multiarch"
depends = ["build:container:cluster:multiarch"]
hide = true

["docker:cleanup"]
description = "Remove stale images, volumes, and build cache not used by the current cluster"
run = "scripts/docker-cleanup.sh --force"
description = "Alias → container:cleanup"
depends = ["container:cleanup"]

["docker:cleanup:dry-run"]
description = "Preview what docker:cleanup would remove without deleting anything"
run = "scripts/docker-cleanup.sh --dry-run"
description = "Alias → container:cleanup:dry-run"
depends = ["container:cleanup:dry-run"]
4 changes: 3 additions & 1 deletion tasks/python.toml
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,10 @@ CARGO_TARGET_CACHE_SCOPE=$(printf '%s' "$CACHE_SCOPE_INPUT" | sha256_16_stdin)
mkdir -p target/wheels
CONTAINERFILE=$(ce_resolve_containerfile deploy/docker python-wheels-macos)
ce build \
-f deploy/docker/Dockerfile.python-wheels-macos \
-f "${CONTAINERFILE}" \
--target wheels \
--build-arg "OSXCROSS_IMAGE=${OSXCROSS_IMAGE_REF}" \
--build-arg "CARGO_TARGET_CACHE_SCOPE=${CARGO_TARGET_CACHE_SCOPE}" \
Expand Down
4 changes: 2 additions & 2 deletions tasks/scripts/cluster-bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,12 @@ fi
# and entrypoint from the working tree. This ensures the k3s container
# always starts with the correct chart version.
if [ "${SKIP_CLUSTER_IMAGE_BUILD:-}" != "1" ]; then
tasks/scripts/docker-build-image.sh cluster
tasks/scripts/container-build-image.sh cluster
fi

# In fast/build modes, use the locally-built cluster image rather than the
# remote distribution registry image. The local image is built by
# `docker-build-image.sh cluster` and contains the bundled Helm chart and
# `container-build-image.sh cluster` and contains the bundled Helm chart and
# manifests from the current working tree.
if [ -z "${OPENSHELL_CLUSTER_IMAGE:-}" ]; then
export OPENSHELL_CLUSTER_IMAGE="openshell/cluster:${IMAGE_TAG}"
Expand Down
Loading
Loading