diff --git a/ci/scripts/package/README.md b/ci/scripts/package/README.md
index f123fd1383b8..972c3153098d 100644
--- a/ci/scripts/package/README.md
+++ b/ci/scripts/package/README.md
@@ -23,7 +23,9 @@ and `[tool.scikit-build]`). This directory holds the few helper scripts that flo
 invokes:
 
 - `manylinux_build_libtvm_runtime_cuda.sh` — run by the `build_cuda_runtime` CI
-  stage; builds the `libtvm_runtime_cuda.so` sidecar inside the manylinux container.
+  stage; builds the `libtvm_runtime_cuda.so` sidecar inside the prebuilt
+  `quay.io/manylinux_cuda` image (CUDA toolkit preinstalled).
 - `windows_build_libtvm_runtime_cuda.bat` — the Windows equivalent (run with
-  `shell: cmd`), building `tvm_runtime_cuda.dll`.
+  `shell: cmd`); installs the CUDA toolkit via conda and builds
+  `tvm_runtime_cuda.dll`.
 - `build-environment.yaml` — conda environment for building the wheel.
diff --git a/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh b/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
index 66e3d787b426..ea9e563046f3 100755
--- a/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
+++ b/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
@@ -27,14 +27,14 @@ set -euxo pipefail
 
 repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
 build_dir="${repo_root}/build-wheel-cuda"
-python_bin="/opt/python/cp310-cp310/bin/python"
 parallel="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)"
 
 # Build the CUDA runtime sidecar with CUDA on and LLVM off, so it does not need
-# the LLVM prefix; the main CPU wheel links LLVM statically. pip-install
-# cmake/ninja rather than relying on whatever the image ships.
+# the LLVM prefix; the main CPU wheel links LLVM statically. The manylinux CUDA
+# image already ships cmake and make, and the build uses the default Makefiles
+# generator (no Ninja), so no build tools are installed here. Put the bundled
+# CPython and CUDA toolchain on PATH for the CMake configure and nvcc.
 export PATH="/opt/python/cp310-cp310/bin:/usr/local/cuda/bin:${PATH}"
-"${python_bin}" -m pip install -U pip cmake ninja
 nvcc --version
 
 rm -rf "${build_dir}"
diff --git a/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat b/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
index 20523394ee1b..4864c18a80d2 100644
--- a/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
+++ b/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
@@ -36,10 +36,11 @@ set "conda_exe=conda"
 where conda >nul 2>nul || set "conda_exe=%CONDA%\Scripts\conda.exe"
 
 rem Install the pinned CUDA toolkit via conda from the nvidia channel, mirroring the
-rem LLVM-via-conda install used elsewhere. The win-64 channel caps at 13.0.x, matching
-rem the Linux hook's CUDA 13.0.2. The nvidia CDN occasionally returns a transient
-rem HTTP 5xx, so retry once; a half-finished first attempt can leave the prefix
-rem partially populated, so wipe it before retrying.
+rem LLVM-via-conda install used elsewhere. The win-64 channel caps at 13.0.x, so this
+rem pins 13.0.2 -- slightly behind the Linux image's CUDA 13.1, which is harmless: the
+rem sidecar has no device code and links the CUDA runtime by soname only. The nvidia
+rem CDN occasionally returns a transient HTTP 5xx, so retry once; a half-finished first
+rem attempt can leave the prefix partially populated, so wipe it before retrying.
 if not exist "%cuda_prefix%\Library\bin\nvcc.exe" (
   call "%conda_exe%" create -q -p "%cuda_prefix%" -c nvidia/label/cuda-13.0.2 cuda-toolkit -y
   if errorlevel 1 (