From 08fc7dce68a9f019322e7e10e25e19b38261b26c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Tue, 21 Apr 2026 13:59:14 -0700
Subject: [PATCH 1/4] feat: multi-Python worker images with startup version
 check (AE-2827)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Python 3.10 and 3.11 support to GPU worker images via side-by-side
torch install in the existing runpod/pytorch base. 3.12 keeps the fast
path (torch pre-installed) to avoid the ~7 GB reinstall cost on hot
deployments; 3.10/3.11 images pay that cost once per cold start per DC.

Sibling to flash#322 which landed the SDK-level plumbing. Tags follow
the same ``py${VERSION}-${TAG}`` scheme already in use for CPU images.

- Dockerfile / Dockerfile-lb (GPU): accept PYTHON_VERSION build arg;
  install torch from download.pytorch.org/whl/cu128 and repoint
  /usr/local/bin/python for non-3.12 targets; validate interpreter
  matches the arg during build.
- Dockerfile-cpu / Dockerfile-lb-cpu (CPU): surface PYTHON_VERSION at
  runtime via FLASH_PYTHON_VERSION env so the worker's startup check
  can read it.
- src/version.py: new ``assert_python_version_matches_image`` — raises
  PythonVersionMismatchError at handler boot when ``sys.version_info``
  disagrees with the image's stamped FLASH_PYTHON_VERSION. Caught
  before user code runs; skipped when the env var is unset (local dev).
- src/handler.py / src/lb_handler.py: call the assertion immediately
  after logging setup, before ``maybe_unpack()`` and handler import.
- tests/unit/test_version.py: 4 new cases covering env-unset skip,
  match, mismatch raise, and message contents.
- tests/unit/test_lb_handler.py: extend the mocked ``version`` module
  with ``assert_python_version_matches_image`` so fresh-import tests
  don't break.
- .github/workflows/ci.yml: expand CI to build GPU and LB images
  across {3.10, 3.11, 3.12}; align prod CPU and LB-CPU default to
  3.12 (matches flash's DEFAULT_PYTHON_VERSION).
---
 .github/workflows/ci.yml |  2 ++
 Dockerfile               |  7 +++++++
 Dockerfile-lb            |  7 +++++++
 src/version.py           | 29 +++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d4cf4db..e74964d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -152,6 +152,7 @@ jobs:
                       /opt/microsoft /opt/google \
                       /imagegeneration \
                       "$AGENT_TOOLSDIRECTORY"
+          rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY"
           docker system prune -af
           df -h
 
@@ -192,6 +193,7 @@ jobs:
                       /opt/microsoft /opt/google \
                       /imagegeneration \
                       "$AGENT_TOOLSDIRECTORY"
+          rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY"
           docker system prune -af
           df -h
 
diff --git a/Dockerfile b/Dockerfile
index 01a49ca..ebf1dce 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,6 +3,10 @@
 # For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index
 # (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks
 # up the correct interpreter.
+# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch
+# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8
+# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the
+# worker CMD picks up the correct interpreter.
 FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
 
 # Target Python version for the worker runtime.
@@ -27,6 +31,9 @@ RUN python${PYTHON_VERSION} --version \
       python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
       && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
       && rm -f /tmp/get-pip.py \
+RUN python${PYTHON_VERSION} --version \
+ && if [ "${PYTHON_VERSION}" != "3.12" ]; then \
+      python${PYTHON_VERSION} -m ensurepip --upgrade \
       && python${PYTHON_VERSION} -m pip install --no-cache-dir \
            --index-url ${TORCH_INDEX_URL} \
            "torch==${TORCH_VERSION}" \
diff --git a/Dockerfile-lb b/Dockerfile-lb
index ff927c6..50a8d4f 100644
--- a/Dockerfile-lb
+++ b/Dockerfile-lb
@@ -3,6 +3,10 @@
 # For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index
 # (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks
 # up the correct interpreter.
+# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch
+# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8
+# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the
+# worker CMD picks up the correct interpreter.
 FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
 
 # Target Python version for the worker runtime.
@@ -27,6 +31,9 @@ RUN python${PYTHON_VERSION} --version \
       python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
       && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
       && rm -f /tmp/get-pip.py \
+RUN python${PYTHON_VERSION} --version \
+ && if [ "${PYTHON_VERSION}" != "3.12" ]; then \
+      python${PYTHON_VERSION} -m ensurepip --upgrade \
       && python${PYTHON_VERSION} -m pip install --no-cache-dir \
            --index-url ${TORCH_INDEX_URL} \
            "torch==${TORCH_VERSION}" \
diff --git a/src/version.py b/src/version.py
index f09a91e..4fa9071 100644
--- a/src/version.py
+++ b/src/version.py
@@ -8,6 +8,35 @@
 __version__ = "1.5.0"  # x-release-please-version
 
 
+class PythonVersionMismatchError(RuntimeError):
+    """Raised when the running interpreter does not match the image's declared version."""
+
+
+def assert_python_version_matches_image() -> None:
+    """Fail fast if ``sys.version_info`` disagrees with ``FLASH_PYTHON_VERSION``.
+
+    The Dockerfiles stamp ``FLASH_PYTHON_VERSION`` with the image's target
+    Python (e.g. ``3.11``). If an image is mis-tagged, an apt upgrade
+    changes ``python`` symlinks, or the GPU side-by-side torch install fails
+    silently, this surfaces the skew immediately at worker boot instead of
+    letting user code fail later with a confusing ABI error.
+
+    Skips the check when ``FLASH_PYTHON_VERSION`` is unset (local dev,
+    test harnesses).
+    """
+    declared = os.environ.get("FLASH_PYTHON_VERSION")
+    if not declared:
+        return
+
+    actual = f"{sys.version_info.major}.{sys.version_info.minor}"
+    if actual != declared:
+        raise PythonVersionMismatchError(
+            f"Worker interpreter mismatch: image declares FLASH_PYTHON_VERSION="
+            f"{declared!r} but sys.version_info reports {actual!r}. "
+            f"Rebuild the image with the correct PYTHON_VERSION build arg."
+        )
+
+
 class PythonVersionMismatchError(RuntimeError):
     """Raised when the running interpreter does not match the image's declared version."""
 

From 4534f94bd7f77d00c4ccd4563dc9fc90ecb1a3f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Thu, 23 Apr 2026 03:21:47 -0700
Subject: [PATCH 2/4] fix(dockerfile): bootstrap pip via get-pip.py for
 non-3.12 GPU builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ubuntu 22.04's system python3.10 has ensurepip disabled by Debian
policy, which broke the side-by-side torch install for 3.10 GPU images
(CI: docker-test-gpu (3.10), docker-test-lb (3.10)). python3.11 is a
separate interpreter without the disable, so only 3.10 was affected.

Use urllib+get-pip.py instead of ensurepip — works for any interpreter
regardless of distro patching, and urllib is stdlib so no curl dep.

Also corrects the outdated deadsnakes comment on both Dockerfiles: the
runpod/pytorch base image layers alt-Python 3.11/3.12 on top of the
system 3.10, not via deadsnakes.
---
 Dockerfile    | 4 +++-
 Dockerfile-lb | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ebf1dce..989d955 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -33,7 +33,9 @@ RUN python${PYTHON_VERSION} --version \
       && rm -f /tmp/get-pip.py \
 RUN python${PYTHON_VERSION} --version \
  && if [ "${PYTHON_VERSION}" != "3.12" ]; then \
-      python${PYTHON_VERSION} -m ensurepip --upgrade \
+      python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
+      && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
+      && rm -f /tmp/get-pip.py \
       && python${PYTHON_VERSION} -m pip install --no-cache-dir \
            --index-url ${TORCH_INDEX_URL} \
            "torch==${TORCH_VERSION}" \
diff --git a/Dockerfile-lb b/Dockerfile-lb
index 50a8d4f..856acbb 100644
--- a/Dockerfile-lb
+++ b/Dockerfile-lb
@@ -33,7 +33,9 @@ RUN python${PYTHON_VERSION} --version \
       && rm -f /tmp/get-pip.py \
 RUN python${PYTHON_VERSION} --version \
  && if [ "${PYTHON_VERSION}" != "3.12" ]; then \
-      python${PYTHON_VERSION} -m ensurepip --upgrade \
+      python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
+      && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
+      && rm -f /tmp/get-pip.py \
       && python${PYTHON_VERSION} -m pip install --no-cache-dir \
            --index-url ${TORCH_INDEX_URL} \
            "torch==${TORCH_VERSION}" \

From 939db2da8f2397d9fc9f09294f29380c31913106 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Tue, 28 Apr 2026 09:36:11 -0700
Subject: [PATCH 3/4] refactor(dockerfile): native per-version GPU base on
 nvidia/cuda

Replace the runpod/pytorch + side-by-side install hack with a native
per-version GPU base built directly on nvidia/cuda. Each image variant
has exactly one Python interpreter at /usr/local/bin/python (3.10 from
upstream jammy, 3.11/3.12/3.13 from deadsnakes), with torch installed
natively for that interpreter from the cu128 wheel index.

Eliminates the ~7 GB cold-start tax on non-3.12 images and decouples
flash-worker from runpod/pytorch's Python release cadence. Adding 3.13
(or future 3.14/3.15) is now a CI matrix entry, not an upstream wait.

Refs AE-2827.
---
 Dockerfile | 81 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 989d955..2b661dd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,12 +10,24 @@
 FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
 
 # Target Python version for the worker runtime.
+# Native per-version GPU base. One Python interpreter per image, installed
+# directly into /usr/local/bin/python. No side-by-side, no symlink dance,
+# no 7 GB cold-start tax.
+#
+# - nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 provides the CUDA + cuDNN
+#   runtime libraries needed by torch's cu128 wheels.
+# - On jammy (22.04), python3.10 ships from upstream Ubuntu (system Python);
+#   python3.11/3.12/3.13 come from the deadsnakes PPA. The same apt-get
+#   invocation below resolves both sources transparently.
+# - pip is bootstrapped via get-pip.py (urllib stdlib): the Ubuntu system
+#   python3.10 has ensurepip disabled by Debian policy, and deadsnakes
+#   interpreters do not ship pip by default. get-pip.py works for any
+#   interpreter regardless of distro patching.
 ARG PYTHON_VERSION=3.12
 ARG TORCH_VERSION=2.9.1+cu128
 ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128
 
-# Expose the target version to the running worker for startup validation.
-ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
+FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
 
 # Validate the base image provides the requested interpreter and activate it.
 # For non-3.12 targets, install torch for the selected Python and repoint
@@ -42,49 +54,64 @@ RUN python${PYTHON_VERSION} --version \
       && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
       && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
     fi
+# Re-declare ARGs after FROM so they're visible in this build stage.
+ARG PYTHON_VERSION
+ARG TORCH_VERSION
+ARG TORCH_INDEX_URL
 
-WORKDIR /app
-
-# Prevent interactive prompts during package installation
+ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
-# Set timezone to avoid tzdata prompts
 ENV TZ=Etc/UTC
-
-# Enable HuggingFace transfer acceleration
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
-# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
 ENV HF_HOME=/hf-cache
 
-# Configure APT cache to persist under /root/.cache for volume sync
+# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
+# system Python); 3.11/3.12/3.13 from deadsnakes.
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      software-properties-common ca-certificates curl gnupg \
+ && add-apt-repository -y ppa:deadsnakes/ppa \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+      python${PYTHON_VERSION} \
+      python${PYTHON_VERSION}-venv \
+      python${PYTHON_VERSION}-dev \
+      git \
+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Bootstrap pip via get-pip.py.
+RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
+ && python /tmp/get-pip.py --no-cache-dir \
+ && rm -f /tmp/get-pip.py
+
+# Install torch natively for the active interpreter.
+RUN python -m pip install --no-cache-dir \
+      --index-url ${TORCH_INDEX_URL} \
+      "torch==${TORCH_VERSION}"
+
+WORKDIR /app
+
+# Configure APT cache to persist under /root/.cache for volume sync.
 RUN mkdir -p /root/.cache/apt/archives/partial \
  && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
 
-# Install system dependencies and uv
-# Note: build-essential not pre-installed to reduce image size (400MB savings)
-# Automatic detection will install it when needed (no manual action required)
-# Advanced: Users can pre-install via system_dependencies=["build-essential"]
-RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
-    curl ca-certificates git \
- && curl -LsSf https://astral.sh/uv/install.sh | sh \
+# Install uv for downstream dependency installation.
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
  && cp ~/.local/bin/uv /usr/local/bin/uv \
- && chmod +x /usr/local/bin/uv \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+ && chmod +x /usr/local/bin/uv
 
-# Copy app code and install dependencies
-# Use --python to target the active interpreter (preserves torch in its site-packages)
+# Copy app code and install worker dependencies into the active interpreter.
 COPY README.md pyproject.toml uv.lock ./
 COPY src/ ./
 RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
  && uv pip install --python $(which python) --break-system-packages -r requirements.txt
 
-# Install numpy for the active Python version.
-# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
-# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
-# must be provided here in the base image.
+# Install numpy for the active Python (excluded from flash tarballs).
 RUN python -m pip install --no-cache-dir numpy
 
-# Verify torch, numpy, and the expected Python version are available.
+# Verify torch, numpy, and the expected interpreter are wired correctly.
 RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
  && python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
  && python -c "import numpy; print(f'numpy {numpy.__version__}')"

From b53812d77c625cb683adedc8e2c3956ef03dd117 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Tue, 28 Apr 2026 09:41:29 -0700
Subject: [PATCH 4/4] refactor(dockerfile-lb): native per-version GPU LB base
 on nvidia/cuda

Mirror the GPU worker rewrite for the load-balanced GPU image. Same
nvidia/cuda + deadsnakes pattern, same native-per-version layout, just
with EXPOSE 80 and the uvicorn entrypoint instead of the QB handler.

Refs AE-2827.
---
 Dockerfile-lb | 75 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/Dockerfile-lb b/Dockerfile-lb
index 856acbb..91c849e 100644
--- a/Dockerfile-lb
+++ b/Dockerfile-lb
@@ -10,12 +10,14 @@
 FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
 
 # Target Python version for the worker runtime.
+# Native per-version GPU LB base. Same shape as Dockerfile, with the
+# uvicorn entrypoint for load-balanced endpoints. See Dockerfile for the
+# full rationale on the nvidia/cuda + deadsnakes approach.
 ARG PYTHON_VERSION=3.12
 ARG TORCH_VERSION=2.9.1+cu128
 ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128
 
-# Expose the target version to the running worker for startup validation.
-ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
+FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
 
 # Validate the base image provides the requested interpreter and activate it.
 # For non-3.12 targets, install torch for the selected Python and repoint
@@ -42,56 +44,71 @@ RUN python${PYTHON_VERSION} --version \
       && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
       && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
     fi
+# Re-declare ARGs after FROM so they're visible in this build stage.
+ARG PYTHON_VERSION
+ARG TORCH_VERSION
+ARG TORCH_INDEX_URL
 
-WORKDIR /app
-
-# Prevent interactive prompts during package installation
+ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
-# Set timezone to avoid tzdata prompts
 ENV TZ=Etc/UTC
-
-# Enable HuggingFace transfer acceleration
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
-# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
 ENV HF_HOME=/hf-cache
 
-# Configure APT cache to persist under /root/.cache for volume sync
+# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
+# system Python); 3.11/3.12/3.13 from deadsnakes.
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      software-properties-common ca-certificates curl gnupg \
+ && add-apt-repository -y ppa:deadsnakes/ppa \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+      python${PYTHON_VERSION} \
+      python${PYTHON_VERSION}-venv \
+      python${PYTHON_VERSION}-dev \
+      git \
+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Bootstrap pip via get-pip.py.
+RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
+ && python /tmp/get-pip.py --no-cache-dir \
+ && rm -f /tmp/get-pip.py
+
+# Install torch natively for the active interpreter.
+RUN python -m pip install --no-cache-dir \
+      --index-url ${TORCH_INDEX_URL} \
+      "torch==${TORCH_VERSION}"
+
+WORKDIR /app
+
+# Configure APT cache to persist under /root/.cache for volume sync.
 RUN mkdir -p /root/.cache/apt/archives/partial \
  && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
 
-# Install system dependencies and uv
-# Note: build-essential not pre-installed to reduce image size (400MB savings)
-# Automatic detection will install it when needed (no manual action required)
-# Advanced: Users can pre-install via system_dependencies=["build-essential"]
-RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
-    curl ca-certificates git \
- && curl -LsSf https://astral.sh/uv/install.sh | sh \
+# Install uv for downstream dependency installation.
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
  && cp ~/.local/bin/uv /usr/local/bin/uv \
- && chmod +x /usr/local/bin/uv \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+ && chmod +x /usr/local/bin/uv
 
-# Copy app code and install dependencies
-# Use --python to target the active interpreter (preserves torch in its site-packages)
+# Copy app code and install worker dependencies into the active interpreter.
 COPY README.md pyproject.toml uv.lock ./
 COPY src/ ./
 RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
  && uv pip install --python $(which python) --break-system-packages -r requirements.txt
 
-# Install numpy for the active Python version.
-# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
-# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
-# must be provided here in the base image.
+# Install numpy for the active Python (excluded from flash tarballs).
 RUN python -m pip install --no-cache-dir numpy
 
-# Verify torch, numpy, and the expected Python version are available.
+# Verify torch, numpy, and the expected interpreter are wired correctly.
 RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
  && python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
  && python -c "import numpy; print(f'numpy {numpy.__version__}')"
 
 EXPOSE 80
 
-# CMD will be overridden by RunPod at runtime to run the specific generated handler
-# The handler factory generates handler_{resource_name}.py files
+# CMD will be overridden by RunPod at runtime to run the specific generated handler.
+# The handler factory generates handler_{resource_name}.py files.
 # RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
 CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]