diff --git a/src/programbench/container.py b/src/programbench/container.py index 4c24e57..3703dee 100644 --- a/src/programbench/container.py +++ b/src/programbench/container.py @@ -6,15 +6,53 @@ import logging import subprocess +import tempfile +import time import uuid from pathlib import Path -from typing import Any +from typing import Any, Protocol from programbench.constants import DOCKER_CP_TIMEOUT, DOCKER_RUN_TIMEOUT log = logging.getLogger(__name__) +class Environment(Protocol): + """The minimal surface ``Evaluator`` needs from a per-task container. + + Implementations don't have to inherit from this — duck-typed protocols + are fine — but you do have to provide every method below. ``cwd`` and + ``default_timeout`` are read directly by ``Evaluator`` so they need to + be regular attributes. + """ + + cwd: str + default_timeout: int + + def execute(self, command: str, *, timeout: int | None = None) -> dict[str, Any]: ... + def copy_in(self, local_path: Path, container_path: str) -> None: ... + def copy_in_tar(self, tar_path: Path, container_path: str) -> None: ... + def copy_out(self, container_path: str, *, timeout: int = 60) -> tuple[str, str]: ... + def commit(self, image_ref: str) -> str: ... + def cleanup(self) -> None: ... + + +class ContainerBackend(Protocol): + """Factory that produces ``Environment`` instances for the Evaluator. + + The Evaluator asks the backend for one ``Environment`` per phase + (compile, then one per test branch). The default implementation + (:class:`DockerBackend`) returns :class:`ContainerEnvironment` — + i.e., real Docker containers. Custom backends can swap in + non-Docker isolation (firecracker VMs, gVisor sandboxes, no + isolation at all if you trust the input, etc.). + """ + + def new_env(self, image: str, *, cwd: str, timeout: int, cpus: int, + env: dict[str, str] | None, run_args: list[str] | None) -> Environment: ... + def remove_image(self, image_ref: str) -> None: ... + + class ContainerEnvironment: """Manage a long-running container for command execution and file injection.""" @@ -116,6 +154,32 @@ def copy_in(self, local_path: Path, container_path: str) -> None: if result.returncode != 0: raise RuntimeError(f"docker cp failed: {result.stderr.strip()}") + def copy_out(self, container_path: str, *, timeout: int = 60) -> tuple[str, str]: + """Read a file out of the container via ``docker cp``. + + Returns ``(contents, command_string)``. The command string is + included so callers (notably :meth:`Evaluator._copy_file_from_container`) + can record it in their step logs without having to know the + backend's wire format. Raises ``RuntimeError`` with the + underlying stderr (or ``"... timed out after Ns"``) on failure. + + Bypasses bash so login-shell stderr (``mesg: ttyname failed`` etc.) + can't pollute the bytes the way ``cat `` would. + """ + host_tmp = Path(tempfile.mkstemp(suffix=Path(container_path).suffix or ".out")[1]) + cmd_list = [self.executable, "cp", f"{self.container_id}:{container_path}", str(host_tmp)] + cmd_str = " ".join(cmd_list) + try: + try: + cp = subprocess.run(cmd_list, capture_output=True, text=True, timeout=timeout) + except subprocess.TimeoutExpired: + raise RuntimeError(f"docker cp timed out after {timeout}s") + if cp.returncode != 0: + raise RuntimeError((cp.stdout + cp.stderr).strip()) + return host_tmp.read_text(), cmd_str + finally: + host_tmp.unlink(missing_ok=True) + def copy_in_tar(self, tar_path: Path, container_path: str) -> None: """Stream an on-disk tar(.gz) into the container via the container's tar. @@ -206,3 +270,38 @@ def remove_image(image_ref: str, *, executable: str = "docker") -> None: ) except Exception: pass + + +class DockerBackend: + """The default ``ContainerBackend``: spawns real Docker containers. + + Drop-in for any callsite that previously instantiated + :class:`ContainerEnvironment` directly — same args, same behavior. + """ + + def __init__(self, *, executable: str = "docker", run_args: list[str] | None = None): + self.executable = executable + self.default_run_args = list(run_args or []) + + def new_env( + self, + image: str, + *, + cwd: str = "/", + timeout: int = 30, + cpus: int = 10, + env: dict[str, str] | None = None, + run_args: list[str] | None = None, + ) -> ContainerEnvironment: + return ContainerEnvironment( + image=image, + cwd=cwd, + executable=self.executable, + timeout=timeout, + cpus=cpus, + env=env, + run_args=list(self.default_run_args) + list(run_args or []), + ) + + def remove_image(self, image_ref: str) -> None: + remove_image(image_ref, executable=self.executable) diff --git a/src/programbench/eval/eval.py b/src/programbench/eval/eval.py index b423f3d..a69dafb 100644 --- a/src/programbench/eval/eval.py +++ b/src/programbench/eval/eval.py @@ -20,8 +20,6 @@ """ import logging -import subprocess -import tempfile import threading import time import uuid @@ -40,7 +38,7 @@ DOCKER_RUN_ARGS, WORKSPACE_DIR, ) -from programbench.container import ContainerEnvironment, remove_image +from programbench.container import ContainerBackend, ContainerEnvironment, DockerBackend from programbench.exceptions import EmptyTestResultError, EvalStepError, XmlParseError log = logging.getLogger(__name__) @@ -295,6 +293,7 @@ def __init__( docker_cpus: int = DOCKER_CPUS, branch_workers: int = 1, branch_retries: int = 1, + backend: ContainerBackend | None = None, ): self.image_name = image_name self.solution_branch = solution_branch @@ -310,6 +309,7 @@ def __init__( self.docker_cpus = docker_cpus self.branch_workers = max(1, branch_workers) self.branch_retries = max(0, branch_retries) + self.backend = backend or DockerBackend(executable=DOCKER_EXECUTABLE, run_args=list(DOCKER_RUN_ARGS)) self._has_rerunfailures = False self._log_lock = threading.Lock() self._from_existing = from_existing @@ -394,52 +394,38 @@ def _copy_file_from_container( step_name: str, timeout: int = 60, ) -> str: - """Copy a file out of the container via ``docker cp`` and return its contents. + """Copy a file out of the container and return its contents. - Bypasses bash so login-shell stderr (``mesg: ttyname failed`` etc.) can't - pollute the bytes the way ``cat `` would. Logs to ``log_buf`` with - the same shape as ``_run_step``; on success the entry's ``output`` holds - the file contents so ``from_existing`` replay keeps working. + Delegates to ``env.copy_out``; the backend decides how to fetch the + bytes (Docker's ``docker cp``, a non-Docker backend's filesystem + read, etc.). Logs to ``log_buf`` with the same shape as + ``_run_step``; on success the entry's ``output`` holds the file + contents so ``from_existing`` replay keeps working. """ - host_tmp = Path(tempfile.mkstemp(suffix=Path(container_path).suffix or ".out")[1]) - cmd_list = [env.executable, "cp", f"{env.container_id}:{container_path}", str(host_tmp)] - cmd_str = " ".join(cmd_list) - log.debug("Running step: %s", cmd_str) t0 = time.monotonic() try: - try: - cp = subprocess.run(cmd_list, capture_output=True, text=True, timeout=timeout) - rc = cp.returncode - err = (cp.stdout + cp.stderr).strip() - except subprocess.TimeoutExpired: - rc, err = -1, f"docker cp timed out after {timeout}s" + contents, cmd_str = env.copy_out(container_path, timeout=timeout) + except RuntimeError as exc: wall_time = time.monotonic() - t0 - if rc != 0: - log_buf.append( - { - "step": step_name, - "command": cmd_str, - "wall_time": wall_time, - "output": err, - "returncode": rc, - "exception_info": "", - } - ) - raise EvalStepError(f"{step_name}_failed", err) - contents = host_tmp.read_text() - log_buf.append( - { - "step": step_name, - "command": cmd_str, - "wall_time": wall_time, - "output": contents, - "returncode": 0, - "exception_info": "", - } - ) - return contents - finally: - host_tmp.unlink(missing_ok=True) + log_buf.append({ + "step": step_name, + "command": f"copy_out({container_path})", + "wall_time": wall_time, + "output": str(exc), + "returncode": -1, + "exception_info": "", + }) + raise EvalStepError(f"{step_name}_failed", str(exc)) + wall_time = time.monotonic() - t0 + log_buf.append({ + "step": step_name, + "command": cmd_str, + "wall_time": wall_time, + "output": contents, + "returncode": 0, + "exception_info": "", + }) + return contents def _new_env(self, image: str, *, serial_pytest: bool = False) -> ContainerEnvironment: # Baseline xdist hardening that always works (xdist ships with @@ -456,14 +442,13 @@ def _new_env(self, image: str, *, serial_pytest: bool = False) -> ContainerEnvir env = {"PYTEST_ADDOPTS": addopts} if serial_pytest: env["PYTEST_XDIST_AUTO_NUM_WORKERS"] = "1" - return ContainerEnvironment( - image=image, + return self.backend.new_env( + image, cwd=WORKSPACE_DIR, - executable=DOCKER_EXECUTABLE, timeout=600, cpus=self.docker_cpus, env=env, - run_args=[*DOCKER_RUN_ARGS, "--init"], + run_args=["--init"], ) def _remove_hashed_files(self, env: ContainerEnvironment, log_buf: list[dict]) -> None: @@ -839,7 +824,7 @@ def run(self) -> EvaluationResult: if compile_env is not None: compile_env.cleanup() if committed_image is not None: - remove_image(committed_image, executable=DOCKER_EXECUTABLE) + self.backend.remove_image(committed_image) def parse_test_results(results_xml: str, branch: str = "") -> EvaluationResult: diff --git a/tests/test_pluggable_backend.py b/tests/test_pluggable_backend.py new file mode 100644 index 0000000..0a1fdf8 --- /dev/null +++ b/tests/test_pluggable_backend.py @@ -0,0 +1,119 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +"""Tests for the pluggable ContainerBackend / Environment abstraction. + +The point of this PR is that downstream callers shouldn't have to monkey- +patch internals to run the eval against a non-Docker isolation primitive +(e.g. Daytona sandboxes, gVisor, firecracker, or just a bare host shell). +These tests exercise the substitution: build a fake in-memory backend, +pass it to the Evaluator, and verify the eval calls the fake's methods +instead of shelling Docker. +""" + +from pathlib import Path + +from programbench.container import ContainerBackend, DockerBackend +from programbench.eval.eval import Evaluator + + +class FakeEnv: + """Records every method call so the test can assert the surface.""" + + def __init__(self, *, image, cwd, timeout, cpus, env, run_args): + self.image = image + self.cwd = cwd + self.default_timeout = timeout + self.cpus = cpus + self.env = env + self.run_args = run_args + self.calls: list[tuple] = [] + self.canned_copy_out: dict[str, str] = {} + + def execute(self, command, *, timeout=None): + self.calls.append(("execute", command)) + return {"output": "", "returncode": 0, "exception_info": ""} + + def copy_in(self, local_path, container_path): + self.calls.append(("copy_in", str(local_path), container_path)) + + def copy_in_tar(self, tar_path, container_path): + self.calls.append(("copy_in_tar", str(tar_path), container_path)) + + def copy_out(self, container_path, *, timeout=60): + self.calls.append(("copy_out", container_path)) + contents = self.canned_copy_out.get(container_path, "") + return contents, f"fake-cp {container_path}" + + def commit(self, image_ref): + self.calls.append(("commit", image_ref)) + return image_ref + + def cleanup(self): + self.calls.append(("cleanup",)) + + +class FakeBackend: + """Hands out FakeEnv instances and records image removals.""" + + def __init__(self): + self.envs: list[FakeEnv] = [] + self.removed: list[str] = [] + + def new_env(self, image, *, cwd, timeout, cpus, env, run_args): + env_obj = FakeEnv(image=image, cwd=cwd, timeout=timeout, + cpus=cpus, env=env, run_args=run_args) + self.envs.append(env_obj) + return env_obj + + def remove_image(self, image_ref): + self.removed.append(image_ref) + + +def test_evaluator_uses_injected_backend(): + """Constructing an Evaluator with a custom backend routes all container + operations through it — _new_env calls backend.new_env, and + _copy_file_from_container calls env.copy_out.""" + backend = FakeBackend() + e = Evaluator( + tests_branches=["abc123"], + image_name="example/example.deadbeef", + backend=backend, + ) + + env = e._new_env("example/example.deadbeef:task") + assert isinstance(env, FakeEnv) + assert env.image == "example/example.deadbeef:task" + assert env.cwd # WORKSPACE_DIR + assert len(backend.envs) == 1 + + log_buf: list[dict] = [] + env.canned_copy_out["/workspace/eval/results.xml"] = "" + contents = e._copy_file_from_container( + env=env, log_buf=log_buf, + container_path="/workspace/eval/results.xml", + step_name="results_read", + ) + assert contents == "" + assert ("copy_out", "/workspace/eval/results.xml") in env.calls + # Step was logged with the env's reported command-string, not "docker cp". + assert log_buf[0]["step"] == "results_read" + assert log_buf[0]["command"].startswith("fake-cp ") + + +def test_evaluator_default_backend_is_docker(): + """No `backend=` kwarg → DockerBackend (preserves pre-PR behavior).""" + e = Evaluator(tests_branches=[], image_name="foo") + assert isinstance(e.backend, DockerBackend) + + +def test_docker_backend_satisfies_protocol(): + """DockerBackend implements the ContainerBackend protocol surface.""" + backend: ContainerBackend = DockerBackend() + # Protocol membership is structural — this assignment compiles only + # because DockerBackend has the required methods. Smoke-touch them. + assert callable(backend.new_env) + assert callable(backend.remove_image)