From 9ae3ce85b85aa66ce72b671c2a745b469fd0825d Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Fri, 12 Jun 2026 17:42:53 -0400
Subject: [PATCH] test: add import-integrity and packaging guards; fix 8 latent
 bugs they found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to OpenAdaptAI/OpenAdapt#999: green CI shipped a fully broken
CLI because no test exercised lazy imports, internal call seams, or the
built wheel. This adds three guards and fixes everything they caught on
current main:

New tests (dependency-free, <3s total):
- tests/test_import_integrity.py::test_no_phantom_imports — AST-walks
  every `from openadapt_ml.x import y` (including inside function
  bodies) and verifies y exists in x
- tests/test_import_integrity.py::test_no_phantom_kwargs — verifies
  keyword args passed to internal functions exist in their signatures
- tests/test_packaging.py — builds the wheel and asserts bundled
  configs, core modules, and version match

Latent bugs the new tests caught on main, fixed here:
- cloud/local.py: cmd_serve called regenerate_local_dashboard with a
  keep_polling kwarg that no longer exists (dashboard regeneration
  failed on every serve, downgraded to a warning)
- scripts/compare.py + experiments/demo_prompt/run_experiment.py:
  capture_to_episode(goal=) — same kwarg bug as #999 bug 3, two more
  call sites
- ingest/__init__.py: re-exported capture_to_session and
  load_captures_as_sessions, which don't exist; the except ImportError
  guard meant capture_to_episode was silently never exported either
- evals/grounding.py: TYPE_CHECKING import from missing module
  openadapt_ml.data.types (Episode lives in openadapt_ml.schema)
- cloud/azure_inference.py: imported QwenVLAdapter from missing module
  openadapt_ml.adapters.qwen (lives in models.qwen_vl) and constructed
  it with a non-existent model_name kwarg (needs from_pretrained);
  imported generate_comparison which was refactored away — restored as
  a thin wrapper over generate_comparison_data/_html in compare.py

Also:
- release.yml: file/append a GitHub issue when the release workflow
  fails. Releases failed silently Mar-Jun 2026 while PyPI went stale,
  which is what forced #999's reporter onto git installs
- pyproject: add build to the dev extra for the packaging tests

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .github/workflows/release.yml                 |  18 ++
 openadapt_ml/cloud/azure_inference.py         |   4 +-
 openadapt_ml/cloud/local.py                   |   4 +-
 openadapt_ml/evals/grounding.py               |   2 +-
 .../experiments/demo_prompt/run_experiment.py |   2 +-
 openadapt_ml/ingest/__init__.py               |  12 +-
 openadapt_ml/scripts/compare.py               |  25 +-
 pyproject.toml                                |   1 +
 tests/test_import_integrity.py                | 274 ++++++++++++++++++
 tests/test_packaging.py                       |  76 +++++
 10 files changed, 404 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_import_integrity.py
 create mode 100644 tests/test_packaging.py

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 16e8a87..04bd62a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -57,3 +57,21 @@ jobs:
         uses: python-semantic-release/publish-action@v9.15.2
         with:
           github_token: ${{ secrets.ADMIN_TOKEN }}
+
+      # Releases failed silently for 3 months (Mar-Jun 2026) while PyPI
+      # went stale; see OpenAdaptAI/OpenAdapt#999. Fail loudly instead.
+      - name: File issue on release failure
+        if: failure()
+        env:
+          GH_TOKEN: ${{ secrets.ADMIN_TOKEN }}
+        run: |
+          TITLE="Release workflow failed on main"
+          BODY="The release workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+          Until this is fixed, merged fix/feat commits are NOT being published to PyPI, and users install stale versions."
+          EXISTING=$(gh issue list --repo "${{ github.repository }}" --state open --search "in:title \"$TITLE\"" --json number --jq '.[0].number // empty')
+          if [ -n "$EXISTING" ]; then
+            gh issue comment "$EXISTING" --repo "${{ github.repository }}" --body "$BODY"
+          else
+            gh issue create --repo "${{ github.repository }}" --title "$TITLE" --body "$BODY"
+          fi
diff --git a/openadapt_ml/cloud/azure_inference.py b/openadapt_ml/cloud/azure_inference.py
index f5a49d3..f572746 100644
--- a/openadapt_ml/cloud/azure_inference.py
+++ b/openadapt_ml/cloud/azure_inference.py
@@ -424,10 +424,10 @@ def main():
 
     elif args.command == "inference-worker":
         # Start inference worker
-        from openadapt_ml.adapters.qwen import QwenVLAdapter
+        from openadapt_ml.models.qwen_vl import QwenVLAdapter
 
         print(f"Starting inference worker with model: {args.model}")
-        adapter = QwenVLAdapter(model_name=args.model)
+        adapter = QwenVLAdapter.from_pretrained(args.model)
         queue.poll_and_process(adapter)
 
     elif args.command == "inference-watch":
diff --git a/openadapt_ml/cloud/local.py b/openadapt_ml/cloud/local.py
index 12dee8f..0c06d93 100644
--- a/openadapt_ml/cloud/local.py
+++ b/openadapt_ml/cloud/local.py
@@ -499,9 +499,7 @@ def cmd_serve(args: argparse.Namespace) -> int:
         if not args.no_regenerate:
             print("Regenerating dashboard and viewer...")
             try:
-                # Use keep_polling=True so JavaScript fetches live data from training_log.json
-                # This ensures the dashboard shows current data instead of stale embedded data
-                regenerate_local_dashboard(str(serve_dir), keep_polling=True)
+                regenerate_local_dashboard(str(serve_dir))
                 # Also regenerate viewer if comparison data exists
                 _regenerate_viewer_if_possible(serve_dir)
             except Exception as e:
diff --git a/openadapt_ml/evals/grounding.py b/openadapt_ml/evals/grounding.py
index 2a675aa..cd3ffbb 100644
--- a/openadapt_ml/evals/grounding.py
+++ b/openadapt_ml/evals/grounding.py
@@ -19,8 +19,8 @@
 if TYPE_CHECKING:
     from PIL import Image
 
-    from openadapt_ml.data.types import Episode
     from openadapt_ml.grounding.base import GroundingModule, RegionCandidate
+    from openadapt_ml.schema import Episode
 
 
 @dataclass
diff --git a/openadapt_ml/experiments/demo_prompt/run_experiment.py b/openadapt_ml/experiments/demo_prompt/run_experiment.py
index 3c38579..4fec51f 100644
--- a/openadapt_ml/experiments/demo_prompt/run_experiment.py
+++ b/openadapt_ml/experiments/demo_prompt/run_experiment.py
@@ -423,7 +423,7 @@ def run_experiment(
     from openadapt_ml.ingest.capture import capture_to_episode
 
     print(f"Loading demo from: {demo_capture_path}")
-    episode = capture_to_episode(demo_capture_path, goal=goal)
+    episode = capture_to_episode(demo_capture_path, instruction=goal)
     print(f"  Loaded {len(episode.steps)} steps, goal: {episode.goal}")
 
     print(f"\nTest task: {test_task}")
diff --git a/openadapt_ml/ingest/__init__.py b/openadapt_ml/ingest/__init__.py
index 3475f75..1ddd9c7 100644
--- a/openadapt_ml/ingest/__init__.py
+++ b/openadapt_ml/ingest/__init__.py
@@ -11,8 +11,8 @@
     - load_episodes(): Load Episodes from JSON files (primary entry point)
     - save_episodes(): Save Episodes to JSON file
     - capture_to_episode(): Converts one openadapt-capture recording → one Episode
-    - capture_to_session(): Converts one recording → Session containing one Episode
-    - load_captures_as_sessions(): Loads multiple recordings → list of Sessions
+    - capture_to_episodes(): Converts one recording → list of Episodes
+    - load_captures_as_episodes(): Loads multiple recordings → list of Episodes
     - generate_synthetic_episodes(): Creates synthetic training data
 """
 
@@ -29,15 +29,15 @@
 try:
     from openadapt_ml.ingest.capture import (  # noqa: F401
         capture_to_episode,
-        capture_to_session,
-        load_captures_as_sessions,
+        capture_to_episodes,
+        load_captures_as_episodes,
     )
 
     __all__.extend(
         [
             "capture_to_episode",
-            "capture_to_session",
-            "load_captures_as_sessions",
+            "capture_to_episodes",
+            "load_captures_as_episodes",
         ]
     )
 except ImportError:
diff --git a/openadapt_ml/scripts/compare.py b/openadapt_ml/scripts/compare.py
index ac41701..8a70c3f 100644
--- a/openadapt_ml/scripts/compare.py
+++ b/openadapt_ml/scripts/compare.py
@@ -202,6 +202,29 @@ def generate_comparison_data(
     return comparison_data
 
 
+def generate_comparison(
+    capture_path: str | Path,
+    adapter=None,
+    output_path: str | Path | None = None,
+) -> Path:
+    """Generate a comparison HTML for a capture using a loaded adapter.
+
+    Convenience wrapper over generate_comparison_data and
+    generate_comparison_html for callers that hold a capture path and a
+    model adapter (e.g. openadapt_ml.cloud.azure_inference).
+    """
+    from openadapt_ml.ingest.capture import capture_to_episode
+
+    capture_path = Path(capture_path)
+    episode = capture_to_episode(capture_path)
+    comparison_data = generate_comparison_data(episode, model=adapter)
+    if output_path is None:
+        output_path = capture_path / "comparison.html"
+    output_path = Path(output_path)
+    generate_comparison_html(capture_path, episode, comparison_data, output_path)
+    return output_path
+
+
 def generate_comparison_html(
     capture_path: Path,
     episode: Episode,
@@ -804,7 +827,7 @@ def main():
 
     # Convert capture to episode
     print(f"Loading capture from: {capture_path}")
-    episode = capture_to_episode(capture_path, goal=args.goal)
+    episode = capture_to_episode(capture_path, instruction=args.goal)
     print(f"Loaded {len(episode.steps)} steps")
 
     # Load model if checkpoint provided
diff --git a/pyproject.toml b/pyproject.toml
index 5bb266e..4729c55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ training = [
 # See https://docs.unsloth.ai/get-started/installation
 
 dev = [
+    "build>=1.0.0",  # tests/test_packaging.py builds the wheel
     "pytest>=9.0.0",
     "ruff>=0.1.0",
 ]
diff --git a/tests/test_import_integrity.py b/tests/test_import_integrity.py
new file mode 100644
index 0000000..4e835ec
--- /dev/null
+++ b/tests/test_import_integrity.py
@@ -0,0 +1,274 @@
+"""Static import-integrity checks for the openadapt_ml package.
+
+Guards against the failure class behind OpenAdaptAI/OpenAdapt#999:
+``from openadapt_ml.cloud.local import serve_dashboard`` parsed fine,
+only exploded at call time, and a bare ``except ImportError`` reported
+it as "openadapt-ml not installed". Imports inside function bodies are
+invisible to plain import-the-module tests, so these checks walk the
+AST instead and need no heavy runtime dependencies.
+
+Two checks:
+
+1. test_no_phantom_imports — every ``from openadapt_ml.x import y``
+   anywhere in the package (including inside functions) names something
+   that actually exists in the target module.
+2. test_no_phantom_kwargs — every call to a function imported from an
+   internal module passes only keyword arguments that exist in that
+   function's signature. Conservative: decorated functions, classes,
+   and functions taking **kwargs are skipped.
+"""
+
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+PACKAGE_NAME = "openadapt_ml"
+PACKAGE_ROOT = Path(__file__).resolve().parent.parent / PACKAGE_NAME
+
+# Known-acceptable exceptions, as (module, imported_name). Keep empty
+# unless a module defines names dynamically in a way the AST walk
+# cannot see.
+PHANTOM_IMPORT_ALLOWLIST: set[tuple[str, str]] = set()
+
+
+# ---------------------------------------------------------------------------
+# Module discovery
+# ---------------------------------------------------------------------------
+
+
+def _module_map() -> dict[str, Path]:
+    """Map dotted module names to file paths for the whole package."""
+    modules: dict[str, Path] = {}
+    for path in PACKAGE_ROOT.rglob("*.py"):
+        rel = path.relative_to(PACKAGE_ROOT.parent)
+        parts = list(rel.with_suffix("").parts)
+        if parts[-1] == "__init__":
+            parts = parts[:-1]
+        modules[".".join(parts)] = path
+    return modules
+
+
+MODULES = _module_map()
+
+
+# ---------------------------------------------------------------------------
+# Definition collection
+# ---------------------------------------------------------------------------
+
+
+def _collect_defined(tree: ast.Module) -> tuple[set[str], bool]:
+    """Names defined at module level, and whether the module is dynamic.
+
+    Walks module-level statements, descending into If/Try/With bodies
+    (TYPE_CHECKING guards, optional-import fallbacks) but not into
+    function or class bodies. A module is "dynamic" if it star-imports
+    or defines module-level __getattr__; we skip checking those.
+    """
+    defined: set[str] = set()
+    dynamic = False
+
+    def visit_body(body: list[ast.stmt]) -> None:
+        nonlocal dynamic
+        for node in body:
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                defined.add(node.name)
+                if node.name == "__getattr__":
+                    dynamic = True
+            elif isinstance(node, ast.ClassDef):
+                defined.add(node.name)
+            elif isinstance(node, ast.Assign):
+                for target in node.targets:
+                    for name_node in ast.walk(target):
+                        if isinstance(name_node, ast.Name):
+                            defined.add(name_node.id)
+            elif isinstance(node, (ast.AnnAssign, ast.AugAssign)):
+                if isinstance(node.target, ast.Name):
+                    defined.add(node.target.id)
+            elif isinstance(node, ast.Import):
+                for alias in node.names:
+                    defined.add((alias.asname or alias.name).split(".")[0])
+            elif isinstance(node, ast.ImportFrom):
+                for alias in node.names:
+                    if alias.name == "*":
+                        dynamic = True
+                    else:
+                        defined.add(alias.asname or alias.name)
+            elif isinstance(node, (ast.If, ast.Try, ast.With)):
+                for sub in ast.iter_child_nodes(node):
+                    if isinstance(sub, list):
+                        continue
+                visit_body(getattr(node, "body", []))
+                visit_body(getattr(node, "orelse", []))
+                visit_body(getattr(node, "finalbody", []))
+                for handler in getattr(node, "handlers", []):
+                    visit_body(handler.body)
+
+    visit_body(tree.body)
+    return defined, dynamic
+
+
+def _parse(path: Path) -> ast.Module:
+    return ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
+
+
+_DEFINED_CACHE: dict[str, tuple[set[str], bool]] = {}
+
+
+def _defined_in(module: str) -> tuple[set[str], bool] | None:
+    """Defined names for a module in the package, or None if not ours."""
+    if module not in MODULES:
+        return None
+    if module not in _DEFINED_CACHE:
+        _DEFINED_CACHE[module] = _collect_defined(_parse(MODULES[module]))
+    return _DEFINED_CACHE[module]
+
+
+def _resolve_relative(current_module: str, node: ast.ImportFrom) -> str | None:
+    """Resolve a (possibly relative) ImportFrom to a dotted module name."""
+    if node.level == 0:
+        return node.module
+    parts = current_module.split(".")
+    # level=1 from a module means its containing package; packages
+    # (__init__) count as themselves.
+    if MODULES.get(current_module, Path()).name != "__init__.py":
+        parts = parts[:-1]
+    cut = node.level - 1
+    if cut:
+        parts = parts[:-cut] if cut <= len(parts) else []
+    base = ".".join(parts)
+    if node.module:
+        return f"{base}.{node.module}" if base else node.module
+    return base or None
+
+
+# ---------------------------------------------------------------------------
+# Check 1: phantom imports
+# ---------------------------------------------------------------------------
+
+
+def test_no_phantom_imports():
+    problems: list[str] = []
+
+    for current, path in sorted(MODULES.items()):
+        tree = _parse(path)
+        for node in ast.walk(tree):
+            if not isinstance(node, ast.ImportFrom):
+                continue
+            target = _resolve_relative(current, node)
+            if not target or not (
+                target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".")
+            ):
+                continue
+            info = _defined_in(target)
+            if info is None:
+                # Importing from a module we can't find at all.
+                if target in MODULES or f"{target}.__init__" in MODULES:
+                    continue
+                problems.append(
+                    f"{path}:{node.lineno}: imports from missing module '{target}'"
+                )
+                continue
+            defined, dynamic = info
+            if dynamic:
+                continue
+            for alias in node.names:
+                if alias.name == "*":
+                    continue
+                if alias.name in defined:
+                    continue
+                # Importing a submodule: from openadapt_ml.cloud import local
+                if f"{target}.{alias.name}" in MODULES:
+                    continue
+                if (target, alias.name) in PHANTOM_IMPORT_ALLOWLIST:
+                    continue
+                problems.append(
+                    f"{path}:{node.lineno}: 'from {target} import "
+                    f"{alias.name}' but '{alias.name}' is not defined in "
+                    f"{MODULES[target]}"
+                )
+
+    assert not problems, (
+        "Phantom imports detected (names imported from internal modules "
+        "that do not exist there). These typically only explode at call "
+        "time and get masked by 'except ImportError':\n  " + "\n  ".join(problems)
+    )
+
+
+# ---------------------------------------------------------------------------
+# Check 2: phantom keyword arguments
+# ---------------------------------------------------------------------------
+
+
+def _function_params(module: str, func_name: str) -> set[str] | None:
+    """Param names of an undecorated top-level function, else None.
+
+    None means "cannot safely check" (missing, decorated, a class,
+    has **kwargs, or module is dynamic).
+    """
+    info = _defined_in(module)
+    if info is None or info[1]:
+        return None
+    tree = _parse(MODULES[module])
+    for node in tree.body:
+        if (
+            isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
+            and node.name == func_name
+        ):
+            if node.decorator_list or node.args.kwarg is not None:
+                return None
+            params = [a.arg for a in node.args.posonlyargs]
+            params += [a.arg for a in node.args.args]
+            params += [a.arg for a in node.args.kwonlyargs]
+            return set(params)
+    return None
+
+
+def test_no_phantom_kwargs():
+    problems: list[str] = []
+
+    for current, path in sorted(MODULES.items()):
+        tree = _parse(path)
+
+        # local alias -> (target_module, original_name), from ALL
+        # ImportFroms in the file, including inside function bodies.
+        imported: dict[str, tuple[str, str]] = {}
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ImportFrom):
+                target = _resolve_relative(current, node)
+                if target and (
+                    target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".")
+                ):
+                    for alias in node.names:
+                        if alias.name != "*":
+                            imported[alias.asname or alias.name] = (
+                                target,
+                                alias.name,
+                            )
+
+        if not imported:
+            continue
+
+        for node in ast.walk(tree):
+            if not isinstance(node, ast.Call):
+                continue
+            if not isinstance(node.func, ast.Name):
+                continue
+            if node.func.id not in imported:
+                continue
+            target_module, original = imported[node.func.id]
+            params = _function_params(target_module, original)
+            if params is None:
+                continue
+            for kw in node.keywords:
+                if kw.arg is not None and kw.arg not in params:
+                    problems.append(
+                        f"{path}:{node.lineno}: call to "
+                        f"{target_module}.{original}(... {kw.arg}=...) but "
+                        f"its parameters are {sorted(params)}"
+                    )
+
+    assert not problems, (
+        "Keyword arguments passed to internal functions that do not "
+        "accept them (TypeError at call time):\n  " + "\n  ".join(problems)
+    )
diff --git a/tests/test_packaging.py b/tests/test_packaging.py
new file mode 100644
index 0000000..86e2a6e
--- /dev/null
+++ b/tests/test_packaging.py
@@ -0,0 +1,76 @@
+"""Built-artifact checks.
+
+CI normally tests the repo checkout, where files like configs/ exist
+regardless of packaging configuration. OpenAdaptAI/OpenAdapt#999 bug 5
+shipped because the wheel silently lacked the configs/ directory and
+nothing ever inspected a built artifact. These tests build the wheel
+and assert its contents.
+"""
+
+from __future__ import annotations
+
+import re
+import subprocess
+import sys
+import zipfile
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+
+
+@pytest.fixture(scope="module")
+def wheel_path(tmp_path_factory) -> Path:
+    out_dir = tmp_path_factory.mktemp("wheel")
+    result = subprocess.run(
+        [sys.executable, "-m", "build", "--wheel", "--outdir", str(out_dir)],
+        cwd=REPO_ROOT,
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        pytest.skip(
+            "could not build wheel (is 'build' installed?): " + result.stderr[-500:]
+        )
+    wheels = list(out_dir.glob("*.whl"))
+    assert len(wheels) == 1, f"expected exactly one wheel, got {wheels}"
+    return wheels[0]
+
+
+def test_wheel_contains_bundled_configs(wheel_path: Path):
+    with zipfile.ZipFile(wheel_path) as zf:
+        config_yamls = [
+            n
+            for n in zf.namelist()
+            if n.startswith("openadapt_ml/configs/") and n.endswith(".yaml")
+        ]
+    assert len(config_yamls) >= 5, (
+        "Wheel is missing bundled training configs; cmd_train and the "
+        "openadapt CLI resolve default configs from openadapt_ml/configs/ "
+        f"inside the installed package. Found: {config_yamls}"
+    )
+
+
+def test_wheel_contains_core_subpackages(wheel_path: Path):
+    required = {
+        "openadapt_ml/__init__.py",
+        "openadapt_ml/cloud/local.py",
+        "openadapt_ml/scripts/train.py",
+        "openadapt_ml/training/trainer.py",
+        "openadapt_ml/ingest/capture.py",
+        "openadapt_ml/schema/__init__.py",
+    }
+    with zipfile.ZipFile(wheel_path) as zf:
+        names = set(zf.namelist())
+    missing = required - names
+    assert not missing, f"Wheel is missing core modules: {sorted(missing)}"
+
+
+def test_wheel_version_matches_pyproject(wheel_path: Path):
+    pyproject = (REPO_ROOT / "pyproject.toml").read_text()
+    match = re.search(r'^version = "([^"]+)"', pyproject, re.MULTILINE)
+    assert match, "could not find version in pyproject.toml"
+    assert f"-{match.group(1)}-" in wheel_path.name, (
+        f"wheel {wheel_path.name} does not match pyproject version {match.group(1)}"
+    )