From 9ae3ce85b85aa66ce72b671c2a745b469fd0825d Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Fri, 12 Jun 2026 17:42:53 -0400 Subject: [PATCH] test: add import-integrity and packaging guards; fix 8 latent bugs they found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to OpenAdaptAI/OpenAdapt#999: green CI shipped a fully broken CLI because no test exercised lazy imports, internal call seams, or the built wheel. This adds three guards and fixes everything they caught on current main: New tests (dependency-free, <3s total): - tests/test_import_integrity.py::test_no_phantom_imports — AST-walks every `from openadapt_ml.x import y` (including inside function bodies) and verifies y exists in x - tests/test_import_integrity.py::test_no_phantom_kwargs — verifies keyword args passed to internal functions exist in their signatures - tests/test_packaging.py — builds the wheel and asserts bundled configs, core modules, and version match Latent bugs the new tests caught on main, fixed here: - cloud/local.py: cmd_serve called regenerate_local_dashboard with a keep_polling kwarg that no longer exists (dashboard regeneration failed on every serve, downgraded to a warning) - scripts/compare.py + experiments/demo_prompt/run_experiment.py: capture_to_episode(goal=) — same kwarg bug as #999 bug 3, two more call sites - ingest/__init__.py: re-exported capture_to_session and load_captures_as_sessions, which don't exist; the except ImportError guard meant capture_to_episode was silently never exported either - evals/grounding.py: TYPE_CHECKING import from missing module openadapt_ml.data.types (Episode lives in openadapt_ml.schema) - cloud/azure_inference.py: imported QwenVLAdapter from missing module openadapt_ml.adapters.qwen (lives in models.qwen_vl) and constructed it with a non-existent model_name kwarg (needs from_pretrained); imported generate_comparison which was refactored away — restored as a thin wrapper over generate_comparison_data/_html in compare.py Also: - release.yml: file/append a GitHub issue when the release workflow fails. Releases failed silently Mar-Jun 2026 while PyPI went stale, which is what forced #999's reporter onto git installs - pyproject: add build to the dev extra for the packaging tests Co-Authored-By: Claude Fable 5 --- .github/workflows/release.yml | 18 ++ openadapt_ml/cloud/azure_inference.py | 4 +- openadapt_ml/cloud/local.py | 4 +- openadapt_ml/evals/grounding.py | 2 +- .../experiments/demo_prompt/run_experiment.py | 2 +- openadapt_ml/ingest/__init__.py | 12 +- openadapt_ml/scripts/compare.py | 25 +- pyproject.toml | 1 + tests/test_import_integrity.py | 274 ++++++++++++++++++ tests/test_packaging.py | 76 +++++ 10 files changed, 404 insertions(+), 14 deletions(-) create mode 100644 tests/test_import_integrity.py create mode 100644 tests/test_packaging.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 16e8a87..04bd62a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -57,3 +57,21 @@ jobs: uses: python-semantic-release/publish-action@v9.15.2 with: github_token: ${{ secrets.ADMIN_TOKEN }} + + # Releases failed silently for 3 months (Mar-Jun 2026) while PyPI + # went stale; see OpenAdaptAI/OpenAdapt#999. Fail loudly instead. + - name: File issue on release failure + if: failure() + env: + GH_TOKEN: ${{ secrets.ADMIN_TOKEN }} + run: | + TITLE="Release workflow failed on main" + BODY="The release workflow failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + Until this is fixed, merged fix/feat commits are NOT being published to PyPI, and users install stale versions." + EXISTING=$(gh issue list --repo "${{ github.repository }}" --state open --search "in:title \"$TITLE\"" --json number --jq '.[0].number // empty') + if [ -n "$EXISTING" ]; then + gh issue comment "$EXISTING" --repo "${{ github.repository }}" --body "$BODY" + else + gh issue create --repo "${{ github.repository }}" --title "$TITLE" --body "$BODY" + fi diff --git a/openadapt_ml/cloud/azure_inference.py b/openadapt_ml/cloud/azure_inference.py index f5a49d3..f572746 100644 --- a/openadapt_ml/cloud/azure_inference.py +++ b/openadapt_ml/cloud/azure_inference.py @@ -424,10 +424,10 @@ def main(): elif args.command == "inference-worker": # Start inference worker - from openadapt_ml.adapters.qwen import QwenVLAdapter + from openadapt_ml.models.qwen_vl import QwenVLAdapter print(f"Starting inference worker with model: {args.model}") - adapter = QwenVLAdapter(model_name=args.model) + adapter = QwenVLAdapter.from_pretrained(args.model) queue.poll_and_process(adapter) elif args.command == "inference-watch": diff --git a/openadapt_ml/cloud/local.py b/openadapt_ml/cloud/local.py index 12dee8f..0c06d93 100644 --- a/openadapt_ml/cloud/local.py +++ b/openadapt_ml/cloud/local.py @@ -499,9 +499,7 @@ def cmd_serve(args: argparse.Namespace) -> int: if not args.no_regenerate: print("Regenerating dashboard and viewer...") try: - # Use keep_polling=True so JavaScript fetches live data from training_log.json - # This ensures the dashboard shows current data instead of stale embedded data - regenerate_local_dashboard(str(serve_dir), keep_polling=True) + regenerate_local_dashboard(str(serve_dir)) # Also regenerate viewer if comparison data exists _regenerate_viewer_if_possible(serve_dir) except Exception as e: diff --git a/openadapt_ml/evals/grounding.py b/openadapt_ml/evals/grounding.py index 2a675aa..cd3ffbb 100644 --- a/openadapt_ml/evals/grounding.py +++ b/openadapt_ml/evals/grounding.py @@ -19,8 +19,8 @@ if TYPE_CHECKING: from PIL import Image - from openadapt_ml.data.types import Episode from openadapt_ml.grounding.base import GroundingModule, RegionCandidate + from openadapt_ml.schema import Episode @dataclass diff --git a/openadapt_ml/experiments/demo_prompt/run_experiment.py b/openadapt_ml/experiments/demo_prompt/run_experiment.py index 3c38579..4fec51f 100644 --- a/openadapt_ml/experiments/demo_prompt/run_experiment.py +++ b/openadapt_ml/experiments/demo_prompt/run_experiment.py @@ -423,7 +423,7 @@ def run_experiment( from openadapt_ml.ingest.capture import capture_to_episode print(f"Loading demo from: {demo_capture_path}") - episode = capture_to_episode(demo_capture_path, goal=goal) + episode = capture_to_episode(demo_capture_path, instruction=goal) print(f" Loaded {len(episode.steps)} steps, goal: {episode.goal}") print(f"\nTest task: {test_task}") diff --git a/openadapt_ml/ingest/__init__.py b/openadapt_ml/ingest/__init__.py index 3475f75..1ddd9c7 100644 --- a/openadapt_ml/ingest/__init__.py +++ b/openadapt_ml/ingest/__init__.py @@ -11,8 +11,8 @@ - load_episodes(): Load Episodes from JSON files (primary entry point) - save_episodes(): Save Episodes to JSON file - capture_to_episode(): Converts one openadapt-capture recording → one Episode - - capture_to_session(): Converts one recording → Session containing one Episode - - load_captures_as_sessions(): Loads multiple recordings → list of Sessions + - capture_to_episodes(): Converts one recording → list of Episodes + - load_captures_as_episodes(): Loads multiple recordings → list of Episodes - generate_synthetic_episodes(): Creates synthetic training data """ @@ -29,15 +29,15 @@ try: from openadapt_ml.ingest.capture import ( # noqa: F401 capture_to_episode, - capture_to_session, - load_captures_as_sessions, + capture_to_episodes, + load_captures_as_episodes, ) __all__.extend( [ "capture_to_episode", - "capture_to_session", - "load_captures_as_sessions", + "capture_to_episodes", + "load_captures_as_episodes", ] ) except ImportError: diff --git a/openadapt_ml/scripts/compare.py b/openadapt_ml/scripts/compare.py index ac41701..8a70c3f 100644 --- a/openadapt_ml/scripts/compare.py +++ b/openadapt_ml/scripts/compare.py @@ -202,6 +202,29 @@ def generate_comparison_data( return comparison_data +def generate_comparison( + capture_path: str | Path, + adapter=None, + output_path: str | Path | None = None, +) -> Path: + """Generate a comparison HTML for a capture using a loaded adapter. + + Convenience wrapper over generate_comparison_data and + generate_comparison_html for callers that hold a capture path and a + model adapter (e.g. openadapt_ml.cloud.azure_inference). + """ + from openadapt_ml.ingest.capture import capture_to_episode + + capture_path = Path(capture_path) + episode = capture_to_episode(capture_path) + comparison_data = generate_comparison_data(episode, model=adapter) + if output_path is None: + output_path = capture_path / "comparison.html" + output_path = Path(output_path) + generate_comparison_html(capture_path, episode, comparison_data, output_path) + return output_path + + def generate_comparison_html( capture_path: Path, episode: Episode, @@ -804,7 +827,7 @@ def main(): # Convert capture to episode print(f"Loading capture from: {capture_path}") - episode = capture_to_episode(capture_path, goal=args.goal) + episode = capture_to_episode(capture_path, instruction=args.goal) print(f"Loaded {len(episode.steps)} steps") # Load model if checkpoint provided diff --git a/pyproject.toml b/pyproject.toml index 5bb266e..4729c55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ training = [ # See https://docs.unsloth.ai/get-started/installation dev = [ + "build>=1.0.0", # tests/test_packaging.py builds the wheel "pytest>=9.0.0", "ruff>=0.1.0", ] diff --git a/tests/test_import_integrity.py b/tests/test_import_integrity.py new file mode 100644 index 0000000..4e835ec --- /dev/null +++ b/tests/test_import_integrity.py @@ -0,0 +1,274 @@ +"""Static import-integrity checks for the openadapt_ml package. + +Guards against the failure class behind OpenAdaptAI/OpenAdapt#999: +``from openadapt_ml.cloud.local import serve_dashboard`` parsed fine, +only exploded at call time, and a bare ``except ImportError`` reported +it as "openadapt-ml not installed". Imports inside function bodies are +invisible to plain import-the-module tests, so these checks walk the +AST instead and need no heavy runtime dependencies. + +Two checks: + +1. test_no_phantom_imports — every ``from openadapt_ml.x import y`` + anywhere in the package (including inside functions) names something + that actually exists in the target module. +2. test_no_phantom_kwargs — every call to a function imported from an + internal module passes only keyword arguments that exist in that + function's signature. Conservative: decorated functions, classes, + and functions taking **kwargs are skipped. +""" + +from __future__ import annotations + +import ast +from pathlib import Path + +PACKAGE_NAME = "openadapt_ml" +PACKAGE_ROOT = Path(__file__).resolve().parent.parent / PACKAGE_NAME + +# Known-acceptable exceptions, as (module, imported_name). Keep empty +# unless a module defines names dynamically in a way the AST walk +# cannot see. +PHANTOM_IMPORT_ALLOWLIST: set[tuple[str, str]] = set() + + +# --------------------------------------------------------------------------- +# Module discovery +# --------------------------------------------------------------------------- + + +def _module_map() -> dict[str, Path]: + """Map dotted module names to file paths for the whole package.""" + modules: dict[str, Path] = {} + for path in PACKAGE_ROOT.rglob("*.py"): + rel = path.relative_to(PACKAGE_ROOT.parent) + parts = list(rel.with_suffix("").parts) + if parts[-1] == "__init__": + parts = parts[:-1] + modules[".".join(parts)] = path + return modules + + +MODULES = _module_map() + + +# --------------------------------------------------------------------------- +# Definition collection +# --------------------------------------------------------------------------- + + +def _collect_defined(tree: ast.Module) -> tuple[set[str], bool]: + """Names defined at module level, and whether the module is dynamic. + + Walks module-level statements, descending into If/Try/With bodies + (TYPE_CHECKING guards, optional-import fallbacks) but not into + function or class bodies. A module is "dynamic" if it star-imports + or defines module-level __getattr__; we skip checking those. + """ + defined: set[str] = set() + dynamic = False + + def visit_body(body: list[ast.stmt]) -> None: + nonlocal dynamic + for node in body: + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + defined.add(node.name) + if node.name == "__getattr__": + dynamic = True + elif isinstance(node, ast.ClassDef): + defined.add(node.name) + elif isinstance(node, ast.Assign): + for target in node.targets: + for name_node in ast.walk(target): + if isinstance(name_node, ast.Name): + defined.add(name_node.id) + elif isinstance(node, (ast.AnnAssign, ast.AugAssign)): + if isinstance(node.target, ast.Name): + defined.add(node.target.id) + elif isinstance(node, ast.Import): + for alias in node.names: + defined.add((alias.asname or alias.name).split(".")[0]) + elif isinstance(node, ast.ImportFrom): + for alias in node.names: + if alias.name == "*": + dynamic = True + else: + defined.add(alias.asname or alias.name) + elif isinstance(node, (ast.If, ast.Try, ast.With)): + for sub in ast.iter_child_nodes(node): + if isinstance(sub, list): + continue + visit_body(getattr(node, "body", [])) + visit_body(getattr(node, "orelse", [])) + visit_body(getattr(node, "finalbody", [])) + for handler in getattr(node, "handlers", []): + visit_body(handler.body) + + visit_body(tree.body) + return defined, dynamic + + +def _parse(path: Path) -> ast.Module: + return ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + + +_DEFINED_CACHE: dict[str, tuple[set[str], bool]] = {} + + +def _defined_in(module: str) -> tuple[set[str], bool] | None: + """Defined names for a module in the package, or None if not ours.""" + if module not in MODULES: + return None + if module not in _DEFINED_CACHE: + _DEFINED_CACHE[module] = _collect_defined(_parse(MODULES[module])) + return _DEFINED_CACHE[module] + + +def _resolve_relative(current_module: str, node: ast.ImportFrom) -> str | None: + """Resolve a (possibly relative) ImportFrom to a dotted module name.""" + if node.level == 0: + return node.module + parts = current_module.split(".") + # level=1 from a module means its containing package; packages + # (__init__) count as themselves. + if MODULES.get(current_module, Path()).name != "__init__.py": + parts = parts[:-1] + cut = node.level - 1 + if cut: + parts = parts[:-cut] if cut <= len(parts) else [] + base = ".".join(parts) + if node.module: + return f"{base}.{node.module}" if base else node.module + return base or None + + +# --------------------------------------------------------------------------- +# Check 1: phantom imports +# --------------------------------------------------------------------------- + + +def test_no_phantom_imports(): + problems: list[str] = [] + + for current, path in sorted(MODULES.items()): + tree = _parse(path) + for node in ast.walk(tree): + if not isinstance(node, ast.ImportFrom): + continue + target = _resolve_relative(current, node) + if not target or not ( + target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".") + ): + continue + info = _defined_in(target) + if info is None: + # Importing from a module we can't find at all. + if target in MODULES or f"{target}.__init__" in MODULES: + continue + problems.append( + f"{path}:{node.lineno}: imports from missing module '{target}'" + ) + continue + defined, dynamic = info + if dynamic: + continue + for alias in node.names: + if alias.name == "*": + continue + if alias.name in defined: + continue + # Importing a submodule: from openadapt_ml.cloud import local + if f"{target}.{alias.name}" in MODULES: + continue + if (target, alias.name) in PHANTOM_IMPORT_ALLOWLIST: + continue + problems.append( + f"{path}:{node.lineno}: 'from {target} import " + f"{alias.name}' but '{alias.name}' is not defined in " + f"{MODULES[target]}" + ) + + assert not problems, ( + "Phantom imports detected (names imported from internal modules " + "that do not exist there). These typically only explode at call " + "time and get masked by 'except ImportError':\n " + "\n ".join(problems) + ) + + +# --------------------------------------------------------------------------- +# Check 2: phantom keyword arguments +# --------------------------------------------------------------------------- + + +def _function_params(module: str, func_name: str) -> set[str] | None: + """Param names of an undecorated top-level function, else None. + + None means "cannot safely check" (missing, decorated, a class, + has **kwargs, or module is dynamic). + """ + info = _defined_in(module) + if info is None or info[1]: + return None + tree = _parse(MODULES[module]) + for node in tree.body: + if ( + isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + and node.name == func_name + ): + if node.decorator_list or node.args.kwarg is not None: + return None + params = [a.arg for a in node.args.posonlyargs] + params += [a.arg for a in node.args.args] + params += [a.arg for a in node.args.kwonlyargs] + return set(params) + return None + + +def test_no_phantom_kwargs(): + problems: list[str] = [] + + for current, path in sorted(MODULES.items()): + tree = _parse(path) + + # local alias -> (target_module, original_name), from ALL + # ImportFroms in the file, including inside function bodies. + imported: dict[str, tuple[str, str]] = {} + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom): + target = _resolve_relative(current, node) + if target and ( + target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".") + ): + for alias in node.names: + if alias.name != "*": + imported[alias.asname or alias.name] = ( + target, + alias.name, + ) + + if not imported: + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not isinstance(node.func, ast.Name): + continue + if node.func.id not in imported: + continue + target_module, original = imported[node.func.id] + params = _function_params(target_module, original) + if params is None: + continue + for kw in node.keywords: + if kw.arg is not None and kw.arg not in params: + problems.append( + f"{path}:{node.lineno}: call to " + f"{target_module}.{original}(... {kw.arg}=...) but " + f"its parameters are {sorted(params)}" + ) + + assert not problems, ( + "Keyword arguments passed to internal functions that do not " + "accept them (TypeError at call time):\n " + "\n ".join(problems) + ) diff --git a/tests/test_packaging.py b/tests/test_packaging.py new file mode 100644 index 0000000..86e2a6e --- /dev/null +++ b/tests/test_packaging.py @@ -0,0 +1,76 @@ +"""Built-artifact checks. + +CI normally tests the repo checkout, where files like configs/ exist +regardless of packaging configuration. OpenAdaptAI/OpenAdapt#999 bug 5 +shipped because the wheel silently lacked the configs/ directory and +nothing ever inspected a built artifact. These tests build the wheel +and assert its contents. +""" + +from __future__ import annotations + +import re +import subprocess +import sys +import zipfile +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +@pytest.fixture(scope="module") +def wheel_path(tmp_path_factory) -> Path: + out_dir = tmp_path_factory.mktemp("wheel") + result = subprocess.run( + [sys.executable, "-m", "build", "--wheel", "--outdir", str(out_dir)], + cwd=REPO_ROOT, + capture_output=True, + text=True, + ) + if result.returncode != 0: + pytest.skip( + "could not build wheel (is 'build' installed?): " + result.stderr[-500:] + ) + wheels = list(out_dir.glob("*.whl")) + assert len(wheels) == 1, f"expected exactly one wheel, got {wheels}" + return wheels[0] + + +def test_wheel_contains_bundled_configs(wheel_path: Path): + with zipfile.ZipFile(wheel_path) as zf: + config_yamls = [ + n + for n in zf.namelist() + if n.startswith("openadapt_ml/configs/") and n.endswith(".yaml") + ] + assert len(config_yamls) >= 5, ( + "Wheel is missing bundled training configs; cmd_train and the " + "openadapt CLI resolve default configs from openadapt_ml/configs/ " + f"inside the installed package. Found: {config_yamls}" + ) + + +def test_wheel_contains_core_subpackages(wheel_path: Path): + required = { + "openadapt_ml/__init__.py", + "openadapt_ml/cloud/local.py", + "openadapt_ml/scripts/train.py", + "openadapt_ml/training/trainer.py", + "openadapt_ml/ingest/capture.py", + "openadapt_ml/schema/__init__.py", + } + with zipfile.ZipFile(wheel_path) as zf: + names = set(zf.namelist()) + missing = required - names + assert not missing, f"Wheel is missing core modules: {sorted(missing)}" + + +def test_wheel_version_matches_pyproject(wheel_path: Path): + pyproject = (REPO_ROOT / "pyproject.toml").read_text() + match = re.search(r'^version = "([^"]+)"', pyproject, re.MULTILINE) + assert match, "could not find version in pyproject.toml" + assert f"-{match.group(1)}-" in wheel_path.name, ( + f"wheel {wheel_path.name} does not match pyproject version {match.group(1)}" + )