diff --git a/openadapt_ml/cloud/local.py b/openadapt_ml/cloud/local.py index e11d203..12dee8f 100644 --- a/openadapt_ml/cloud/local.py +++ b/openadapt_ml/cloud/local.py @@ -54,6 +54,22 @@ def get_current_output_dir() -> Path: return TRAINING_OUTPUT +def resolve_config_path(config: str | Path) -> Path: + """Resolve a training config path, falling back to packaged configs. + + Relative paths like "configs/qwen3vl_capture.yaml" only exist when + running from a repo checkout. When openadapt-ml is pip-installed, the + bundled copies under openadapt_ml/configs/ are used instead. + """ + path = Path(config) + if path.exists(): + return path + packaged = Path(__file__).resolve().parent.parent / "configs" / path.name + if packaged.exists(): + return packaged + return path + + def _regenerate_viewer_if_possible(output_dir: Path) -> bool: """Regenerate viewer.html if comparison data exists. @@ -324,7 +340,7 @@ def cmd_train(args: argparse.Namespace) -> int: else: config = "configs/qwen3vl_capture_4bit.yaml" - config_path = Path(config) + config_path = resolve_config_path(config) if not config_path.exists(): print(f"Error: Config not found: {config_path}") return 1 diff --git a/openadapt_ml/scripts/demo_policy.py b/openadapt_ml/scripts/demo_policy.py index 9014db0..ba5210e 100644 --- a/openadapt_ml/scripts/demo_policy.py +++ b/openadapt_ml/scripts/demo_policy.py @@ -3,7 +3,7 @@ import argparse from openadapt_ml.datasets.next_action import build_next_action_sft_samples -from openadapt_ml.ingest.synthetic import generate_synthetic_sessions +from openadapt_ml.ingest.synthetic import generate_synthetic_episodes from openadapt_ml.models.dummy_adapter import DummyAdapter from openadapt_ml.models.qwen_vl import QwenVLAdapter from openadapt_ml.models.api_adapter import ApiVLMAdapter @@ -20,10 +20,9 @@ def main() -> None: args = parser.parse_args() # Use synthetic data to build one SFT-style sample - sessions = generate_synthetic_sessions( - num_sessions=1, seed=99, output_dir="synthetic/demo" + episodes = generate_synthetic_episodes( + num_episodes=1, seed=99, output_dir="synthetic/demo" ) - episodes = [ep for sess in sessions for ep in sess.episodes] samples = build_next_action_sft_samples(episodes) # Load first sample and overwrite assistant content so the dummy adapter diff --git a/openadapt_ml/scripts/train.py b/openadapt_ml/scripts/train.py index 3541d80..9fcf003 100644 --- a/openadapt_ml/scripts/train.py +++ b/openadapt_ml/scripts/train.py @@ -38,7 +38,7 @@ def _load_capture_episodes(capture_path: str | Path, goal: str | None = None) -> from openadapt_ml.ingest.capture import capture_to_episode capture_path = Path(capture_path) - episode = capture_to_episode(capture_path, goal=goal) + episode = capture_to_episode(capture_path, instruction=goal) return [episode] diff --git a/openadapt_ml/training/grpo/reward.py b/openadapt_ml/training/grpo/reward.py index e6f549f..1d90678 100644 --- a/openadapt_ml/training/grpo/reward.py +++ b/openadapt_ml/training/grpo/reward.py @@ -100,8 +100,7 @@ def evaluate_milestones_screenshot( # Only evaluate screenshot-type milestones locally screenshot_milestones = [ - ms for ms in milestones - if getattr(ms.check, "check", None) == "screenshot" + ms for ms in milestones if getattr(ms.check, "check", None) == "screenshot" ] if not screenshot_milestones: return 0.0 diff --git a/openadapt_ml/training/grpo/trainer.py b/openadapt_ml/training/grpo/trainer.py index 05f249a..b13656f 100644 --- a/openadapt_ml/training/grpo/trainer.py +++ b/openadapt_ml/training/grpo/trainer.py @@ -195,6 +195,7 @@ class BenchmarkAction: # type: ignore[no-redef] if json_match: try: import json as _json + action_data = _json.loads(json_match.group()) atype = action_data.get("action_type", "").lower() coord = action_data.get("coordinate", action_data.get("coords", [])) @@ -205,9 +206,7 @@ class BenchmarkAction: # type: ignore[no-redef] x_val, y_val = x_val * width, y_val * height return BenchmarkAction(type="click", x=int(x_val), y=int(y_val)) if atype == "type": - return BenchmarkAction( - type="type", text=action_data.get("text", "") - ) + return BenchmarkAction(type="type", text=action_data.get("text", "")) if atype in ("done", "wait"): return BenchmarkAction(type=atype) except Exception: diff --git a/openadapt_ml/training/trainer.py b/openadapt_ml/training/trainer.py index f0be6ab..3d07b9d 100644 --- a/openadapt_ml/training/trainer.py +++ b/openadapt_ml/training/trainer.py @@ -79,6 +79,59 @@ def get_current_job_directory(base_dir: str | Path) -> Path | None: return None +def update_current_symlink_to_latest( + base_dir: str | Path = "training_output", +) -> Path | None: + """Point the 'current' symlink at the most recent job directory. + + Scans base_dir for job directories (any real subdirectory other than + the 'current' symlink itself) and atomically updates the symlink to + the most recently modified one. + + Args: + base_dir: Base output directory containing job directories. + + Returns: + Path to the latest job directory, or None if none exist. + """ + base_dir = Path(base_dir) + if not base_dir.is_dir(): + return None + + job_dirs = [ + d + for d in base_dir.iterdir() + if d.is_dir() and not d.is_symlink() and not d.name.startswith(".") + ] + if not job_dirs: + return None + + # Prefer directories that look like training runs over stray dirs + # (e.g. a top-level "checkpoints" directory from the old flat layout). + run_like = [ + d + for d in job_dirs + if (d / "training_log.json").exists() or (d / "dashboard.html").exists() + ] + candidates = run_like or job_dirs + + latest = max(candidates, key=lambda d: d.stat().st_mtime) + + current_link = base_dir / "current" + temp_link = base_dir / f".current_temp_{latest.name}" + try: + if temp_link.exists() or temp_link.is_symlink(): + temp_link.unlink() + temp_link.symlink_to(latest.name) + temp_link.rename(current_link) + except Exception as e: + if temp_link.exists() or temp_link.is_symlink(): + temp_link.unlink() + raise RuntimeError(f"Failed to update current symlink: {e}") + + return latest + + @dataclass class TrainingConfig: # Model / LoRA-related fields are handled elsewhere; this covers loop hyperparams. diff --git a/openadapt_ml/training/trl_trainer.py b/openadapt_ml/training/trl_trainer.py index b82af7f..1c13511 100644 --- a/openadapt_ml/training/trl_trainer.py +++ b/openadapt_ml/training/trl_trainer.py @@ -409,6 +409,13 @@ def _run_sft_training( callbacks=[callback], ) else: + import torch + + has_cuda = torch.cuda.is_available() + has_mps = ( + getattr(torch.backends, "mps", None) is not None + and torch.backends.mps.is_available() + ) training_args = SFTConfig( output_dir=config.output_dir, per_device_train_batch_size=config.batch_size, @@ -423,6 +430,9 @@ def _run_sft_training( save_strategy=config.save_strategy, max_length=None, # Critical for VLMs assistant_only_loss=False, # Not supported for VL models yet + use_cpu=not (has_cuda or has_mps), + bf16=has_cuda and torch.cuda.is_bf16_supported(), + fp16=False, ) trainer = SFTTrainer( diff --git a/pyproject.toml b/pyproject.toml index e71e961..7f0bae3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,9 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["openadapt_ml"] +[tool.hatch.build.targets.wheel.force-include] +"configs" = "openadapt_ml/configs" + [tool.uv.sources] openadapt-capture = { path = "../openadapt-capture", editable = true }