From 12bbc798be8b3831b1079725ce46530f50420baf Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 30 May 2026 13:25:37 +0000
Subject: [PATCH 1/2] feat(training): persisted configurable splits, full
 HP/aug passthrough, richer metrics capture

---
 backend/src/app/api/assets.py             |  40 +++++
 backend/src/app/api/experiments.py        |  58 ++++++-
 backend/src/app/jobs/tasks/training.py    | 189 ++++++++++++++++----
 backend/src/app/schemas/experiment.py     |   1 +
 backend/src/app/schemas/split.py          |  29 ++++
 backend/src/app/services/split_service.py | 200 ++++++++++++++++++++++
 backend/src/app/services/storage.py       |  35 ++++
 7 files changed, 521 insertions(+), 31 deletions(-)
 create mode 100644 backend/src/app/schemas/split.py
 create mode 100644 backend/src/app/services/split_service.py

diff --git a/backend/src/app/api/assets.py b/backend/src/app/api/assets.py
index d0b55c9..ab5b3bf 100644
--- a/backend/src/app/api/assets.py
+++ b/backend/src/app/api/assets.py
@@ -9,6 +9,8 @@
 
 from app.db.deps import get_current_user, get_db
 from app.models.user import User
+from app.schemas.split import SplitConfig, SplitSummary
+from app.services import split_service
 from app.services.annotation_service import get_asset_annotations
 from app.services.asset_service import (
     confirm_upload,
@@ -102,6 +104,7 @@ def list_dataset_assets(
     dataset_id: str = Path(...),
     version_id: str | None = Query(None),
     label_status: str | None = Query(None),
+    split: str | None = Query(None, description="Filter by train/val/test split"),
     limit: int = Query(100, ge=1, le=500),
     offset: int = Query(0, ge=0),
     db: Session = Depends(get_db),
@@ -112,6 +115,7 @@ def list_dataset_assets(
         dataset_id,
         version_id=version_id,
         label_status=label_status,
+        split=split,
         limit=limit,
         offset=offset,
     )
@@ -120,10 +124,12 @@ def list_dataset_assets(
             {
                 "id": a.id,
                 "uri": a.uri,
+                "download_url": _presign_download(a.uri),
                 "mime_type": a.mime_type,
                 "width": a.width,
                 "height": a.height,
                 "label_status": a.label_status,
+                "split": split_service.asset_split(a),
                 "created_at": a.created_at.isoformat() if a.created_at else None,
             }
             for a in assets
@@ -134,6 +140,40 @@ def list_dataset_assets(
     }
 
 
+@router.get("/datasets/{dataset_id}/versions/{version_id}/split", response_model=SplitSummary)
+def get_version_split(
+    dataset_id: str = Path(...),
+    version_id: str = Path(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Return persisted train/val/test counts and per-class breakdown for a version."""
+    return split_service.get_split_summary(db, version_id)
+
+
+@router.post("/datasets/{dataset_id}/versions/{version_id}/split", response_model=SplitSummary)
+def assign_version_split(
+    body: SplitConfig,
+    dataset_id: str = Path(...),
+    version_id: str = Path(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Deterministically (re)assign and persist the split for every asset in a version."""
+    try:
+        return split_service.assign_splits(
+            db,
+            version_id,
+            train=body.train,
+            val=body.val,
+            test=body.test,
+            seed=body.seed,
+            stratify=body.stratify,
+        )
+    except split_service.SplitConfigError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
 @router.get("/datasets/{dataset_id}/stats")
 def dataset_stats(
     dataset_id: str = Path(...),
diff --git a/backend/src/app/api/experiments.py b/backend/src/app/api/experiments.py
index 86c48bd..e65bace 100644
--- a/backend/src/app/api/experiments.py
+++ b/backend/src/app/api/experiments.py
@@ -23,6 +23,7 @@ def _run_to_schema(e: ExperimentModel) -> ExperimentSchema:
         params_json=e.params_json,
         dataset_version_id=e.dataset_version_id,
         metrics_json=e.metrics_json,
+        artifacts=e.artifacts,
         status=e.status,
         code_hash=e.code_hash,
         started_at=e.started_at,
@@ -100,12 +101,15 @@ def get_metrics(
     if not e:
         raise HTTPException(status_code=404, detail="Run not found")
     metrics: list = []
+    summary: dict | None = None
+    plots: list = []
+    split: dict | None = None
     if e.metrics_json:
         try:
             data = json.loads(e.metrics_json)
             # metrics_json may be:
             #   - a list of epoch dicts: [{epoch, mAP50, ...}, ...]
-            #   - {"epochs": [{epoch, mAP50, ...}, ...]} as written by train_task
+            #   - {"epochs": [...], "summary": {...}, "plots": [...], "split": {...}}
             #   - {"error": "..."} on failure
             if isinstance(data, list):
                 metrics = data
@@ -114,6 +118,56 @@ def get_metrics(
                     metrics = data["epochs"]
                 elif "error" not in data:
                     metrics = [data]
+                summary = data.get("summary")
+                plots = data.get("plots") or []
+                split = data.get("split")
         except Exception:
             pass
-    return {"run_id": runId, "status": e.status, "metrics": metrics}
+    return {
+        "run_id": runId,
+        "status": e.status,
+        "metrics": metrics,
+        "summary": summary,
+        "plots": plots,
+        "split": split,
+    }
+
+
+@router.get("/runs/{runId}/plots/{name}")
+def get_plot(
+    runId: str = Path(...),
+    name: str = Path(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Stream a training plot PNG/JPEG that was generated by the run."""
+    import os
+
+    from fastapi.responses import StreamingResponse
+
+    e = db.get(ExperimentModel, runId)
+    if not e or not e.metrics_json:
+        raise HTTPException(status_code=404, detail="Run or plots not found")
+    try:
+        plots = json.loads(e.metrics_json).get("plots") or []
+    except Exception:
+        plots = []
+    record = next((p for p in plots if p.get("name") == name or p.get("file") == name), None)
+    if not record:
+        raise HTTPException(status_code=404, detail="Plot not found")
+
+    key = record["key"]
+    try:
+        from app.services import storage
+
+        client = storage.get_minio_client()
+        bucket = os.getenv("MINIO_BUCKET", os.getenv("S3_BUCKET", "visionforge"))
+        data = storage.get_bytes(client, key, bucket=bucket)
+    except Exception as exc:  # pragma: no cover - storage failure path
+        raise HTTPException(status_code=502, detail=f"could not fetch plot: {exc}") from exc
+
+    ext = key.rsplit(".", 1)[-1].lower()
+    media = "image/jpeg" if ext in ("jpg", "jpeg") else "image/png"
+    import io as _io
+
+    return StreamingResponse(_io.BytesIO(data), media_type=media)
diff --git a/backend/src/app/jobs/tasks/training.py b/backend/src/app/jobs/tasks/training.py
index 1550243..ef81c9b 100644
--- a/backend/src/app/jobs/tasks/training.py
+++ b/backend/src/app/jobs/tasks/training.py
@@ -43,6 +43,103 @@ def _extract_minio_key(uri: str) -> str:
     return uri
 
 
+# Ultralytics emits verbose metric keys (e.g. "metrics/mAP50(B)",
+# "train/box_loss", "lr/pg0"). Map them to clean, stable keys the frontend
+# charts read. Raw keys are preserved alongside these for completeness.
+_METRIC_KEY_MAP = {
+    "metrics/mAP50(B)": "mAP50",
+    "metrics/mAP50-95(B)": "mAP50_95",
+    "metrics/precision(B)": "precision",
+    "metrics/recall(B)": "recall",
+    "train/box_loss": "train_box_loss",
+    "train/cls_loss": "train_cls_loss",
+    "train/dfl_loss": "train_dfl_loss",
+    "val/box_loss": "val_box_loss",
+    "val/cls_loss": "val_cls_loss",
+    "val/dfl_loss": "val_dfl_loss",
+    "lr/pg0": "lr",
+    "metrics/accuracy_top1": "top1",
+    "metrics/accuracy_top5": "top5",
+}
+
+# Allow-list of Ultralytics ``model.train()`` arguments users may tune. Keys not
+# present here are ignored so callers cannot inject unsafe/irrelevant kwargs.
+# (data / project / name / device / plots are handled separately.)
+ULTRALYTICS_TRAIN_ARGS: dict[str, Any] = {
+    # Core
+    "epochs": 50,
+    "imgsz": 640,
+    "batch": 16,
+    "patience": 100,
+    "rect": False,
+    "single_cls": False,
+    "seed": 0,
+    # Optimizer & schedule
+    "optimizer": "auto",
+    "lr0": 0.01,
+    "lrf": 0.01,
+    "momentum": 0.937,
+    "weight_decay": 0.0005,
+    "warmup_epochs": 3.0,
+    "warmup_momentum": 0.8,
+    "warmup_bias_lr": 0.1,
+    "cos_lr": False,
+    "close_mosaic": 10,
+    "nbs": 64,
+    "amp": True,
+    # Regularization / loss gains
+    "dropout": 0.0,
+    "label_smoothing": 0.0,
+    "box": 7.5,
+    "cls": 0.5,
+    "dfl": 1.5,
+    "overlap_mask": True,
+    "mask_ratio": 4,
+    "freeze": None,
+    # Augmentation
+    "hsv_h": 0.015,
+    "hsv_s": 0.7,
+    "hsv_v": 0.4,
+    "degrees": 0.0,
+    "translate": 0.1,
+    "scale": 0.5,
+    "shear": 0.0,
+    "perspective": 0.0,
+    "flipud": 0.0,
+    "fliplr": 0.5,
+    "bgr": 0.0,
+    "mosaic": 1.0,
+    "mixup": 0.0,
+    "copy_paste": 0.0,
+    "erasing": 0.4,
+    "crop_fraction": 1.0,
+    "auto_augment": "randaugment",
+}
+
+# Ultralytics writes these plot images into the run directory when plots=True.
+_PLOT_FILES = [
+    "results.png",
+    "PR_curve.png",
+    "P_curve.png",
+    "R_curve.png",
+    "F1_curve.png",
+    "confusion_matrix.png",
+    "confusion_matrix_normalized.png",
+    "labels.jpg",
+    "BoxPR_curve.png",
+]
+
+
+def _normalize_metrics(raw: dict) -> dict:
+    """Return a metric dict with clean keys mapped in, raw keys preserved."""
+    out: dict[str, Any] = {}
+    for k, v in raw.items():
+        if k in _METRIC_KEY_MAP:
+            out[_METRIC_KEY_MAP[k]] = v
+        out[k] = v
+    return out
+
+
 def _build_yolo_dataset(
     assets: list[Any],
     annotations_by_asset: dict[str, list[Any]],
@@ -51,8 +148,14 @@ def _build_yolo_dataset(
     minio_client: Any,
     bucket: str,
     task_type: str = "detect",
+    splits: dict[str, str] | None = None,
 ) -> Path:
-    """Export assets and annotations to YOLO dataset format and return path to data.yaml."""
+    """Export assets and annotations to YOLO dataset format and return path to data.yaml.
+
+    ``splits`` maps asset id -> "train" | "val" | "test". Test assets are held
+    out entirely (never written) so they are a true unseen set for evaluation.
+    """
+    splits = splits or {}
     images_train = output_dir / "train" / "images"
     images_val = output_dir / "val" / "images"
     labels_train = output_dir / "train" / "labels"
@@ -68,8 +171,11 @@ def _build_yolo_dataset(
 
     class_idx: dict[str, int] = {name: i for i, name in enumerate(class_names)}
 
-    for i, asset in enumerate(assets):
-        split = "val" if i % 5 == 4 else "train"
+    for asset in assets:
+        split = splits.get(asset.id, "train")
+        if split == "test":
+            # Held out — never exported into the training/val set.
+            continue
         ext = Path(asset.uri).suffix or ".jpg"
         img_name = f"{asset.id}{ext}"
 
@@ -231,6 +337,30 @@ def train_task(payload: dict) -> dict:
                             seen.append(ann.class_name)
                 class_names = seen or ["object"]
 
+        # Resolve the train/val/test split for every asset. Honor a split that
+        # was already persisted on the version (via the split endpoint); fall
+        # back to a deterministic, reproducible hash split for any asset that
+        # has none, using the ratios/seed carried in the run params.
+        from app.services.split_service import (
+            DEFAULT_SEED,
+            asset_split,
+            normalize_ratios,
+            resolve_split,
+        )
+
+        ratios = normalize_ratios(
+            params.get("split_train", 0.8),
+            params.get("split_val", 0.2),
+            params.get("split_test", 0.0),
+        )
+        split_seed = int(params.get("split_seed", DEFAULT_SEED))
+        splits: dict[str, str] = {}
+        split_counts = {"train": 0, "val": 0, "test": 0}
+        for a in assets:
+            s = asset_split(a) or resolve_split(a.id, ratios, split_seed)
+            splits[a.id] = s
+            split_counts[s] = split_counts.get(s, 0) + 1
+
         if job_id:
             update_job_status(db, job_id, status="running", progress=0.1)
 
@@ -245,6 +375,19 @@ def train_task(payload: dict) -> dict:
 
         epoch_metrics: list[dict] = []
         best_pt_path: Path | None = None
+        # Single metrics blob persisted throughout the run: per-epoch history,
+        # the resolved split, a final summary, and links to generated plots.
+        metrics_blob: dict[str, Any] = {
+            "epochs": epoch_metrics,
+            "split": {
+                "counts": split_counts,
+                "ratios": {"train": ratios[0], "val": ratios[1], "test": ratios[2]},
+                "seed": split_seed,
+            },
+        }
+        run.metrics_json = json.dumps(metrics_blob)
+        db.add(run)
+        db.commit()
 
         with tempfile.TemporaryDirectory() as tmp:
             tmp_path = Path(tmp)
@@ -261,6 +404,7 @@ def train_task(payload: dict) -> dict:
                 minio_client=minio_client,
                 bucket=bucket,
                 task_type=task_type,
+                splits=splits,
             )
 
             if job_id:
@@ -285,12 +429,12 @@ def on_train_epoch_end(trainer: Any) -> None:  # noqa: ANN001
                     if hasattr(trainer, "loss"):
                         loss_val = trainer.loss
                         metrics_dict["loss"] = float(loss_val) if loss_val is not None else None
-                    entry = {"epoch": epoch_num, **metrics_dict}
+                    entry = {"epoch": epoch_num, **_normalize_metrics(metrics_dict)}
                     epoch_metrics.append(entry)
 
-                    # Persist metrics to DB
+                    # Persist metrics to DB (epoch history lives inside the blob)
                     try:
-                        run.metrics_json = json.dumps({"epochs": epoch_metrics})
+                        run.metrics_json = json.dumps(metrics_blob)
                         db.add(run)
                         db.commit()
                     except Exception:
@@ -308,33 +452,20 @@ def on_train_epoch_end(trainer: Any) -> None:  # noqa: ANN001
 
                 train_kwargs: dict = {
                     "data": str(data_yaml),
-                    "epochs": total_epochs,
-                    "imgsz": params.get("imgsz", 640),
-                    "batch": params.get("batch", 16),
                     "device": params.get("device", "cpu"),
                     "project": str(output_dir),
                     "name": "train",
-                    # Learning rate hyperparameters
-                    "lr0": params.get("lr0", 0.01),
-                    "lrf": params.get("lrf", 0.01),
-                    "momentum": params.get("momentum", 0.937),
-                    "weight_decay": params.get("weight_decay", 0.0005),
-                    "warmup_epochs": params.get("warmup_epochs", 3.0),
-                    # Augmentation parameters
-                    "hsv_h": params.get("hsv_h", 0.015),
-                    "hsv_s": params.get("hsv_s", 0.7),
-                    "hsv_v": params.get("hsv_v", 0.4),
-                    "degrees": params.get("degrees", 0.0),
-                    "translate": params.get("translate", 0.1),
-                    "scale": params.get("scale", 0.5),
-                    "shear": params.get("shear", 0.0),
-                    "perspective": params.get("perspective", 0.0),
-                    "flipud": params.get("flipud", 0.0),
-                    "fliplr": params.get("fliplr", 0.5),
-                    "mosaic": params.get("mosaic", 1.0),
-                    "mixup": params.get("mixup", 0.0),
-                    "copy_paste": params.get("copy_paste", 0.0),
+                    "plots": True,  # emit PR/confusion/results plots for the UI
                 }
+                # Pull every tunable hyperparameter / augmentation knob from the
+                # allow-list, taking the user's value when present and the
+                # Ultralytics default otherwise. `epochs` stays bound to
+                # total_epochs so progress reporting matches.
+                for key, default in ULTRALYTICS_TRAIN_ARGS.items():
+                    val = params.get(key, default)
+                    if val is not None:
+                        train_kwargs[key] = val
+                train_kwargs["epochs"] = total_epochs
                 model.train(**train_kwargs)
 
                 # Locate best.pt
diff --git a/backend/src/app/schemas/experiment.py b/backend/src/app/schemas/experiment.py
index 823ab17..ba88f35 100644
--- a/backend/src/app/schemas/experiment.py
+++ b/backend/src/app/schemas/experiment.py
@@ -22,6 +22,7 @@ class Experiment(BaseModel):
     dataset_version_id: str | None = None
     params_json: str | None = None
     metrics_json: str | None = None
+    artifacts: str | None = None
     status: str
     code_hash: str | None = None
     started_at: datetime | None = None
diff --git a/backend/src/app/schemas/split.py b/backend/src/app/schemas/split.py
new file mode 100644
index 0000000..6cc0c14
--- /dev/null
+++ b/backend/src/app/schemas/split.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class SplitConfig(BaseModel):
+    """Operator-supplied train/val/test split configuration."""
+
+    train: float = Field(0.8, ge=0)
+    val: float = Field(0.2, ge=0)
+    test: float = Field(0.0, ge=0)
+    seed: int = 42
+    stratify: bool = True
+
+
+class SplitCounts(BaseModel):
+    train: int = 0
+    val: int = 0
+    test: int = 0
+    unassigned: int = 0
+
+
+class SplitSummary(BaseModel):
+    total: int
+    counts: SplitCounts
+    per_class: dict[str, dict[str, int]] = {}
+    ratios: dict[str, float] | None = None
+    seed: int | None = None
+    stratify: bool | None = None
diff --git a/backend/src/app/services/split_service.py b/backend/src/app/services/split_service.py
new file mode 100644
index 0000000..fbbb875
--- /dev/null
+++ b/backend/src/app/services/split_service.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import random
+from collections import defaultdict
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.models.annotation import Annotation
+from app.models.asset import Asset
+
+VALID_SPLITS = ("train", "val", "test")
+DEFAULT_RATIOS = (0.8, 0.2, 0.0)
+DEFAULT_SEED = 42
+
+
+class SplitConfigError(ValueError):
+    """Raised when split ratios are invalid (negative or all-zero)."""
+
+
+def asset_split(asset: Any) -> str | None:
+    """Read an asset's persisted split tag (train/val/test) from meta_data JSON.
+
+    This is the single source of truth used by both training and evaluation so
+    they always agree on which assets belong to which split.
+    """
+    md = getattr(asset, "meta_data", None)
+    if not md:
+        return None
+    try:
+        parsed = json.loads(md) if isinstance(md, str) else md
+    except Exception:
+        return None
+    if not isinstance(parsed, dict):
+        return None
+    val = parsed.get("split") or parsed.get("subset")
+    if not val:
+        return None
+    val = str(val).lower()
+    return val if val in VALID_SPLITS else None
+
+
+def normalize_ratios(train: float, val: float, test: float) -> tuple[float, float, float]:
+    """Validate and L1-normalize the three split ratios so they sum to 1.0."""
+    parts = [float(train), float(val), float(test)]
+    if any(p < 0 for p in parts):
+        raise SplitConfigError("split ratios must be non-negative")
+    total = sum(parts)
+    if total <= 0:
+        raise SplitConfigError("split ratios must not all be zero")
+    return parts[0] / total, parts[1] / total, parts[2] / total
+
+
+def resolve_split(asset_id: str, ratios: tuple[float, float, float], seed: int) -> str:
+    """Deterministically assign an asset to a split via a seeded hash.
+
+    Used as a fallback when an asset has no persisted split, so a training run is
+    always reproducible regardless of whether splits were pre-assigned.
+    """
+    train_r, val_r, _ = ratios
+    digest = hashlib.sha256(f"{seed}:{asset_id}".encode()).hexdigest()
+    frac = int(digest[:8], 16) / 0xFFFFFFFF
+    if frac < train_r:
+        return "train"
+    if frac < train_r + val_r:
+        return "val"
+    return "test"
+
+
+def _dominant_class(annotations: list[Annotation]) -> str:
+    """Pick the most frequent class on an asset for stratification."""
+    counts: dict[str, int] = defaultdict(int)
+    for ann in annotations:
+        if ann.class_name:
+            counts[ann.class_name] += 1
+    if not counts:
+        return "__none__"
+    return max(counts.items(), key=lambda kv: kv[1])[0]
+
+
+def _annotations_by_asset(db: Session, asset_ids: list[str]) -> dict[str, list[Annotation]]:
+    out: dict[str, list[Annotation]] = defaultdict(list)
+    if not asset_ids:
+        return out
+    rows = db.scalars(select(Annotation).where(Annotation.asset_id.in_(asset_ids))).all()
+    for ann in rows:
+        out[ann.asset_id].append(ann)
+    return out
+
+
+def _slice_counts(n: int, ratios: tuple[float, float, float]) -> tuple[int, int, int]:
+    """Split ``n`` items into (train, val, test) honoring ratios with no loss."""
+    train_r, val_r, _ = ratios
+    n_train = int(round(n * train_r))
+    n_val = int(round(n * val_r))
+    # Whatever rounding left over goes to test so counts always sum to n.
+    n_train = min(n_train, n)
+    n_val = min(n_val, n - n_train)
+    n_test = n - n_train - n_val
+    return n_train, n_val, n_test
+
+
+def assign_splits(
+    db: Session,
+    version_id: str,
+    *,
+    train: float = 0.8,
+    val: float = 0.2,
+    test: float = 0.0,
+    seed: int = DEFAULT_SEED,
+    stratify: bool = True,
+) -> dict:
+    """Deterministically assign train/val/test to every asset in a version.
+
+    Writes the chosen split into each ``asset.meta_data`` JSON (under ``split``)
+    and commits. When ``stratify`` is set, the ratio is applied within each
+    dominant-class bucket so rare classes are represented in every split.
+    Returns a summary identical in shape to :func:`get_split_summary`.
+    """
+    ratios = normalize_ratios(train, val, test)
+    assets = list(db.scalars(select(Asset).where(Asset.version_id == version_id)).all())
+
+    anns_by_asset: dict[str, list[Annotation]] = {}
+    if stratify:
+        anns_by_asset = _annotations_by_asset(db, [a.id for a in assets])
+
+    # Bucket assets (single bucket when not stratifying).
+    buckets: dict[str, list[Asset]] = defaultdict(list)
+    for a in assets:
+        key = _dominant_class(anns_by_asset.get(a.id, [])) if stratify else "__all__"
+        buckets[key].append(a)
+
+    rng = random.Random(seed)
+    assignment: dict[str, str] = {}
+    for key in sorted(buckets):
+        group = sorted(buckets[key], key=lambda a: a.id)
+        rng.shuffle(group)
+        n_train, n_val, _ = _slice_counts(len(group), ratios)
+        for idx, a in enumerate(group):
+            if idx < n_train:
+                assignment[a.id] = "train"
+            elif idx < n_train + n_val:
+                assignment[a.id] = "val"
+            else:
+                assignment[a.id] = "test"
+
+    for a in assets:
+        try:
+            md = json.loads(a.meta_data) if a.meta_data else {}
+            if not isinstance(md, dict):
+                md = {}
+        except Exception:
+            md = {}
+        md["split"] = assignment[a.id]
+        a.meta_data = json.dumps(md)
+        db.add(a)
+    db.commit()
+
+    return _summarize(assets, anns_by_asset, ratios, seed, stratify)
+
+
+def get_split_summary(db: Session, version_id: str) -> dict:
+    """Return persisted split counts + per-class-per-split breakdown for a version."""
+    assets = list(db.scalars(select(Asset).where(Asset.version_id == version_id)).all())
+    anns_by_asset = _annotations_by_asset(db, [a.id for a in assets])
+    return _summarize(assets, anns_by_asset, ratios=None, seed=None, stratify=None)
+
+
+def _summarize(
+    assets: list[Asset],
+    anns_by_asset: dict[str, list[Annotation]],
+    ratios: tuple[float, float, float] | None,
+    seed: int | None,
+    stratify: bool | None,
+) -> dict:
+    counts = {"train": 0, "val": 0, "test": 0, "unassigned": 0}
+    # per_class[class][split] -> count
+    per_class: dict[str, dict[str, int]] = defaultdict(lambda: {"train": 0, "val": 0, "test": 0})
+    for a in assets:
+        split = asset_split(a) or "unassigned"
+        counts[split] = counts.get(split, 0) + 1
+        if split in VALID_SPLITS:
+            cls = _dominant_class(anns_by_asset.get(a.id, [])) if anns_by_asset else "__all__"
+            per_class[cls][split] += 1
+
+    summary: dict[str, Any] = {
+        "total": len(assets),
+        "counts": counts,
+        "per_class": {k: v for k, v in sorted(per_class.items())},
+    }
+    if ratios is not None:
+        summary["ratios"] = {"train": ratios[0], "val": ratios[1], "test": ratios[2]}
+    if seed is not None:
+        summary["seed"] = seed
+    if stratify is not None:
+        summary["stratify"] = stratify
+    return summary
diff --git a/backend/src/app/services/storage.py b/backend/src/app/services/storage.py
index 7f9b3f5..df91424 100644
--- a/backend/src/app/services/storage.py
+++ b/backend/src/app/services/storage.py
@@ -1,10 +1,45 @@
 from __future__ import annotations
 
+import io
 import os
 
 from minio import Minio
 
 
+def _default_bucket() -> str:
+    return os.getenv("MINIO_BUCKET", os.getenv("S3_BUCKET", "visionforge"))
+
+
+def put_bytes(
+    client: Minio,
+    object_key: str,
+    data: bytes,
+    content_type: str = "application/octet-stream",
+    bucket: str | None = None,
+) -> str:
+    """Upload raw bytes to MinIO and return the object key."""
+    bucket = bucket or _default_bucket()
+    client.put_object(
+        bucket,
+        object_key,
+        io.BytesIO(data),
+        length=len(data),
+        content_type=content_type,
+    )
+    return object_key
+
+
+def get_bytes(client: Minio, object_key: str, bucket: str | None = None) -> bytes:
+    """Fetch an object's bytes from MinIO."""
+    bucket = bucket or _default_bucket()
+    response = client.get_object(bucket, object_key)
+    try:
+        return response.read()
+    finally:
+        response.close()
+        response.release_conn()
+
+
 def get_minio_client() -> Minio:
     endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
     access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")

From e9792e6907bf208724a8ec19612e984dd0373e3a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 30 May 2026 19:28:02 +0000
Subject: [PATCH 2/2] =?UTF-8?q?feat(training):=20seamless=20training=20UX?=
 =?UTF-8?q?=20=E2=80=94=20config-driven=20HP/aug=20form,=20persisted=20spl?=
 =?UTF-8?q?its=20UI,=20rich=20metrics=20+=20native=20plots,=20one-click=20?=
 =?UTF-8?q?eval?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Frontend: grouped config-driven hyperparameter/augmentation form, SplitPanel
  with ratio/seed/stratify + per-class breakdown, multi-panel run-detail charts
  (loss/mAP/PR/LR), summary tiles, native plot gallery, Run Evaluation button
- Backend: split GET/POST endpoints + ?split= asset filter, metric-key
  normalization, summary/plots/split in metrics_json, plot streaming endpoint,
  run detail exposes artifacts
- Tests: split_service unit coverage; specs/training-ux docs
---
 backend/src/app/api/experiments.py            |  23 +
 backend/src/app/jobs/tasks/training.py        |  39 +
 backend/src/app/services/asset_service.py     |  12 +
 backend/tests/unit/test_split_service.py      | 110 +++
 frontend/src/components/common/SplitPanel.tsx | 252 ++++++
 frontend/src/pages/experiments/[runId].tsx    | 435 +++++++---
 frontend/src/pages/experiments/new.tsx        | 740 +++++++++++++-----
 specs/training-ux/plan.md                     |  40 +
 specs/training-ux/spec.md                     |  68 ++
 specs/training-ux/tasks.md                    |  16 +
 10 files changed, 1467 insertions(+), 268 deletions(-)
 create mode 100644 backend/tests/unit/test_split_service.py
 create mode 100644 frontend/src/components/common/SplitPanel.tsx
 create mode 100644 specs/training-ux/plan.md
 create mode 100644 specs/training-ux/spec.md
 create mode 100644 specs/training-ux/tasks.md

diff --git a/backend/src/app/api/experiments.py b/backend/src/app/api/experiments.py
index e65bace..94a3a59 100644
--- a/backend/src/app/api/experiments.py
+++ b/backend/src/app/api/experiments.py
@@ -123,6 +123,29 @@ def get_metrics(
                 split = data.get("split")
         except Exception:
             pass
+
+    # Attach presigned GET urls so the frontend can render plots in <img> tags
+    # without forwarding the auth header (mirrors asset download_url).
+    if plots:
+        plots = [dict(p) for p in plots]
+        try:
+            import os
+            from datetime import timedelta
+
+            from app.services import storage
+
+            client = storage.get_minio_client()
+            bucket = os.getenv("MINIO_BUCKET", os.getenv("S3_BUCKET", "visionforge"))
+            for p in plots:
+                if p.get("key"):
+                    try:
+                        p["url"] = client.presigned_get_object(
+                            bucket, p["key"], expires=timedelta(hours=1)
+                        )
+                    except Exception:
+                        p["url"] = None
+        except Exception:
+            pass
     return {
         "run_id": runId,
         "status": e.status,
diff --git a/backend/src/app/jobs/tasks/training.py b/backend/src/app/jobs/tasks/training.py
index ef81c9b..dcd7b42 100644
--- a/backend/src/app/jobs/tasks/training.py
+++ b/backend/src/app/jobs/tasks/training.py
@@ -521,6 +521,45 @@ def on_train_epoch_end(trainer: Any) -> None:  # noqa: ANN001
                     # Log but don't fail the task
                     epoch_metrics.append({"warning": f"model upload failed: {upload_err}"})
 
+            # Upload Ultralytics-generated plots (PR curve, confusion matrix,
+            # results grid, label distribution) so the UI can render them, and
+            # record a final metric summary. Must run inside the temp-dir block
+            # so the plot files still exist on disk.
+            plot_records: list[dict] = []
+            results_dir = output_dir / "train"
+            if minio_client and results_dir.exists():
+                from app.services import storage as _storage
+
+                for fname in _PLOT_FILES:
+                    fpath = results_dir / fname
+                    if not fpath.exists():
+                        continue
+                    try:
+                        ext = fpath.suffix.lstrip(".").lower()
+                        ctype = "image/jpeg" if ext in ("jpg", "jpeg") else "image/png"
+                        key = f"models/{experiment_id}/plots/{fname}"
+                        _storage.put_bytes(
+                            minio_client,
+                            key,
+                            fpath.read_bytes(),
+                            content_type=ctype,
+                            bucket=bucket,
+                        )
+                        plot_records.append({"name": fpath.stem, "file": fname, "key": key})
+                    except Exception:
+                        continue
+            metrics_blob["plots"] = plot_records
+            if epoch_metrics:
+                metrics_blob["summary"] = {
+                    k: v for k, v in epoch_metrics[-1].items() if k != "epoch"
+                }
+            try:
+                run.metrics_json = json.dumps(metrics_blob)
+                db.add(run)
+                db.commit()
+            except Exception:
+                pass
+
             if job_id:
                 update_job_status(db, job_id, status="running", progress=0.97)
 
diff --git a/backend/src/app/services/asset_service.py b/backend/src/app/services/asset_service.py
index 14ed9d9..b356942 100644
--- a/backend/src/app/services/asset_service.py
+++ b/backend/src/app/services/asset_service.py
@@ -26,6 +26,7 @@ def list_assets(
     *,
     version_id: str | None = None,
     label_status: str | None = None,
+    split: str | None = None,
     limit: int = 100,
     offset: int = 0,
 ) -> tuple[list[Asset], int]:
@@ -34,6 +35,17 @@ def list_assets(
         q = q.where(Asset.version_id == version_id)
     if label_status:
         q = q.where(Asset.label_status == label_status)
+
+    # The split lives inside the meta_data JSON blob, which is not portably
+    # queryable across SQLite (tests) and Postgres, so filter it in Python.
+    if split:
+        from app.services.split_service import asset_split
+
+        want = split.lower()
+        ordered = q.order_by(Asset.created_at.asc(), Asset.id.asc())
+        matched = [a for a in db.scalars(ordered).all() if asset_split(a) == want]
+        return matched[offset : offset + limit], len(matched)
+
     total = db.scalar(select(func.count()).select_from(q.subquery()))
     assets = list(db.scalars(q.offset(offset).limit(limit)).all())
     return assets, total or 0
diff --git a/backend/tests/unit/test_split_service.py b/backend/tests/unit/test_split_service.py
new file mode 100644
index 0000000..f3eb482
--- /dev/null
+++ b/backend/tests/unit/test_split_service.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.db.base import Base
+from app.models.asset import Asset
+from app.services import split_service
+
+
+@pytest.fixture()
+def db():
+    # Importing the models registers their tables on Base.metadata.
+    import app.models.annotation  # noqa: F401
+    import app.models.dataset  # noqa: F401
+    import app.models.dataset_version  # noqa: F401
+
+    engine = create_engine("sqlite+pysqlite:///:memory:", connect_args={"check_same_thread": False})
+    Base.metadata.create_all(engine)
+    session = sessionmaker(bind=engine, autoflush=False, autocommit=False)()
+    yield session
+    session.close()
+
+
+def _seed_assets(db, n: int, version_id: str = "v1") -> None:
+    for i in range(n):
+        db.add(
+            Asset(
+                id=f"a{i:04d}",
+                dataset_id="d1",
+                version_id=version_id,
+                uri=f"key/{i}.jpg",
+                mime_type="image/jpeg",
+            )
+        )
+    db.commit()
+
+
+def test_normalize_ratios_normalizes_and_validates():
+    assert split_service.normalize_ratios(8, 1, 1) == pytest.approx((0.8, 0.1, 0.1))
+    # Already-fractional ratios are preserved.
+    assert split_service.normalize_ratios(0.7, 0.2, 0.1) == pytest.approx((0.7, 0.2, 0.1))
+    with pytest.raises(split_service.SplitConfigError):
+        split_service.normalize_ratios(0, 0, 0)
+    with pytest.raises(split_service.SplitConfigError):
+        split_service.normalize_ratios(-1, 1, 1)
+
+
+def test_resolve_split_is_deterministic_and_seed_sensitive():
+    r1 = split_service.resolve_split("asset-x", (0.8, 0.1, 0.1), 42)
+    r2 = split_service.resolve_split("asset-x", (0.8, 0.1, 0.1), 42)
+    assert r1 == r2
+    assert r1 in split_service.VALID_SPLITS
+    # A different seed should change at least some assignments.
+    diffs = sum(
+        split_service.resolve_split(f"a{i}", (0.34, 0.33, 0.33), 1)
+        != split_service.resolve_split(f"a{i}", (0.34, 0.33, 0.33), 2)
+        for i in range(200)
+    )
+    assert diffs > 0
+
+
+def test_slice_counts_never_loses_items():
+    for n in (0, 1, 7, 10, 99):
+        tr, va, te = split_service._slice_counts(n, (0.8, 0.1, 0.1))
+        assert tr + va + te == n
+        assert min(tr, va, te) >= 0
+
+
+def test_assign_splits_persists_and_summarizes(db):
+    _seed_assets(db, 100)
+    summary = split_service.assign_splits(
+        db, "v1", train=0.8, val=0.1, test=0.1, seed=42, stratify=False
+    )
+    counts = summary["counts"]
+    assert counts["train"] + counts["val"] + counts["test"] == 100
+    assert counts["train"] == 80 and counts["val"] == 10 and counts["test"] == 10
+    assert counts["unassigned"] == 0
+    assert summary["seed"] == 42
+
+    # Persisted onto meta_data and reflected by get_split_summary.
+    a = db.get(Asset, "a0000")
+    assert json.loads(a.meta_data)["split"] in split_service.VALID_SPLITS
+    again = split_service.get_split_summary(db, "v1")
+    assert again["counts"] == counts
+
+
+def test_assign_splits_is_reproducible(db):
+    _seed_assets(db, 50)
+    split_service.assign_splits(db, "v1", seed=7, stratify=False)
+    first = {a.id: split_service.asset_split(a) for a in db.query(Asset).all()}
+    # Re-run with the same seed → identical assignment.
+    split_service.assign_splits(db, "v1", seed=7, stratify=False)
+    second = {a.id: split_service.asset_split(a) for a in db.query(Asset).all()}
+    assert first == second
+
+
+def test_asset_split_reads_meta_data():
+    class FakeAsset:
+        meta_data = json.dumps({"split": "TEST"})
+
+    assert split_service.asset_split(FakeAsset()) == "test"
+
+    class NoSplit:
+        meta_data = json.dumps({"other": 1})
+
+    assert split_service.asset_split(NoSplit()) is None
diff --git a/frontend/src/components/common/SplitPanel.tsx b/frontend/src/components/common/SplitPanel.tsx
new file mode 100644
index 0000000..d2df832
--- /dev/null
+++ b/frontend/src/components/common/SplitPanel.tsx
@@ -0,0 +1,252 @@
+import React, { useEffect, useState, useCallback } from 'react';
+import Input from '@/components/ui/Input';
+import Button from '@/components/ui/Button';
+import { apiGet, apiPost } from '@/services/api';
+
+export interface SplitConfig {
+  train: number;
+  val: number;
+  test: number;
+  seed: number;
+  stratify: boolean;
+}
+
+interface SplitSummary {
+  total: number;
+  counts: { train: number; val: number; test: number; unassigned: number };
+  per_class: Record<string, { train: number; val: number; test: number }>;
+  ratios?: { train: number; val: number; test: number };
+  seed?: number;
+  stratify?: boolean;
+}
+
+export const DEFAULT_SPLIT: SplitConfig = {
+  train: 0.8,
+  val: 0.2,
+  test: 0.0,
+  seed: 42,
+  stratify: true,
+};
+
+const SPLIT_COLORS: Record<string, string> = {
+  train: 'oklch(0.60 0.10 155)', // green
+  val: 'oklch(0.72 0.10 82)', // amber
+  test: 'oklch(0.72 0.08 230)', // blue
+  unassigned: 'oklch(0.45 0.02 250)', // muted
+};
+
+function StackedBar({ counts }: { counts: SplitSummary['counts'] }) {
+  const total = counts.train + counts.val + counts.test + counts.unassigned || 1;
+  const segs = (['train', 'val', 'test', 'unassigned'] as const)
+    .map((k) => ({ k, v: counts[k] }))
+    .filter((s) => s.v > 0);
+  return (
+    <div>
+      <div className="flex h-5 w-full overflow-hidden border border-[var(--hud-border-subtle)]">
+        {segs.map((s) => (
+          <div
+            key={s.k}
+            style={{ width: `${(s.v / total) * 100}%`, backgroundColor: SPLIT_COLORS[s.k] }}
+            title={`${s.k}: ${s.v}`}
+          />
+        ))}
+      </div>
+      <div className="mt-2 flex flex-wrap gap-3 text-xs font-mono">
+        {(['train', 'val', 'test', 'unassigned'] as const).map((k) => (
+          <div key={k} className="flex items-center gap-1.5">
+            <span
+              className="inline-block h-2.5 w-2.5"
+              style={{ backgroundColor: SPLIT_COLORS[k] }}
+            />
+            <span className="text-[var(--hud-text-muted)]">{k}</span>
+            <span className="text-[var(--hud-text-data)]">{counts[k]}</span>
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+}
+
+interface Props {
+  datasetId?: string;
+  versionId?: string;
+  /** Notifies parent of the current ratio/seed config (for training params). */
+  onConfigChange?: (cfg: SplitConfig) => void;
+  /** When false, hides the apply/persist controls (read-only display). */
+  editable?: boolean;
+}
+
+export default function SplitPanel({
+  datasetId,
+  versionId,
+  onConfigChange,
+  editable = true,
+}: Props) {
+  const [cfg, setCfg] = useState<SplitConfig>(DEFAULT_SPLIT);
+  const [summary, setSummary] = useState<SplitSummary | null>(null);
+  const [applying, setApplying] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const sum = cfg.train + cfg.val + cfg.test;
+  const ratiosValid = Math.abs(sum - 1) < 0.001 && cfg.train >= 0 && cfg.val >= 0 && cfg.test >= 0;
+
+  useEffect(() => {
+    onConfigChange?.(cfg);
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [cfg.train, cfg.val, cfg.test, cfg.seed, cfg.stratify]);
+
+  const loadSummary = useCallback(() => {
+    if (!datasetId || !versionId) return;
+    apiGet<SplitSummary>(`/api/datasets/${datasetId}/versions/${versionId}/split`)
+      .then((s) => {
+        setSummary(s);
+        if (s.ratios) {
+          setCfg((prev) => ({
+            ...prev,
+            train: s.ratios!.train,
+            val: s.ratios!.val,
+            test: s.ratios!.test,
+            seed: s.seed ?? prev.seed,
+            stratify: s.stratify ?? prev.stratify,
+          }));
+        }
+      })
+      .catch(() => setSummary(null));
+  }, [datasetId, versionId]);
+
+  useEffect(() => {
+    loadSummary();
+  }, [loadSummary]);
+
+  async function applySplit() {
+    if (!datasetId || !versionId || !ratiosValid) return;
+    setApplying(true);
+    setError(null);
+    try {
+      const s = await apiPost<SplitSummary>(
+        `/api/datasets/${datasetId}/versions/${versionId}/split`,
+        cfg,
+      );
+      setSummary(s);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Failed to assign split');
+    } finally {
+      setApplying(false);
+    }
+  }
+
+  function setRatio(key: 'train' | 'val' | 'test', value: number) {
+    setCfg((prev) => ({ ...prev, [key]: Number.isFinite(value) ? value : 0 }));
+  }
+
+  const topClasses = summary
+    ? Object.entries(summary.per_class)
+        .filter(([name]) => name !== '__all__' && name !== '__none__')
+        .slice(0, 12)
+    : [];
+
+  return (
+    <div className="space-y-3">
+      {editable && (
+        <>
+          <div className="grid grid-cols-3 gap-3">
+            {(['train', 'val', 'test'] as const).map((k) => (
+              <div key={k}>
+                <label className="label-overline mb-1 block" htmlFor={`split-${k}`}>
+                  {k} ratio
+                </label>
+                <Input
+                  id={`split-${k}`}
+                  type="number"
+                  min={0}
+                  max={1}
+                  step={0.05}
+                  value={cfg[k]}
+                  onChange={(e) => setRatio(k, parseFloat(e.target.value))}
+                />
+              </div>
+            ))}
+          </div>
+          <div className="grid grid-cols-2 gap-3">
+            <div>
+              <label className="label-overline mb-1 block" htmlFor="split-seed">
+                seed
+              </label>
+              <Input
+                id="split-seed"
+                type="number"
+                value={cfg.seed}
+                onChange={(e) => setCfg((p) => ({ ...p, seed: parseInt(e.target.value, 10) || 0 }))}
+              />
+            </div>
+            <label className="flex items-end gap-2 pb-2 text-xs font-mono text-[var(--hud-text-muted)]">
+              <input
+                type="checkbox"
+                checked={cfg.stratify}
+                onChange={(e) => setCfg((p) => ({ ...p, stratify: e.target.checked }))}
+              />
+              stratify by class
+            </label>
+          </div>
+          {!ratiosValid && (
+            <p className="text-[0.6875rem] font-mono text-[var(--hud-danger-text)]">
+              Ratios must be ≥ 0 and sum to 1.0 (current: {sum.toFixed(2)}).
+            </p>
+          )}
+          {error && (
+            <p className="text-[0.6875rem] font-mono text-[var(--hud-danger-text)]">{error}</p>
+          )}
+          {datasetId && versionId && (
+            <Button
+              type="button"
+              size="sm"
+              variant="outline"
+              disabled={!ratiosValid || applying}
+              onClick={applySplit}
+            >
+              {applying ? 'Assigning…' : 'Apply & persist split'}
+            </Button>
+          )}
+        </>
+      )}
+
+      {summary ? (
+        <div className="space-y-3 pt-1">
+          <StackedBar counts={summary.counts} />
+          {topClasses.length > 0 && (
+            <div className="overflow-x-auto">
+              <table className="w-full text-xs font-mono">
+                <thead>
+                  <tr className="text-[var(--hud-text-muted)]">
+                    <th className="py-1 pr-4 text-left label-overline">class</th>
+                    <th className="py-1 pr-4 text-right label-overline">train</th>
+                    <th className="py-1 pr-4 text-right label-overline">val</th>
+                    <th className="py-1 text-right label-overline">test</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {topClasses.map(([name, c]) => (
+                    <tr key={name} className="border-t border-[var(--hud-border-subtle)]">
+                      <td className="py-1 pr-4 text-[var(--hud-text-secondary)]">{name}</td>
+                      <td className="py-1 pr-4 text-right text-[var(--hud-text-data)]">
+                        {c.train}
+                      </td>
+                      <td className="py-1 pr-4 text-right text-[var(--hud-text-data)]">{c.val}</td>
+                      <td className="py-1 text-right text-[var(--hud-text-data)]">{c.test}</td>
+                    </tr>
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          )}
+        </div>
+      ) : (
+        <p className="text-xs font-mono text-[var(--hud-text-muted)]">
+          {versionId
+            ? 'No split assigned yet.'
+            : 'Select a dataset version to configure its split.'}
+        </p>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/pages/experiments/[runId].tsx b/frontend/src/pages/experiments/[runId].tsx
index fd52e3c..c04582e 100644
--- a/frontend/src/pages/experiments/[runId].tsx
+++ b/frontend/src/pages/experiments/[runId].tsx
@@ -1,5 +1,5 @@
 import React, { useState, useEffect, useRef } from 'react';
-import { useParams, Link } from 'react-router-dom';
+import { useParams, Link, useNavigate } from 'react-router-dom';
 import Badge from '@/components/ui/Badge';
 import Button from '@/components/ui/Button';
 import Loading from '@/components/common/Loading';
@@ -12,79 +12,129 @@ interface Run {
   status: string;
   params_json?: string;
   metrics_json?: string;
+  artifacts?: string;
+  dataset_version_id?: string;
   created_at: string;
   completed_at?: string;
   project_id?: string;
 }
 
-interface MetricPoint {
-  epoch: number;
-  box_loss?: number;
-  cls_loss?: number;
-  mAP50?: number;
-  precision?: number;
-  recall?: number;
+type MetricPoint = Record<string, number | undefined> & { epoch: number };
+
+interface PlotRecord {
+  name: string;
+  file: string;
+  key: string;
+  url?: string | null;
+}
+
+interface SplitInfo {
+  counts: { train: number; val: number; test: number; unassigned?: number };
+  ratios?: { train: number; val: number; test: number };
+  seed?: number;
 }
 
-const METRIC_KEYS: Array<keyof Omit<MetricPoint, 'epoch'>> = ['mAP50', 'box_loss', 'cls_loss', 'precision', 'recall'];
+interface MetricsResponse {
+  status: string;
+  metrics: MetricPoint[];
+  summary?: Record<string, number> | null;
+  plots?: PlotRecord[];
+  split?: SplitInfo | null;
+}
 
-// HUD-palette colors for chart lines (muted, not neon)
 const COLORS = [
-  'oklch(0.72 0.10 82)',   // amber/accent
-  'oklch(0.60 0.10 155)', // success green
-  'oklch(0.68 0.16 20)',  // danger-text
-  'oklch(0.72 0.08 230)', // info blue
-  'oklch(0.70 0.10 75)',  // warning
+  'oklch(0.72 0.10 82)',
+  'oklch(0.60 0.10 155)',
+  'oklch(0.68 0.16 20)',
+  'oklch(0.72 0.08 230)',
+  'oklch(0.70 0.10 75)',
+  'oklch(0.66 0.14 300)',
+];
+
+// Chart panels: each groups same-unit series so a shared y-scale is meaningful.
+const PANELS: { title: string; keys: string[] }[] = [
+  {
+    title: 'Loss (train vs val)',
+    keys: [
+      'train_box_loss',
+      'val_box_loss',
+      'train_cls_loss',
+      'val_cls_loss',
+      'train_dfl_loss',
+      'val_dfl_loss',
+      'loss',
+    ],
+  },
+  { title: 'mAP', keys: ['mAP50', 'mAP50_95'] },
+  { title: 'Precision / Recall', keys: ['precision', 'recall'] },
+  { title: 'Accuracy', keys: ['top1', 'top5'] },
+  { title: 'Learning Rate', keys: ['lr'] },
 ];
 
+const SPLIT_COLORS: Record<string, string> = {
+  train: 'oklch(0.60 0.10 155)',
+  val: 'oklch(0.72 0.10 82)',
+  test: 'oklch(0.72 0.08 230)',
+};
+
 function statusVariant(status: string): 'default' | 'success' | 'warning' | 'danger' {
   switch (status) {
-    case 'succeeded': return 'success';
-    case 'running':   return 'warning';
-    case 'failed':    return 'danger';
-    default:          return 'default';
+    case 'succeeded':
+      return 'success';
+    case 'running':
+      return 'warning';
+    case 'failed':
+      return 'danger';
+    default:
+      return 'default';
   }
 }
 
-function MetricsChart({ data, keys }: { data: MetricPoint[]; keys: string[] }) {
-  if (!data.length) {
-    return <p className="text-xs font-mono text-[var(--hud-text-muted)] py-4">No metrics recorded yet…</p>;
+function LineChart({ data, keys }: { data: MetricPoint[]; keys: string[] }) {
+  const present = keys.filter((k) => data.some((d) => d[k] != null));
+  if (!present.length) {
+    return <p className="text-xs font-mono text-[var(--hud-text-muted)] py-4">No data.</p>;
   }
-
-  const W = 500, H = 180, PAD = 36;
+  const W = 460;
+  const H = 160;
+  const PAD = 34;
+  const all = present.flatMap((k) => data.map((d) => d[k]).filter((v): v is number => v != null));
+  const min = Math.min(...all);
+  const max = Math.max(...all);
+  const norm = (v: number) => (max === min ? 0.5 : (v - min) / (max - min));
 
   return (
     <div className="overflow-x-auto">
-      <svg viewBox={`0 0 ${W} ${H}`} className="w-full" aria-label="Metrics chart" style={{ background: 'var(--hud-inset)' }}>
-        {/* Grid lines */}
-        {[0.25, 0.5, 0.75, 1].map((t) => (
+      <svg viewBox={`0 0 ${W} ${H}`} className="w-full" style={{ background: 'var(--hud-inset)' }}>
+        {[0, 0.25, 0.5, 0.75, 1].map((t) => (
           <line
             key={t}
-            x1={PAD} y1={PAD + (1 - t) * (H - PAD * 2)}
-            x2={W - PAD} y2={PAD + (1 - t) * (H - PAD * 2)}
-            stroke="var(--hud-border-subtle)" strokeWidth="1"
+            x1={PAD}
+            y1={PAD + (1 - t) * (H - PAD * 2)}
+            x2={W - PAD}
+            y2={PAD + (1 - t) * (H - PAD * 2)}
+            stroke="var(--hud-border-subtle)"
+            strokeWidth="1"
           />
         ))}
-        {/* Axes */}
-        <line x1={PAD} y1={H - PAD} x2={W - PAD} y2={H - PAD} stroke="var(--hud-border-default)" strokeWidth="1" />
-        <line x1={PAD} y1={PAD}     x2={PAD}     y2={H - PAD} stroke="var(--hud-border-default)" strokeWidth="1" />
-
-        {keys.map((key, ki) => {
-          const vals = data.map((d) => (d as Record<string, number | undefined>)[key]).filter((v) => v != null) as number[];
-          if (!vals.length) return null;
-          const min = Math.min(...vals);
-          const max = Math.max(...vals);
-          const norm = (v: number) => (max === min ? 0.5 : (v - min) / (max - min));
+        {/* y-axis min/max labels */}
+        <text x={4} y={PAD + 4} fontSize="8" fill="var(--hud-text-muted)" fontFamily="monospace">
+          {max.toFixed(3)}
+        </text>
+        <text x={4} y={H - PAD} fontSize="8" fill="var(--hud-text-muted)" fontFamily="monospace">
+          {min.toFixed(3)}
+        </text>
+        {present.map((key, ki) => {
           const pts = data
             .map((d, i) => {
-              const val = (d as Record<string, number | undefined>)[key];
+              const val = d[key];
               if (val == null) return null;
               const x = PAD + (i / Math.max(data.length - 1, 1)) * (W - PAD * 2);
               const y = PAD + (1 - norm(val)) * (H - PAD * 2);
               return `${x},${y}`;
             })
-            .filter(Boolean).join(' ');
-
+            .filter(Boolean)
+            .join(' ');
           return (
             <polyline
               key={key}
@@ -95,15 +145,19 @@ function MetricsChart({ data, keys }: { data: MetricPoint[]; keys: string[] }) {
             />
           );
         })}
-
-        <text x={W / 2} y={H - 6} textAnchor="middle" fontSize="9" fill="var(--hud-text-muted)" fontFamily="monospace">
+        <text
+          x={W / 2}
+          y={H - 4}
+          textAnchor="middle"
+          fontSize="9"
+          fill="var(--hud-text-muted)"
+          fontFamily="monospace"
+        >
           EPOCH
         </text>
       </svg>
-
-      {/* Legend */}
       <div className="flex flex-wrap gap-3 mt-2">
-        {keys.map((key, ki) => (
+        {present.map((key, ki) => (
           <div key={key} className="flex items-center gap-1.5 text-xs font-mono">
             <div className="w-4 h-0.5" style={{ backgroundColor: COLORS[ki % COLORS.length] }} />
             <span className="text-[var(--hud-text-muted)]">{key}</span>
@@ -114,17 +168,51 @@ function MetricsChart({ data, keys }: { data: MetricPoint[]; keys: string[] }) {
   );
 }
 
+function Panel({ title, children }: { title: string; children: React.ReactNode }) {
+  return (
+    <div className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
+      <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
+        <div className="h-1.5 w-1.5 bg-[var(--hud-accent)]" />
+        <span className="label-overline">{title}</span>
+      </div>
+      <div className="p-4">{children}</div>
+    </div>
+  );
+}
+
+function fmtMetric(key: string, v: number): string {
+  if (/loss|lr/i.test(key)) return v.toFixed(4);
+  return v < 1 ? `${(v * 100).toFixed(1)}%` : v.toFixed(3);
+}
+
 export default function ExperimentDetail() {
   const { runId } = useParams<{ runId: string }>();
+  const navigate = useNavigate();
   const [run, setRun] = useState<Run | null>(null);
-  const [metrics, setMetrics] = useState<MetricPoint[]>([]);
+  const [m, setM] = useState<MetricsResponse | null>(null);
+  const [evals, setEvals] = useState<
+    { id: string; status: string; primary_metric?: number; metrics?: Record<string, number> }[]
+  >([]);
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
   const [exporting, setExporting] = useState(false);
   const [exportJobId, setExportJobId] = useState<string | null>(null);
-  const runStatusRef = useRef<string | undefined>(undefined);
+  const [zoom, setZoom] = useState<string | null>(null);
+  const statusRef = useRef<string | undefined>(undefined);
 
-  useEffect(() => { runStatusRef.current = run?.status; }, [run?.status]);
+  useEffect(() => {
+    statusRef.current = run?.status;
+  }, [run?.status]);
+
+  function modelArtifactId(r: Run | null): string | null {
+    if (!r?.artifacts) return null;
+    try {
+      const arr = JSON.parse(r.artifacts) as { id: string; type: string }[];
+      return (arr.find((a) => a.type === 'weights') || arr[0])?.id || null;
+    } catch {
+      return null;
+    }
+  }
 
   useEffect(() => {
     if (!runId) return;
@@ -132,11 +220,17 @@ export default function ExperimentDetail() {
       try {
         const [runData, metricsData] = await Promise.all([
           apiGet<Run>(`/api/experiments/runs/${runId}`),
-          apiGet<{ metrics: MetricPoint[] }>(`/api/experiments/runs/${runId}/metrics`),
+          apiGet<MetricsResponse>(`/api/experiments/runs/${runId}/metrics`),
         ]);
         setRun(runData);
-        setMetrics(metricsData.metrics || []);
+        setM(metricsData);
         setLoading(false);
+        const artId = modelArtifactId(runData);
+        if (artId && runData.status === 'succeeded') {
+          apiGet<{ evaluations?: typeof evals }>(`/api/artifacts/models/${artId}/lineage`)
+            .then((d) => setEvals(d.evaluations || []))
+            .catch(() => {});
+        }
       } catch (err) {
         setError(err instanceof Error ? err.message : 'Failed to load run');
         setLoading(false);
@@ -144,8 +238,8 @@ export default function ExperimentDetail() {
     };
     poll();
     const interval = setInterval(() => {
-      const status = runStatusRef.current;
-      if (status === 'running' || status === 'queued') poll();
+      const s = statusRef.current;
+      if (s === 'running' || s === 'queued') poll();
     }, 3000);
     return () => clearInterval(interval);
   }, [runId]);
@@ -154,7 +248,10 @@ export default function ExperimentDetail() {
     if (!run) return;
     setExporting(true);
     try {
-      const j = await apiPost<{ id: string; status: string }>(`/api/artifacts/models/${run.id}/export`, {});
+      const j = await apiPost<{ id: string; status: string }>(
+        `/api/artifacts/models/${run.id}/export`,
+        {},
+      );
       setExportJobId(j.id);
     } catch (err) {
       console.error(err);
@@ -163,26 +260,51 @@ export default function ExperimentDetail() {
     }
   }
 
-  if (loading) return <div className="py-6"><Loading label="Loading experiment…" /></div>;
-  if (error)   return <ErrorState title="Failed to load experiment" description={error} />;
-  if (!run)    return <ErrorState title="Run not found" />;
+  function handleEvaluate() {
+    if (!run) return;
+    const artId = modelArtifactId(run);
+    const q = new URLSearchParams();
+    if (artId) q.set('artifact_id', artId);
+    if (run.dataset_version_id) q.set('dataset_version_id', run.dataset_version_id);
+    navigate(`/evaluations/new?${q.toString()}`);
+  }
+
+  if (loading)
+    return (
+      <div className="py-6">
+        <Loading label="Loading experiment…" />
+      </div>
+    );
+  if (error) return <ErrorState title="Failed to load experiment" description={error} />;
+  if (!run) return <ErrorState title="Run not found" />;
 
   let params: Record<string, unknown> = {};
-  try { if (run.params_json) params = JSON.parse(run.params_json); } catch { /* ignore */ }
+  try {
+    if (run.params_json) params = JSON.parse(run.params_json);
+  } catch {
+    /* ignore */
+  }
 
+  const metrics = m?.metrics || [];
+  const summary = m?.summary || null;
+  const plots = (m?.plots || []).filter((p) => p.url);
+  const split = m?.split || null;
   const isActive = run.status === 'running' || run.status === 'queued';
-  const activeMetricKeys = METRIC_KEYS.filter((k) =>
-    metrics.some((m) => (m as Record<string, number | undefined>)[k] != null)
-  );
+  const activePanels = PANELS.filter((p) => p.keys.some((k) => metrics.some((d) => d[k] != null)));
+  const splitTotal = split ? split.counts.train + split.counts.val + split.counts.test : 0;
 
   return (
     <div className="space-y-4">
       {/* Header */}
       <div className="border-b border-[var(--hud-border-subtle)] pb-3">
         <nav className="label-overline mb-1">
-          <Link to="/experiments" className="hover:text-[var(--hud-accent)] transition-colors">EXPERIMENTS</Link>
+          <Link to="/experiments" className="hover:text-[var(--hud-accent)] transition-colors">
+            EXPERIMENTS
+          </Link>
           <span className="mx-1.5 text-[var(--hud-border-strong)]">/</span>
-          <span className="text-[var(--hud-text-secondary)]">{(run.name || run.id.slice(0, 8)).toUpperCase()}</span>
+          <span className="text-[var(--hud-text-secondary)]">
+            {(run.name || run.id.slice(0, 8)).toUpperCase()}
+          </span>
         </nav>
         <div className="flex items-center justify-between gap-4">
           <h1 className="flex items-center gap-2 flex-wrap">
@@ -195,45 +317,108 @@ export default function ExperimentDetail() {
             )}
           </h1>
           {run.status === 'succeeded' && (
-            <Button onClick={handleExport} disabled={exporting || !!exportJobId} size="sm">
-              {exporting ? 'Exporting…' : exportJobId ? `Job ${exportJobId.slice(0, 8)}` : 'Export ONNX'}
-            </Button>
+            <div className="flex gap-2">
+              <Button onClick={handleEvaluate} size="sm" variant="outline">
+                Run Evaluation
+              </Button>
+              <Button onClick={handleExport} disabled={exporting || !!exportJobId} size="sm">
+                {exporting
+                  ? 'Exporting…'
+                  : exportJobId
+                    ? `Job ${exportJobId.slice(0, 8)}`
+                    : 'Export ONNX'}
+              </Button>
+            </div>
           )}
         </div>
       </div>
 
-      {/* Main content grid */}
-      <div className="grid gap-4 lg:grid-cols-[1.5fr_1fr]">
-        {/* Metrics chart */}
-        <div className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
-          <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
-            <div className="h-1.5 w-1.5 bg-[var(--hud-accent)]" />
-            <span className="label-overline">Training Metrics</span>
-          </div>
-          <div className="p-4">
-            <MetricsChart
-              data={metrics}
-              keys={(activeMetricKeys.length > 0 ? activeMetricKeys : METRIC_KEYS) as unknown as string[]}
-            />
-          </div>
+      {/* Summary tiles */}
+      {summary && (
+        <div className="grid grid-cols-2 sm:grid-cols-4 lg:grid-cols-6 gap-2">
+          {['mAP50', 'mAP50_95', 'precision', 'recall', 'top1', 'top5']
+            .filter((k) => summary[k] != null)
+            .map((k) => (
+              <div
+                key={k}
+                className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)] px-3 py-2"
+              >
+                <div className="label-overline mb-1">{k}</div>
+                <div className="text-lg font-mono text-[var(--hud-text-data)]">
+                  {fmtMetric(k, summary[k]!)}
+                </div>
+              </div>
+            ))}
         </div>
+      )}
+
+      {/* Metric charts */}
+      <div className="grid gap-4 lg:grid-cols-2">
+        {(activePanels.length ? activePanels : PANELS.slice(0, 2)).map((p) => (
+          <Panel key={p.title} title={p.title}>
+            <LineChart data={metrics} keys={p.keys} />
+          </Panel>
+        ))}
+      </div>
+
+      {/* Split + params */}
+      <div className="grid gap-4 lg:grid-cols-2">
+        {split && splitTotal > 0 && (
+          <Panel title="Dataset Split">
+            <div className="flex h-5 w-full overflow-hidden border border-[var(--hud-border-subtle)]">
+              {(['train', 'val', 'test'] as const)
+                .filter((k) => split.counts[k] > 0)
+                .map((k) => (
+                  <div
+                    key={k}
+                    style={{
+                      width: `${(split.counts[k] / splitTotal) * 100}%`,
+                      backgroundColor: SPLIT_COLORS[k],
+                    }}
+                    title={`${k}: ${split.counts[k]}`}
+                  />
+                ))}
+            </div>
+            <div className="mt-2 flex flex-wrap gap-3 text-xs font-mono">
+              {(['train', 'val', 'test'] as const).map((k) => (
+                <div key={k} className="flex items-center gap-1.5">
+                  <span
+                    className="inline-block h-2.5 w-2.5"
+                    style={{ backgroundColor: SPLIT_COLORS[k] }}
+                  />
+                  <span className="text-[var(--hud-text-muted)]">{k}</span>
+                  <span className="text-[var(--hud-text-data)]">{split.counts[k]}</span>
+                </div>
+              ))}
+              {split.seed != null && (
+                <span className="text-[var(--hud-text-muted)]">seed {split.seed}</span>
+              )}
+            </div>
+          </Panel>
+        )}
 
-        {/* Parameters */}
         <div className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
           <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
             <div className="h-1.5 w-1.5 bg-[var(--hud-border-strong)]" />
             <span className="label-overline">Parameters</span>
           </div>
-          <div className="p-4">
+          <div className="p-4 max-h-64 overflow-y-auto">
             {Object.keys(params).length === 0 ? (
-              <p className="text-xs font-mono text-[var(--hud-text-muted)]">No parameters recorded.</p>
+              <p className="text-xs font-mono text-[var(--hud-text-muted)]">
+                No parameters recorded.
+              </p>
             ) : (
               <table className="w-full text-xs font-mono">
                 <tbody>
                   {Object.entries(params).map(([k, v]) => (
-                    <tr key={k} className="border-b border-[var(--hud-border-subtle)] last:border-0">
-                      <td className="py-1.5 pr-4 text-[var(--hud-text-muted)] uppercase tracking-wide">{k}</td>
-                      <td className="py-1.5 text-[var(--hud-text-data)]">{String(v)}</td>
+                    <tr
+                      key={k}
+                      className="border-b border-[var(--hud-border-subtle)] last:border-0"
+                    >
+                      <td className="py-1 pr-4 text-[var(--hud-text-muted)] uppercase tracking-wide">
+                        {k}
+                      </td>
+                      <td className="py-1 text-[var(--hud-text-data)]">{String(v)}</td>
                     </tr>
                   ))}
                 </tbody>
@@ -243,6 +428,58 @@ export default function ExperimentDetail() {
         </div>
       </div>
 
+      {/* Plots gallery */}
+      {plots.length > 0 && (
+        <Panel title="Plots">
+          <div className="grid grid-cols-2 sm:grid-cols-3 gap-3">
+            {plots.map((p) => (
+              <button
+                key={p.file}
+                type="button"
+                onClick={() => setZoom(p.url || null)}
+                className="border border-[var(--hud-border-subtle)] bg-[var(--hud-inset)] p-1 hover:border-[var(--hud-accent)]"
+              >
+                <img src={p.url || ''} alt={p.name} className="w-full h-auto" loading="lazy" />
+                <div className="label-overline mt-1 text-center">{p.name}</div>
+              </button>
+            ))}
+          </div>
+        </Panel>
+      )}
+
+      {/* Evaluations of this model */}
+      {run.status === 'succeeded' && (
+        <Panel title="Evaluations">
+          {evals.length === 0 ? (
+            <p className="text-xs font-mono text-[var(--hud-text-muted)]">
+              No evaluations yet. Use{' '}
+              <span className="text-[var(--hud-accent)]">Run Evaluation</span> to score this model
+              on the test split.
+            </p>
+          ) : (
+            <ul className="space-y-1.5">
+              {evals.map((ev) => {
+                const metric = ev.primary_metric ?? ev.metrics?.mAP50 ?? ev.metrics?.accuracy;
+                return (
+                  <li key={ev.id}>
+                    <Link
+                      to={`/evaluations/${ev.id}`}
+                      className="flex items-center justify-between text-xs font-mono hover:text-[var(--hud-accent)]"
+                    >
+                      <span className="text-[var(--hud-text-data)]">{ev.id.slice(0, 8)}</span>
+                      <Badge variant={statusVariant(ev.status)}>{ev.status}</Badge>
+                      <span className="text-[var(--hud-text-muted)]">
+                        {metric != null ? fmtMetric('mAP50', metric) : '—'}
+                      </span>
+                    </Link>
+                  </li>
+                );
+              })}
+            </ul>
+          )}
+        </Panel>
+      )}
+
       {/* Run details */}
       <div className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
         <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
@@ -252,7 +489,9 @@ export default function ExperimentDetail() {
         <div className="px-4 py-3 grid grid-cols-2 sm:grid-cols-4 gap-4">
           <div>
             <div className="label-overline mb-1">Run ID</div>
-            <code className="text-[0.6875rem] font-mono text-[var(--hud-text-data)] break-all">{run.id}</code>
+            <code className="text-[0.6875rem] font-mono text-[var(--hud-text-data)] break-all">
+              {run.id}
+            </code>
           </div>
           <div>
             <div className="label-overline mb-1">Status</div>
@@ -277,12 +516,26 @@ export default function ExperimentDetail() {
 
       {exportJobId && (
         <div className="border border-[var(--hud-success)] border-l-2 bg-[var(--hud-success-dim)] px-4 py-2.5 text-xs font-mono text-[var(--hud-success-text)]">
-          ONNX export queued · JOB <span className="text-[var(--hud-text-data)]">{exportJobId}</span>{' '}
-          <Link to="/artifacts" className="text-[var(--hud-accent)] hover:underline underline-offset-2 ml-2">
+          ONNX export queued · JOB{' '}
+          <span className="text-[var(--hud-text-data)]">{exportJobId}</span>{' '}
+          <Link
+            to="/artifacts"
+            className="text-[var(--hud-accent)] hover:underline underline-offset-2 ml-2"
+          >
             View Artifacts →
           </Link>
         </div>
       )}
+
+      {/* Zoom lightbox */}
+      {zoom && (
+        <div
+          className="fixed inset-0 z-50 flex items-center justify-center bg-black/80 p-6"
+          onClick={() => setZoom(null)}
+        >
+          <img src={zoom} alt="plot" className="max-h-full max-w-full" />
+        </div>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/pages/experiments/new.tsx b/frontend/src/pages/experiments/new.tsx
index 079412f..29c2f92 100644
--- a/frontend/src/pages/experiments/new.tsx
+++ b/frontend/src/pages/experiments/new.tsx
@@ -6,26 +6,299 @@ import Button from '@/components/ui/Button';
 import Alert from '@/components/ui/Alert';
 import Spinner from '@/components/ui/Spinner';
 import ClusterSelect from '@/components/common/ClusterSelect';
+import SplitPanel, { SplitConfig, DEFAULT_SPLIT } from '@/components/common/SplitPanel';
 import { apiGet, apiPost } from '@/services/api';
 
-interface Project { id: string; name: string; }
-interface Dataset { id: string; name: string; latest_version_id?: string; }
+interface Project {
+  id: string;
+  name: string;
+}
+interface Dataset {
+  id: string;
+  name: string;
+  latest_version_id?: string;
+}
 
 const BASE_MODELS = ['yolov8n.pt', 'yolov8s.pt', 'yolov8m.pt', 'yolov8l.pt', 'yolov8x.pt'];
 
-const DEFAULT_AUGMENTATIONS = {
-  hsv_h: 0.015,
-  hsv_s: 0.7,
-  hsv_v: 0.4,
-  degrees: 0.0,
-  translate: 0.1,
-  scale: 0.5,
-  shear: 0.0,
-  flipud: 0.0,
-  fliplr: 0.5,
-  mosaic: 1.0,
-  mixup: 0.0,
-};
+type FieldType = 'number' | 'bool' | 'select';
+interface FieldDef {
+  key: string;
+  label: string;
+  type: FieldType;
+  default: number | boolean | string;
+  min?: number;
+  max?: number;
+  step?: number;
+  options?: string[];
+  help?: string;
+}
+
+// Single source of truth for every tunable hyperparameter / augmentation knob.
+// Adding a field here exposes it in the UI and forwards it to the backend.
+const GROUPS: { title: string; fields: FieldDef[] }[] = [
+  {
+    title: 'Core',
+    fields: [
+      { key: 'epochs', label: 'Epochs', type: 'number', default: 50, min: 1, max: 2000 },
+      { key: 'batch', label: 'Batch Size', type: 'number', default: 16, min: 1, max: 512 },
+      {
+        key: 'imgsz',
+        label: 'Image Size',
+        type: 'number',
+        default: 640,
+        min: 32,
+        max: 1920,
+        step: 32,
+      },
+      {
+        key: 'patience',
+        label: 'Patience',
+        type: 'number',
+        default: 100,
+        min: 0,
+        max: 1000,
+        help: 'Early-stop after N epochs w/o improvement',
+      },
+      { key: 'seed', label: 'Seed', type: 'number', default: 0, min: 0 },
+      {
+        key: 'rect',
+        label: 'Rectangular',
+        type: 'bool',
+        default: false,
+        help: 'Rectangular batches (min padding)',
+      },
+      { key: 'single_cls', label: 'Single class', type: 'bool', default: false },
+    ],
+  },
+  {
+    title: 'Optimizer & Schedule',
+    fields: [
+      {
+        key: 'optimizer',
+        label: 'Optimizer',
+        type: 'select',
+        default: 'auto',
+        options: ['auto', 'SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp'],
+      },
+      {
+        key: 'lr0',
+        label: 'Initial LR (lr0)',
+        type: 'number',
+        default: 0.01,
+        min: 0.00001,
+        max: 1,
+        step: 0.0001,
+      },
+      {
+        key: 'lrf',
+        label: 'Final LR (lrf)',
+        type: 'number',
+        default: 0.01,
+        min: 0.00001,
+        max: 1,
+        step: 0.0001,
+      },
+      {
+        key: 'momentum',
+        label: 'Momentum',
+        type: 'number',
+        default: 0.937,
+        min: 0,
+        max: 1,
+        step: 0.001,
+      },
+      {
+        key: 'weight_decay',
+        label: 'Weight Decay',
+        type: 'number',
+        default: 0.0005,
+        min: 0,
+        max: 0.1,
+        step: 0.0001,
+      },
+      {
+        key: 'warmup_epochs',
+        label: 'Warmup Epochs',
+        type: 'number',
+        default: 3.0,
+        min: 0,
+        max: 20,
+        step: 0.5,
+      },
+      {
+        key: 'warmup_momentum',
+        label: 'Warmup Momentum',
+        type: 'number',
+        default: 0.8,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      {
+        key: 'warmup_bias_lr',
+        label: 'Warmup Bias LR',
+        type: 'number',
+        default: 0.1,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      { key: 'cos_lr', label: 'Cosine LR', type: 'bool', default: false },
+      {
+        key: 'close_mosaic',
+        label: 'Close Mosaic',
+        type: 'number',
+        default: 10,
+        min: 0,
+        max: 100,
+        help: 'Disable mosaic for last N epochs',
+      },
+      { key: 'nbs', label: 'Nominal Batch', type: 'number', default: 64, min: 1, max: 256 },
+      { key: 'amp', label: 'AMP', type: 'bool', default: true, help: 'Automatic mixed precision' },
+    ],
+  },
+  {
+    title: 'Regularization & Loss Gains',
+    fields: [
+      {
+        key: 'dropout',
+        label: 'Dropout',
+        type: 'number',
+        default: 0.0,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      {
+        key: 'label_smoothing',
+        label: 'Label Smoothing',
+        type: 'number',
+        default: 0.0,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      { key: 'box', label: 'Box Gain', type: 'number', default: 7.5, min: 0, max: 20, step: 0.1 },
+      { key: 'cls', label: 'Cls Gain', type: 'number', default: 0.5, min: 0, max: 10, step: 0.1 },
+      { key: 'dfl', label: 'DFL Gain', type: 'number', default: 1.5, min: 0, max: 10, step: 0.1 },
+      { key: 'overlap_mask', label: 'Overlap Mask', type: 'bool', default: true },
+      { key: 'mask_ratio', label: 'Mask Ratio', type: 'number', default: 4, min: 1, max: 16 },
+    ],
+  },
+  {
+    title: 'Augmentation',
+    fields: [
+      {
+        key: 'hsv_h',
+        label: 'hsv_h',
+        type: 'number',
+        default: 0.015,
+        min: 0,
+        max: 1,
+        step: 0.001,
+        help: 'Hue jitter fraction',
+      },
+      {
+        key: 'hsv_s',
+        label: 'hsv_s',
+        type: 'number',
+        default: 0.7,
+        min: 0,
+        max: 1,
+        step: 0.01,
+        help: 'Saturation jitter',
+      },
+      {
+        key: 'hsv_v',
+        label: 'hsv_v',
+        type: 'number',
+        default: 0.4,
+        min: 0,
+        max: 1,
+        step: 0.01,
+        help: 'Value jitter',
+      },
+      {
+        key: 'degrees',
+        label: 'degrees',
+        type: 'number',
+        default: 0.0,
+        min: 0,
+        max: 180,
+        step: 1,
+        help: 'Rotation range',
+      },
+      {
+        key: 'translate',
+        label: 'translate',
+        type: 'number',
+        default: 0.1,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      { key: 'scale', label: 'scale', type: 'number', default: 0.5, min: 0, max: 1, step: 0.01 },
+      { key: 'shear', label: 'shear', type: 'number', default: 0.0, min: 0, max: 10, step: 0.1 },
+      {
+        key: 'perspective',
+        label: 'perspective',
+        type: 'number',
+        default: 0.0,
+        min: 0,
+        max: 0.001,
+        step: 0.0001,
+      },
+      { key: 'flipud', label: 'flipud', type: 'number', default: 0.0, min: 0, max: 1, step: 0.01 },
+      { key: 'fliplr', label: 'fliplr', type: 'number', default: 0.5, min: 0, max: 1, step: 0.01 },
+      { key: 'bgr', label: 'bgr', type: 'number', default: 0.0, min: 0, max: 1, step: 0.01 },
+      { key: 'mosaic', label: 'mosaic', type: 'number', default: 1.0, min: 0, max: 1, step: 0.01 },
+      { key: 'mixup', label: 'mixup', type: 'number', default: 0.0, min: 0, max: 1, step: 0.01 },
+      {
+        key: 'copy_paste',
+        label: 'copy_paste',
+        type: 'number',
+        default: 0.0,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      {
+        key: 'erasing',
+        label: 'erasing',
+        type: 'number',
+        default: 0.4,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      {
+        key: 'crop_fraction',
+        label: 'crop_fraction',
+        type: 'number',
+        default: 1.0,
+        min: 0,
+        max: 1,
+        step: 0.01,
+      },
+      {
+        key: 'auto_augment',
+        label: 'auto_augment',
+        type: 'select',
+        default: 'randaugment',
+        options: ['randaugment', 'autoaugment', 'augmix'],
+      },
+    ],
+  },
+];
+
+const DEVICES = ['cpu', 'cuda', 'mps', '0', '0,1'];
+
+function buildDefaults(): Record<string, number | boolean | string> {
+  const out: Record<string, number | boolean | string> = {};
+  for (const g of GROUPS) for (const f of g.fields) out[f.key] = f.default;
+  return out;
+}
 
 function FieldLabel({ htmlFor, children }: { htmlFor: string; children: React.ReactNode }) {
   return (
@@ -35,31 +308,80 @@ function FieldLabel({ htmlFor, children }: { htmlFor: string; children: React.Re
   );
 }
 
+function HpField({
+  field,
+  value,
+  onChange,
+}: {
+  field: FieldDef;
+  value: number | boolean | string;
+  onChange: (v: number | boolean | string) => void;
+}) {
+  if (field.type === 'bool') {
+    return (
+      <label className="flex items-center gap-2 pt-5 text-xs font-mono text-[var(--hud-text-muted)]">
+        <input type="checkbox" checked={!!value} onChange={(e) => onChange(e.target.checked)} />
+        {field.label}
+      </label>
+    );
+  }
+  if (field.type === 'select') {
+    return (
+      <div>
+        <FieldLabel htmlFor={`hp-${field.key}`}>{field.label}</FieldLabel>
+        <Select
+          id={`hp-${field.key}`}
+          value={String(value)}
+          onChange={(e) => onChange(e.target.value)}
+        >
+          {field.options!.map((o) => (
+            <option key={o} value={o}>
+              {o}
+            </option>
+          ))}
+        </Select>
+      </div>
+    );
+  }
+  return (
+    <div>
+      <FieldLabel htmlFor={`hp-${field.key}`}>{field.label}</FieldLabel>
+      <Input
+        id={`hp-${field.key}`}
+        type="number"
+        min={field.min}
+        max={field.max}
+        step={field.step ?? 1}
+        value={value as number}
+        onChange={(e) => {
+          const n = parseFloat(e.target.value);
+          onChange(Number.isFinite(n) ? n : (field.default as number));
+        }}
+        title={field.help}
+      />
+    </div>
+  );
+}
+
 export default function ExperimentsNew() {
   const [searchParams] = useSearchParams();
   const preselectedProject = searchParams.get('projectId') || '';
 
   const [projects, setProjects] = useState<Project[]>([]);
   const [datasets, setDatasets] = useState<Dataset[]>([]);
-  const [form, setForm] = useState({
+  const [run, setRun] = useState({
     projectId: preselectedProject,
+    datasetId: '',
     datasetVersionId: '',
     name: 'Baseline',
     task: 'detect',
     baseModel: 'yolov8n.pt',
     clusterId: '',
-    epochs: 50,
-    batchSize: 16,
-    imageSize: 640,
-    learningRate: 0.01,
-    lrf: 0.01,
-    momentum: 0.937,
-    weightDecay: 0.0005,
-    warmupEpochs: 3.0,
     device: 'cpu',
-    augmentations: { ...DEFAULT_AUGMENTATIONS },
   });
-  const [showAugmentations, setShowAugmentations] = useState(false);
+  const [params, setParams] = useState<Record<string, number | boolean | string>>(buildDefaults());
+  const [splitCfg, setSplitCfg] = useState<SplitConfig>(DEFAULT_SPLIT);
+  const [open, setOpen] = useState<Record<string, boolean>>({ Core: true });
   const [loading, setLoading] = useState(false);
   const [jobId, setJobId] = useState<string | null>(null);
   const [error, setError] = useState<string | null>(null);
@@ -72,43 +394,62 @@ export default function ExperimentsNew() {
   }, []);
 
   useEffect(() => {
-    if (!form.projectId) { setDatasets([]); return; }
-    apiGet<{ items: Dataset[] }>(
-      `/api/datasets?project_id=${form.projectId}&page=1&page_size=200`,
-    )
+    if (!run.projectId) {
+      setDatasets([]);
+      return;
+    }
+    apiGet<{ items: Dataset[] }>(`/api/datasets?project_id=${run.projectId}&page=1&page_size=200`)
       .then((d) => setDatasets(d.items || []))
       .catch(console.error);
-  }, [form.projectId]);
+  }, [run.projectId]);
 
-  function setField<K extends keyof typeof form>(key: K, value: (typeof form)[K]) {
-    setForm((prev) => ({ ...prev, [key]: value }));
+  function setParam(key: string, value: number | boolean | string) {
+    setParams((prev) => ({ ...prev, [key]: value }));
+  }
+
+  function resetGroup(title: string) {
+    const group = GROUPS.find((g) => g.title === title);
+    if (!group) return;
+    setParams((prev) => {
+      const next = { ...prev };
+      for (const f of group.fields) next[f.key] = f.default;
+      return next;
+    });
   }
 
   async function onSubmit(e: React.FormEvent) {
     e.preventDefault();
-    if (!form.projectId) { setError('Select a project'); return; }
-    if (!form.datasetVersionId) { setError('Select a dataset/version'); return; }
+    if (!run.projectId) return setError('Select a project');
+    if (!run.datasetVersionId) return setError('Select a dataset/version');
     setLoading(true);
     setError(null);
     try {
+      // Persist the split first so what we train on matches what we visualize.
+      if (run.datasetId && run.datasetVersionId) {
+        try {
+          await apiPost(
+            `/api/datasets/${run.datasetId}/versions/${run.datasetVersionId}/split`,
+            splitCfg,
+          );
+        } catch {
+          /* training re-resolves deterministically from the same seed/ratios */
+        }
+      }
       const job = await apiPost<{ id: string; status: string }>('/api/train', {
-        projectId: form.projectId,
-        datasetVersionId: form.datasetVersionId,
-        task: form.task,
-        baseModel: form.baseModel,
-        name: form.name,
-        clusterId: form.clusterId || null,
+        projectId: run.projectId,
+        datasetVersionId: run.datasetVersionId,
+        task: run.task,
+        baseModel: run.baseModel,
+        name: run.name,
+        clusterId: run.clusterId || null,
         params: {
-          epochs: form.epochs,
-          batch: form.batchSize,
-          imgsz: form.imageSize,
-          lr0: form.learningRate,
-          lrf: form.lrf,
-          momentum: form.momentum,
-          weight_decay: form.weightDecay,
-          warmup_epochs: form.warmupEpochs,
-          device: form.device,
-          ...form.augmentations,
+          ...params,
+          device: run.device,
+          split_train: splitCfg.train,
+          split_val: splitCfg.val,
+          split_test: splitCfg.test,
+          split_seed: splitCfg.seed,
+          split_stratify: splitCfg.stratify,
         },
       });
       setJobId(job.id);
@@ -136,58 +477,91 @@ export default function ExperimentsNew() {
   }
 
   return (
-    <div className="max-w-xl space-y-4">
-      {/* Header */}
-      <div className="flex items-center justify-between border-b border-[var(--hud-border-subtle)] pb-3">
+    <div className="max-w-2xl space-y-0">
+      <div className="flex items-center justify-between border-b border-[var(--hud-border-subtle)] pb-3 mb-4">
         <div>
           <div className="label-overline mb-0.5">// Experiments / New</div>
           <h1>New Training Run</h1>
         </div>
-        <Link to="/experiments" className="text-xs font-mono text-[var(--hud-accent)] hover:underline">
+        <Link
+          to="/experiments"
+          className="text-xs font-mono text-[var(--hud-accent)] hover:underline"
+        >
           ← EXPERIMENTS
         </Link>
       </div>
 
-      {/* Form */}
       <form onSubmit={onSubmit} className="space-y-0">
         {/* Run config */}
-        <div className="border border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
-          <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
-            <div className="h-1.5 w-1.5 bg-[var(--hud-accent)]" />
-            <span className="label-overline">Run Configuration</span>
-          </div>
+        <Section title="Run Configuration" accent>
           <div className="p-4 space-y-3">
             <div>
               <FieldLabel htmlFor="run-name">Run Name</FieldLabel>
-              <Input id="run-name" value={form.name} onChange={(e) => setField('name', e.target.value)} placeholder="Baseline" />
+              <Input
+                id="run-name"
+                value={run.name}
+                onChange={(e) => setRun((r) => ({ ...r, name: e.target.value }))}
+                placeholder="Baseline"
+              />
             </div>
             <div className="grid grid-cols-2 gap-3">
               <div>
                 <FieldLabel htmlFor="project-select">Project</FieldLabel>
-                <Select id="project-select" value={form.projectId} onChange={(e) => { setField('projectId', e.target.value); setField('datasetVersionId', ''); }}>
+                <Select
+                  id="project-select"
+                  value={run.projectId}
+                  onChange={(e) =>
+                    setRun((r) => ({
+                      ...r,
+                      projectId: e.target.value,
+                      datasetId: '',
+                      datasetVersionId: '',
+                    }))
+                  }
+                >
                   <option value="">— select —</option>
-                  {projects.map((p) => <option key={p.id} value={p.id}>{p.name}</option>)}
+                  {projects.map((p) => (
+                    <option key={p.id} value={p.id}>
+                      {p.name}
+                    </option>
+                  ))}
                 </Select>
               </div>
               <div>
                 <FieldLabel htmlFor="dataset-select">Dataset Version</FieldLabel>
-                <Select id="dataset-select" value={form.datasetVersionId} onChange={(e) => setField('datasetVersionId', e.target.value)} disabled={!form.projectId || datasets.length === 0}>
+                <Select
+                  id="dataset-select"
+                  value={run.datasetVersionId}
+                  onChange={(e) => {
+                    const ds = datasets.find(
+                      (d) => (d.latest_version_id || d.id) === e.target.value,
+                    );
+                    setRun((r) => ({
+                      ...r,
+                      datasetVersionId: e.target.value,
+                      datasetId: ds?.id || '',
+                    }));
+                  }}
+                  disabled={!run.projectId || datasets.length === 0}
+                >
                   <option value="">— select —</option>
                   {datasets.map((d) => (
                     <option key={d.id} value={d.latest_version_id || d.id}>
-                      {d.name}{!d.latest_version_id ? ' (no versions)' : ''}
+                      {d.name}
+                      {!d.latest_version_id ? ' (no versions)' : ''}
                     </option>
                   ))}
                 </Select>
-                {form.projectId && datasets.length === 0 && (
-                  <p className="text-[0.6875rem] font-mono text-[var(--hud-text-muted)] mt-1">No datasets found</p>
-                )}
               </div>
             </div>
             <div className="grid grid-cols-2 gap-3">
               <div>
                 <FieldLabel htmlFor="task-select">Task</FieldLabel>
-                <Select id="task-select" value={form.task} onChange={(e) => setField('task', e.target.value)}>
+                <Select
+                  id="task-select"
+                  value={run.task}
+                  onChange={(e) => setRun((r) => ({ ...r, task: e.target.value }))}
+                >
                   <option value="detect">Object Detection</option>
                   <option value="classify">Classification</option>
                   <option value="segment">Segmentation</option>
@@ -196,134 +570,95 @@ export default function ExperimentsNew() {
               </div>
               <div>
                 <FieldLabel htmlFor="base-model">Base Model</FieldLabel>
-                <Select id="base-model" value={form.baseModel} onChange={(e) => setField('baseModel', e.target.value)}>
-                  {BASE_MODELS.map((m) => <option key={m} value={m}>{m}</option>)}
+                <Select
+                  id="base-model"
+                  value={run.baseModel}
+                  onChange={(e) => setRun((r) => ({ ...r, baseModel: e.target.value }))}
+                >
+                  {BASE_MODELS.map((m) => (
+                    <option key={m} value={m}>
+                      {m}
+                    </option>
+                  ))}
                 </Select>
               </div>
             </div>
+            <div>
+              <FieldLabel htmlFor="device">Device</FieldLabel>
+              <Select
+                id="device"
+                value={run.device}
+                onChange={(e) => setRun((r) => ({ ...r, device: e.target.value }))}
+              >
+                {DEVICES.map((d) => (
+                  <option key={d} value={d}>
+                    {d}
+                  </option>
+                ))}
+              </Select>
+            </div>
           </div>
-        </div>
+        </Section>
 
-        {/* Cluster selection */}
-        <div className="border border-t-0 border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
-          <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
-            <div className="h-1.5 w-1.5 bg-[var(--hud-accent)]" />
-            <span className="label-overline">Compute Cluster</span>
+        {/* Dataset split */}
+        <Section title="Dataset Split" accent>
+          <div className="p-4">
+            <SplitPanel
+              datasetId={run.datasetId}
+              versionId={run.datasetVersionId}
+              onConfigChange={setSplitCfg}
+            />
           </div>
+        </Section>
+
+        {/* Cluster */}
+        <Section title="Compute Cluster" accent>
           <div className="p-4 space-y-2">
             <FieldLabel htmlFor="cluster-select">Run on cluster</FieldLabel>
             <ClusterSelect
               id="cluster-select"
               kind="train"
-              value={form.clusterId}
-              onChange={(v) => setField('clusterId', v)}
+              value={run.clusterId}
+              onChange={(v) => setRun((r) => ({ ...r, clusterId: v }))}
               allowAuto
             />
             <p className="text-[0.6875rem] font-mono text-[var(--hud-text-muted)]">
-              Pick an idle cluster to dedicate this run, or leave on auto-assign to use the shared
-              queue. Busy clusters are listed but disabled.
+              Pick an idle cluster to dedicate this run, or leave on auto-assign. Busy clusters are
+              disabled.
             </p>
           </div>
-        </div>
-
-        {/* Hyperparameters */}
-        <div className="border border-t-0 border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
-          <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center gap-2">
-            <div className="h-1.5 w-1.5 bg-[var(--hud-border-strong)]" />
-            <span className="label-overline">Hyperparameters</span>
-          </div>
-          <div className="p-4 grid grid-cols-2 gap-3">
-            <div>
-              <FieldLabel htmlFor="epochs">Epochs</FieldLabel>
-              <Input id="epochs" type="number" min={1} max={1000} value={form.epochs} onChange={(e) => setField('epochs', parseInt(e.target.value) || 1)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="batch-size">Batch Size</FieldLabel>
-              <Input id="batch-size" type="number" min={1} max={512} value={form.batchSize} onChange={(e) => setField('batchSize', parseInt(e.target.value) || 1)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="image-size">Image Size</FieldLabel>
-              <Input id="image-size" type="number" min={32} max={1280} step={32} value={form.imageSize} onChange={(e) => setField('imageSize', parseInt(e.target.value) || 640)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="lr">Initial LR (lr0)</FieldLabel>
-              <Input id="lr" type="number" min={0.00001} max={1} step={0.0001} value={form.learningRate} onChange={(e) => setField('learningRate', parseFloat(e.target.value) || 0.01)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="lrf">Final LR (lrf)</FieldLabel>
-              <Input id="lrf" type="number" min={0.00001} max={1} step={0.0001} value={form.lrf} onChange={(e) => setField('lrf', parseFloat(e.target.value) || 0.01)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="momentum">Momentum</FieldLabel>
-              <Input id="momentum" type="number" min={0} max={1} step={0.001} value={form.momentum} onChange={(e) => setField('momentum', parseFloat(e.target.value) || 0.937)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="weight-decay">Weight Decay</FieldLabel>
-              <Input id="weight-decay" type="number" min={0} max={0.1} step={0.0001} value={form.weightDecay} onChange={(e) => setField('weightDecay', parseFloat(e.target.value) || 0.0005)} />
-            </div>
-            <div>
-              <FieldLabel htmlFor="warmup-epochs">Warmup Epochs</FieldLabel>
-              <Input id="warmup-epochs" type="number" min={0} max={10} step={0.5} value={form.warmupEpochs} onChange={(e) => setField('warmupEpochs', parseFloat(e.target.value) || 3)} />
-            </div>
-            <div className="col-span-2">
-              <FieldLabel htmlFor="device">Device</FieldLabel>
-              <Select id="device" value={form.device} onChange={(e) => setField('device', e.target.value)}>
-                <option value="cpu">CPU</option>
-                <option value="cuda">CUDA (GPU)</option>
-                <option value="mps">MPS (Apple Silicon)</option>
-                <option value="0">GPU:0</option>
-                <option value="0,1">GPU:0,1</option>
-              </Select>
-            </div>
-          </div>
-        </div>
+        </Section>
 
-        {/* Augmentation */}
-        <div className="border border-t-0 border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
-          <button
-            type="button"
-            onClick={() => setShowAugmentations((v) => !v)}
-            className="w-full border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center justify-between hover:bg-[var(--hud-elevated)] transition-colors"
+        {/* Hyperparameter groups */}
+        {GROUPS.map((g) => (
+          <Section
+            key={g.title}
+            title={g.title}
+            collapsible
+            isOpen={!!open[g.title]}
+            onToggle={() => setOpen((o) => ({ ...o, [g.title]: !o[g.title] }))}
+            onReset={() => resetGroup(g.title)}
           >
-            <div className="flex items-center gap-2">
-              <div className="h-1.5 w-1.5 bg-[var(--hud-border-strong)]" />
-              <span className="label-overline">Augmentation Parameters</span>
-            </div>
-            <span className="text-xs font-mono text-[var(--hud-text-muted)]">
-              {showAugmentations ? '▲ COLLAPSE' : '▼ EXPAND'}
-            </span>
-          </button>
-          {showAugmentations && (
-            <div className="p-4 grid grid-cols-2 gap-3">
-              {(Object.keys(DEFAULT_AUGMENTATIONS) as Array<keyof typeof DEFAULT_AUGMENTATIONS>).map((key) => (
-                <div key={key}>
-                  <FieldLabel htmlFor={`aug-${key}`}>{key}</FieldLabel>
-                  <Input
-                    id={`aug-${key}`}
-                    type="number"
-                    min={0}
-                    max={key === 'mosaic' ? 1 : undefined}
-                    step={0.001}
-                    value={form.augmentations[key]}
-                    onChange={(e) =>
-                      setForm((prev) => ({
-                        ...prev,
-                        augmentations: { ...prev.augmentations, [key]: parseFloat(e.target.value) || 0 },
-                      }))
-                    }
+            {open[g.title] && (
+              <div className="p-4 grid grid-cols-2 gap-3 md:grid-cols-3">
+                {g.fields.map((f) => (
+                  <HpField
+                    key={f.key}
+                    field={f}
+                    value={params[f.key]}
+                    onChange={(v) => setParam(f.key, v)}
                   />
-                </div>
-              ))}
-              <div className="col-span-2 text-[0.6875rem] font-mono text-[var(--hud-text-muted)] border border-[var(--hud-border-subtle)] bg-[var(--hud-inset)] px-3 py-2">
-                Augmentation values follow Ultralytics YOLO conventions. hsv_h/s/v: colour jitter fractions.
-                degrees: rotation range. fliplr/flipud: flip probability. mosaic: mosaic augmentation probability.
-                mixup: MixUp probability.
+                ))}
               </div>
-            </div>
-          )}
-        </div>
+            )}
+          </Section>
+        ))}
 
-        {error && <Alert variant="error" className="mt-3">{error}</Alert>}
+        {error && (
+          <Alert variant="error" className="mt-3">
+            {error}
+          </Alert>
+        )}
 
         <div className="pt-3">
           <Button type="submit" disabled={loading} className="w-full">
@@ -341,3 +676,54 @@ export default function ExperimentsNew() {
     </div>
   );
 }
+
+function Section({
+  title,
+  children,
+  accent,
+  collapsible,
+  isOpen,
+  onToggle,
+  onReset,
+}: {
+  title: string;
+  children: React.ReactNode;
+  accent?: boolean;
+  collapsible?: boolean;
+  isOpen?: boolean;
+  onToggle?: () => void;
+  onReset?: () => void;
+}) {
+  return (
+    <div className="border border-t-0 first:border-t border-[var(--hud-border-default)] bg-[var(--hud-surface)]">
+      <div className="border-b border-[var(--hud-border-subtle)] px-4 py-2 flex items-center justify-between">
+        <button
+          type="button"
+          onClick={onToggle}
+          disabled={!collapsible}
+          className="flex items-center gap-2 disabled:cursor-default"
+        >
+          <div
+            className={`h-1.5 w-1.5 ${accent ? 'bg-[var(--hud-accent)]' : 'bg-[var(--hud-border-strong)]'}`}
+          />
+          <span className="label-overline">{title}</span>
+          {collapsible && (
+            <span className="text-xs font-mono text-[var(--hud-text-muted)]">
+              {isOpen ? '▲' : '▼'}
+            </span>
+          )}
+        </button>
+        {collapsible && onReset && isOpen && (
+          <button
+            type="button"
+            onClick={onReset}
+            className="text-[0.6875rem] font-mono text-[var(--hud-text-muted)] hover:text-[var(--hud-accent)]"
+          >
+            RESET
+          </button>
+        )}
+      </div>
+      {children}
+    </div>
+  );
+}
diff --git a/specs/training-ux/plan.md b/specs/training-ux/plan.md
new file mode 100644
index 0000000..03c1a6f
--- /dev/null
+++ b/specs/training-ux/plan.md
@@ -0,0 +1,40 @@
+# Training UX Overhaul — Plan
+
+## Backend
+
+1. **`services/split_service.py`** (new) — deterministic, seeded, optionally
+   class-stratified split assignment persisted into `asset.meta_data.split`;
+   `asset_split`, `normalize_ratios`, `resolve_split` (hash fallback),
+   `assign_splits`, `get_split_summary`.
+2. **`schemas/split.py`** (new) — `SplitConfig`, `SplitSummary`.
+3. **`services/asset_service.py`** — `list_assets` gains a `split` filter
+   (filtered in Python since the value lives in JSON).
+4. **`api/assets.py`** — `GET/POST /datasets/{id}/versions/{vid}/split`; asset
+   list returns `split` + `download_url` and accepts `?split=`.
+5. **`services/storage.py`** — `put_bytes` / `get_bytes` helpers.
+6. **`jobs/tasks/training.py`** —
+   - honor persisted split with deterministic hash fallback; **hold out test**;
+   - `ULTRALYTICS_TRAIN_ARGS` allow-list applied with `plots=True`;
+   - `_normalize_metrics` maps Ultralytics keys → clean keys;
+   - persist `{epochs, split, summary, plots}` in `metrics_json`; upload plot
+     PNGs to MinIO.
+7. **`api/experiments.py`** / **`schemas/experiment.py`** — `/metrics` returns
+   `summary`/`plots`/`split` (+ presigned plot urls); new `/plots/{name}`
+   stream; run detail exposes `artifacts`.
+
+## Frontend
+
+8. **`components/common/SplitPanel.tsx`** (new) — ratio/seed/stratify controls,
+   stacked split bar, per-class table; persists via the split endpoint.
+9. **`pages/experiments/new.tsx`** — config-driven grouped HP/aug form
+   (Core / Optimizer & Schedule / Regularization & Loss / Augmentation),
+   embedded SplitPanel, device select; persists split on launch.
+10. **`pages/experiments/[runId].tsx`** — multi-panel charts (loss / mAP / P-R /
+    accuracy / LR), summary tiles, split bar, native plot gallery w/ lightbox,
+    "Run Evaluation" + evaluations list.
+
+## Tests
+- `tests/unit/test_split_service.py` — ratios, determinism, slice-sum,
+  persistence, reproducibility, meta-data reads.
+
+No DB migration (reuses existing JSON/Text columns).
diff --git a/specs/training-ux/spec.md b/specs/training-ux/spec.md
new file mode 100644
index 0000000..8bfb3b7
--- /dev/null
+++ b/specs/training-ux/spec.md
@@ -0,0 +1,68 @@
+# Training UX Overhaul — Spec
+
+## Problem
+
+The training flow is functional but not seamless or fully controllable:
+
+- **Splits are inconsistent and invisible.** Training hardcoded a round-robin
+  80/20 split and never held out a test set, while evaluation reads
+  `asset.meta_data.split` — so the two disagreed and nothing was persisted to
+  visualize.
+- **Only a subset of Ultralytics hyperparameters/augmentations** were reachable
+  from the UI/backend.
+- **Completed-run metrics barely rendered** (the chart looked for clean keys that
+  Ultralytics never emits) and the native plots (PR curve, confusion matrix,
+  results grid) were discarded.
+- **Test-set evaluation was not discoverable** from a finished run.
+
+## Goals
+
+1. Configurable, persisted, reproducible train/val/test splits honored by both
+   training and evaluation, with visualization and per-split browsing.
+2. Full coverage of Ultralytics training hyperparameters and online
+   augmentations in the launch form and backend.
+3. Gold-standard CV metric visualization for completed runs: train/val loss
+   curves, mAP50 / mAP50-95, precision/recall, LR schedule, summary tiles, and
+   the native Ultralytics plot images.
+4. One-click "Run Evaluation" on the test split from a completed run, plus a list
+   of that model's evaluations.
+
+## Non-goals
+
+- Live augmentation image preview.
+- Hyperparameter sweeps / multi-run comparison.
+
+## Design
+
+- **Splits** persist into the existing `asset.meta_data` JSON (`split` key, the
+  same field evaluation already reads). `split_service` assigns them
+  deterministically (seeded, optionally class-stratified) and summarizes them.
+  Training honors persisted splits and falls back to a deterministic hash split;
+  **test assets are held out** of the YOLO dataset.
+- **Hyperparameters/augmentations** flow through an allow-list
+  (`ULTRALYTICS_TRAIN_ARGS`) applied with `plots=True`. The UI is config-driven
+  (single field-metadata array) and grouped (Core / Optimizer & Schedule /
+  Regularization & Loss / Augmentation).
+- **Metrics** are normalized to clean keys at capture time and stored in
+  `experiment_runs.metrics_json` alongside a `summary`, the `split`, and `plots`
+  (uploaded to MinIO and surfaced with presigned URLs).
+- **Evaluation** is launched from the run detail page via the existing
+  `/evaluations/new` query-param prefill.
+
+No schema migration is required — only existing JSON/Text columns are used.
+
+## API surface
+
+- `GET/POST /api/datasets/{dataset_id}/versions/{version_id}/split`
+- `GET /api/datasets/{dataset_id}/assets?split=train|val|test`
+- `GET /api/experiments/runs/{runId}/metrics` → `{metrics, summary, plots, split}`
+- `GET /api/experiments/runs/{runId}/plots/{name}` (streaming fallback)
+- `GET /api/experiments/runs/{runId}` now includes `artifacts`
+
+## Acceptance
+
+- Splits set in the form are persisted, visualized, and are exactly what training
+  trains on; test split is unseen during training.
+- All documented Ultralytics knobs are settable and forwarded.
+- Run detail shows populated loss/mAP/PR/LR charts, summary tiles, and native
+  plots; "Run Evaluation" lands on a prefilled eval form.
diff --git a/specs/training-ux/tasks.md b/specs/training-ux/tasks.md
new file mode 100644
index 0000000..0912b02
--- /dev/null
+++ b/specs/training-ux/tasks.md
@@ -0,0 +1,16 @@
+# Training UX Overhaul — Tasks
+
+- [x] `split_service` with deterministic seeded + stratified assignment
+- [x] `SplitConfig` / `SplitSummary` schemas
+- [x] Split GET/POST endpoints + `?split=` asset filter (+ download_url)
+- [x] `storage.put_bytes` / `get_bytes`
+- [x] Training honors persisted split, holds out test, hash fallback
+- [x] `ULTRALYTICS_TRAIN_ARGS` full hyperparameter/aug passthrough (`plots=True`)
+- [x] Metric key normalization + `{summary, plots, split}` in metrics_json
+- [x] Plot upload to MinIO + `/metrics` (presigned) and `/plots/{name}` stream
+- [x] Run detail exposes `artifacts`
+- [x] `SplitPanel` component
+- [x] Config-driven grouped training form + embedded split
+- [x] Multi-panel run-detail charts, summary tiles, plot gallery, Run Evaluation
+- [x] Split service unit tests
+- [ ] Playwright visual snapshots (run when UI review needed)