From 75b55d4d6c964db8051f62879d8521d375f8da4c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 31 May 2026 18:05:17 +0000
Subject: [PATCH 1/2] test: expand coverage for RBAC, Celery tasks, services,
 routers, agent, and frontend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend (unit):
- test_rbac: workspace authorization gate (role ladder, non-member 403,
  superuser bypass) — previously 0% coverage on the central auth chokepoint
- test_task_training / _onnx_export / _prelabels: Celery task orchestration,
  Job status transitions, and cluster release on terminal status (stubbed
  trainer + MINIO_DISABLED, no ML stack required)
- test_storage_service / _evaluation_service / _inference_service: object
  storage plumbing, evaluation helpers + create flow, inference download/
  dispatch
- test_api_assets / _api_experiments / _api_ops: request-level router tests
  covering reads, pagination, neighbor cursor, and validation guards

Agent:
- test_discover: hardware/OS probe parsing + payload shape
- test_identity: persistent identity round-trip, 0600 perms, corrupt-file handling

Frontend (Vitest):
- add vitest + jsdom + testing-library; vitest.config.ts and setup
- token-store, api (url resolution, auth header, 401 redirect, error detail),
  and auth-store (login/logout/restore) unit tests
- playwright now scopes to *.spec.ts so the runners don't overlap

CI:
- enforce backend unit coverage floor (--cov-fail-under=62) to lock in
  coverage and enable ratcheting
- run frontend vitest unit tests in the frontend job

https://claude.ai/code/session_011h7jZv18xWfFmT6fDz9EU4
---
 .github/workflows/ci.yml                      |   7 +-
 agent/tests/test_discover.py                  |  81 +++++++++
 agent/tests/test_identity.py                  |  66 +++++++
 backend/tests/unit/test_api_assets.py         | 129 ++++++++++++++
 backend/tests/unit/test_api_experiments.py    | 103 +++++++++++
 backend/tests/unit/test_api_ops.py            |  96 ++++++++++
 backend/tests/unit/test_evaluation_service.py | 164 ++++++++++++++++++
 backend/tests/unit/test_inference_service.py  |  83 +++++++++
 backend/tests/unit/test_rbac.py               | 151 ++++++++++++++++
 backend/tests/unit/test_storage_service.py    | 105 +++++++++++
 backend/tests/unit/test_task_onnx_export.py   |  99 +++++++++++
 backend/tests/unit/test_task_prelabels.py     |  93 ++++++++++
 backend/tests/unit/test_task_training.py      | 126 ++++++++++++++
 frontend/package.json                         |   8 +-
 frontend/playwright.config.ts                 |   3 +
 frontend/tests/unit/api.test.ts               | 115 ++++++++++++
 frontend/tests/unit/auth-store.test.tsx       |  89 ++++++++++
 frontend/tests/unit/setup.ts                  |   7 +
 frontend/tests/unit/token-store.test.ts       |  32 ++++
 frontend/vitest.config.ts                     |  21 +++
 20 files changed, 1576 insertions(+), 2 deletions(-)
 create mode 100644 agent/tests/test_discover.py
 create mode 100644 agent/tests/test_identity.py
 create mode 100644 backend/tests/unit/test_api_assets.py
 create mode 100644 backend/tests/unit/test_api_experiments.py
 create mode 100644 backend/tests/unit/test_api_ops.py
 create mode 100644 backend/tests/unit/test_evaluation_service.py
 create mode 100644 backend/tests/unit/test_inference_service.py
 create mode 100644 backend/tests/unit/test_rbac.py
 create mode 100644 backend/tests/unit/test_storage_service.py
 create mode 100644 backend/tests/unit/test_task_onnx_export.py
 create mode 100644 backend/tests/unit/test_task_prelabels.py
 create mode 100644 backend/tests/unit/test_task_training.py
 create mode 100644 frontend/tests/unit/api.test.ts
 create mode 100644 frontend/tests/unit/auth-store.test.tsx
 create mode 100644 frontend/tests/unit/setup.ts
 create mode 100644 frontend/tests/unit/token-store.test.ts
 create mode 100644 frontend/vitest.config.ts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 09e6784..56f38c1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,7 +43,9 @@ jobs:
         env:
           SKIP_DB_MIGRATIONS: "1"
         working-directory: backend
-        run: pytest -q tests/unit/
+        # addopts in pyproject.toml already enables --cov=app; the floor here
+        # locks in current coverage and is meant to be ratcheted upward.
+        run: pytest -q tests/unit/ --cov-fail-under=62
 
   frontend:
     name: Frontend (lint + build)
@@ -66,6 +68,9 @@ jobs:
       - name: ESLint
         run: npm run lint
 
+      - name: Unit tests (vitest)
+        run: npm run test:unit
+
       - name: Build
         run: npm run build
 
diff --git a/agent/tests/test_discover.py b/agent/tests/test_discover.py
new file mode 100644
index 0000000..af82fc2
--- /dev/null
+++ b/agent/tests/test_discover.py
@@ -0,0 +1,81 @@
+"""Unit tests for the agent hardware/OS discovery module.
+
+These exercise the parsing helpers and the assembled ``discover()`` payload
+shape. They are hermetic: GPU vendor libraries are optional and absent in CI,
+so ``detect_gpus`` falls back to the CPU vendor.
+"""
+
+from __future__ import annotations
+
+from vf_agent import discover
+
+
+def test_to_int_parses_and_defaults():
+    assert discover._to_int("42") == 42
+    assert discover._to_int(7) == 7
+    assert discover._to_int(None) == 0
+    assert discover._to_int("not-a-number") == 0
+
+
+def test_to_float_strips_percent_and_defaults():
+    assert discover._to_float("55.5%") == 55.5
+    assert discover._to_float("12") == 12.0
+    assert discover._to_float(None) == 0.0
+    assert discover._to_float("bad") == 0.0
+
+
+def test_os_info_has_expected_keys():
+    info = discover._os_info()
+    assert {"name", "release", "version", "arch", "python"} <= set(info)
+    assert isinstance(info["name"], str)
+
+
+def test_detect_gpus_falls_back_to_cpu(monkeypatch):
+    # Force both probes to report nothing so we land on the CPU vendor.
+    monkeypatch.setattr(discover, "_nvidia_gpus", lambda: None)
+    monkeypatch.setattr(discover, "_rocm_gpus", lambda: None)
+    vendor, gpus = discover.detect_gpus()
+    assert vendor == "cpu"
+    assert gpus == []
+
+
+def test_discover_payload_shape(monkeypatch):
+    monkeypatch.setattr(discover, "_nvidia_gpus", lambda: None)
+    monkeypatch.setattr(discover, "_rocm_gpus", lambda: None)
+    snap = discover.discover()
+
+    required = {
+        "cpu_cores",
+        "cpu_usage_pct",
+        "ram_total_mb",
+        "ram_used_mb",
+        "disk_total_gb",
+        "disk_used_gb",
+        "gpu_vendor",
+        "gpu_count",
+        "gpu_model",
+        "gpu_memory_mb",
+        "gpu_usage_pct",
+        "gpus",
+        "os",
+    }
+    assert required <= set(snap)
+    assert snap["gpu_vendor"] == "cpu"
+    assert snap["gpu_count"] == len(snap["gpus"]) == 0
+    assert isinstance(snap["cpu_cores"], int)
+
+
+def test_discover_summarizes_gpu_list(monkeypatch):
+    fake_gpus = [
+        {"index": 0, "name": "RTX 4090", "memory_mb": 24576, "util_pct": 30.0},
+        {"index": 1, "name": "RTX 4090", "memory_mb": 24576, "util_pct": 50.0},
+    ]
+    monkeypatch.setattr(discover, "_nvidia_gpus", lambda: fake_gpus)
+    monkeypatch.setattr(discover, "_rocm_gpus", lambda: None)
+    snap = discover.discover()
+    assert snap["gpu_vendor"] == "nvidia"
+    assert snap["gpu_count"] == 2
+    assert snap["gpu_model"] == "RTX 4090"
+    assert snap["gpu_memory_mb"] == 24576
+    # Usage is the mean across GPUs.
+    assert snap["gpu_usage_pct"] == 40.0
diff --git a/agent/tests/test_identity.py b/agent/tests/test_identity.py
new file mode 100644
index 0000000..bdfedf0
--- /dev/null
+++ b/agent/tests/test_identity.py
@@ -0,0 +1,66 @@
+"""Unit tests for the agent's persistent identity store.
+
+``identity`` reads/writes a small JSON file granted at adoption. We point the
+module-level path at a tmp file and verify the save/load round-trip, the
+0600 permission bits, and graceful handling of missing/corrupt files.
+"""
+
+from __future__ import annotations
+
+import json
+import stat
+
+from vf_agent import identity
+from vf_agent.identity import Identity
+
+
+def _redirect(monkeypatch, tmp_path):
+    path = tmp_path / "identity.json"
+    monkeypatch.setattr(identity, "IDENTITY_PATH", path)
+    return path
+
+
+def test_load_returns_none_when_absent(monkeypatch, tmp_path):
+    _redirect(monkeypatch, tmp_path)
+    assert identity.load() is None
+
+
+def test_save_then_load_round_trips(monkeypatch, tmp_path):
+    path = _redirect(monkeypatch, tmp_path)
+    ident = Identity(cluster_id="c1", register_token="tok", api_url="https://api")
+    identity.save(ident)
+
+    assert path.exists()
+    loaded = identity.load()
+    assert loaded == ident
+    assert loaded.cluster_id == "c1"
+    assert loaded.register_token == "tok"
+
+
+def test_save_sets_owner_only_permissions(monkeypatch, tmp_path):
+    path = _redirect(monkeypatch, tmp_path)
+    identity.save(Identity(cluster_id="c", register_token="t", api_url="u"))
+    mode = stat.S_IMODE(path.stat().st_mode)
+    assert mode == 0o600
+
+
+def test_load_returns_none_on_corrupt_json(monkeypatch, tmp_path):
+    path = _redirect(monkeypatch, tmp_path)
+    path.write_text("{ not valid json")
+    assert identity.load() is None
+
+
+def test_load_returns_none_on_missing_keys(monkeypatch, tmp_path):
+    path = _redirect(monkeypatch, tmp_path)
+    path.write_text(json.dumps({"cluster_id": "c"}))  # missing register_token/api_url
+    assert identity.load() is None
+
+
+def test_clear_removes_file(monkeypatch, tmp_path):
+    path = _redirect(monkeypatch, tmp_path)
+    identity.save(Identity(cluster_id="c", register_token="t", api_url="u"))
+    assert path.exists()
+    identity.clear()
+    assert not path.exists()
+    # Clearing again is a no-op, not an error.
+    identity.clear()
diff --git a/backend/tests/unit/test_api_assets.py b/backend/tests/unit/test_api_assets.py
new file mode 100644
index 0000000..740070b
--- /dev/null
+++ b/backend/tests/unit/test_api_assets.py
@@ -0,0 +1,129 @@
+"""Request-level tests for the assets router (``api/assets.py``).
+
+The router previously had no direct tests. We seed datasets/versions/assets
+through the test session and exercise the read endpoints, the neighbor cursor
+logic, and the upload-confirm write path. Auth is satisfied with a fake user;
+``MINIO_DISABLED`` keeps presign best-effort.
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime, timedelta, timezone
+
+from app.db.deps import get_current_user
+from app.main import app
+from app.models.asset import Asset
+from app.models.dataset import Dataset
+from app.models.dataset_version import DatasetVersion
+from app.models.user import User
+from tests.conftest import TestingSessionLocal, client
+
+
+def _fake_user() -> User:
+    return User(id="asset-tester", email="assets@example.com", name="T", password_hash="x")
+
+
+app.dependency_overrides[get_current_user] = _fake_user
+
+
+def _seed_assets(n: int) -> tuple[str, str, list[str]]:
+    db = TestingSessionLocal()
+    try:
+        ds = Dataset(id=str(uuid.uuid4()), project_id="p", name="ds")
+        db.add(ds)
+        db.commit()
+        ver = DatasetVersion(id=str(uuid.uuid4()), dataset_id=ds.id, version=1)
+        db.add(ver)
+        db.commit()
+        ids = []
+        base = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        for i in range(n):
+            a = Asset(
+                id=str(uuid.uuid4()),
+                dataset_id=ds.id,
+                version_id=ver.id,
+                uri=f"datasets/{ver.id}/img{i}.jpg",
+                mime_type="image/jpeg",
+                label_status="unlabelled" if i % 2 else "labeled",
+                # Distinct, increasing timestamps so the (created_at, id) cursor
+                # ordering in /neighbors is deterministic.
+                created_at=base + timedelta(seconds=i),
+            )
+            db.add(a)
+            ids.append(a.id)
+        db.commit()
+        return ds.id, ver.id, ids
+    finally:
+        db.close()
+
+
+def test_get_asset_404_when_missing():
+    r = client.get("/api/assets/does-not-exist")
+    assert r.status_code == 404
+
+
+def test_get_asset_returns_fields():
+    _, _, ids = _seed_assets(1)
+    r = client.get(f"/api/assets/{ids[0]}")
+    assert r.status_code == 200, r.text
+    body = r.json()
+    assert body["id"] == ids[0]
+    assert body["mime_type"] == "image/jpeg"
+    assert "download_url" in body
+
+
+def test_list_dataset_assets_pagination_shape():
+    ds_id, ver_id, _ = _seed_assets(5)
+    r = client.get(f"/api/datasets/{ds_id}/assets?limit=2&offset=0")
+    assert r.status_code == 200
+    body = r.json()
+    assert body["total"] == 5
+    assert len(body["items"]) == 2
+    assert body["limit"] == 2 and body["offset"] == 0
+
+
+def test_list_dataset_assets_label_status_filter():
+    ds_id, _, _ = _seed_assets(4)  # 2 labeled, 2 unlabelled
+    r = client.get(f"/api/datasets/{ds_id}/assets?label_status=labeled")
+    assert r.status_code == 200
+    body = r.json()
+    assert body["total"] == 2
+    assert all(item["label_status"] == "labeled" for item in body["items"])
+
+
+def test_asset_neighbors_prev_next_index():
+    ds_id, ver_id, ids = _seed_assets(3)
+    # Ordering is (created_at, id), which need not match insertion order, so
+    # assert the invariants across all three: total is 3 everywhere, exactly one
+    # has no prev (the first) and exactly one has no next (the last).
+    bodies = []
+    for aid in ids:
+        r = client.get(f"/api/assets/{aid}/neighbors")
+        assert r.status_code == 200
+        bodies.append(r.json())
+
+    assert all(b["total"] == 3 for b in bodies)
+    assert sum(1 for b in bodies if b["prev"] is None) == 1
+    assert sum(1 for b in bodies if b["next"] is None) == 1
+    assert sorted(b["index"] for b in bodies) == [0, 1, 2]
+
+
+def test_asset_neighbors_404_when_missing():
+    assert client.get("/api/assets/missing/neighbors").status_code == 404
+
+
+def test_confirm_upload_creates_asset():
+    ds_id, ver_id, _ = _seed_assets(0)
+    r = client.post(
+        "/api/ingest/confirm",
+        json={
+            "dataset_id": ds_id,
+            "version_id": ver_id,
+            "storage_key": f"datasets/{ver_id}/new.jpg",
+            "filename": "new.jpg",
+            "content_type": "image/jpeg",
+        },
+    )
+    assert r.status_code == 201, r.text
+    assert r.json()["dataset_id"] == ds_id
diff --git a/backend/tests/unit/test_api_experiments.py b/backend/tests/unit/test_api_experiments.py
new file mode 100644
index 0000000..b120d3f
--- /dev/null
+++ b/backend/tests/unit/test_api_experiments.py
@@ -0,0 +1,103 @@
+"""Request-level tests for the experiments router (``api/experiments.py``).
+
+Covers run creation, the paginated list shape + project filter, the 404/200
+detail paths, and the ``/metrics`` endpoint's handling of the several
+``metrics_json`` shapes it must tolerate.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+
+from app.db.deps import get_current_user
+from app.main import app
+from app.models.experiment import ExperimentRun
+from app.models.user import User
+from tests.conftest import TestingSessionLocal, client
+
+
+def _fake_user() -> User:
+    return User(id="exp-tester", email="exp@example.com", name="T", password_hash="x")
+
+
+app.dependency_overrides[get_current_user] = _fake_user
+
+
+def _mk_run(project_id: str, *, metrics_json: str | None = None) -> str:
+    db = TestingSessionLocal()
+    try:
+        run = ExperimentRun(
+            id=str(uuid.uuid4()),
+            project_id=project_id,
+            owner_id="exp-tester",
+            name="Run",
+            status="succeeded",
+            metrics_json=metrics_json,
+        )
+        db.add(run)
+        db.commit()
+        return run.id
+    finally:
+        db.close()
+
+
+def test_create_run():
+    project_id = uuid.uuid4().hex
+    r = client.post(
+        "/api/experiments/runs",
+        json={"project_id": project_id, "name": "My Run", "params": {"lr": 0.01}},
+    )
+    assert r.status_code == 201, r.text
+    body = r.json()
+    assert body["status"] == "queued"
+    assert body["name"] == "My Run"
+
+
+def test_list_runs_filtered_by_project():
+    project_id = uuid.uuid4().hex
+    _mk_run(project_id)
+    _mk_run(project_id)
+    _mk_run(uuid.uuid4().hex)  # different project
+
+    r = client.get(f"/api/experiments/runs?project_id={project_id}&page=1&page_size=10")
+    assert r.status_code == 200
+    body = r.json()
+    assert body["total"] == 2
+    assert body["page"] == 1 and body["page_size"] == 10
+    assert all(item["project_id"] == project_id for item in body["items"])
+
+
+def test_get_run_404_and_200():
+    assert client.get("/api/experiments/runs/missing").status_code == 404
+    run_id = _mk_run(uuid.uuid4().hex)
+    r = client.get(f"/api/experiments/runs/{run_id}")
+    assert r.status_code == 200
+    assert r.json()["id"] == run_id
+
+
+def test_metrics_endpoint_parses_epochs_dict():
+    blob = json.dumps(
+        {
+            "epochs": [{"epoch": 1, "mAP50": 0.4}, {"epoch": 2, "mAP50": 0.6}],
+            "summary": {"mAP50": 0.6},
+        }
+    )
+    run_id = _mk_run(uuid.uuid4().hex, metrics_json=blob)
+    r = client.get(f"/api/experiments/runs/{run_id}/metrics")
+    assert r.status_code == 200
+    body = r.json()
+    assert len(body["metrics"]) == 2
+    assert body["summary"]["mAP50"] == 0.6
+
+
+def test_metrics_endpoint_tolerates_error_blob():
+    run_id = _mk_run(uuid.uuid4().hex, metrics_json='{"error": "boom"}')
+    r = client.get(f"/api/experiments/runs/{run_id}/metrics")
+    assert r.status_code == 200
+    # An error blob yields no chartable epochs.
+    assert r.json()["metrics"] == []
+
+
+def test_metrics_endpoint_404_when_missing():
+    assert client.get("/api/experiments/runs/missing/metrics").status_code == 404
diff --git a/backend/tests/unit/test_api_ops.py b/backend/tests/unit/test_api_ops.py
new file mode 100644
index 0000000..43a5009
--- /dev/null
+++ b/backend/tests/unit/test_api_ops.py
@@ -0,0 +1,96 @@
+"""Request-level tests for the ops router (``api/ops.py``).
+
+Covers the system-status aggregation, the presigned upload-url endpoint, and
+the frame-extraction validation/dispatch guards — none of which had direct
+tests. Runs with ``MINIO_DISABLED`` so storage probes degrade gracefully.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from app.db.deps import get_current_user
+from app.main import app
+from app.models.asset import Asset
+from app.models.dataset import Dataset
+from app.models.user import User
+from tests.conftest import TestingSessionLocal, client
+
+
+def _fake_user() -> User:
+    return User(id="ops-tester", email="ops@example.com", name="T", password_hash="x")
+
+
+app.dependency_overrides[get_current_user] = _fake_user
+
+
+def test_system_status_aggregates_components():
+    r = client.get("/api/system/status")
+    assert r.status_code == 200
+    body = r.json()
+    assert body["status"] in ("ok", "degraded", "down")
+    comps = body["components"]
+    assert comps["api"]["status"] == "ok"
+    # DB is the in-memory test SQLite, so it must report reachable.
+    assert comps["database"]["status"] == "ok"
+    assert "queue" in comps and "storage" in comps
+
+
+def test_upload_url_returns_object_key():
+    r = client.post(
+        "/api/ingest/upload-url",
+        json={
+            "projectId": "p1",
+            "datasetVersionId": "ver-9",
+            "filename": "a.jpg",
+            "contentType": "image/jpeg",
+        },
+    )
+    assert r.status_code == 200, r.text
+    body = r.json()
+    assert body["objectKey"] == "datasets/ver-9/a.jpg"
+    assert body["url"]
+
+
+def _seed_asset(*, mime: str, uri: str) -> tuple[str, str]:
+    db = TestingSessionLocal()
+    try:
+        ds = Dataset(id=str(uuid.uuid4()), project_id="p", name="ds")
+        db.add(ds)
+        db.commit()
+        asset = Asset(
+            id=str(uuid.uuid4()),
+            dataset_id=ds.id,
+            uri=uri,
+            mime_type=mime,
+            label_status="unlabelled",
+        )
+        db.add(asset)
+        db.commit()
+        return ds.id, asset.id
+    finally:
+        db.close()
+
+
+def test_extract_frames_404_for_missing_asset():
+    r = client.post("/api/datasets/d/assets/missing/extract-frames")
+    assert r.status_code == 404
+
+
+def test_extract_frames_rejects_non_video_asset():
+    ds_id, asset_id = _seed_asset(mime="image/jpeg", uri="x/y.jpg")
+    r = client.post(f"/api/datasets/{ds_id}/assets/{asset_id}/extract-frames")
+    assert r.status_code == 400
+    assert "not a video" in r.json()["detail"]
+
+
+def test_extract_frames_rejects_mismatched_dataset():
+    _, asset_id = _seed_asset(mime="video/mp4", uri="x/y.mp4")
+    r = client.post(f"/api/datasets/wrong-dataset/assets/{asset_id}/extract-frames")
+    assert r.status_code == 400
+
+
+def test_extract_frames_rejects_invalid_interval():
+    ds_id, asset_id = _seed_asset(mime="video/mp4", uri="x/y.mp4")
+    r = client.post(f"/api/datasets/{ds_id}/assets/{asset_id}/extract-frames?fps_interval=0")
+    assert r.status_code == 422
diff --git a/backend/tests/unit/test_evaluation_service.py b/backend/tests/unit/test_evaluation_service.py
new file mode 100644
index 0000000..ce3c473
--- /dev/null
+++ b/backend/tests/unit/test_evaluation_service.py
@@ -0,0 +1,164 @@
+"""Unit tests for ``services/evaluation_service``.
+
+Covers the previously-untested pure helpers (``summarize`` / ``to_dict`` /
+``write_result``), the paginated ``list_evaluations`` query, and the
+``create_evaluation`` flow — including its validation errors. Celery dispatch is
+stubbed so no broker is required.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+
+from app.models.artifact import ModelArtifact
+from app.models.dataset_version import DatasetVersion
+from app.models.evaluation import Evaluation
+from app.schemas.evaluation import EvaluationCreate
+from app.services import evaluation_service
+from tests.conftest import TestingSessionLocal
+
+
+def _mk_eval(db, **kwargs) -> Evaluation:
+    row = Evaluation(
+        id=str(uuid.uuid4()),
+        project_id=kwargs.pop("project_id", "proj"),
+        artifact_id=kwargs.pop("artifact_id", "art"),
+        dataset_version_id=kwargs.pop("dataset_version_id", "ver"),
+        status=kwargs.pop("status", "queued"),
+        **kwargs,
+    )
+    db.add(row)
+    db.commit()
+    db.refresh(row)
+    return row
+
+
+def test_summarize_picks_first_available_primary_metric():
+    row = Evaluation(
+        id="e1",
+        project_id="p",
+        artifact_id="a",
+        dataset_version_id="v",
+        status="succeeded",
+        metrics_json='{"precision": 0.7, "mAP50": 0.81}',
+    )
+    out = evaluation_service.summarize(row)
+    # mAP50 wins over precision because it comes first in the priority list.
+    assert out["primary_metric_name"] == "mAP50"
+    assert out["primary_metric"] == 0.81
+
+
+def test_summarize_handles_missing_and_bad_metrics():
+    row = Evaluation(
+        id="e2",
+        project_id="p",
+        artifact_id="a",
+        dataset_version_id="v",
+        status="failed",
+        metrics_json="not-json",
+    )
+    out = evaluation_service.summarize(row)
+    assert out["primary_metric"] is None
+    assert out["primary_metric_name"] is None
+
+
+def test_to_dict_parses_json_columns():
+    row = Evaluation(
+        id="e3",
+        project_id="p",
+        artifact_id="a",
+        dataset_version_id="v",
+        status="succeeded",
+        metrics_json='{"mAP50": 0.5}',
+        confusion_json="[[1,2],[3,4]]",
+        classes_json='["cat","dog"]',
+    )
+    out = evaluation_service.to_dict(row)
+    assert out["metrics"] == {"mAP50": 0.5}
+    assert out["confusion"] == [[1, 2], [3, 4]]
+    assert out["classes"] == ["cat", "dog"]
+    # Unset JSON columns fall back to None rather than raising.
+    assert out["per_class"] is None
+
+
+def test_write_result_persists_status_and_completion():
+    db = TestingSessionLocal()
+    try:
+        row = _mk_eval(db, status="running")
+        updated = evaluation_service.write_result(
+            db, row.id, status="succeeded", metrics={"mAP50": 0.9}
+        )
+        assert updated.status == "succeeded"
+        assert updated.completed_at is not None
+        assert evaluation_service.to_dict(updated)["metrics"] == {"mAP50": 0.9}
+    finally:
+        db.close()
+
+
+def test_write_result_returns_none_for_unknown_id():
+    db = TestingSessionLocal()
+    try:
+        assert evaluation_service.write_result(db, "missing", status="failed") is None
+    finally:
+        db.close()
+
+
+def test_list_evaluations_filters_and_paginates():
+    db = TestingSessionLocal()
+    try:
+        art = uuid.uuid4().hex
+        for _ in range(3):
+            _mk_eval(db, artifact_id=art)
+        _mk_eval(db, artifact_id="other")
+
+        rows, total = evaluation_service.list_evaluations(
+            db, artifact_id=art, page=1, page_size=2, return_total=True
+        )
+        assert total == 3
+        assert len(rows) == 2
+
+        flat = evaluation_service.list_evaluations(db, artifact_id=art)
+        assert len(flat) == 3
+    finally:
+        db.close()
+
+
+def test_create_evaluation_validates_artifact_and_version(monkeypatch):
+    db = TestingSessionLocal()
+    try:
+        payload = EvaluationCreate(artifact_id="nope", dataset_version_id="nope")
+        with pytest.raises(evaluation_service.EvaluationError):
+            evaluation_service.create_evaluation(db, payload)
+    finally:
+        db.close()
+
+
+def test_create_evaluation_dispatches_without_cluster(monkeypatch):
+    # Stub Celery so the broker is never contacted.
+    sent: list = []
+    monkeypatch.setattr(
+        evaluation_service.celery_app,
+        "send_task",
+        lambda name, **kw: sent.append((name, kw)),
+    )
+
+    db = TestingSessionLocal()
+    try:
+        ver = DatasetVersion(id=str(uuid.uuid4()), dataset_id="ds", version=1)
+        art = ModelArtifact(
+            id=str(uuid.uuid4()), project_id="proj", type="pytorch", format="pytorch"
+        )
+        db.add_all([ver, art])
+        db.commit()
+
+        payload = EvaluationCreate(artifact_id=art.id, dataset_version_id=ver.id)
+        eval_row, job = evaluation_service.create_evaluation(db, payload)
+
+        assert eval_row.status == "queued"
+        assert job["evaluationId"] == eval_row.id
+        assert job["jobId"]
+        assert sent and sent[0][0] == "app.jobs.tasks.evaluation.evaluate_task"
+    finally:
+        db.close()
diff --git a/backend/tests/unit/test_inference_service.py b/backend/tests/unit/test_inference_service.py
new file mode 100644
index 0000000..6b4b754
--- /dev/null
+++ b/backend/tests/unit/test_inference_service.py
@@ -0,0 +1,83 @@
+"""Unit tests for ``services/inference_service`` beyond the LRU cache.
+
+Focuses on the download helper, the load-error path, and the ``predict``
+dispatch + temp-file cleanup. The cache is pre-seeded and the per-framework
+runners are stubbed so no ML wheels are needed.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import uuid
+from pathlib import Path
+
+import pytest
+
+from app.services import inference_service
+from app.services.inference_service import InferenceError, _CacheEntry
+
+
+class _Artifact:
+    def __init__(self, *, storage_path=None, fmt="pytorch", type_="yolo"):
+        self.id = uuid.uuid4().hex
+        self.storage_path = storage_path
+        self.format = fmt
+        self.type = type_
+        self.framework = None
+
+
+def test_download_to_copies_local_file(tmp_path):
+    src = tmp_path / "weights.pt"
+    src.write_bytes(b"weights")
+    dest = tmp_path / "out.pt"
+    assert inference_service._download_to(str(src), dest) is True
+    assert dest.read_bytes() == b"weights"
+
+
+def test_download_to_missing_source_returns_false(tmp_path):
+    dest = tmp_path / "out.pt"
+    assert inference_service._download_to(str(tmp_path / "nope.pt"), dest) is False
+
+
+def test_load_artifact_without_storage_path_raises():
+    with pytest.raises(InferenceError):
+        inference_service._load_artifact(_Artifact(storage_path=None))
+
+
+def test_predict_dispatches_to_yolo_runner_and_cleans_tempfile(monkeypatch):
+    inference_service._cache.clear()
+    artifact = _Artifact()
+    scratch = Path(tempfile.mkdtemp(prefix="vf-test-"))
+    # Seed the cache so get_or_load never tries to download/load a real model.
+    inference_service._cache._cache[artifact.id] = _CacheEntry(
+        kind="ultralytics", model=object(), scratch_dir=scratch
+    )
+
+    captured: dict = {}
+
+    def fake_yolo(model, img_path, score_threshold):
+        captured["img_path"] = img_path
+        # The temp image must exist while the runner is executing.
+        assert os.path.exists(img_path)
+        return {"detections": [{"class": "cat", "bbox": [0, 0, 1, 1], "score": 0.9}]}
+
+    monkeypatch.setattr(inference_service, "_yolo_predict", fake_yolo)
+
+    out = inference_service.predict(artifact, b"fake-image-bytes")
+    assert out["detections"][0]["class"] == "cat"
+    # The scratch image is removed once predict returns.
+    assert not os.path.exists(captured["img_path"])
+
+
+def test_predict_dispatches_to_onnx_runner(monkeypatch):
+    inference_service._cache.clear()
+    artifact = _Artifact(fmt="onnx", type_="onnx")
+    scratch = Path(tempfile.mkdtemp(prefix="vf-test-"))
+    inference_service._cache._cache[artifact.id] = _CacheEntry(
+        kind="onnx", model=object(), scratch_dir=scratch
+    )
+    monkeypatch.setattr(inference_service, "_onnx_predict", lambda m, p, t: {"top_class_index": 7})
+
+    out = inference_service.predict(artifact, b"img")
+    assert out["top_class_index"] == 7
diff --git a/backend/tests/unit/test_rbac.py b/backend/tests/unit/test_rbac.py
new file mode 100644
index 0000000..e87a003
--- /dev/null
+++ b/backend/tests/unit/test_rbac.py
@@ -0,0 +1,151 @@
+"""Unit tests for the workspace authorization gate (``api/rbac.require_role``).
+
+This dependency is the single chokepoint for workspace RBAC, so the role
+hierarchy, the non-membership rejection, and the superuser bypass each get
+direct coverage. We exercise the returned ``dependency`` callable directly with
+a real DB session rather than going through HTTP, which keeps the assertions
+focused on the authorization logic itself.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+from fastapi import HTTPException
+
+from app.api.rbac import ROLE_ORDER, require_role
+from app.models.user import User
+from app.models.workspace import Membership, Role, Workspace
+from tests.conftest import TestingSessionLocal
+
+
+def _mk_user(db, *, email: str | None = None, is_superuser: bool = False) -> User:
+    user = User(
+        id=str(uuid.uuid4()),
+        name="Tester",
+        email=email or f"{uuid.uuid4().hex[:8]}@example.com",
+        password_hash="x",
+        is_superuser=is_superuser,
+    )
+    db.add(user)
+    db.commit()
+    db.refresh(user)
+    return user
+
+
+def _mk_workspace(db, owner_id: str) -> Workspace:
+    ws = Workspace(id=str(uuid.uuid4()), name="WS", created_by=owner_id)
+    db.add(ws)
+    db.commit()
+    db.refresh(ws)
+    return ws
+
+
+def _mk_membership(db, *, user_id: str, workspace_id: str, role: Role) -> Membership:
+    m = Membership(id=str(uuid.uuid4()), user_id=user_id, workspace_id=workspace_id, role=role)
+    db.add(m)
+    db.commit()
+    return m
+
+
+def test_role_order_is_monotonic():
+    # The ladder must be strictly increasing viewer < … < owner for the
+    # >= comparison in require_role to be meaningful.
+    levels = [
+        ROLE_ORDER[Role.VIEWER],
+        ROLE_ORDER[Role.ANNOTATOR],
+        ROLE_ORDER[Role.DEVELOPER],
+        ROLE_ORDER[Role.ADMIN],
+        ROLE_ORDER[Role.OWNER],
+    ]
+    assert levels == sorted(levels)
+    assert len(set(levels)) == len(levels)
+
+
+def test_member_with_sufficient_role_is_allowed():
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db)
+        ws = _mk_workspace(db, user.id)
+        _mk_membership(db, user_id=user.id, workspace_id=ws.id, role=Role.DEVELOPER)
+
+        dep = require_role(Role.DEVELOPER)
+        result = dep(workspace_id=ws.id, current_user=user, db=db)
+        assert result is user
+    finally:
+        db.close()
+
+
+def test_member_with_higher_role_is_allowed():
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db)
+        ws = _mk_workspace(db, user.id)
+        _mk_membership(db, user_id=user.id, workspace_id=ws.id, role=Role.OWNER)
+
+        dep = require_role(Role.DEVELOPER)
+        assert dep(workspace_id=ws.id, current_user=user, db=db) is user
+    finally:
+        db.close()
+
+
+def test_member_with_insufficient_role_is_forbidden():
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db)
+        ws = _mk_workspace(db, user.id)
+        _mk_membership(db, user_id=user.id, workspace_id=ws.id, role=Role.VIEWER)
+
+        dep = require_role(Role.DEVELOPER)
+        with pytest.raises(HTTPException) as exc:
+            dep(workspace_id=ws.id, current_user=user, db=db)
+        assert exc.value.status_code == 403
+        assert "developer" in exc.value.detail
+    finally:
+        db.close()
+
+
+def test_non_member_is_forbidden():
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db)
+        ws = _mk_workspace(db, user.id)
+        # No membership row created for this user/workspace pair.
+        dep = require_role(Role.VIEWER)
+        with pytest.raises(HTTPException) as exc:
+            dep(workspace_id=ws.id, current_user=user, db=db)
+        assert exc.value.status_code == 403
+        assert "member" in exc.value.detail.lower()
+    finally:
+        db.close()
+
+
+def test_superuser_email_bypasses_membership_check(monkeypatch):
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db, email="root@example.com")
+        ws = _mk_workspace(db, user.id)
+        # Deliberately no membership — the bypass must still admit the user.
+        monkeypatch.setenv("FIRST_SUPERUSER_EMAIL", "root@example.com")
+        monkeypatch.delenv("SUPERUSER_EMAIL", raising=False)
+
+        dep = require_role(Role.OWNER)
+        assert dep(workspace_id=ws.id, current_user=user, db=db) is user
+    finally:
+        db.close()
+
+
+def test_non_superuser_email_does_not_bypass(monkeypatch):
+    db = TestingSessionLocal()
+    try:
+        user = _mk_user(db, email="someone@example.com")
+        ws = _mk_workspace(db, user.id)
+        monkeypatch.setenv("FIRST_SUPERUSER_EMAIL", "root@example.com")
+
+        dep = require_role(Role.VIEWER)
+        with pytest.raises(HTTPException) as exc:
+            dep(workspace_id=ws.id, current_user=user, db=db)
+        assert exc.value.status_code == 403
+    finally:
+        db.close()
diff --git a/backend/tests/unit/test_storage_service.py b/backend/tests/unit/test_storage_service.py
new file mode 100644
index 0000000..cef8925
--- /dev/null
+++ b/backend/tests/unit/test_storage_service.py
@@ -0,0 +1,105 @@
+"""Unit tests for the object-storage abstraction (``services/storage``).
+
+The real MinIO client is replaced with a tiny in-memory fake so we can verify
+the bucket/object plumbing, the presign fallback when storage is disabled, and
+the ``MINIO_BUCKET`` / ``S3_BUCKET`` precedence — none of which were covered.
+"""
+
+from __future__ import annotations
+
+from app.services import storage
+
+
+class _FakeMinio:
+    def __init__(self, existing_buckets=None):
+        self.objects: dict[tuple[str, str], bytes] = {}
+        self.buckets: set[str] = set(existing_buckets or [])
+        self.made_buckets: list[str] = []
+
+    def put_object(self, bucket, key, data, length=None, content_type=None):
+        self.objects[(bucket, key)] = data.read()
+
+    def get_object(self, bucket, key):
+        payload = self.objects[(bucket, key)]
+        return _FakeResponse(payload)
+
+    def bucket_exists(self, bucket):
+        return bucket in self.buckets
+
+    def make_bucket(self, bucket):
+        self.buckets.add(bucket)
+        self.made_buckets.append(bucket)
+
+
+class _FakeResponse:
+    def __init__(self, payload: bytes):
+        self._payload = payload
+        self.closed = False
+        self.released = False
+
+    def read(self):
+        return self._payload
+
+    def close(self):
+        self.closed = True
+
+    def release_conn(self):
+        self.released = True
+
+
+def test_default_bucket_prefers_minio_bucket(monkeypatch):
+    monkeypatch.setenv("MINIO_BUCKET", "primary")
+    monkeypatch.setenv("S3_BUCKET", "secondary")
+    assert storage._default_bucket() == "primary"
+
+    monkeypatch.delenv("MINIO_BUCKET", raising=False)
+    assert storage._default_bucket() == "secondary"
+
+
+def test_put_and_get_bytes_round_trip():
+    client = _FakeMinio()
+    key = storage.put_bytes(client, "a/b.bin", b"hello", bucket="bkt")
+    assert key == "a/b.bin"
+    assert storage.get_bytes(client, "a/b.bin", bucket="bkt") == b"hello"
+
+
+def test_get_bytes_closes_and_releases_response():
+    client = _FakeMinio()
+    storage.put_bytes(client, "k", b"x", bucket="bkt")
+    # Patch get_object to hand back a response we can inspect afterwards.
+    resp = _FakeResponse(b"x")
+    client.get_object = lambda b, k: resp  # type: ignore[assignment]
+    assert storage.get_bytes(client, "k", bucket="bkt") == b"x"
+    assert resp.closed and resp.released
+
+
+def test_ensure_bucket_creates_only_when_missing(monkeypatch):
+    monkeypatch.delenv("MINIO_BUCKET_POLICY_JSON", raising=False)
+    client = _FakeMinio(existing_buckets={"exists"})
+
+    storage.ensure_bucket(client, "exists")
+    assert client.made_buckets == []
+
+    storage.ensure_bucket(client, "fresh")
+    assert client.made_buckets == ["fresh"]
+
+
+def test_presign_put_url_disabled_returns_stub(monkeypatch):
+    monkeypatch.setenv("MINIO_DISABLED", "true")
+    monkeypatch.setenv("S3_BUCKET", "visionforge")
+    out = storage.presign_put_url("ver-1", "img.jpg")
+    assert out["fields"] == {}
+    assert out["url"].endswith("visionforge/datasets/ver-1/img.jpg")
+
+
+def test_get_minio_client_reads_env(monkeypatch):
+    monkeypatch.setenv("MINIO_ENDPOINT", "storage.local:9000")
+    monkeypatch.setenv("MINIO_ACCESS_KEY", "key")
+    monkeypatch.setenv("MINIO_SECRET_KEY", "secret")
+    monkeypatch.setenv("MINIO_SECURE", "false")
+    client = storage.get_minio_client()
+    # Constructing the client must not touch the network; just confirm we got a
+    # real Minio instance configured from the env vars above.
+    from minio import Minio
+
+    assert isinstance(client, Minio)
diff --git a/backend/tests/unit/test_task_onnx_export.py b/backend/tests/unit/test_task_onnx_export.py
new file mode 100644
index 0000000..ac5fc3e
--- /dev/null
+++ b/backend/tests/unit/test_task_onnx_export.py
@@ -0,0 +1,99 @@
+"""Unit tests for the ONNX export Celery task.
+
+Runs with ``MINIO_DISABLED=true`` so the task takes its storage-less path:
+no weights are downloaded or uploaded, but it still must create the ONNX
+``ModelArtifact`` row and drive the Job through its status transitions.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from app.jobs.tasks import onnx_export as onnx_task
+from app.models.artifact import ModelArtifact
+from app.models.cluster import Cluster
+from app.models.experiment import ExperimentRun
+from app.services.jobs_service import create_job
+from tests.conftest import TestingSessionLocal
+
+
+def _mk_run(db) -> ExperimentRun:
+    run = ExperimentRun(
+        id=str(uuid.uuid4()),
+        project_id="proj-onnx",
+        owner_id="owner-1",
+        name="Run",
+        status="succeeded",
+    )
+    db.add(run)
+    db.commit()
+    db.refresh(run)
+    return run
+
+
+def test_export_task_creates_onnx_artifact_and_succeeds():
+    db = TestingSessionLocal()
+    run = _mk_run(db)
+    job = create_job(db, "onnx_export", {"experimentId": run.id})
+    run_id, job_id = run.id, job.id
+    db.close()
+
+    result = onnx_task.export_task({"jobId": job_id, "experimentId": run_id})
+
+    assert result["status"] == "succeeded"
+
+    check = TestingSessionLocal()
+    try:
+        artifact = (
+            check.query(ModelArtifact)
+            .filter(ModelArtifact.run_id == run_id, ModelArtifact.type == "onnx")
+            .first()
+        )
+        assert artifact is not None
+
+        from app.models.job import Job
+
+        assert check.get(Job, job_id).status == "succeeded"
+    finally:
+        check.close()
+
+
+def test_export_task_missing_run_fails_job():
+    db = TestingSessionLocal()
+    job = create_job(db, "onnx_export", {})
+    job_id = job.id
+    db.close()
+
+    result = onnx_task.export_task({"jobId": job_id, "experimentId": "nope"})
+
+    assert result["status"] == "failed"
+    assert "not found" in result["error"]
+
+    check = TestingSessionLocal()
+    try:
+        from app.models.job import Job
+
+        assert check.get(Job, job_id).status == "failed"
+    finally:
+        check.close()
+
+
+def test_export_task_releases_reserved_cluster_on_success():
+    db = TestingSessionLocal()
+    run = _mk_run(db)
+    job = create_job(db, "onnx_export", {"experimentId": run.id})
+    cluster = Cluster(id=str(uuid.uuid4()), name="gpu", status="busy", active_job_id=job.id)
+    db.add(cluster)
+    db.commit()
+    run_id, job_id, cluster_id = run.id, job.id, cluster.id
+    db.close()
+
+    onnx_task.export_task({"jobId": job_id, "experimentId": run_id})
+
+    check = TestingSessionLocal()
+    try:
+        refreshed = check.get(Cluster, cluster_id)
+        assert refreshed.active_job_id is None
+        assert refreshed.status == "online"
+    finally:
+        check.close()
diff --git a/backend/tests/unit/test_task_prelabels.py b/backend/tests/unit/test_task_prelabels.py
new file mode 100644
index 0000000..e815ba4
--- /dev/null
+++ b/backend/tests/unit/test_task_prelabels.py
@@ -0,0 +1,93 @@
+"""Unit tests for the prelabeling Celery task.
+
+With ``MINIO_DISABLED=true`` and no model key, the task can't load a model, so
+it makes no predictions — but it must still walk the unlabeled assets, leave
+them untouched, drive the Job to ``succeeded``, and report accurate counts.
+We also cover the helper that extracts a storage key from an asset URI.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from app.jobs.tasks import prelabels as prelabels_task
+from app.jobs.tasks.prelabels import _extract_minio_key
+from app.models.asset import Asset
+from app.models.dataset import Dataset
+from app.models.dataset_version import DatasetVersion
+from app.services.jobs_service import create_job
+from tests.conftest import TestingSessionLocal
+
+
+def _mk_version_with_assets(db, n: int) -> str:
+    ds = Dataset(id=str(uuid.uuid4()), project_id="p", name="ds")
+    db.add(ds)
+    db.commit()
+    ver = DatasetVersion(id=str(uuid.uuid4()), dataset_id=ds.id, version=1)
+    db.add(ver)
+    db.commit()
+    for _ in range(n):
+        db.add(
+            Asset(
+                id=str(uuid.uuid4()),
+                dataset_id=ds.id,
+                version_id=ver.id,
+                uri=f"s3://bucket/{uuid.uuid4().hex}.jpg",
+                mime_type="image/jpeg",
+                label_status="unlabelled",
+            )
+        )
+    db.commit()
+    return ver.id
+
+
+def test_extract_minio_key_handles_uri_schemes():
+    assert _extract_minio_key("s3://bucket/path/to/img.jpg") == "path/to/img.jpg"
+    assert _extract_minio_key("minio://bucket/img.jpg") == "img.jpg"
+    # For http(s) the scheme + host are stripped, leaving bucket/key.
+    assert _extract_minio_key("https://host/bucket/img.jpg") == "bucket/img.jpg"
+    # A bare key (no scheme) is returned unchanged.
+    assert _extract_minio_key("plain/key.jpg") == "plain/key.jpg"
+
+
+def test_apply_prelabels_no_model_is_noop_but_succeeds():
+    db = TestingSessionLocal()
+    version_id = _mk_version_with_assets(db, 3)
+    job = create_job(db, "prelabels", {"datasetVersionId": version_id})
+    job_id = job.id
+    db.close()
+
+    result = prelabels_task.apply_prelabels(
+        {"jobId": job_id, "datasetVersionId": version_id, "task": "detect"}
+    )
+
+    assert result["status"] == "succeeded"
+    assert result["total_assets"] == 3
+    assert result["labeled_count"] == 0
+
+    check = TestingSessionLocal()
+    try:
+        from app.models.job import Job
+
+        assert check.get(Job, job_id).status == "succeeded"
+        # No annotations were invented, so assets stay unlabelled.
+        remaining = (
+            check.query(Asset)
+            .filter(Asset.version_id == version_id, Asset.label_status == "unlabelled")
+            .count()
+        )
+        assert remaining == 3
+    finally:
+        check.close()
+
+
+def test_apply_prelabels_no_version_succeeds_with_zero_assets():
+    db = TestingSessionLocal()
+    job = create_job(db, "prelabels", {})
+    job_id = job.id
+    db.close()
+
+    result = prelabels_task.apply_prelabels({"jobId": job_id})
+
+    assert result["status"] == "succeeded"
+    assert result["total_assets"] == 0
diff --git a/backend/tests/unit/test_task_training.py b/backend/tests/unit/test_task_training.py
new file mode 100644
index 0000000..c72bb73
--- /dev/null
+++ b/backend/tests/unit/test_task_training.py
@@ -0,0 +1,126 @@
+"""Unit tests for the Celery training task orchestration.
+
+The task owns the framework-agnostic scaffolding: load the run, resolve the
+split, drive a trainer, persist metrics, and march the Job/Run through their
+status transitions. We stub the trainer (so no ML stack is needed) and run with
+``MINIO_DISABLED=true`` so the test is hermetic and CI-light.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+
+from app.jobs.tasks import training as training_task
+from app.models.cluster import Cluster
+from app.models.experiment import ExperimentRun
+from app.services import training as training_pkg
+from app.services.jobs_service import create_job
+from app.services.training.base import TrainResult
+from tests.conftest import TestingSessionLocal
+
+
+class _FakeTrainer:
+    key = "fake"
+
+    def run(self, ctx):  # noqa: D401 - matches Trainer.run signature
+        ctx.report(progress=0.5, epoch={"epoch": 1, "loss": 0.1})
+        return TrainResult(best_model_path=None, final_metrics={"mAP50": 0.42})
+
+
+def _mk_run(db, **params) -> ExperimentRun:
+    run = ExperimentRun(
+        id=str(uuid.uuid4()),
+        project_id="proj-1",
+        owner_id="owner-1",
+        name="Run",
+        status="queued",
+        params_json=json.dumps({"framework": "fake", "task": "detect", **params}),
+    )
+    db.add(run)
+    db.commit()
+    db.refresh(run)
+    return run
+
+
+def test_train_task_success_marks_run_and_job_succeeded(monkeypatch):
+    monkeypatch.setattr(training_pkg, "get_trainer", lambda fw: _FakeTrainer())
+
+    db = TestingSessionLocal()
+    run = _mk_run(db)
+    job = create_job(db, "training", {"experimentId": run.id})
+    run_id, job_id = run.id, job.id
+    db.close()
+
+    result = training_task.train_task({"jobId": job_id, "experimentId": run_id})
+
+    assert result["status"] == "succeeded"
+    assert result["experiment_id"] == run_id
+
+    check = TestingSessionLocal()
+    try:
+        refreshed_run = check.get(ExperimentRun, run_id)
+        assert refreshed_run.status == "succeeded"
+        assert refreshed_run.completed_at is not None
+        metrics = json.loads(refreshed_run.metrics_json)
+        # The fake trainer's final_metrics become the run summary, and the
+        # reported epoch is recorded in the per-epoch history.
+        assert metrics["summary"]["mAP50"] == 0.42
+        assert metrics["epochs"] and metrics["epochs"][0]["epoch"] == 1
+
+        from app.models.job import Job
+
+        assert check.get(Job, job_id).status == "succeeded"
+    finally:
+        check.close()
+
+
+def test_train_task_missing_run_fails_job(monkeypatch):
+    monkeypatch.setattr(training_pkg, "get_trainer", lambda fw: _FakeTrainer())
+
+    db = TestingSessionLocal()
+    job = create_job(db, "training", {})
+    job_id = job.id
+    db.close()
+
+    result = training_task.train_task({"jobId": job_id, "experimentId": "does-not-exist"})
+
+    assert result["status"] == "failed"
+    assert "not found" in result["error"]
+
+    check = TestingSessionLocal()
+    try:
+        from app.models.job import Job
+
+        assert check.get(Job, job_id).status == "failed"
+    finally:
+        check.close()
+
+
+def test_train_task_releases_reserved_cluster_on_success(monkeypatch):
+    monkeypatch.setattr(training_pkg, "get_trainer", lambda fw: _FakeTrainer())
+
+    db = TestingSessionLocal()
+    run = _mk_run(db)
+    job = create_job(db, "training", {"experimentId": run.id})
+    cluster = Cluster(
+        id=str(uuid.uuid4()),
+        name="gpu-box",
+        status="busy",
+        active_job_id=job.id,
+    )
+    db.add(cluster)
+    db.commit()
+    run_id, job_id, cluster_id = run.id, job.id, cluster.id
+    db.close()
+
+    training_task.train_task({"jobId": job_id, "experimentId": run_id})
+
+    check = TestingSessionLocal()
+    try:
+        refreshed = check.get(Cluster, cluster_id)
+        # Terminal job status must free the cluster so capacity isn't leaked.
+        assert refreshed.active_job_id is None
+        assert refreshed.status == "online"
+    finally:
+        check.close()
diff --git a/frontend/package.json b/frontend/package.json
index 8eaa2f2..8937689 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -9,6 +9,8 @@
     "preview": "vite preview --host",
     "lint": "eslint \"src/**/*.{js,jsx}\"",
     "format": "prettier --write \"**/*.{js,jsx,css,json,md}\"",
+    "test:unit": "vitest run",
+    "test:unit:watch": "vitest",
     "test:ui": "playwright test",
     "test:ui:headed": "playwright test --headed",
     "playwright:install": "playwright install --with-deps"
@@ -17,6 +19,8 @@
     "@eslint/js": "^9.12.0",
     "@playwright/test": "^1.48.2",
     "@tailwindcss/vite": "^4.0.0",
+    "@testing-library/jest-dom": "^6.6.3",
+    "@testing-library/react": "^16.0.1",
     "@types/node": "^22.7.4",
     "@types/react": "^19.1.13",
     "@types/react-dom": "^19.1.9",
@@ -30,7 +34,9 @@
     "tailwind-merge": "^3.3.1",
     "tailwindcss": "^4.0.0",
     "typescript": "^5.9.2",
-    "vite": "^5.4.8"
+    "vite": "^5.4.8",
+    "vitest": "^2.1.8",
+    "jsdom": "^25.0.1"
   },
   "dependencies": {
     "react": "^19.0.0",
diff --git a/frontend/playwright.config.ts b/frontend/playwright.config.ts
index 0671212..3a9eb26 100644
--- a/frontend/playwright.config.ts
+++ b/frontend/playwright.config.ts
@@ -2,6 +2,9 @@ import { defineConfig, devices } from '@playwright/test';
 
 export default defineConfig({
   testDir: './tests',
+  // Playwright owns the *.spec.ts e2e/visual suites; Vitest unit tests live in
+  // tests/unit/*.test.ts and must not be picked up by the browser runner.
+  testMatch: '**/*.spec.ts',
   fullyParallel: true,
   retries: 0,
   reporter: [['list']],
diff --git a/frontend/tests/unit/api.test.ts b/frontend/tests/unit/api.test.ts
new file mode 100644
index 0000000..c3059ef
--- /dev/null
+++ b/frontend/tests/unit/api.test.ts
@@ -0,0 +1,115 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { apiUrl, apiGet, apiPost, apiDelete } from '@/services/api';
+import { setStoredToken } from '@/services/token-store';
+
+function jsonResponse(data: unknown, init: { ok?: boolean; status?: number } = {}) {
+  const ok = init.ok ?? true;
+  return {
+    ok,
+    status: init.status ?? (ok ? 200 : 400),
+    json: async () => data,
+  } as Response;
+}
+
+beforeEach(() => {
+  localStorage.clear();
+  // jsdom's default location can't be navigated; replace it with a writable
+  // stub so handle401's `location.href = '/login'` is observable and safe.
+  Object.defineProperty(window, 'location', {
+    value: { href: '', origin: 'http://localhost:5173' },
+    writable: true,
+    configurable: true,
+  });
+});
+
+afterEach(() => {
+  vi.unstubAllEnvs();
+  vi.unstubAllGlobals();
+});
+
+describe('apiUrl', () => {
+  it('prefers an explicit runtime API base override', () => {
+    // The localStorage `API_URL` override takes precedence over the
+    // origin-derived fallback (used to point the SPA at a remote API).
+    localStorage.setItem('API_URL', 'http://api.example');
+    expect(apiUrl('/projects')).toBe('http://api.example/projects');
+  });
+
+  it('falls back to the origin with the dev port swapped 5173 -> 8000', () => {
+    expect(apiUrl('/health')).toBe('http://localhost:8000/health');
+  });
+
+  it('normalises a path that is missing its leading slash', () => {
+    expect(apiUrl('health')).toBe('http://localhost:8000/health');
+  });
+});
+
+describe('apiGet', () => {
+  it('returns parsed JSON and attaches the bearer token when present', async () => {
+    setStoredToken('tok-123');
+    const fetchMock = vi.fn(async () => jsonResponse({ id: '1' }));
+    vi.stubGlobal('fetch', fetchMock);
+
+    const out = await apiGet<{ id: string }>('/projects/1');
+    expect(out).toEqual({ id: '1' });
+
+    const [, opts] = fetchMock.mock.calls[0];
+    expect((opts as RequestInit).headers).toMatchObject({
+      Authorization: 'Bearer tok-123',
+    });
+  });
+
+  it('omits the Authorization header when no token is stored', async () => {
+    const fetchMock = vi.fn(async () => jsonResponse({ ok: true }));
+    vi.stubGlobal('fetch', fetchMock);
+
+    await apiGet('/public');
+    const [, opts] = fetchMock.mock.calls[0];
+    expect((opts as RequestInit).headers).not.toHaveProperty('Authorization');
+  });
+
+  it('clears the session and redirects to /login on 401', async () => {
+    setStoredToken('expired');
+    vi.stubGlobal('fetch', vi.fn(async () => jsonResponse({}, { ok: false, status: 401 })));
+
+    await expect(apiGet('/secure')).rejects.toThrow(/session expired/i);
+    expect(localStorage.getItem('vf_access_token')).toBeNull();
+    expect(window.location.href).toBe('/login');
+  });
+
+  it('throws the server-provided detail on a non-ok response', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn(async () => jsonResponse({ detail: 'nope' }, { ok: false, status: 500 }))
+    );
+    await expect(apiGet('/boom')).rejects.toThrow('nope');
+  });
+});
+
+describe('apiPost', () => {
+  it('sends a JSON body with the correct method and content type', async () => {
+    const fetchMock = vi.fn(async () => jsonResponse({ created: true }));
+    vi.stubGlobal('fetch', fetchMock);
+
+    const out = await apiPost<{ created: boolean }>('/projects', { name: 'x' });
+    expect(out).toEqual({ created: true });
+
+    const [, opts] = fetchMock.mock.calls[0] as [string, RequestInit];
+    expect(opts.method).toBe('POST');
+    expect(opts.body).toBe(JSON.stringify({ name: 'x' }));
+    expect(opts.headers).toMatchObject({ 'Content-Type': 'application/json' });
+  });
+});
+
+describe('apiDelete', () => {
+  it('resolves on success and throws on failure', async () => {
+    vi.stubGlobal('fetch', vi.fn(async () => jsonResponse(null, { ok: true, status: 204 })));
+    await expect(apiDelete('/projects/1')).resolves.toBeUndefined();
+
+    vi.stubGlobal(
+      'fetch',
+      vi.fn(async () => jsonResponse({ detail: 'gone' }, { ok: false, status: 404 }))
+    );
+    await expect(apiDelete('/projects/1')).rejects.toThrow('gone');
+  });
+});
diff --git a/frontend/tests/unit/auth-store.test.tsx b/frontend/tests/unit/auth-store.test.tsx
new file mode 100644
index 0000000..60b7b2e
--- /dev/null
+++ b/frontend/tests/unit/auth-store.test.tsx
@@ -0,0 +1,89 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { renderHook, act, waitFor } from '@testing-library/react';
+import React from 'react';
+
+// Stub the auth API so the store is tested in isolation from the network.
+vi.mock('@/services/auth', () => ({
+  login: vi.fn(),
+  signup: vi.fn(),
+  logout: vi.fn(async () => {}),
+}));
+
+import * as authApi from '@/services/auth';
+import { AuthProvider, useAuth } from '@/services/auth-store';
+
+const wrapper = ({ children }: { children: React.ReactNode }) => (
+  <AuthProvider>{children}</AuthProvider>
+);
+
+const fakeAuthResponse = {
+  access_token: 'access-1',
+  refresh_token: 'refresh-1',
+  token_type: 'bearer',
+  user: { id: 'u1', email: 'a@b.c', displayName: 'A' },
+};
+
+describe('auth-store', () => {
+  beforeEach(() => {
+    localStorage.clear();
+    vi.clearAllMocks();
+  });
+
+  it('throws when useAuth is used outside an AuthProvider', () => {
+    expect(() => renderHook(() => useAuth())).toThrow(/within AuthProvider/i);
+  });
+
+  it('starts unauthenticated once storage has been restored', async () => {
+    const { result } = renderHook(() => useAuth(), { wrapper });
+    await waitFor(() => expect(result.current.isLoading).toBe(false));
+    expect(result.current.user).toBeNull();
+    expect(result.current.token).toBeNull();
+  });
+
+  it('login populates state and persists tokens + user', async () => {
+    (authApi.login as ReturnType<typeof vi.fn>).mockResolvedValue(fakeAuthResponse);
+
+    const { result } = renderHook(() => useAuth(), { wrapper });
+    await waitFor(() => expect(result.current.isLoading).toBe(false));
+
+    await act(async () => {
+      await result.current.login('a@b.c', 'pw');
+    });
+
+    expect(authApi.login).toHaveBeenCalledWith('a@b.c', 'pw');
+    expect(result.current.user?.id).toBe('u1');
+    expect(result.current.token).toBe('access-1');
+    expect(localStorage.getItem('vf_access_token')).toBe('access-1');
+    expect(localStorage.getItem('vf_refresh_token')).toBe('refresh-1');
+    expect(JSON.parse(localStorage.getItem('vf_user')!).id).toBe('u1');
+  });
+
+  it('logout clears state and stored credentials', async () => {
+    (authApi.login as ReturnType<typeof vi.fn>).mockResolvedValue(fakeAuthResponse);
+    const { result } = renderHook(() => useAuth(), { wrapper });
+    await waitFor(() => expect(result.current.isLoading).toBe(false));
+
+    await act(async () => {
+      await result.current.login('a@b.c', 'pw');
+    });
+    await act(async () => {
+      await result.current.logout();
+    });
+
+    expect(authApi.logout).toHaveBeenCalled();
+    expect(result.current.user).toBeNull();
+    expect(result.current.token).toBeNull();
+    expect(localStorage.getItem('vf_access_token')).toBeNull();
+  });
+
+  it('restores an existing session from localStorage on mount', async () => {
+    localStorage.setItem('vf_access_token', 'stored-token');
+    localStorage.setItem('vf_user', JSON.stringify({ id: 'u9', email: 'x@y.z' }));
+
+    const { result } = renderHook(() => useAuth(), { wrapper });
+    await waitFor(() => expect(result.current.isLoading).toBe(false));
+
+    expect(result.current.token).toBe('stored-token');
+    expect(result.current.user?.id).toBe('u9');
+  });
+});
diff --git a/frontend/tests/unit/setup.ts b/frontend/tests/unit/setup.ts
new file mode 100644
index 0000000..816bbcb
--- /dev/null
+++ b/frontend/tests/unit/setup.ts
@@ -0,0 +1,7 @@
+import '@testing-library/jest-dom/vitest';
+import { afterEach } from 'vitest';
+
+// Keep tests isolated: localStorage carries auth state between cases.
+afterEach(() => {
+  localStorage.clear();
+});
diff --git a/frontend/tests/unit/token-store.test.ts b/frontend/tests/unit/token-store.test.ts
new file mode 100644
index 0000000..498636a
--- /dev/null
+++ b/frontend/tests/unit/token-store.test.ts
@@ -0,0 +1,32 @@
+import { describe, it, expect, beforeEach } from 'vitest';
+import {
+  getStoredToken,
+  setStoredToken,
+  clearStoredToken,
+} from '@/services/token-store';
+
+describe('token-store', () => {
+  beforeEach(() => localStorage.clear());
+
+  it('returns null when no token is stored', () => {
+    expect(getStoredToken()).toBeNull();
+  });
+
+  it('round-trips the access token', () => {
+    setStoredToken('abc.def.ghi');
+    expect(getStoredToken()).toBe('abc.def.ghi');
+    expect(localStorage.getItem('vf_access_token')).toBe('abc.def.ghi');
+  });
+
+  it('clears the access, refresh, and user keys together', () => {
+    setStoredToken('tok');
+    localStorage.setItem('vf_refresh_token', 'refresh');
+    localStorage.setItem('vf_user', '{"id":"1"}');
+
+    clearStoredToken();
+
+    expect(getStoredToken()).toBeNull();
+    expect(localStorage.getItem('vf_refresh_token')).toBeNull();
+    expect(localStorage.getItem('vf_user')).toBeNull();
+  });
+});
diff --git a/frontend/vitest.config.ts b/frontend/vitest.config.ts
new file mode 100644
index 0000000..13ad319
--- /dev/null
+++ b/frontend/vitest.config.ts
@@ -0,0 +1,21 @@
+import { defineConfig } from 'vitest/config';
+import react from '@vitejs/plugin-react';
+import path from 'path';
+
+// Vitest config kept separate from vite.config.ts so the production build never
+// depends on test-only settings. Only the tests/unit/*.test.* files run here;
+// the Playwright *.spec.ts suites are owned by playwright.config.ts.
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      '@': path.resolve(__dirname, 'src'),
+    },
+  },
+  test: {
+    environment: 'jsdom',
+    globals: true,
+    setupFiles: ['./tests/unit/setup.ts'],
+    include: ['tests/unit/**/*.{test,spec}.{ts,tsx}'],
+  },
+});

From 2622329fe1b796e5d4ac5bebab9e1eb66b8a1dd4 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 31 May 2026 21:23:35 +0000
Subject: [PATCH 2/2] ci: lower backend coverage floor to 60 for stable gate

The 62 floor sat right at the lightweight-CI coverage level (the ML-stack
trainer tests skip in CI, so it measures lower than a full local run),
making the gate prone to flaking red. Drop to 60 for a safe margin; ratchet
up as lightweight-CI coverage genuinely climbs.

https://claude.ai/code/session_011h7jZv18xWfFmT6fDz9EU4
---
 .github/workflows/ci.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 56f38c1..e9a717e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,8 +44,10 @@ jobs:
           SKIP_DB_MIGRATIONS: "1"
         working-directory: backend
         # addopts in pyproject.toml already enables --cov=app; the floor here
-        # locks in current coverage and is meant to be ratcheted upward.
-        run: pytest -q tests/unit/ --cov-fail-under=62
+        # locks in current coverage and is meant to be ratcheted upward. It sits
+        # below the lightweight-CI coverage (the ML-stack trainer tests skip
+        # here, so CI measures lower than a full local run) to avoid flaky reds.
+        run: pytest -q tests/unit/ --cov-fail-under=60
 
   frontend:
     name: Frontend (lint + build)