Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions backend/src/app/api/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app.db.deps import get_current_user, get_db
from app.models.annotation import REVIEW_STATUSES
from app.models.user import User
from app.services import inference_service, suggestion_service
from app.services.annotation_service import (
AnnotationError,
VersionConflictError,
Expand Down Expand Up @@ -247,6 +248,78 @@ def review_queue_summary(
return review_summary(db, dataset_id=dataset_id, version_id=version_id)


class SuggestRequest(BaseModel):
asset_id: str
artifact_id: str | None = None
score_threshold: float = Field(0.25, ge=0.0, le=1.0)


@router.post("/suggest")
def suggest(
body: SuggestRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Run a trained model on an asset and return proposed annotations.

Suggestions are not persisted; the annotator overlays them and saves any
accepted ones through the normal bulk path. 404 when no model is available,
502 when inference fails.
"""
try:
artifact, suggestions = suggestion_service.suggest_annotations(
db,
asset_id=body.asset_id,
artifact_id=body.artifact_id,
score_threshold=body.score_threshold,
)
except suggestion_service.NoModelError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except suggestion_service.SuggestionError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except inference_service.InferenceError as exc:
raise HTTPException(status_code=502, detail=f"inference failed: {exc}") from exc

return {
"artifact": {
"id": artifact.id,
"name": artifact.name,
"version": artifact.version,
},
"suggestions": [
{
"type": s.type,
"geometry": s.geometry,
"class_name": s.class_name,
"score": s.score,
}
for s in suggestions
],
}


@router.get("/suggest/artifacts")
def suggest_artifacts(
dataset_id: str = Query(...),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""List models trained on this dataset, newest first (override dropdown)."""
arts = suggestion_service.candidate_artifacts_for_dataset(db, dataset_id)
return {
"items": [
{
"id": a.id,
"name": a.name,
"version": a.version,
"format": a.format,
"created_at": a.created_at.isoformat() if a.created_at else None,
}
for a in arts
]
}


class ErrorMineRequest(BaseModel):
artifact_id: str
dataset_version_id: str
Expand Down
19 changes: 18 additions & 1 deletion backend/src/app/api/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from app.db.deps import get_current_user, get_db
from app.models.user import User
from app.services.annotation_service import get_asset_annotations
from app.services.asset_service import confirm_upload, get_asset, get_dataset_stats, list_assets
from app.services.asset_service import (
confirm_upload,
get_asset,
get_dataset_metrics,
get_dataset_stats,
list_assets,
)

router = APIRouter(prefix="/api", tags=["assets"])

Expand Down Expand Up @@ -138,6 +144,17 @@ def dataset_stats(
return get_dataset_stats(db, dataset_id, version_id=version_id)


@router.get("/datasets/{dataset_id}/metrics")
def dataset_metrics(
dataset_id: str = Path(...),
version_id: str | None = Query(None),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Detailed dataset health metrics for the metrics dashboard."""
return get_dataset_metrics(db, dataset_id, version_id=version_id)


@router.get("/assets/{asset_id}/neighbors")
def get_asset_neighbors(
asset_id: str = Path(...),
Expand Down
6 changes: 6 additions & 0 deletions backend/src/app/api/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,19 @@ def get_dataset(
classes = json.loads(class_map.classes)
except Exception:
classes = []
# The newest version (latest) and the newest editable/unlocked version
# (open) — the UI writes new imagery/imports into the open version.
latest_v = versions[0] if versions else None
open_v = next((v for v in versions if not v.locked), None)
return {
"id": d.id,
"project_id": d.project_id,
"name": d.name,
"description": d.description,
"task_type": d.task_type,
"classes": classes,
"latest_version_id": latest_v.id if latest_v else None,
"open_version_id": open_v.id if open_v else None,
"versions": [
{
"id": v.id,
Expand Down
239 changes: 239 additions & 0 deletions backend/src/app/services/asset_service.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
from __future__ import annotations

import json
from datetime import datetime, timedelta, timezone

from sqlalchemy import func, select
from sqlalchemy.orm import Session

from app.models.annotation import Annotation
from app.models.asset import Asset
from app.models.dataset import ClassMap, Dataset
from app.models.dataset_version import DatasetVersion

# Cap on how many annotation geometries we parse in Python for the
# size/aspect-ratio histograms. Above this we sample and flag the result.
_GEOMETRY_SAMPLE_CAP = 5000


def get_asset(db: Session, asset_id: str) -> Asset | None:
return db.get(Asset, asset_id)
Expand Down Expand Up @@ -100,3 +106,236 @@ def get_dataset_stats(db: Session, dataset_id: str, version_id: str | None = Non
"class_distribution": class_counts,
"annotation_count": sum(class_counts.values()),
}


def _box_wh(geometry_json: str) -> tuple[float, float] | None:
"""Parse a box annotation's geometry JSON into (width, height) in pixels."""
try:
g = json.loads(geometry_json)
except Exception:
return None
w = g.get("w")
h = g.get("h")
if isinstance(w, (int, float)) and isinstance(h, (int, float)) and w > 0 and h > 0:
return float(w), float(h)
return None


def get_dataset_metrics(db: Session, dataset_id: str, version_id: str | None = None) -> dict:
"""Gold-standard dataset health metrics.

Aggregates in SQL where possible; geometry histograms parse a capped sample
of annotation rows in Python. Scopes to a single version when given.
"""
_LABELED = ("labeled", "prelabeled")

def _scope_assets(q):
q = q.where(Asset.dataset_id == dataset_id)
if version_id:
q = q.where(Asset.version_id == version_id)
return q

def _scope_anns(q):
# Explicit select_from(Annotation): several callers select only column
# expressions (counts), so SQLAlchemy can't infer the join's left side.
q = (
q.select_from(Annotation)
.join(Asset, Annotation.asset_id == Asset.id)
.where(Asset.dataset_id == dataset_id)
)
if version_id:
q = q.where(Asset.version_id == version_id)
return q

# --- Asset / workflow counts ------------------------------------------
status_rows = db.execute(
_scope_assets(select(Asset.label_status, func.count())).group_by(Asset.label_status)
).all()
status_counts = {s: c for s, c in status_rows}
total_assets = sum(status_counts.values())
labeled = sum(status_counts.get(s, 0) for s in _LABELED)

annotated_assets = (
db.scalar(_scope_anns(select(func.count(func.distinct(Annotation.asset_id))))) or 0
)
empty_images = max(total_assets - annotated_assets, 0)

# --- Review workflow ---------------------------------------------------
review_rows = db.execute(
_scope_anns(select(Annotation.review_status, func.count())).group_by(
Annotation.review_status
)
).all()
review_counts = {(s or "unreviewed"): c for s, c in review_rows}
flagged = db.scalar(_scope_anns(select(func.count())).where(Annotation.flagged.is_(True))) or 0

# --- Class balance -----------------------------------------------------
instance_rows = db.execute(
_scope_anns(select(Annotation.class_name, func.count())).group_by(Annotation.class_name)
).all()
image_rows = db.execute(
_scope_anns(
select(Annotation.class_name, func.count(func.distinct(Annotation.asset_id)))
).group_by(Annotation.class_name)
).all()
instance_counts = {(c or "(none)"): n for c, n in instance_rows}
image_counts = {(c or "(none)"): n for c, n in image_rows}
total_annotations = sum(instance_counts.values())

nonzero = [n for c, n in instance_counts.items() if c != "(none)" and n > 0]
imbalance_ratio = round(max(nonzero) / min(nonzero), 2) if len(nonzero) >= 1 else None

# Defined-but-unused classes (declared in the ClassMap, never annotated).
defined_classes: list[str] = []
ds = db.get(Dataset, dataset_id)
if ds and ds.class_map_id:
cm = db.get(ClassMap, ds.class_map_id)
if cm:
try:
for c in json.loads(cm.classes):
name = c if isinstance(c, str) else c.get("name")
if name:
defined_classes.append(name)
except Exception:
pass
used = {c for c in instance_counts if c != "(none)"}
unused_classes = [c for c in defined_classes if c not in used]

# --- Annotation type breakdown ----------------------------------------
type_rows = db.execute(
_scope_anns(select(Annotation.type, func.count())).group_by(Annotation.type)
).all()
type_counts = {t: n for t, n in type_rows}

# --- Annotations per image --------------------------------------------
per_asset_rows = db.execute(
_scope_anns(select(Annotation.asset_id, func.count())).group_by(Annotation.asset_id)
).all()
per_image_counts = [n for _, n in per_asset_rows]
per_image_hist = {"0": empty_images, "1": 0, "2-5": 0, "6-10": 0, "10+": 0}
for n in per_image_counts:
if n == 1:
per_image_hist["1"] += 1
elif n <= 5:
per_image_hist["2-5"] += 1
elif n <= 10:
per_image_hist["6-10"] += 1
else:
per_image_hist["10+"] += 1
per_image_mean = round(total_annotations / total_assets, 2) if total_assets else 0.0
per_image_max = max(per_image_counts) if per_image_counts else 0

# --- Box geometry (area + aspect ratio), sampled ----------------------
geo_rows = db.execute(
_scope_anns(select(Annotation.geometry))
.where(Annotation.type == "box")
.limit(_GEOMETRY_SAMPLE_CAP + 1)
).all()
geometry_sampled = len(geo_rows) > _GEOMETRY_SAMPLE_CAP
area_hist = {"small (<32²)": 0, "medium (<96²)": 0, "large (≥96²)": 0}
aspect_hist = {"tall (<0.5)": 0, "square (0.5-2)": 0, "wide (>2)": 0}
for (geom,) in geo_rows[:_GEOMETRY_SAMPLE_CAP]:
wh = _box_wh(geom)
if not wh:
continue
w, h = wh
area = w * h
if area < 32 * 32:
area_hist["small (<32²)"] += 1
elif area < 96 * 96:
area_hist["medium (<96²)"] += 1
else:
area_hist["large (≥96²)"] += 1
ar = w / h
if ar < 0.5:
aspect_hist["tall (<0.5)"] += 1
elif ar <= 2.0:
aspect_hist["square (0.5-2)"] += 1
else:
aspect_hist["wide (>2)"] += 1

# --- Image resolution --------------------------------------------------
res_rows = db.execute(
_scope_assets(select(Asset.width, Asset.height)).where(Asset.width.is_not(None))
).all()
res_hist = {"<640": 0, "640-1280": 0, "1280-1920": 0, "≥1920": 0}
areas: list[int] = []
for w, h in res_rows:
if not w or not h:
continue
areas.append(w * h)
m = max(w, h)
if m < 640:
res_hist["<640"] += 1
elif m < 1280:
res_hist["640-1280"] += 1
elif m < 1920:
res_hist["1280-1920"] += 1
else:
res_hist["≥1920"] += 1
areas.sort()
if areas:
median_area = areas[len(areas) // 2]
resolution = {
"min_pixels": areas[0],
"max_pixels": areas[-1],
"median_pixels": median_area,
"histogram": res_hist,
"with_dimensions": len(areas),
}
else:
resolution = {
"min_pixels": None,
"max_pixels": None,
"median_pixels": None,
"histogram": res_hist,
"with_dimensions": 0,
}

# --- Labeling velocity (last 30 days) ---------------------------------
since = datetime.now(timezone.utc) - timedelta(days=30)
vel_rows = db.execute(
_scope_anns(select(func.date(Annotation.created_at), func.count()))
.where(Annotation.created_at >= since)
.group_by(func.date(Annotation.created_at))
).all()
velocity = [{"date": str(d), "count": n} for d, n in vel_rows if d is not None]
velocity.sort(key=lambda r: r["date"])

coverage_pct = round(labeled / total_assets * 100, 1) if total_assets else 0.0

return {
"total_assets": total_assets,
"total_annotations": total_annotations,
"coverage_pct": coverage_pct,
"labeled": labeled,
"empty_images": empty_images,
"label_status_distribution": status_counts,
"review": {
"unreviewed": review_counts.get("unreviewed", 0),
"approved": review_counts.get("approved", 0),
"rejected": review_counts.get("rejected", 0),
"flagged": flagged,
},
"class_balance": {
"instances": instance_counts,
"images": image_counts,
"imbalance_ratio": imbalance_ratio,
"defined_classes": defined_classes,
"unused_classes": unused_classes,
},
"annotation_types": type_counts,
"per_image": {
"histogram": per_image_hist,
"mean": per_image_mean,
"max": per_image_max,
},
"box_geometry": {
"area_histogram": area_hist,
"aspect_histogram": aspect_hist,
"sampled": geometry_sampled,
},
"resolution": resolution,
"velocity": velocity,
"split": None, # train/val/test split not modeled yet
}
Loading
Loading