diff --git a/codec_cookbook/__init__.py b/codec_cookbook/__init__.py
new file mode 100644
index 0000000..aec93b0
--- /dev/null
+++ b/codec_cookbook/__init__.py
@@ -0,0 +1,21 @@
+"""CODEC Cookbook — local-model lifecycle management (scan → recommend →
+download → serve → list → stop) for the M1 Ultra workstation.
+
+This is the NON-skill helper package. All OS / subprocess / PM2 / network work
+lives here so the six `skills/cookbook_*.py` skill files stay thin (import only
+this package + stdlib-safe modules) and therefore pass the `SkillRegistry`
+load-time AST safety gate (`codec_config.is_dangerous_skill_code`, which
+forbids `os`/`subprocess`/`socket`/... in skill files).
+
+HARD SAFETY CONTRACT (enforced in serve.py):
+  * Cookbook only ever stops a PM2 process it started, in the `cookbook-`
+    namespace, after explicit confirm=True.
+  * It never binds to or stops the protected ports (8083/8090/8094/9223/5678)
+    or any port currently bound by a non-cookbook process (live `pm2 jlist`
+    + socket probe at call time).
+  * Its own serve range is 8110-8119.
+  * It never issues docker stop/rm, never changes an existing service's port,
+    never restarts a running service.
+"""
+
+__all__ = ["catalog", "probe", "fit", "serve", "download"]
diff --git a/codec_cookbook/args.py b/codec_cookbook/args.py
new file mode 100644
index 0000000..7571dce
--- /dev/null
+++ b/codec_cookbook/args.py
@@ -0,0 +1,66 @@
+"""Tiny argument parser shared by the thin cookbook_* skills.
+
+Skills receive a single `task` string (CODEC's `run(task, app, ctx)` contract),
+so structured options are parsed out of it here. `re` only — keeps the skill
+files AST-safe (no os/subprocess) and DRY.
+"""
+from __future__ import annotations
+
+import re
+from typing import Optional
+
+from . import catalog
+
+
+def parse_model_id(task: str) -> Optional[str]:
+    """First known catalog id that appears as a whole token in `task`."""
+    if not task:
+        return None
+    tokens = re.findall(r"[A-Za-z0-9_.\-]+", task.lower())
+    known = set(catalog.ids())
+    for tok in tokens:
+        if tok in known:
+            return tok
+    return None
+
+
+def parse_context(task: str, default: int = 8192) -> int:
+    """`context 8192`, `context=8192`, `ctx 4096`, `context_length: 16384`."""
+    m = re.search(r"(?:context|ctx)(?:[ _]?length)?\s*[=:]?\s*(\d{3,7})", (task or "").lower())
+    if m:
+        try:
+            return int(m.group(1))
+        except ValueError:
+            pass
+    return default
+
+
+def parse_flag(task: str, flag: str) -> bool:
+    """True if `flag` appears as a whole word, or `flag=true`/`flag:yes`."""
+    low = (task or "").lower()
+    if re.search(rf"\b{re.escape(flag)}\b\s*[=:]\s*(?:true|yes|1|on)\b", low):
+        return True
+    if re.search(rf"\b{re.escape(flag)}=(?:true|yes|1|on)\b", low):
+        return True
+    return bool(re.search(rf"\b{re.escape(flag)}\b", low))
+
+
+def parse_port(task: str) -> Optional[int]:
+    """A bare port number in the Cookbook range (8110-8119) mentioned in `task`."""
+    for m in re.findall(r"\b(\d{4,5})\b", task or ""):
+        try:
+            p = int(m)
+        except ValueError:
+            continue
+        if 8110 <= p <= 8119:
+            return p
+    return None
+
+
+def parse_role(task: str) -> Optional[str]:
+    """A recommendation role mentioned in `task` (chat/reason/code/max/fast/tiny)."""
+    low = (task or "").lower()
+    for role in ("chat", "reason", "code", "max", "fast", "tiny"):
+        if re.search(rf"\b{role}\b", low):
+            return role
+    return None
diff --git a/codec_cookbook/catalog.json b/codec_cookbook/catalog.json
new file mode 100644
index 0000000..a123cc7
--- /dev/null
+++ b/codec_cookbook/catalog.json
@@ -0,0 +1,7 @@
+[
+  {"id":"qwen3-30b-a3b","hf_repo":"mlx-community/Qwen3-30B-A3B-Instruct-2507-4bit","backend":"mlx","roles":["chat","reason"],"anchor_gb":17.2},
+  {"id":"qwen3-coder-30b","hf_repo":"mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit","backend":"mlx","roles":["code"],"anchor_gb":17.2},
+  {"id":"qwen3-next-80b","hf_repo":"mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit","backend":"mlx","roles":["max"],"anchor_gb":42.0},
+  {"id":"qwen3-4b","hf_repo":"mlx-community/Qwen3-4B-Instruct-2507-4bit","backend":"mlx","roles":["fast"]},
+  {"id":"llama32-3b","hf_repo":"mlx-community/Llama-3.2-3B-Instruct-4bit","backend":"mlx","roles":["tiny"]}
+]
diff --git a/codec_cookbook/catalog.py b/codec_cookbook/catalog.py
new file mode 100644
index 0000000..ba597d8
--- /dev/null
+++ b/codec_cookbook/catalog.py
@@ -0,0 +1,57 @@
+"""Cookbook model catalog — verified, downloadable options.
+
+Loaded from `catalog.json` (next to this file). The primary `qwen3.6@8083`
+model that the live stack serves is intentionally NOT in here — Cookbook never
+manages it.
+"""
+from __future__ import annotations
+
+import json
+import os
+from functools import lru_cache
+from typing import Optional
+
+_CATALOG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "catalog.json")
+
+
+@lru_cache(maxsize=1)
+def _load() -> list[dict]:
+    with open(_CATALOG_PATH, encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError("catalog.json must be a JSON array")
+    return data
+
+
+def all_entries() -> list[dict]:
+    """Return a copy of every catalog entry."""
+    return [dict(e) for e in _load()]
+
+
+def ids() -> list[str]:
+    """Return the list of known model ids."""
+    return [e["id"] for e in _load()]
+
+
+def get(model_id: Optional[str]) -> dict:
+    """Return the catalog entry for `model_id`. Raises KeyError if unknown so
+    callers fail loud rather than serving a mystery repo."""
+    if not model_id:
+        raise KeyError("no model_id given")
+    for e in _load():
+        if e["id"] == model_id:
+            return dict(e)
+    raise KeyError(f"unknown model id: {model_id!r} (known: {', '.join(ids())})")
+
+
+def find(model_id: Optional[str]) -> Optional[dict]:
+    """Like get() but returns None instead of raising — for arg-parsing paths."""
+    try:
+        return get(model_id)
+    except KeyError:
+        return None
+
+
+def by_role(role: str) -> list[dict]:
+    """All entries advertising a given role (chat/reason/code/max/fast/tiny)."""
+    return [dict(e) for e in _load() if role in e.get("roles", [])]
diff --git a/codec_cookbook/download.py b/codec_cookbook/download.py
new file mode 100644
index 0000000..b21c81f
--- /dev/null
+++ b/codec_cookbook/download.py
@@ -0,0 +1,130 @@
+"""Cookbook model downloads — detached Hugging Face jobs + status polling.
+
+Each download runs as a DETACHED subprocess (start_new_session) so it survives
+across skill calls (every skill `run()` is a fresh, short-lived call). The child
+writes its own per-repo status file with stdlib only (no dependency on this repo
+being importable in the child), and status() reconciles a dead pid to
+'interrupted'. Downloads land in the standard HF cache — they never touch the
+running stack.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import subprocess
+import sys
+import tempfile
+import time
+
+log = logging.getLogger("codec_cookbook.download")
+
+DL_DIR = os.path.expanduser("~/.codec/cookbook/downloads")
+
+# Self-contained child: writes {repo,state,pid,...} to argv[2] via tmp+replace.
+_CHILD = r"""
+import sys, json, os, time, tempfile
+repo, sf = sys.argv[1], sys.argv[2]
+def w(state, **kw):
+    d = {"repo": repo, "state": state, "pid": os.getpid(), "updated_at": time.time()}
+    d.update(kw)
+    fd, tmp = tempfile.mkstemp(dir=os.path.dirname(sf), suffix=".tmp")
+    with os.fdopen(fd, "w") as f:
+        json.dump(d, f)
+    os.replace(tmp, sf)
+w("running", started_at=time.time())
+try:
+    from huggingface_hub import snapshot_download
+    p = snapshot_download(repo)
+    w("done", path=p, finished_at=time.time())
+except Exception as e:
+    w("error", error=str(e)[:500], finished_at=time.time())
+"""
+
+
+def _slug(repo: str) -> str:
+    return repo.replace("/", "__").replace(":", "_")
+
+
+def _status_file(repo: str) -> str:
+    return os.path.join(DL_DIR, _slug(repo) + ".json")
+
+
+def _read_status(repo: str) -> dict | None:
+    try:
+        with open(_status_file(repo), encoding="utf-8") as f:
+            return json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+
+
+def _pid_alive(pid) -> bool:
+    try:
+        os.kill(int(pid), 0)
+        return True
+    except (OSError, ValueError, TypeError):
+        return False
+
+
+def _write_initial(repo: str, pid: int | None = None) -> None:
+    os.makedirs(DL_DIR, exist_ok=True)
+    sf = _status_file(repo)
+    data = {"repo": repo, "state": "starting", "pid": pid, "updated_at": time.time()}
+    fd, tmp = tempfile.mkstemp(dir=DL_DIR, suffix=".tmp")
+    with os.fdopen(fd, "w") as f:
+        json.dump(data, f)
+    os.replace(tmp, sf)
+
+
+def start(repo: str) -> dict:
+    """Begin (or report) a download of `repo`. Idempotent: if a job is already
+    running, returns its current status instead of spawning a duplicate."""
+    if not repo:
+        return {"state": "error", "error": "no repo given"}
+    cur = status(repo)
+    if cur.get("state") in ("starting", "running"):
+        return cur  # already in flight
+    os.makedirs(DL_DIR, exist_ok=True)
+    _write_initial(repo)
+    try:
+        proc = subprocess.Popen(
+            [sys.executable, "-c", _CHILD, repo, _status_file(repo)],
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+            start_new_session=True,   # detach: survives the parent skill call
+        )
+    except Exception as e:
+        return {"repo": repo, "state": "error", "error": f"spawn failed: {e}"}
+    _write_initial(repo, pid=proc.pid)
+    return {"repo": repo, "state": "starting", "pid": proc.pid}
+
+
+def status(repo: str) -> dict:
+    """Current download state: not_started | starting | running | done | error |
+    interrupted. Reconciles a dead pid (child crashed/killed) to 'interrupted'."""
+    rec = _read_status(repo)
+    if rec is None:
+        return {"repo": repo, "state": "not_started"}
+    state = rec.get("state")
+    if state in ("starting", "running"):
+        pid = rec.get("pid")
+        if pid is not None and not _pid_alive(pid):
+            return {**rec, "state": "interrupted",
+                    "detail": "download process is no longer running"}
+    return rec
+
+
+def list_downloads() -> list[dict]:
+    """All known download jobs (one per status file in DL_DIR)."""
+    out = []
+    try:
+        for fn in os.listdir(DL_DIR):
+            if fn.endswith(".json"):
+                try:
+                    with open(os.path.join(DL_DIR, fn), encoding="utf-8") as f:
+                        rec = json.load(f)
+                    out.append(status(rec.get("repo", "")))
+                except (OSError, json.JSONDecodeError):
+                    continue
+    except OSError:
+        pass
+    return out
diff --git a/codec_cookbook/fit.py b/codec_cookbook/fit.py
new file mode 100644
index 0000000..ad1f265
--- /dev/null
+++ b/codec_cookbook/fit.py
@@ -0,0 +1,124 @@
+"""Cookbook unified-memory fit math.
+
+Weight size comes from the model's ACTUAL Hugging Face file sizes; KV-cache
+geometry from its real `config.json`. No fabricated architecture constants —
+the only tunables are the conservative over-estimate (×1.10 + 1.5 GB) and the
+deployment safety figures (os_reserve=24, margin=8), which are policy, not
+architecture.
+
+MoE note: footprint = TOTAL params (every expert is resident in unified
+memory); the active-param count affects speed, not memory. So the anchors are
+the full-model resident sizes (Qwen3-30B-A3B-4bit ≈ 17.2 GB,
+Qwen3-Next-80B-A3B-4bit ≈ 42 GB).
+
+Network boundary: weight_gb_from_hub() and load_config() touch the Hub.
+estimate_footprint_gb() accepts `anchor_gb` (skip the weight call) and `cfg`
+(skip the config fetch) so callers — and tests — can run fully offline.
+"""
+from __future__ import annotations
+
+import json
+import logging
+from typing import Optional
+
+log = logging.getLogger("codec_cookbook.fit")
+
+WEIGHT_EXT = (".safetensors", ".gguf", ".npz")
+DEFAULT_OS_RESERVE_GB = 24
+DEFAULT_MARGIN_GB = 8
+_OVERHEAD_MULT = 1.10   # +10% for activations / fragmentation
+_OVERHEAD_FLAT_GB = 1.5  # runtime / framework baseline
+
+
+def weight_gb_from_hub(repo: str) -> float:
+    """Sum of the model's weight-file sizes (GB) from the Hub. Network."""
+    from huggingface_hub import HfApi
+    info = HfApi().model_info(repo, files_metadata=True)
+    total = sum((f.size or 0) for f in (info.siblings or [])
+                if f.rfilename.endswith(WEIGHT_EXT))
+    return total / 1e9
+
+
+def load_config(repo: str) -> dict:
+    """Fetch + parse the model's config.json from the Hub. Network."""
+    from huggingface_hub import hf_hub_download
+    path = hf_hub_download(repo, "config.json")
+    with open(path, encoding="utf-8") as f:
+        return json.load(f)
+
+
+def kv_cache_gb(cfg: dict, ctx: int) -> float:
+    """fp16 K+V cache size (GB) for `ctx` tokens, from real config geometry.
+
+    GQA-aware: uses num_key_value_heads when present (falls back to
+    num_attention_heads for MHA models). head_dim falls back to
+    hidden_size / num_attention_heads.
+    """
+    nl = cfg["num_hidden_layers"]
+    nkv = cfg.get("num_key_value_heads", cfg["num_attention_heads"])
+    hd = cfg.get("head_dim", cfg["hidden_size"] // cfg["num_attention_heads"])
+    return (2 * nl * nkv * hd * ctx * 2) / 1e9  # 2 (K+V) * ... * 2 bytes (fp16)
+
+
+def estimate_footprint_gb(repo: str, ctx: int,
+                          anchor_gb: Optional[float] = None,
+                          cfg: Optional[dict] = None) -> float:
+    """Conservative resident footprint (GB) = (weight + KV) × 1.10 + 1.5.
+
+    weight: `anchor_gb` if given, else weight_gb_from_hub(repo).
+    KV:     from `cfg` if given, else load_config(repo). If the config can't
+            be obtained (offline + no anchor cfg), KV is omitted and a warning
+            is logged — the estimate then under-counts KV, so callers relying
+            on the conservative guarantee should supply anchor_gb + cfg.
+    """
+    w = anchor_gb if anchor_gb is not None else weight_gb_from_hub(repo)
+    kv = 0.0
+    try:
+        c = cfg if cfg is not None else load_config(repo)
+        kv = kv_cache_gb(c, ctx)
+    except Exception as e:  # offline / missing config — best-effort
+        log.warning("KV geometry unavailable for %s (%s) — footprint omits KV", repo, e)
+    return (w + kv) * _OVERHEAD_MULT + _OVERHEAD_FLAT_GB
+
+
+def available_gb(unified_total_gb: float, resident_gb,
+                 os_reserve_gb: int = DEFAULT_OS_RESERVE_GB) -> float:
+    """Unified memory free for a new model = total − os_reserve − Σ resident."""
+    return unified_total_gb - os_reserve_gb - sum(resident_gb)
+
+
+def fits(need_gb: float, avail_gb: float,
+         margin_gb: int = DEFAULT_MARGIN_GB) -> tuple[bool, float]:
+    """(ok, headroom). ok requires headroom ≥ margin_gb. headroom may be
+    negative (returned for the refusal message)."""
+    headroom = avail_gb - need_gb
+    return headroom >= margin_gb, headroom
+
+
+def quick_need_gb(entry: dict, ctx: int) -> float:
+    """Best-effort footprint for a catalog entry, preferring its anchor_gb so
+    ranking doesn't require a weight download. KV is added when config is
+    reachable (best-effort). Used by the recommend skill."""
+    return estimate_footprint_gb(entry["hf_repo"], ctx, anchor_gb=entry.get("anchor_gb"))
+
+
+def recommend(entries: list[dict], avail_gb: float, ctx: int,
+              margin_gb: int = DEFAULT_MARGIN_GB) -> list[dict]:
+    """Rank catalog entries against available memory. Returns a list of
+    {entry, need_gb, fits, headroom_gb} sorted fit-first then largest-that-fits
+    (more capable), so the top recommendation is the biggest model that fits."""
+    scored = []
+    for e in entries:
+        need = quick_need_gb(e, ctx)
+        ok, headroom = fits(need, avail_gb, margin_gb)
+        scored.append({
+            "entry": e,
+            "need_gb": round(need, 1),
+            "fits": ok,
+            "headroom_gb": round(headroom, 1),
+        })
+    # fits first; within fits, biggest need (most capable) first; within
+    # non-fits, smallest need (closest to fitting) first.
+    scored.sort(key=lambda s: (not s["fits"],
+                               -s["need_gb"] if s["fits"] else s["need_gb"]))
+    return scored
diff --git a/codec_cookbook/probe.py b/codec_cookbook/probe.py
new file mode 100644
index 0000000..89b2b44
--- /dev/null
+++ b/codec_cookbook/probe.py
@@ -0,0 +1,219 @@
+"""Cookbook hardware + process probe — STRICTLY READ-ONLY.
+
+Nothing here ever starts, stops, binds, or mutates anything. It reads:
+  * chip + total unified memory   (sysctl / system_profiler)
+  * free memory                   (vm_stat)
+  * running PM2 processes + RSS   (pm2 jlist)
+  * whether a TCP port is bound    (socket connect probe)
+  * mlx-lm version (Qwen3 MoE needs >= 0.25.2)
+
+Every function is defensive: on any failure it returns a safe default
+(None / 0 / [] / False) rather than raising, so a probe never breaks a skill.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import socket
+import subprocess
+from functools import lru_cache
+from typing import Optional
+
+log = logging.getLogger("codec_cookbook.probe")
+
+# Protected ports Cookbook must never bind to or stop. Mirrored by serve.py.
+PROTECTED_PORTS = frozenset({8083, 8090, 8094, 9223, 5678})
+# Cookbook's own serve range.
+SERVE_RANGE = range(8110, 8120)  # 8110-8119 inclusive
+OS_RESERVE_GB = 24
+
+
+def _run(cmd: list[str], timeout: int = 10) -> Optional[str]:
+    """Run a read-only command, return stdout or None. Never raises."""
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+        if r.returncode == 0:
+            return r.stdout
+        log.debug("probe cmd %s rc=%s: %s", cmd[:2], r.returncode, (r.stderr or "")[:200])
+        return None
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as e:
+        log.debug("probe cmd %s failed: %s", cmd[:2], e)
+        return None
+
+
+@lru_cache(maxsize=1)
+def chip() -> str:
+    """Apple Silicon chip string (e.g. 'Apple M1 Ultra'), or '' if unknown."""
+    out = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
+    return (out or "").strip()
+
+
+@lru_cache(maxsize=1)
+def unified_total_gb() -> float:
+    """Total unified memory in GB (hw.memsize). 0.0 if unreadable."""
+    out = _run(["sysctl", "-n", "hw.memsize"])
+    if out:
+        try:
+            return int(out.strip()) / 1e9
+        except ValueError:
+            pass
+    return 0.0
+
+
+def vm_free_gb() -> float:
+    """Free + inactive memory in GB from vm_stat (a live, fluctuating figure).
+    Used for the scan report only — fit uses the deterministic resident-sum
+    formula in available_gb()."""
+    out = _run(["vm_stat"])
+    if not out:
+        return 0.0
+    page_size = 16384  # Apple Silicon default; corrected below if vm_stat reports it
+    free_pages = inactive_pages = 0
+    for line in out.splitlines():
+        low = line.lower()
+        if "page size of" in low:
+            for tok in low.replace("(", " ").replace(")", " ").split():
+                if tok.isdigit():
+                    page_size = int(tok)
+                    break
+        elif low.startswith("pages free:"):
+            free_pages = _trailing_int(line)
+        elif low.startswith("pages inactive:"):
+            inactive_pages = _trailing_int(line)
+    return ((free_pages + inactive_pages) * page_size) / 1e9
+
+
+def _trailing_int(line: str) -> int:
+    digits = "".join(c for c in line if c.isdigit())
+    return int(digits) if digits else 0
+
+
+def pm2_jlist() -> list[dict]:
+    """Raw `pm2 jlist` output as a list of dicts. [] on any failure."""
+    out = _run(["pm2", "jlist"])
+    if not out:
+        return []
+    try:
+        data = json.loads(out)
+        return data if isinstance(data, list) else []
+    except (json.JSONDecodeError, ValueError):
+        return []
+
+
+def pm2_processes() -> list[dict]:
+    """Parsed PM2 process summaries: name, status, rss_gb, pm_id, port (if the
+    process declares one via its PORT env or args)."""
+    procs = []
+    for p in pm2_jlist():
+        env = p.get("pm2_env", {}) or {}
+        monit = p.get("monit", {}) or {}
+        procs.append({
+            "name": p.get("name", "?"),
+            "pm_id": p.get("pm_id"),
+            "status": env.get("status", "?"),
+            "rss_gb": round((monit.get("memory", 0) or 0) / 1e9, 3),
+            "port": _proc_declared_port(p),
+        })
+    return procs
+
+
+def _proc_declared_port(p: dict) -> Optional[int]:
+    """Best-effort port a PM2 proc declares (PORT env or a --port/-port arg).
+    Not authoritative for binding — that's what is_port_bound() is for."""
+    env = p.get("pm2_env", {}) or {}
+    port_env = env.get("PORT") or env.get("env", {}).get("PORT") if isinstance(env.get("env"), dict) else env.get("PORT")
+    if port_env:
+        try:
+            return int(port_env)
+        except (ValueError, TypeError):
+            pass
+    # scan args for --port N / -port N / --port=N
+    args = env.get("args") or []
+    if isinstance(args, str):
+        args = args.split()
+    for i, a in enumerate(args):
+        a = str(a)
+        if a in ("--port", "-port", "--listen-port") and i + 1 < len(args):
+            try:
+                return int(args[i + 1])
+            except (ValueError, TypeError):
+                pass
+        if a.startswith("--port="):
+            try:
+                return int(a.split("=", 1)[1])
+            except (ValueError, TypeError):
+                pass
+    return None
+
+
+def resident_gb_total() -> float:
+    """Sum of RSS (GB) across all online PM2 processes — the live stack's
+    footprint, used by available_gb()."""
+    return round(sum(p["rss_gb"] for p in pm2_processes()
+                     if p.get("status") == "online"), 3)
+
+
+def available_gb(os_reserve_gb: int = OS_RESERVE_GB) -> float:
+    """Unified memory available for a NEW model:
+        total - os_reserve - sum(resident PM2 RSS).
+    Deterministic (doesn't depend on the fluctuating vm_stat free figure)."""
+    total = unified_total_gb()
+    if total <= 0:
+        return 0.0
+    return round(total - os_reserve_gb - resident_gb_total(), 3)
+
+
+def is_port_bound(port: int, host: str = "127.0.0.1") -> bool:
+    """True if something is listening on host:port right now (socket probe)."""
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(0.4)
+            return s.connect_ex((host, port)) == 0
+    except OSError:
+        return False
+
+
+def bound_ports_in_range(lo: int, hi: int) -> set[int]:
+    """Subset of [lo, hi) that is currently bound (socket probe)."""
+    return {p for p in range(lo, hi) if is_port_bound(p)}
+
+
+@lru_cache(maxsize=1)
+def mlx_version() -> Optional[str]:
+    try:
+        import mlx_lm
+        return getattr(mlx_lm, "__version__", None)
+    except Exception:
+        return None
+
+
+def mlx_version_ok(minimum: tuple = (0, 25, 2)) -> bool:
+    """True if mlx-lm meets the Qwen3-MoE minimum. None version → False (warn)."""
+    v = mlx_version()
+    if not v:
+        return False
+    try:
+        parts = tuple(int(x) for x in v.split(".")[:3])
+        return parts >= minimum
+    except ValueError:
+        return False
+
+
+def snapshot() -> dict:
+    """Full read-only state for the scan skill."""
+    procs = pm2_processes()
+    return {
+        "chip": chip(),
+        "unified_total_gb": round(unified_total_gb(), 1),
+        "vm_free_gb": round(vm_free_gb(), 1),
+        "resident_gb_total": resident_gb_total(),
+        "available_gb": available_gb(),
+        "os_reserve_gb": OS_RESERVE_GB,
+        "pm2_process_count": len(procs),
+        "pm2_processes": procs,
+        "mlx_version": mlx_version(),
+        "mlx_version_ok": mlx_version_ok(),
+        "protected_ports": sorted(PROTECTED_PORTS),
+        "serve_range": [SERVE_RANGE.start, SERVE_RANGE.stop - 1],
+        "serve_ports_bound": sorted(bound_ports_in_range(SERVE_RANGE.start, SERVE_RANGE.stop)),
+    }
diff --git a/codec_cookbook/serve.py b/codec_cookbook/serve.py
new file mode 100644
index 0000000..8b61363
--- /dev/null
+++ b/codec_cookbook/serve.py
@@ -0,0 +1,262 @@
+"""Cookbook PM2 serve/stop + port allocation + health — the SAFETY-CRITICAL core.
+
+Structural guarantees (the whole point of Cookbook):
+  * launch() only ever allocates a port in 8110-8119 that a live socket probe
+    + pm2 jlist + our own served.json all agree is free. Protected ports
+    (8083/8090/8094/9223/5678) are never in range and are skipped anyway.
+  * Every process we start is named `cookbook-<id>-<port>`.
+  * stop() will ONLY delete a process that (a) we recorded in served.json,
+    (b) is named `cookbook-…`, and (c) is NOT on a protected port — and only
+    when confirm=True. Anything else returns a refusal (or a dry-run).
+  * Nothing here issues docker stop/rm, changes an existing service's port, or
+    restarts/stops a non-cookbook process.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from typing import Optional, Union
+
+from codec_jsonstore import atomic_write_json, file_lock
+from . import probe
+
+log = logging.getLogger("codec_cookbook.serve")
+
+COOKBOOK_PREFIX = "cookbook-"
+PROTECTED_PORTS = probe.PROTECTED_PORTS          # 8083/8090/8094/9223/5678
+SERVE_RANGE = probe.SERVE_RANGE                  # range(8110, 8120)
+SERVED_PATH = os.path.expanduser("~/.codec/cookbook/served.json")
+_CONFIG_PATH = os.path.expanduser("~/.codec/config.json")
+_HEALTH_TIMEOUT_S = 90
+_PM2_TIMEOUT_S = 30
+
+
+# ── persistence ─────────────────────────────────────────────────────────────
+
+def _load_served() -> list[dict]:
+    try:
+        with open(SERVED_PATH, encoding="utf-8") as f:
+            data = json.load(f)
+        return data if isinstance(data, list) else []
+    except (OSError, json.JSONDecodeError):
+        return []
+
+
+def _save_served(records: list[dict]) -> None:
+    with file_lock(SERVED_PATH):
+        atomic_write_json(SERVED_PATH, records, default=str)
+
+
+def _record_served(rec: dict) -> None:
+    with file_lock(SERVED_PATH):
+        records = _load_served()
+        records = [r for r in records if r.get("pm2_name") != rec["pm2_name"]]
+        records.append(rec)
+        atomic_write_json(SERVED_PATH, records, default=str)
+
+
+def _forget_served(pm2_name: str) -> None:
+    with file_lock(SERVED_PATH):
+        records = [r for r in _load_served() if r.get("pm2_name") != pm2_name]
+        atomic_write_json(SERVED_PATH, records, default=str)
+
+
+# ── interpreter discovery (no hardcoded venv) ───────────────────────────────
+
+def resolve_mlx_python() -> str:
+    """Python that has mlx-lm: config.json:cookbook.mlx_python if set, else
+    sys.executable (the interpreter already running CODEC, which serves
+    qwen3.6 and therefore has MLX)."""
+    try:
+        with open(_CONFIG_PATH, encoding="utf-8") as f:
+            cfg = json.load(f)
+        py = (cfg.get("cookbook") or {}).get("mlx_python")
+        if py and os.path.exists(py):
+            return py
+    except (OSError, json.JSONDecodeError):
+        pass
+    return sys.executable
+
+
+def resolve_llama_server() -> Optional[str]:
+    """`llama-server` resolved from PATH, or None if absent."""
+    import shutil
+    return shutil.which("llama-server")
+
+
+# ── port allocation ─────────────────────────────────────────────────────────
+
+def allocate_port() -> Optional[int]:
+    """First free port in 8110-8119. Skips: protected ports, ports a live
+    socket probe says are bound, ports any PM2 process declares, and ports we
+    already recorded in served.json. Returns None if the range is exhausted."""
+    bound = probe.bound_ports_in_range(SERVE_RANGE.start, SERVE_RANGE.stop)
+    pm2_ports = {p["port"] for p in probe.pm2_processes() if p.get("port")}
+    ours = {r["port"] for r in _load_served() if r.get("port")}
+    for port in SERVE_RANGE:
+        if port in PROTECTED_PORTS:          # belt-and-suspenders (none in range)
+            continue
+        if port in bound or port in pm2_ports or port in ours:
+            continue
+        return port
+    return None
+
+
+# ── launch ───────────────────────────────────────────────────────────────────
+
+def _build_command(entry: dict, port: int, context_length: int) -> tuple[Optional[list[str]], Optional[str]]:
+    """Return (pm2_argv, error). Builds the corrected serve command for the
+    entry's backend."""
+    mid = entry["id"]
+    pm2_name = f"{COOKBOOK_PREFIX}{mid}-{port}"
+    backend = entry.get("backend", "mlx")
+    if backend == "mlx":
+        py = resolve_mlx_python()
+        # NOTE: `python -m mlx_lm server` (subcommand form) — NOT `-m mlx_lm.server`.
+        # --max-tokens is mandatory: mlx-lm defaults to 512 and silently truncates.
+        argv = ["pm2", "start", py, "--name", pm2_name, "--",
+                "-m", "mlx_lm", "server", "--model", entry["hf_repo"],
+                "--host", "127.0.0.1", "--port", str(port), "--max-tokens", "16384"]
+        return argv, None
+    if backend in ("gguf", "llama", "llama.cpp"):
+        server = resolve_llama_server()
+        if not server:
+            return None, "llama-server not found on PATH"
+        gguf = entry.get("gguf_path") or entry.get("hf_repo")
+        # Metal is on by default on Apple Silicon; -ngl 999 forces full GPU offload.
+        argv = ["pm2", "start", server, "--name", pm2_name, "--",
+                "-m", gguf, "--host", "127.0.0.1", "--port", str(port),
+                "-ngl", "999", "-c", str(context_length)]
+        return argv, None
+    return None, f"unknown backend: {backend!r}"
+
+
+def _health_ok(port: int, timeout_s: int = _HEALTH_TIMEOUT_S) -> bool:
+    """Poll GET /v1/models until 200 or timeout."""
+    import urllib.request
+    deadline = time.monotonic() + timeout_s
+    url = f"http://127.0.0.1:{port}/v1/models"
+    while time.monotonic() < deadline:
+        try:
+            with urllib.request.urlopen(url, timeout=3) as r:
+                if r.status == 200:
+                    return True
+        except Exception:
+            pass
+        time.sleep(2)
+    return False
+
+
+def launch(entry: dict, context_length: int = 8192,
+           wait_health: bool = True) -> dict:
+    """Allocate a free 8110-8119 port and start the model under PM2. Returns a
+    status dict. Never touches an existing service."""
+    port = allocate_port()
+    if port is None:
+        return {"status": "error", "reason": "no_free_port",
+                "range": [SERVE_RANGE.start, SERVE_RANGE.stop - 1]}
+    argv, err = _build_command(entry, port, context_length)
+    if err:
+        return {"status": "error", "reason": err}
+    pm2_name = f"{COOKBOOK_PREFIX}{entry['id']}-{port}"
+    try:
+        r = subprocess.run(argv, capture_output=True, text=True, timeout=_PM2_TIMEOUT_S)
+    except FileNotFoundError:
+        return {"status": "error", "reason": "pm2_not_found"}
+    except subprocess.TimeoutExpired:
+        return {"status": "error", "reason": "pm2_start_timeout", "pm2_name": pm2_name}
+    if r.returncode != 0:
+        return {"status": "error", "reason": "pm2_start_failed",
+                "detail": (r.stderr or r.stdout or "")[:300], "pm2_name": pm2_name}
+
+    rec = {
+        "id": entry["id"],
+        "port": port,
+        "pm2_name": pm2_name,
+        "backend": entry.get("backend", "mlx"),
+        "hf_repo": entry.get("hf_repo"),
+        "context": context_length,
+        "started_at": datetime.now(timezone.utc).isoformat(),
+    }
+    _record_served(rec)
+
+    healthy = _health_ok(port) if wait_health else None
+    return {"status": "serving" if healthy or not wait_health else "started_unhealthy",
+            "healthy": healthy, **rec}
+
+
+# ── stop — the guard ─────────────────────────────────────────────────────────
+
+def _resolve_target(target: Union[str, int]) -> Optional[dict]:
+    """Resolve a stop target (a cookbook pm2 name OR a port) to OUR served
+    record. Returns None if it isn't a process we started — the caller treats
+    None as a hard refusal, so we never delete anything we don't own."""
+    served = _load_served()
+    s = str(target).strip()
+    if s.isdigit():
+        port = int(s)
+        for r in served:
+            if r.get("port") == port:
+                return r
+        return None
+    for r in served:
+        if r.get("pm2_name") == s:
+            return r
+    return None
+
+
+def stop(target: Union[str, int], confirm: bool = False) -> dict:
+    """Stop a Cookbook-started model. Layered refusals (in order):
+      1. target not a process WE started (served.json)      → refused
+      2. resolved port is a protected port                  → refused
+      3. resolved pm2 name not in the `cookbook-` namespace → refused
+      4. confirm is not True                                → dry-run (would_stop)
+    Only after all four pass do we `pm2 delete <name>`."""
+    rec = _resolve_target(target)
+    if rec is None:
+        return {"status": "refused", "reason": "not_a_cookbook_process",
+                "target": str(target),
+                "detail": "Cookbook only stops models it started (recorded in served.json)."}
+    name, port = rec.get("pm2_name", ""), rec.get("port")
+    if port in PROTECTED_PORTS:
+        return {"status": "refused", "reason": "protected_port", "port": port}
+    if not name.startswith(COOKBOOK_PREFIX):
+        return {"status": "refused", "reason": "not_cookbook_namespace", "pm2_name": name}
+    if not confirm:
+        return {"status": "would_stop", "pm2_name": name, "port": port,
+                "hint": "re-run with confirm=true to actually stop it"}
+    try:
+        r = subprocess.run(["pm2", "delete", name],
+                           capture_output=True, text=True, timeout=_PM2_TIMEOUT_S)
+    except FileNotFoundError:
+        return {"status": "error", "reason": "pm2_not_found", "pm2_name": name}
+    except subprocess.TimeoutExpired:
+        return {"status": "error", "reason": "pm2_delete_timeout", "pm2_name": name}
+    if r.returncode != 0:
+        return {"status": "error", "reason": "pm2_delete_failed",
+                "detail": (r.stderr or r.stdout or "")[:300], "pm2_name": name}
+    _forget_served(name)
+    return {"status": "stopped", "pm2_name": name, "port": port}
+
+
+# ── list ──────────────────────────────────────────────────────────────────────
+
+def list_served() -> list[dict]:
+    """Cookbook-served models with live status (online/stopped via pm2) +
+    a current health probe. Read-only."""
+    live = {p["name"]: p for p in probe.pm2_processes()}
+    out = []
+    for r in _load_served():
+        name = r.get("pm2_name", "")
+        proc = live.get(name)
+        out.append({
+            **r,
+            "pm2_status": proc.get("status") if proc else "absent",
+            "healthy": probe.is_port_bound(r["port"]) if r.get("port") else None,
+        })
+    return out
diff --git a/skills/.manifest.json b/skills/.manifest.json
index d5aa99c..c3305b3 100644
--- a/skills/.manifest.json
+++ b/skills/.manifest.json
@@ -26,6 +26,12 @@
     "chrome_tabs.py": "bf971798a8455215655d37edb6bb326d908f4d33c0e3b232eb37d267fec68d7a",
     "clipboard.py": "f5ef9cc501fe38a3de95bf0b49896b928250c0e272060173668f6b195728d131",
     "clipboard_url_fetch.py": "c2733a92d6e99a0346b91c67bb70698e491be9570305377a82096a0ceb153488",
+    "cookbook_download.py": "8fd9c8a5b82f8e910cc0721904b908f2d201908ff0ac6373994be922598c382c",
+    "cookbook_list.py": "23c19b92742bfd88a801e74bd79e8bbd70f88d49ac1951f1c30e4857cc693a79",
+    "cookbook_recommend.py": "66b09eac0510ca9ca1e19f5619ab10a167bda6e34d17be13bda77ba7ffe0b5e3",
+    "cookbook_scan.py": "9bc1fe32073267c45c20d6dca67e293ed07f5c04dfff0109b480f778ffeaa3bb",
+    "cookbook_serve.py": "8a08bb5deecd988431da421b1b4825cd411c7e9c7658bc547aa16ce74c0e1ddd",
+    "cookbook_stop.py": "8b51e04ef08e08899df24d032af7a609bef7bf02ad72be5badf95eb0a2b1ce64",
     "create_skill.py": "28070280abfe2179d5c9b33eee74b4db5a5dd085f76a09f02d516302ba330036",
     "delegate.py": "7c595d5605cd9913a8afa331ae0013861d6996f378f8a59aca0249f1b2f3a474",
     "fact_extract.py": "a43ed03b8c51704415f4135de46a44e097f757305af3921dd389b8dfd0540906",
diff --git a/skills/cookbook_download.py b/skills/cookbook_download.py
new file mode 100644
index 0000000..18d31eb
--- /dev/null
+++ b/skills/cookbook_download.py
@@ -0,0 +1,43 @@
+"""CODEC Skill: Download a catalog model from Hugging Face (background job)."""
+from codec_cookbook import args, catalog, download
+
+SKILL_NAME = "cookbook_download"
+SKILL_DESCRIPTION = (
+    "Download a Cookbook catalog model from Hugging Face into the local cache "
+    "as a background job, or report an in-flight download's status."
+)
+SKILL_TAGS = ["cookbook", "models", "download", "huggingface", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook download", "download the model", "fetch a model", "download model status",
+    "cookbook fetch",
+]
+SKILL_MCP_EXPOSE = False  # spawns network download jobs — local/dashboard/voice only
+
+
+def run(task, app="", ctx=""):
+    model_id = args.parse_model_id(task)
+    if not model_id:
+        return ("Which model? Say e.g. 'cookbook download qwen3-coder-30b'. Known: "
+                + ", ".join(catalog.ids()))
+    try:
+        entry = catalog.get(model_id)
+    except KeyError as e:
+        return str(e)
+    repo = entry["hf_repo"]
+
+    # "status" / "progress" → report only; otherwise start (idempotent).
+    want_status = args.parse_flag(task, "status") or "progress" in (task or "").lower()
+    res = download.status(repo) if want_status else download.start(repo)
+
+    state = res.get("state")
+    if state == "not_started":
+        return f"No download in progress for {model_id}. Say 'cookbook download {model_id}' to start."
+    if state in ("starting", "running"):
+        return f"⏳ Downloading {model_id} ({repo}) — state: {state} (pid {res.get('pid')})."
+    if state == "done":
+        return f"✅ {model_id} downloaded → {res.get('path', 'HF cache')}."
+    if state == "interrupted":
+        return f"⚠ Download of {model_id} was interrupted. Re-run 'cookbook download {model_id}'."
+    if state == "error":
+        return f"❌ Download of {model_id} failed: {res.get('error', 'unknown')}"
+    return f"{model_id}: {state}"
diff --git a/skills/cookbook_list.py b/skills/cookbook_list.py
new file mode 100644
index 0000000..5029995
--- /dev/null
+++ b/skills/cookbook_list.py
@@ -0,0 +1,29 @@
+"""CODEC Skill: List Cookbook-served models (read-only)."""
+from codec_cookbook import serve
+
+SKILL_NAME = "cookbook_list"
+SKILL_DESCRIPTION = (
+    "List the local models Cookbook is currently serving (port, PM2 name, "
+    "live status, health). Only shows Cookbook-managed processes. Read-only."
+)
+SKILL_TAGS = ["cookbook", "models", "list", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook list", "list served models", "cookbook models", "what models are running",
+    "cookbook ps",
+]
+SKILL_MCP_EXPOSE = True  # read-only, safe to expose
+
+
+def run(task, app="", ctx=""):
+    served = serve.list_served()
+    if not served:
+        return "Cookbook isn't serving any models. Use 'cookbook serve <id>' to start one."
+    lines = [f"Cookbook-served models ({len(served)}):"]
+    for r in served:
+        health = "healthy" if r.get("healthy") else "no response"
+        lines.append(
+            f"  • {r.get('id', '?'):<16} port {r.get('port')}  "
+            f"[{r.get('pm2_status', '?')}/{health}]  {r.get('pm2_name')}  "
+            f"(ctx {r.get('context')}, {r.get('backend')})"
+        )
+    return "\n".join(lines)
diff --git a/skills/cookbook_recommend.py b/skills/cookbook_recommend.py
new file mode 100644
index 0000000..76853f0
--- /dev/null
+++ b/skills/cookbook_recommend.py
@@ -0,0 +1,42 @@
+"""CODEC Skill: Cookbook model recommendation (read-only)."""
+from codec_cookbook import args, catalog, fit, probe
+
+SKILL_NAME = "cookbook_recommend"
+SKILL_DESCRIPTION = (
+    "Recommend which catalog models fit this Mac's current unified-memory "
+    "headroom, ranked biggest-that-fits first. Optionally filter by role "
+    "(chat/reason/code/max/fast/tiny). Read-only."
+)
+SKILL_TAGS = ["cookbook", "models", "recommend", "fit", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook recommend", "recommend a model", "which model fits", "what model can i run",
+    "cookbook suggest", "best model for",
+]
+SKILL_MCP_EXPOSE = True  # read-only, safe to expose
+
+
+def run(task, app="", ctx=""):
+    ctx_len = args.parse_context(task)
+    role = args.parse_role(task)
+    entries = catalog.by_role(role) if role else catalog.all_entries()
+    if not entries:
+        return f"No catalog models with role '{role}'. Roles: chat, reason, code, max, fast, tiny."
+    avail = probe.available_gb()
+    ranked = fit.recommend(entries, avail, ctx_len)
+    header = (f"Models for ~{round(avail, 1)} GB headroom @ {ctx_len}-token context"
+              + (f" (role: {role})" if role else "") + ":")
+    lines = [header]
+    for r in ranked:
+        e = r["entry"]
+        mark = "✅" if r["fits"] else "✗"
+        lines.append(
+            f"  {mark} {e['id']:<16} ~{r['need_gb']} GB  "
+            f"(headroom {r['headroom_gb']:+} GB)  [{','.join(e.get('roles', []))}]"
+        )
+    top = next((r for r in ranked if r["fits"]), None)
+    if top:
+        lines.append(f"\n→ Best fit: {top['entry']['id']}. "
+                     f"Serve it with: cookbook serve {top['entry']['id']}")
+    else:
+        lines.append("\n→ Nothing fits the current headroom. Free memory or pick a smaller context.")
+    return "\n".join(lines)
diff --git a/skills/cookbook_scan.py b/skills/cookbook_scan.py
new file mode 100644
index 0000000..7d572eb
--- /dev/null
+++ b/skills/cookbook_scan.py
@@ -0,0 +1,37 @@
+"""CODEC Skill: Cookbook hardware scan (read-only)."""
+from codec_cookbook import probe
+
+SKILL_NAME = "cookbook_scan"
+SKILL_DESCRIPTION = (
+    "Scan this Mac's hardware + live PM2 stack and report unified-memory "
+    "headroom available for serving additional local models. Read-only."
+)
+SKILL_TAGS = ["cookbook", "models", "scan", "hardware", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook scan", "scan hardware", "cookbook hardware", "model memory headroom",
+    "how much memory for models", "cookbook status",
+]
+SKILL_MCP_EXPOSE = True  # read-only, safe to expose
+
+
+def run(task, app="", ctx=""):
+    s = probe.snapshot()
+    lines = [
+        f"🖥  {s['chip'] or 'Apple Silicon'} — {s['unified_total_gb']} GB unified",
+        f"   resident (PM2): {s['resident_gb_total']} GB  ·  free (vm_stat): {s['vm_free_gb']} GB",
+        f"   available for a new model: ~{s['available_gb']} GB "
+        f"(total − {s['os_reserve_gb']} GB OS reserve − resident)",
+        f"   PM2 processes: {s['pm2_process_count']}",
+    ]
+    if not s["mlx_version_ok"]:
+        lines.append(f"   ⚠ mlx-lm {s['mlx_version'] or 'missing'} — Qwen3 MoE needs ≥ 0.25.2")
+    else:
+        lines.append(f"   mlx-lm {s['mlx_version']} ✓")
+    busy = s["serve_ports_bound"]
+    rng = s["serve_range"]
+    lines.append(
+        f"   Cookbook serve range {rng[0]}-{rng[1]}: "
+        + (f"{len(busy)} busy ({', '.join(map(str, busy))})" if busy else "all free")
+    )
+    lines.append(f"   protected ports (never touched): {', '.join(map(str, s['protected_ports']))}")
+    return "\n".join(lines)
diff --git a/skills/cookbook_serve.py b/skills/cookbook_serve.py
new file mode 100644
index 0000000..8dc9b16
--- /dev/null
+++ b/skills/cookbook_serve.py
@@ -0,0 +1,49 @@
+"""CODEC Skill: Serve a local model on a dedicated Cookbook port (8110-8119)."""
+from codec_cookbook import args, catalog, fit, probe, serve
+
+SKILL_NAME = "cookbook_serve"
+SKILL_DESCRIPTION = (
+    "Serve a local MLX or GGUF model on a dedicated Cookbook port (8110-8119) "
+    "under PM2, after a unified-memory fit check. Never touches existing services."
+)
+SKILL_TAGS = ["cookbook", "models", "serve", "mlx", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook serve", "serve the model", "spin up a local model", "load model on cookbook",
+    "start a local model",
+]
+SKILL_MCP_EXPOSE = False  # starts PM2 processes — local/dashboard/voice only (cf. pm2_control)
+
+
+def run(task, app="", ctx=""):
+    model_id = args.parse_model_id(task)
+    if not model_id:
+        return ("Which model? Say e.g. 'cookbook serve qwen3-30b-a3b'. Known: "
+                + ", ".join(catalog.ids()))
+    try:
+        entry = catalog.get(model_id)
+    except KeyError as e:
+        return str(e)
+
+    context_length = args.parse_context(task)
+    force = args.parse_flag(task, "force")
+
+    # Fit check (conservative over-estimate vs. live headroom).
+    need = fit.estimate_footprint_gb(entry["hf_repo"], context_length, entry.get("anchor_gb"))
+    avail = probe.available_gb()
+    ok, headroom = fit.fits(need, avail)
+    if not ok and not force:
+        return (f"⚠ Refused to serve {model_id}: insufficient memory.\n"
+                f"   need ~{round(need, 1)} GB, headroom {round(headroom, 1)} GB "
+                f"(margin {fit.DEFAULT_MARGIN_GB} GB).\n"
+                f"   Free memory, lower the context, or re-run with 'force' to override.")
+
+    res = serve.launch(entry, context_length)
+    status = res.get("status")
+    if status == "serving":
+        return (f"✅ Serving {model_id} on port {res['port']} "
+                f"(pm2: {res['pm2_name']}, ctx {context_length}, ~{round(need, 1)} GB). "
+                f"OpenAI-compatible at http://127.0.0.1:{res['port']}/v1")
+    if status == "started_unhealthy":
+        return (f"⏳ Started {res['pm2_name']} on port {res['port']} but it didn't pass the "
+                f"/v1/models health check in time. Check: pm2 logs {res['pm2_name']}")
+    return f"❌ Could not serve {model_id}: {res.get('reason')} {res.get('detail', '')}".strip()
diff --git a/skills/cookbook_stop.py b/skills/cookbook_stop.py
new file mode 100644
index 0000000..9cdc1d0
--- /dev/null
+++ b/skills/cookbook_stop.py
@@ -0,0 +1,75 @@
+"""CODEC Skill: Stop a Cookbook-served model (guarded; confirm required)."""
+import re
+
+from codec_cookbook import args, serve
+
+SKILL_NAME = "cookbook_stop"
+SKILL_DESCRIPTION = (
+    "Stop a model Cookbook started (by id, PM2 name, or port). Refuses anything "
+    "outside the cookbook- namespace, any protected/non-cookbook port, and "
+    "requires explicit confirmation. NEVER stops an existing service."
+)
+SKILL_TAGS = ["cookbook", "models", "stop", "local-llm"]
+SKILL_TRIGGERS = [
+    "cookbook stop", "stop the model", "stop cookbook model", "unload model",
+    "shut down local model",
+]
+SKILL_MCP_EXPOSE = False  # stops PM2 processes — local/dashboard/voice only (cf. pm2_control)
+
+
+def _resolve_target(task):
+    """Figure out what the user means: an explicit cookbook- pm2 name, a Cookbook
+    port (8110-8119), or a model id mapped via served.json."""
+    # explicit full pm2 name
+    m = re.search(r"\bcookbook-[A-Za-z0-9_.\-]+\b", task or "")
+    if m:
+        return m.group(0), None
+    # explicit Cookbook port
+    port = args.parse_port(task)
+    if port is not None:
+        return port, None
+    # model id → look up our served record(s)
+    model_id = args.parse_model_id(task)
+    if model_id:
+        matches = [r for r in serve.list_served() if r.get("id") == model_id]
+        if len(matches) == 1:
+            return matches[0]["pm2_name"], None
+        if len(matches) > 1:
+            names = ", ".join(r["pm2_name"] for r in matches)
+            return None, (f"Multiple {model_id} instances are served ({names}). "
+                          f"Specify the port or full pm2 name.")
+        return None, (f"Cookbook isn't serving '{model_id}'. "
+                      f"Use 'cookbook list' to see what's running.")
+    return None, None
+
+
+def run(task, app="", ctx=""):
+    target, msg = _resolve_target(task)
+    if msg:
+        return msg
+    if target is None:
+        return ("Which model? Say e.g. 'cookbook stop llama32-3b confirm', "
+                "or give a port (8110-8119) or the full cookbook- pm2 name. "
+                "Use 'cookbook list' to see running models.")
+
+    confirm = args.parse_flag(task, "confirm")
+    res = serve.stop(target, confirm=confirm)
+    status = res.get("status")
+
+    if status == "would_stop":
+        return (f"About to stop {res['pm2_name']} (port {res['port']}). "
+                f"Re-run with 'confirm' to actually stop it: "
+                f"cookbook stop {res['pm2_name']} confirm")
+    if status == "stopped":
+        return f"🛑 Stopped {res['pm2_name']} (port {res['port']}) and freed the Cookbook port."
+    if status == "refused":
+        reason = res.get("reason")
+        if reason == "protected_port":
+            return f"⛔ Refused: port {res['port']} is a protected core service — Cookbook never touches it."
+        if reason == "not_a_cookbook_process":
+            return (f"⛔ Refused: '{res['target']}' isn't a model Cookbook started. "
+                    f"Cookbook only stops its own cookbook- processes.")
+        if reason == "not_cookbook_namespace":
+            return f"⛔ Refused: '{res['pm2_name']}' is not in the cookbook- namespace."
+        return f"⛔ Refused: {reason}"
+    return f"❌ {res.get('reason', 'unknown error')}: {res.get('detail', '')}".strip()
diff --git a/tests/test_cookbook.py b/tests/test_cookbook.py
new file mode 100644
index 0000000..4fa688d
--- /dev/null
+++ b/tests/test_cookbook.py
@@ -0,0 +1,312 @@
+"""CODEC Cookbook — local-model lifecycle tests.
+
+Coverage matches the build brief:
+  * fit.py    — golden anchors (30B→17.2, 80B→42), KV math, offline anchor fallback
+  * serve.py  — port allocation stays in 8110-8119 + skips bound ports
+  * STOP-GUARD (highest priority) — refuses protected ports, non-cookbook ports,
+    non-cookbook PM2 names; requires confirm=True
+  * integration — serve → served.json → list → stop(confirm), PM2/health mocked
+  * skills     — the six thin skills parse args + format helper output
+
+Real PM2 / MLX / Hub calls are mocked so the suite runs offline + side-effect-free.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+_REPO = Path(__file__).resolve().parents[1]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+
+from codec_cookbook import args, catalog, fit, probe, serve  # noqa: E402
+
+# A representative Qwen3-MoE-ish config for offline KV math.
+_CFG = {"num_hidden_layers": 48, "num_attention_heads": 32,
+        "num_key_value_heads": 4, "head_dim": 128, "hidden_size": 4096}
+
+
+# ── catalog ──────────────────────────────────────────────────────────────
+class TestCatalog:
+    def test_known_ids(self):
+        ids = catalog.ids()
+        assert {"qwen3-30b-a3b", "qwen3-next-80b", "llama32-3b"} <= set(ids)
+
+    def test_get_unknown_raises(self):
+        with pytest.raises(KeyError):
+            catalog.get("does-not-exist")
+
+    def test_find_unknown_is_none(self):
+        assert catalog.find("nope") is None
+
+    def test_by_role(self):
+        assert any(e["id"] == "qwen3-coder-30b" for e in catalog.by_role("code"))
+
+    def test_primary_model_not_in_catalog(self):
+        # the live qwen3.6@8083 must never be Cookbook-managed
+        assert not any("Qwen3.6" in e["hf_repo"] for e in catalog.all_entries())
+
+
+# ── fit math ─────────────────────────────────────────────────────────────
+class TestFit:
+    def test_golden_anchors_present(self):
+        assert catalog.get("qwen3-30b-a3b")["anchor_gb"] == 17.2
+        assert catalog.get("qwen3-next-80b")["anchor_gb"] == 42.0
+
+    def test_kv_cache_math(self):
+        # 2 * nl * nkv * hd * ctx * 2 bytes / 1e9
+        expected = (2 * 48 * 4 * 128 * 8192 * 2) / 1e9
+        assert abs(fit.kv_cache_gb(_CFG, 8192) - expected) < 1e-9
+
+    def test_kv_gqa_falls_back_to_attention_heads(self):
+        mha = {"num_hidden_layers": 4, "num_attention_heads": 8, "hidden_size": 512}
+        # no num_key_value_heads → uses num_attention_heads; head_dim = 512/8 = 64
+        expected = (2 * 4 * 8 * 64 * 1024 * 2) / 1e9
+        assert abs(fit.kv_cache_gb(mha, 1024) - expected) < 1e-9
+
+    def test_footprint_uses_anchor_and_overhead(self):
+        need = fit.estimate_footprint_gb("x", 8192, anchor_gb=17.2, cfg=_CFG)
+        kv = fit.kv_cache_gb(_CFG, 8192)
+        expected = (17.2 + kv) * 1.10 + 1.5
+        assert abs(need - expected) < 1e-6
+
+    def test_offline_anchor_fallback_no_network(self, monkeypatch):
+        # simulate Hub unreachable: load_config raises → KV omitted, no crash,
+        # and weight_gb_from_hub must NOT be called (anchor provided).
+        monkeypatch.setattr(fit, "load_config",
+                            lambda r: (_ for _ in ()).throw(RuntimeError("offline")))
+        called = {"hub": False}
+        monkeypatch.setattr(fit, "weight_gb_from_hub",
+                            lambda r: called.__setitem__("hub", True) or 999.0)
+        need = fit.estimate_footprint_gb("x", 8192, anchor_gb=17.2)
+        assert called["hub"] is False, "anchor given → must not hit the Hub for weights"
+        assert abs(need - (17.2 * 1.10 + 1.5)) < 1e-6
+
+    def test_available_gb_formula(self):
+        assert fit.available_gb(192, [17.2, 42.0], os_reserve_gb=24) == pytest.approx(108.8)
+
+    def test_fits_margin(self):
+        ok, hr = fit.fits(need_gb=50, avail_gb=60, margin_gb=8)
+        assert ok and hr == 10
+        ok2, hr2 = fit.fits(need_gb=55, avail_gb=60, margin_gb=8)
+        assert not ok2 and hr2 == 5
+
+    def test_recommend_orders_fits_first_biggest(self):
+        entries = [
+            {"id": "tiny", "hf_repo": "r/tiny", "anchor_gb": 2.0, "roles": ["tiny"]},
+            {"id": "big", "hf_repo": "r/big", "anchor_gb": 40.0, "roles": ["max"]},
+            {"id": "huge", "hf_repo": "r/huge", "anchor_gb": 200.0, "roles": ["max"]},
+        ]
+        # KV unavailable offline → footprint ≈ anchor*1.1+1.5
+        with patch.object(fit, "load_config", side_effect=RuntimeError("offline")):
+            ranked = fit.recommend(entries, avail_gb=60, ctx=8192)
+        ids = [r["entry"]["id"] for r in ranked]
+        assert ids[0] == "big", "biggest model that fits should rank first"
+        assert ids[-1] == "huge", "non-fitting model ranks last"
+        assert ranked[-1]["fits"] is False
+
+
+# ── args parsing ───────────────────────────────────────────────────────────
+class TestArgs:
+    def test_model_id(self):
+        assert args.parse_model_id("cookbook serve qwen3-30b-a3b now") == "qwen3-30b-a3b"
+        assert args.parse_model_id("nothing here") is None
+
+    def test_context(self):
+        assert args.parse_context("serve x context 16384") == 16384
+        assert args.parse_context("serve x ctx=4096") == 4096
+        assert args.parse_context("serve x") == 8192
+
+    def test_flags(self):
+        assert args.parse_flag("serve x force", "force")
+        assert args.parse_flag("stop x confirm=true", "confirm")
+        assert not args.parse_flag("serve x", "force")
+
+    def test_port(self):
+        assert args.parse_port("stop port 8112") == 8112
+        assert args.parse_port("stop 8083") is None  # not in cookbook range
+        assert args.parse_port("stop x") is None
+
+    def test_role(self):
+        assert args.parse_role("recommend a code model") == "code"
+        assert args.parse_role("recommend something") is None
+
+
+# ── port allocation ─────────────────────────────────────────────────────────
+class TestPortAllocation:
+    def test_stays_in_range_and_skips(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        monkeypatch.setattr(probe, "bound_ports_in_range", lambda lo, hi: {8110, 8111})
+        monkeypatch.setattr(probe, "pm2_processes",
+                            lambda: [{"port": 8112, "name": "x", "status": "online", "rss_gb": 0}])
+        serve._save_served([{"id": "a", "port": 8113, "pm2_name": "cookbook-a-8113"}])
+        port = serve.allocate_port()
+        assert port == 8114
+        assert 8110 <= port <= 8119
+
+    def test_exhausted_returns_none(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        monkeypatch.setattr(probe, "bound_ports_in_range",
+                            lambda lo, hi: set(range(8110, 8120)))
+        monkeypatch.setattr(probe, "pm2_processes", lambda: [])
+        assert serve.allocate_port() is None
+
+    def test_never_allocates_protected_port(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        monkeypatch.setattr(probe, "bound_ports_in_range", lambda lo, hi: set())
+        monkeypatch.setattr(probe, "pm2_processes", lambda: [])
+        for _ in range(20):
+            p = serve.allocate_port()
+            assert p not in probe.PROTECTED_PORTS
+
+
+# ── STOP-GUARD (highest priority) ────────────────────────────────────────────
+class TestStopGuard:
+    @pytest.fixture
+    def served(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        serve._save_served([
+            {"id": "llama32-3b", "port": 8112,
+             "pm2_name": "cookbook-llama32-3b-8112", "backend": "mlx"},
+        ])
+        return serve
+
+    @pytest.mark.parametrize("port", [8083, 8090, 8094, 9223, 5678])
+    def test_refuses_protected_ports(self, served, port):
+        r = served.stop(port, confirm=True)
+        assert r["status"] == "refused"
+
+    def test_refuses_protected_port_even_if_in_served(self, served):
+        # defense-in-depth: a (hypothetical) cookbook record on a protected port
+        served._save_served([{"id": "x", "port": 8090, "pm2_name": "cookbook-x-8090"}])
+        r = served.stop(8090, confirm=True)
+        assert r["status"] == "refused" and r["reason"] == "protected_port"
+
+    @pytest.mark.parametrize("name", ["qwen3.6", "codec-dashboard", "pilot-runner", "n8n"])
+    def test_refuses_non_cookbook_names(self, served, name):
+        r = served.stop(name, confirm=True)
+        assert r["status"] == "refused" and r["reason"] == "not_a_cookbook_process"
+
+    def test_refuses_non_cookbook_namespace_record(self, served):
+        # a served record whose name isn't cookbook- prefixed → guard 3
+        served._save_served([{"id": "x", "port": 8115, "pm2_name": "evil-proc-8115"}])
+        r = served.stop("evil-proc-8115", confirm=True)
+        assert r["status"] == "refused" and r["reason"] == "not_cookbook_namespace"
+
+    def test_refuses_bound_non_cookbook_port(self, served):
+        # a port we never served (not in served.json) → refused, never stopped
+        r = served.stop(8085, confirm=True)
+        assert r["status"] == "refused" and r["reason"] == "not_a_cookbook_process"
+
+    def test_dry_run_without_confirm(self, served):
+        r = served.stop("cookbook-llama32-3b-8112", confirm=False)
+        assert r["status"] == "would_stop"
+        assert r["pm2_name"] == "cookbook-llama32-3b-8112" and r["port"] == 8112
+
+    def test_dry_run_by_port_without_confirm(self, served):
+        assert served.stop(8112, confirm=False)["status"] == "would_stop"
+
+    def test_confirmed_stop_calls_pm2_delete(self, served, monkeypatch):
+        captured = {}
+
+        def fake_run(argv, **kw):
+            captured["argv"] = argv
+            return MagicMock(returncode=0, stdout="ok", stderr="")
+
+        monkeypatch.setattr(serve.subprocess, "run", fake_run)
+        r = served.stop("cookbook-llama32-3b-8112", confirm=True)
+        assert r["status"] == "stopped"
+        assert captured["argv"] == ["pm2", "delete", "cookbook-llama32-3b-8112"]
+        # removed from served.json
+        assert served.stop(8112, confirm=False)["status"] == "refused"
+
+
+# ── integration: serve → list → stop (PM2 + health mocked) ──────────────────
+class TestIntegration:
+    def test_serve_then_list_then_stop(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        monkeypatch.setattr(probe, "bound_ports_in_range", lambda lo, hi: set())
+        monkeypatch.setattr(probe, "pm2_processes", lambda: [])
+        monkeypatch.setattr(serve, "resolve_mlx_python", lambda: "/usr/bin/python3")
+        monkeypatch.setattr(serve, "_health_ok", lambda port, timeout_s=90: True)
+        monkeypatch.setattr(serve.subprocess, "run",
+                            lambda argv, **kw: MagicMock(returncode=0, stdout="ok", stderr=""))
+
+        entry = catalog.get("llama32-3b")
+        res = serve.launch(entry, context_length=8192)
+        assert res["status"] == "serving"
+        assert 8110 <= res["port"] <= 8119
+        assert res["pm2_name"].startswith("cookbook-llama32-3b-")
+
+        # appears in list
+        monkeypatch.setattr(probe, "is_port_bound", lambda port, host="127.0.0.1": True)
+        listed = serve.list_served()
+        assert any(r["id"] == "llama32-3b" for r in listed)
+
+        # stop it
+        stopped = serve.stop(res["pm2_name"], confirm=True)
+        assert stopped["status"] == "stopped"
+        assert serve.list_served() == []
+
+    def test_serve_command_is_corrected_mlx_form(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(serve, "SERVED_PATH", str(tmp_path / "served.json"))
+        monkeypatch.setattr(serve, "resolve_mlx_python", lambda: "/PY")
+        argv, err = serve._build_command(catalog.get("llama32-3b"), 8112, 8192)
+        assert err is None
+        # `-m mlx_lm server` subcommand form (NOT `-m mlx_lm.server`); --max-tokens present
+        assert argv[:5] == ["pm2", "start", "/PY", "--name", "cookbook-llama32-3b-8112"]
+        assert "mlx_lm" in argv and "server" in argv
+        assert "mlx_lm.server" not in argv
+        assert "--max-tokens" in argv
+        assert "--port" in argv and "8112" in argv
+
+
+# ── skills smoke ─────────────────────────────────────────────────────────────
+class TestSkills:
+    def test_all_six_discovered_with_expected_exposure(self):
+        import codec_dispatch
+        codec_dispatch.load_skills()
+        reg = codec_dispatch.registry
+        names = set(reg.names())
+        expected = {"cookbook_scan", "cookbook_recommend", "cookbook_list",
+                    "cookbook_serve", "cookbook_download", "cookbook_stop"}
+        assert expected <= names
+        # read-only exposed, mutating not
+        assert reg.get_mcp_expose("cookbook_scan") is True
+        assert reg.get_mcp_expose("cookbook_serve") is False
+        assert reg.get_mcp_expose("cookbook_stop") is False
+
+    def test_serve_skill_refuses_on_insufficient_memory(self, monkeypatch):
+        import skills.cookbook_serve as cs
+        monkeypatch.setattr(cs.probe, "available_gb", lambda *a, **k: 10.0)
+        monkeypatch.setattr(cs.fit, "estimate_footprint_gb", lambda *a, **k: 42.0)
+        out = cs.run("cookbook serve qwen3-next-80b")
+        assert "Refused" in out and "insufficient memory" in out
+
+    def test_serve_skill_force_overrides(self, monkeypatch):
+        import skills.cookbook_serve as cs
+        monkeypatch.setattr(cs.probe, "available_gb", lambda *a, **k: 10.0)
+        monkeypatch.setattr(cs.fit, "estimate_footprint_gb", lambda *a, **k: 42.0)
+        monkeypatch.setattr(cs.serve, "launch",
+                            lambda e, c: {"status": "serving", "port": 8112,
+                                          "pm2_name": "cookbook-x-8112"})
+        out = cs.run("cookbook serve qwen3-next-80b force")
+        assert "Serving" in out
+
+    def test_stop_skill_requires_confirm(self, monkeypatch):
+        import skills.cookbook_stop as ck
+        monkeypatch.setattr(ck.serve, "list_served",
+                            lambda: [{"id": "llama32-3b", "port": 8112,
+                                      "pm2_name": "cookbook-llama32-3b-8112"}])
+        monkeypatch.setattr(ck.serve, "stop",
+                            lambda t, confirm=False: ({"status": "would_stop",
+                                                       "pm2_name": "cookbook-llama32-3b-8112",
+                                                       "port": 8112} if not confirm
+                                                      else {"status": "stopped",
+                                                            "pm2_name": "cookbook-llama32-3b-8112",
+                                                            "port": 8112}))
+        assert "confirm" in ck.run("cookbook stop llama32-3b").lower()
+        assert "Stopped" in ck.run("cookbook stop llama32-3b confirm")