From 4af98a98614939102af7234ef4a04dda016eafff Mon Sep 17 00:00:00 2001
From: vmelikyan <vahan.melikyan@gmail.com>
Date: Fri, 29 May 2026 00:35:28 -0700
Subject: [PATCH 1/2] feat(agent): build-scoped tool isolation, typed errors,
 run reliability

---
 .mise.toml                                    |   33 +
 Tiltfile                                      |   72 +-
 package.json                                  |    1 +
 .../mcp-servers/[slug]/users/route.test.ts    |   15 +-
 .../agent/mcp-servers/[slug]/users/route.ts   |    7 +-
 .../v2/ai/admin/agent/mcp-servers/route.ts    |    9 +-
 .../agent/sessions/[sessionId]/route.test.ts  |    6 +
 .../admin/agent/sessions/[sessionId]/route.ts |    7 +-
 .../v2/ai/admin/agent/sessions/route.test.ts  |    6 +
 .../api/v2/ai/admin/agent/sessions/route.ts   |    9 +-
 .../[threadId]/conversation/route.test.ts     |    6 +
 .../threads/[threadId]/conversation/route.ts  |    7 +-
 src/app/api/v2/ai/admin/agent/tools/route.ts  |    9 +-
 .../canonical-api-acceptance.test.ts          |   15 +-
 src/app/api/v2/ai/agent/api-keys/route.ts     |   17 +-
 .../agent/build-context-chats/route.test.ts   |   15 +-
 .../v2/ai/agent/build-context-chats/route.ts  |    7 +-
 .../definition-capabilities/route.test.ts     |    6 +
 .../ai/agent/definition-capabilities/route.ts |    7 +-
 .../definitions/[definitionId]/route.test.ts  |   18 +-
 .../agent/definitions/[definitionId]/route.ts |   80 +-
 .../api/v2/ai/agent/definitions/route.test.ts |   18 +-
 src/app/api/v2/ai/agent/definitions/route.ts  |   42 +-
 .../v2/ai/agent/github-token/route.test.ts    |   29 +-
 src/app/api/v2/ai/agent/github-token/route.ts |   14 +-
 .../[slug]/oauth/start/route.test.ts          |    6 +
 .../[slug]/oauth/start/route.ts               |    7 +-
 .../mcp-connections/[slug]/route.test.ts      |    6 +
 .../ai/agent/mcp-connections/[slug]/route.ts  |   14 +-
 .../api/v2/ai/agent/mcp-connections/route.ts  |    9 +-
 src/app/api/v2/ai/agent/models/route.ts       |    9 +-
 .../[actionId]/respond/route.test.ts          |   17 +-
 .../[actionId]/respond/route.ts               |    7 +-
 .../agent/runs/[runId]/cancel/route.test.ts   |   17 +-
 .../v2/ai/agent/runs/[runId]/cancel/route.ts  |    7 +-
 .../agent/runs/[runId]/events/route.test.ts   |   15 +-
 .../v2/ai/agent/runs/[runId]/events/route.ts  |    7 +-
 .../runs/[runId]/events/stream/route.test.ts  |   15 +-
 .../agent/runs/[runId]/events/stream/route.ts |    2 +
 src/app/api/v2/ai/agent/runs/[runId]/route.ts |    7 +-
 .../runtime-controls/preview/route.test.ts    |   15 +
 .../agent/runtime-controls/preview/route.ts   |   38 +-
 .../launches/[launchId]/route.ts              |    7 +-
 .../ai/agent/sandbox-sessions/route.test.ts   |    6 +
 .../api/v2/ai/agent/sandbox-sessions/route.ts |    8 +-
 .../v2/ai/agent/session-candidates/route.ts   |    5 +-
 .../agent/sessions/[sessionId]/route.test.ts  |    8 +-
 .../v2/ai/agent/sessions/[sessionId]/route.ts |    8 +-
 .../[sessionId]/sandbox/resume/route.test.ts  |    8 +-
 .../[sessionId]/sandbox/resume/route.ts       |    7 +-
 .../[sessionId]/sandbox/suspend/route.test.ts |    8 +-
 .../[sessionId]/sandbox/suspend/route.ts      |    7 +-
 .../[sessionId]/services/route.test.ts        |    6 +
 .../sessions/[sessionId]/services/route.ts    |    5 +-
 .../[sessionId]/threads/route.test.ts         |   24 +-
 .../sessions/[sessionId]/threads/route.ts     |   42 +-
 .../[sessionId]/workspace/open/route.test.ts  |    8 +-
 .../[sessionId]/workspace/open/route.ts       |    7 +-
 .../api/v2/ai/agent/sessions/route.test.ts    |   20 +-
 src/app/api/v2/ai/agent/sessions/route.ts     |   43 +-
 src/app/api/v2/ai/agent/settings/route.ts     |    9 +-
 .../threads/[threadId]/agent/route.test.ts    |   12 +-
 .../agent/threads/[threadId]/agent/route.ts   |   19 +-
 .../threads/[threadId]/messages/route.test.ts |   15 +-
 .../threads/[threadId]/messages/route.ts      |    7 +-
 .../[threadId]/pending-actions/route.test.ts  |   17 +-
 .../[threadId]/pending-actions/route.ts       |    7 +-
 .../v2/ai/agent/threads/[threadId]/route.ts   |    9 +-
 .../threads/[threadId]/runs/route.test.ts     |   15 +-
 .../ai/agent/threads/[threadId]/runs/route.ts |   40 +-
 .../[threadId]/runtime-controls/route.test.ts |   15 +
 .../[threadId]/runtime-controls/route.ts      |   56 +-
 .../threads/[threadId]/usage/route.test.ts    |   15 +-
 .../agent/threads/[threadId]/usage/route.ts   |    7 +-
 .../repos/[...fullName]/route.test.ts         |  100 ++
 .../repos/[...fullName]/route.ts              |    5 +-
 .../v2/ai/config/agent-session/route.test.ts  |   79 ++
 .../api/v2/ai/config/agent-session/route.ts   |    3 +-
 .../agent-session/runtime/route.test.ts       |   79 ++
 .../ai/config/agent-session/runtime/route.ts  |    3 +-
 .../config/mcp-servers/[slug]/route.test.ts   |   99 ++
 .../v2/ai/config/mcp-servers/[slug]/route.ts  |    5 +-
 .../v2/ai/config/mcp-servers/route.test.ts    |   79 ++
 src/app/api/v2/ai/config/mcp-servers/route.ts |    3 +-
 src/app/api/v2/config/sites/route.test.ts     |  167 +++
 src/app/api/v2/config/sites/route.ts          |  116 ++
 .../__tests__/agentSessionCleanup.test.ts     |  398 ++----
 src/server/jobs/agentSessionCleanup.ts        |   47 +-
 src/server/jobs/index.ts                      |   56 +-
 src/server/lib/agent/runRequestText.ts        |   33 +
 .../__tests__/runtimeConfig.test.ts           |   12 -
 .../__tests__/startupFailureState.test.ts     |   62 +
 .../__tests__/systemPrompt.test.ts            |   95 +-
 .../__tests__/workspaceEditorProxy.test.ts    |  112 +-
 src/server/lib/agentSession/runtimeConfig.ts  |   12 +-
 .../lib/agentSession/startupFailureState.ts   |   64 +
 src/server/lib/agentSession/systemPrompt.ts   |   85 +-
 .../lib/agentSession/workspaceEditorProxy.ts  |  226 ++++
 src/server/lib/appError.ts                    |  130 ++
 src/server/lib/auth.test.ts                   |   90 ++
 src/server/lib/auth.ts                        |   35 +-
 src/server/lib/codefresh/index.ts             |   15 +-
 src/server/lib/createApiHandler.ts            |    1 +
 src/server/lib/get-user.ts                    |   10 +
 src/server/lib/response.ts                    |   20 +-
 .../lib/validation/sitesConfigSchemas.ts      |   79 ++
 .../services/__tests__/agentSession.test.ts   |  190 ++-
 .../services/agent/AgentSelectionService.ts   |   13 +-
 .../services/agent/BuildContextChatService.ts |   23 +-
 .../services/agent/CapabilityService.ts       | 1148 +----------------
 .../services/agent/ChatSessionService.ts      |    1 +
 .../agent/CustomAgentDefinitionService.ts     |   28 +-
 .../agent/InstructionTemplateService.ts       |   79 +-
 .../services/agent/LifecycleAiSdkHarness.ts   |   30 +-
 src/server/services/agent/MessageStore.ts     |  249 ++--
 src/server/services/agent/PolicyService.ts    |   14 +-
 src/server/services/agent/ProviderRegistry.ts |   11 +-
 src/server/services/agent/RunEventService.ts  |  761 +++--------
 src/server/services/agent/RunExecutor.ts      |   89 +-
 src/server/services/agent/RunPlanResolver.ts  |   61 +-
 .../agent/RunResumeEligibilityService.ts      |   40 +-
 src/server/services/agent/RunService.ts       |  343 +++--
 .../agent/ThreadRuntimeControlsService.ts     |   16 +-
 src/server/services/agent/ThreadService.ts    |   27 +-
 .../agent/WorkspaceRuntimeStateService.ts     |   13 +-
 .../agent/__tests__/CapabilityService.test.ts |  111 +-
 .../CustomAgentDefinitionService.test.ts      |   22 +-
 ...tPartyAgentDefinitions.integration.test.ts |   12 +-
 .../InstructionTemplateService.test.ts        |   35 +-
 .../agent/__tests__/MessageStore.test.ts      |  135 ++
 .../agent/__tests__/PolicyService.test.ts     |   54 +-
 .../agent/__tests__/RunEventService.test.ts   |  143 +-
 .../agent/__tests__/RunExecutor.test.ts       |  113 +-
 .../agent/__tests__/RunPlanResolver.test.ts   |   33 +-
 .../RunResumeEligibilityService.test.ts       |   45 +
 .../agent/__tests__/RunService.test.ts        |  333 +++--
 .../__tests__/debugToolLoopControls.test.ts   |   74 +-
 .../agent/__tests__/sessionOwnership.test.ts  |   56 +
 .../__tests__/thinkingProviderOptions.test.ts |   48 +
 .../agent/capabilitySessionContext.ts         |  233 ++++
 .../services/agent/capabilityToolHelpers.ts   |   96 ++
 .../agent/chatWorkspaceToolRegistration.ts    |  800 ++++++++++++
 .../services/agent/debugToolLoopControls.ts   |   51 +-
 src/server/services/agent/diagnosticTools.ts  |   27 +-
 src/server/services/agent/errors.ts           |   26 +-
 .../services/agent/mcpToolRegistration.ts     |  181 +++
 .../services/agent/runErrorClassification.ts  |  146 +++
 .../services/agent/runEventChunkCodec.ts      |  569 ++++++++
 src/server/services/agent/runEventUtils.ts    |   19 +
 src/server/services/agent/sessionOwnership.ts |   11 +
 .../services/agent/systemAgentDefinitions.ts  |    2 -
 .../agent/systemInstructionTemplates.ts       |   80 +-
 .../services/agent/thinkingProviderOptions.ts |   56 +
 .../tools/__tests__/outputLimiter.test.ts     |   39 +
 .../__tests__/getCodefreshLogs.test.ts        |   65 +
 .../agent/tools/codefresh/getCodefreshLogs.ts |   16 +-
 .../tools/github/__tests__/getFile.test.ts    |   12 +
 .../github/__tests__/listDirectory.test.ts    |   12 +
 .../services/agent/tools/github/getFile.ts    |   21 +-
 .../agent/tools/github/listDirectory.ts       |   21 +-
 .../k8s/__tests__/getK8sResources.test.ts     |  144 +++
 .../tools/k8s/__tests__/getPodLogs.test.ts    |   74 +-
 .../k8s/__tests__/patchK8sResource.test.ts    |   95 +-
 .../agent/tools/k8s/getK8sResources.ts        |  105 +-
 .../agent/tools/k8s/getLifecycleLogs.ts       |   50 +-
 .../services/agent/tools/k8s/getPodLogs.ts    |   42 +-
 .../agent/tools/k8s/patchK8sResource.ts       |   68 +-
 .../services/agent/tools/k8s/queryDatabase.ts |    7 +-
 .../services/agent/tools/outputLimiter.ts     |   82 +-
 .../shared/__tests__/databaseClient.test.ts   |  163 +++
 .../agent/tools/shared/databaseClient.test.ts |   21 +
 .../agent/tools/shared/databaseClient.ts      |   82 +-
 .../agent/tools/shared/githubClient.ts        |   42 +
 .../services/agent/tools/shared/k8sClient.ts  |   36 +
 .../agentRuntime/mcp/__tests__/config.test.ts |   98 ++
 .../mcp/__tests__/oauthProvider.test.ts       |   25 +-
 .../services/agentRuntime/mcp/config.ts       |  519 +-------
 .../agentRuntime/mcp/mcpConfigSecrets.ts      |  532 ++++++++
 .../agentRuntime/mcp/oauthProvider.ts         |    4 +-
 src/server/services/agentSession.ts           |   45 +-
 src/server/services/sitesConfig.test.ts       |  169 +++
 src/server/services/sitesConfig.ts            |  206 +++
 src/shared/openApiSpec.ts                     |  130 ++
 sysops/dockerfiles/tilt.app.Dockerfile        |    7 +
 sysops/tilt/lifecycle-keycloak-values.yaml    |    2 +
 sysops/tilt/scripts/app_setup_entrypoint.sh   |    6 +
 .../tilt/scripts/sync_keycloak_github_idp.sh  |   11 +-
 ws-server.ts                                  |  242 +++-
 188 files changed, 9327 insertions(+), 3956 deletions(-)
 create mode 100644 src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.test.ts
 create mode 100644 src/app/api/v2/ai/config/agent-session/route.test.ts
 create mode 100644 src/app/api/v2/ai/config/agent-session/runtime/route.test.ts
 create mode 100644 src/app/api/v2/ai/config/mcp-servers/[slug]/route.test.ts
 create mode 100644 src/app/api/v2/ai/config/mcp-servers/route.test.ts
 create mode 100644 src/app/api/v2/config/sites/route.test.ts
 create mode 100644 src/app/api/v2/config/sites/route.ts
 create mode 100644 src/server/lib/agent/runRequestText.ts
 create mode 100644 src/server/lib/appError.ts
 create mode 100644 src/server/lib/auth.test.ts
 create mode 100644 src/server/lib/validation/sitesConfigSchemas.ts
 create mode 100644 src/server/services/agent/__tests__/sessionOwnership.test.ts
 create mode 100644 src/server/services/agent/__tests__/thinkingProviderOptions.test.ts
 create mode 100644 src/server/services/agent/capabilitySessionContext.ts
 create mode 100644 src/server/services/agent/capabilityToolHelpers.ts
 create mode 100644 src/server/services/agent/chatWorkspaceToolRegistration.ts
 create mode 100644 src/server/services/agent/mcpToolRegistration.ts
 create mode 100644 src/server/services/agent/runErrorClassification.ts
 create mode 100644 src/server/services/agent/runEventChunkCodec.ts
 create mode 100644 src/server/services/agent/runEventUtils.ts
 create mode 100644 src/server/services/agent/sessionOwnership.ts
 create mode 100644 src/server/services/agent/thinkingProviderOptions.ts
 create mode 100644 src/server/services/agent/tools/codefresh/__tests__/getCodefreshLogs.test.ts
 create mode 100644 src/server/services/agent/tools/shared/__tests__/databaseClient.test.ts
 create mode 100644 src/server/services/agentRuntime/mcp/mcpConfigSecrets.ts
 create mode 100644 src/server/services/sitesConfig.test.ts
 create mode 100644 src/server/services/sitesConfig.ts

diff --git a/.mise.toml b/.mise.toml
index c738766e..96fab687 100644
--- a/.mise.toml
+++ b/.mise.toml
@@ -42,6 +42,39 @@ echo "Starting tilt..."
 tilt up
 """
 
+[tasks.prd]
+description = "Like dev, but run web/worker/gateway from a production-built image (pnpm start, real SSE streaming; no HMR)"
+run = """
+#!/usr/bin/env bash
+set -e
+
+CLUSTER_NAME="lfc"
+REGISTRY_CONFIG_DIR="/tmp/kind-registry-config/10.96.188.230:5000"
+
+echo "Setting up registry config for containerd..."
+mkdir -p "$REGISTRY_CONFIG_DIR"
+cat > "$REGISTRY_CONFIG_DIR/hosts.toml" << 'EOF'
+server = "http://10.96.188.230:5000"
+
+[host."http://10.96.188.230:5000"]
+  capabilities = ["pull", "resolve", "push"]
+  skip_verify = true
+EOF
+
+if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then
+  echo "Kind cluster '$CLUSTER_NAME' already exists"
+else
+  echo "Creating kind cluster '$CLUSTER_NAME'..."
+  kind create cluster --config sysops/tilt/kind-config.yaml --name "$CLUSTER_NAME"
+fi
+
+echo "Switching kubectl context to kind-$CLUSTER_NAME..."
+kubectl config use-context "kind-$CLUSTER_NAME"
+
+echo "Starting tilt (LIFECYCLE_PROD=1 -- production build, no HMR)..."
+LIFECYCLE_PROD=1 tilt up
+"""
+
 [tasks.down]
 description = "Stop tilt (cluster remains)"
 run = """
diff --git a/Tiltfile b/Tiltfile
index 34b323a3..4cbe82f5 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -316,6 +316,7 @@ helm_resource(
         '--set', 'companyIdp.jwksUrl={}/realms/internal/protocol/openid-connect/certs'.format(internal_keycloak_origin),
         '--set', 'companyIdp.logoutUrl={}/realms/internal/protocol/openid-connect/logout'.format(company_idp_origin),
         '--set', 'companyIdp.issuer={}/realms/internal'.format(company_idp_origin),
+        '--set', 'internalIdp.internalUrl={}'.format(internal_keycloak_origin),
     ],
     labels=['infra'],
 )
@@ -338,26 +339,40 @@ local_resource(
 # Worker & Web (Helm, Single Deploy)
 ##################################
 
-docker_build_with_restart(
-    lifecycle_app,
-    ".",
-    entrypoint=["/app_setup_entrypoint.sh"],
-    dockerfile="sysops/dockerfiles/tilt.app.dockerfile",
-    build_args={
-        "APP_DB_HOST": "local-postgres.{}.svc.cluster.local".format(app_namespace),
-        "APP_DB_PORT": "5432",
-        "APP_DB_USER": "lifecycle",
-        "APP_DB_PASSWORD": "lifecycle",
-        "APP_DB_NAME": "lifecycle",
-        "APP_DB_SSL": "false",
-        "APP_REDIS_HOST": "redis-master.{}.svc.cluster.local".format(app_namespace),
-        "APP_REDIS_PORT": "6379",
-        "APP_REDIS_PASSWORD": "",
-    },
-    live_update=[
-        sync("./src", "/app/src"),
-    ],
-)
+# LIFECYCLE_PROD=1: build+`pnpm start` for incremental SSE; dev's on-demand compile batches reasoning replays. Trade-off: no HMR.
+lifecycle_prod = str(os.getenv("LIFECYCLE_PROD", "")).lower() in ("1", "true", "yes", "on")
+
+lifecycle_app_build_args = {
+    "APP_DB_HOST": "local-postgres.{}.svc.cluster.local".format(app_namespace),
+    "APP_DB_PORT": "5432",
+    "APP_DB_USER": "lifecycle",
+    "APP_DB_PASSWORD": "lifecycle",
+    "APP_DB_NAME": "lifecycle",
+    "APP_DB_SSL": "false",
+    "APP_REDIS_HOST": "redis-master.{}.svc.cluster.local".format(app_namespace),
+    "APP_REDIS_PORT": "6379",
+    "APP_REDIS_PASSWORD": "",
+}
+
+if lifecycle_prod:
+    print("LIFECYCLE_PROD=on -> building production lifecycle image; web/worker/gateway run `pnpm start` (no HMR; slower first build)")
+    docker_build(
+        lifecycle_app,
+        ".",
+        dockerfile="sysops/dockerfiles/tilt.app.dockerfile",
+        build_args=dict(lifecycle_app_build_args, LIFECYCLE_BUILD="prod"),
+    )
+else:
+    docker_build_with_restart(
+        lifecycle_app,
+        ".",
+        entrypoint=["/app_setup_entrypoint.sh"],
+        dockerfile="sysops/dockerfiles/tilt.app.dockerfile",
+        build_args=lifecycle_app_build_args,
+        live_update=[
+            sync("./src", "/app/src"),
+        ],
+    )
 
 helm_set_args = [
     'namespace={}'.format(app_namespace),
@@ -410,6 +425,15 @@ for r in lifecycle_deployment:
                 "subPath": "credentials",
                 "readOnly": False
             })
+            # Force LOG_LEVEL=debug: `pnpm start` (unlike `pnpm dev`) inherits it from the env.
+            if lifecycle_prod and "keycloak" not in r["metadata"]["name"]:
+                container["env"] = [
+                    e for e in (container.get("env") or [])
+                    if e.get("name") not in ("LIFECYCLE_SERVE", "LOG_LEVEL")
+                ] + [
+                    {"name": "LIFECYCLE_SERVE", "value": "prod"},
+                    {"name": "LOG_LEVEL", "value": "debug"},
+                ]
     patched_deploy.append(r)
 
 k8s_yaml(encode_yaml_stream(patched_deploy))
@@ -423,8 +447,9 @@ for r in patched_deploy:
         resource_deps = []
 
         # Don't add postgres/redis deps for keycloak resources
-        if "keycloak" not in name:
-            resource_deps = ['local-postgres', 'redis', 'lifecycle-keycloak-github-idp-sync', 'agent-session-workspace-image']
+        if "keycloak" not in name and not lifecycle_prod:
+            resource_deps = ['local-postgres', 'redis', 'lifecycle-keycloak-github-idp-sync']
+        # Prod: ungated so the slow build starts at t=0; web pod may crash-loop until Postgres is up on a cached rebuild.
         if "web" in name:
             labels = ["web"]
             port_forwards = ['5001:80']
@@ -484,9 +509,12 @@ k8s_resource(
 k8s_resource(
     'lifecycle-keycloak',
     port_forwards=['8081:8080'],
+    extra_pod_selectors=[{'app': 'keycloak'}],
+    discovery_strategy="selectors-only",
     labels=["infra"]
 )
 
+
 ##################################
 # DISTRIBUTION
 ##################################
diff --git a/package.json b/package.json
index e46db839..8bceef92 100644
--- a/package.json
+++ b/package.json
@@ -9,6 +9,7 @@
     "babel-node": "babel-node --extensions '.ts'",
     "dev": "LOG_LEVEL=${LOG_LEVEL:-info} ts-node -r ./dd-trace.js -r tsconfig-paths/register --project tsconfig.server.json ws-server.ts | pino-pretty -c -t HH:MM -i pid,hostname,filename -o '{msg}'",
     "build": "next build && tsc --project tsconfig.server.json && tsc-alias -p tsconfig.server.json",
+    "build:local": "next build --no-lint && tsc --project tsconfig.server.json && tsc-alias -p tsconfig.server.json",
     "start": "NEXT_MANUAL_SIG_HANDLE=true NODE_ENV=production node -r ./dd-trace.js .next/ws-server.js",
     "run-prod": "port=5001 pnpm run start",
     "knex": "pnpm run knex",
diff --git a/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.test.ts b/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.test.ts
index 41ccef60..c4a6cb29 100644
--- a/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.test.ts
+++ b/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.test.ts
@@ -23,9 +23,18 @@ jest.mock('server/services/agent/AdminService', () => ({
   },
 }));
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 import { GET } from './route';
 import AgentAdminService from 'server/services/agent/AdminService';
diff --git a/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.ts b/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.ts
index 4049a934..9636afa7 100644
--- a/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.ts
+++ b/src/app/api/v2/ai/admin/agent/mcp-servers/[slug]/users/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentAdminService from 'server/services/agent/AdminService';
 
 /**
@@ -72,10 +72,7 @@ import AgentAdminService from 'server/services/agent/AdminService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { slug: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   const scope = req.nextUrl.searchParams.get('scope');
   if (!scope) {
diff --git a/src/app/api/v2/ai/admin/agent/mcp-servers/route.ts b/src/app/api/v2/ai/admin/agent/mcp-servers/route.ts
index 02feaac1..b12341c2 100644
--- a/src/app/api/v2/ai/admin/agent/mcp-servers/route.ts
+++ b/src/app/api/v2/ai/admin/agent/mcp-servers/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentAdminService from 'server/services/agent/AdminService';
 
 export const dynamic = 'force-dynamic';
@@ -57,10 +57,7 @@ export const dynamic = 'force-dynamic';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   const scope = req.nextUrl.searchParams.get('scope') || 'global';
   const result = await AgentAdminService.listMcpServerCoverage(scope);
diff --git a/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.test.ts b/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.test.ts
index 14300b8d..54c4d828 100644
--- a/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.test.ts
+++ b/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.test.ts
@@ -29,6 +29,12 @@ jest.mock('server/services/agent/AdminService', () => ({
 jest.mock('server/lib/get-user', () => ({
   getUser: (...args: unknown[]) => mockGetUser(...args),
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 import { GET } from './route';
diff --git a/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.ts b/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.ts
index af5991b5..5da50764 100644
--- a/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.ts
+++ b/src/app/api/v2/ai/admin/agent/sessions/[sessionId]/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentAdminService from 'server/services/agent/AdminService';
 
 /**
@@ -64,10 +64,7 @@ import AgentAdminService from 'server/services/agent/AdminService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   try {
     const result = await AgentAdminService.getSession(params.sessionId);
diff --git a/src/app/api/v2/ai/admin/agent/sessions/route.test.ts b/src/app/api/v2/ai/admin/agent/sessions/route.test.ts
index 935ad14c..d557e345 100644
--- a/src/app/api/v2/ai/admin/agent/sessions/route.test.ts
+++ b/src/app/api/v2/ai/admin/agent/sessions/route.test.ts
@@ -29,6 +29,12 @@ jest.mock('server/services/agent/AdminService', () => ({
 jest.mock('server/lib/get-user', () => ({
   getUser: (...args: unknown[]) => mockGetUser(...args),
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 import { GET } from './route';
diff --git a/src/app/api/v2/ai/admin/agent/sessions/route.ts b/src/app/api/v2/ai/admin/agent/sessions/route.ts
index 5dff7a2c..ab00d2e0 100644
--- a/src/app/api/v2/ai/admin/agent/sessions/route.ts
+++ b/src/app/api/v2/ai/admin/agent/sessions/route.ts
@@ -16,9 +16,9 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
+import { successResponse } from 'server/lib/response';
 import { getPaginationParamsFromURL } from 'server/lib/paginate';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentAdminService from 'server/services/agent/AdminService';
 
 /**
@@ -87,10 +87,7 @@ import AgentAdminService from 'server/services/agent/AdminService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   const { page, limit } = getPaginationParamsFromURL(req.nextUrl.searchParams);
   const status = req.nextUrl.searchParams.get('status') || 'all';
diff --git a/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.test.ts b/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.test.ts
index e341537b..d699bdf4 100644
--- a/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.test.ts
+++ b/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.test.ts
@@ -22,6 +22,12 @@ const mockGetRequestUserIdentity = jest.fn();
 jest.mock('server/lib/get-user', () => ({
   getUser: (...args: unknown[]) => mockGetUser(...args),
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/AdminService', () => ({
diff --git a/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.ts b/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.ts
index 83a88a87..5529ab36 100644
--- a/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.ts
+++ b/src/app/api/v2/ai/admin/agent/threads/[threadId]/conversation/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentAdminService from 'server/services/agent/AdminService';
 
 /**
@@ -64,10 +64,7 @@ import AgentAdminService from 'server/services/agent/AdminService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   try {
     const result = await AgentAdminService.getThreadConversation(params.threadId);
diff --git a/src/app/api/v2/ai/admin/agent/tools/route.ts b/src/app/api/v2/ai/admin/agent/tools/route.ts
index 663160da..0958b70f 100644
--- a/src/app/api/v2/ai/admin/agent/tools/route.ts
+++ b/src/app/api/v2/ai/admin/agent/tools/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentSessionConfigService from 'server/services/agentSessionConfig';
 
 export const dynamic = 'force-dynamic';
@@ -57,10 +57,7 @@ export const dynamic = 'force-dynamic';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   const scope = req.nextUrl.searchParams.get('scope') || 'global';
   const data = await AgentSessionConfigService.getInstance().listToolInventory(scope);
diff --git a/src/app/api/v2/ai/agent/__tests__/canonical-api-acceptance.test.ts b/src/app/api/v2/ai/agent/__tests__/canonical-api-acceptance.test.ts
index c255519f..71e069da 100644
--- a/src/app/api/v2/ai/agent/__tests__/canonical-api-acceptance.test.ts
+++ b/src/app/api/v2/ai/agent/__tests__/canonical-api-acceptance.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
   resolveRequestGitHubToken: jest.fn(),
diff --git a/src/app/api/v2/ai/agent/api-keys/route.ts b/src/app/api/v2/ai/agent/api-keys/route.ts
index ee6ff09a..1ae72982 100644
--- a/src/app/api/v2/ai/agent/api-keys/route.ts
+++ b/src/app/api/v2/ai/agent/api-keys/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { successResponse, errorResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentRuntimeConfigService from 'server/services/agentRuntime/config/agentRuntimeConfig';
 import UserApiKeyService from 'server/services/userApiKey';
 import AgentProviderRegistry from 'server/services/agent/ProviderRegistry';
@@ -269,10 +269,7 @@ async function buildProviderStateWithSharedFallback(
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const providerParam = getSearchParam(req, 'provider');
   const requestedProvider = providerParam == null ? null : normalizeProvider(providerParam);
@@ -305,10 +302,7 @@ const getHandler = async (req: NextRequest) => {
 };
 
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await req.json().catch(() => ({}));
   const provider = normalizeProvider(body?.provider);
@@ -333,10 +327,7 @@ const postHandler = async (req: NextRequest) => {
 };
 
 const deleteHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const provider = normalizeProvider(getSearchParam(req, 'provider'));
   if (!provider) {
diff --git a/src/app/api/v2/ai/agent/build-context-chats/route.test.ts b/src/app/api/v2/ai/agent/build-context-chats/route.test.ts
index 260fa64d..18fa2a07 100644
--- a/src/app/api/v2/ai/agent/build-context-chats/route.test.ts
+++ b/src/app/api/v2/ai/agent/build-context-chats/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/BuildContextChatService', () => {
   class BuildContextChatBuildNotFoundError extends Error {
diff --git a/src/app/api/v2/ai/agent/build-context-chats/route.ts b/src/app/api/v2/ai/agent/build-context-chats/route.ts
index da8db2ed..da413355 100644
--- a/src/app/api/v2/ai/agent/build-context-chats/route.ts
+++ b/src/app/api/v2/ai/agent/build-context-chats/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import BuildContextChatService, {
   BuildContextChatBuildNotFoundError,
   BuildContextChatSelectedDeployError,
@@ -156,10 +156,7 @@ function parseCreateBuildContextChatBody(body: unknown): CreateBuildContextChatB
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   let requestBody: CreateBuildContextChatBody;
   try {
diff --git a/src/app/api/v2/ai/agent/definition-capabilities/route.test.ts b/src/app/api/v2/ai/agent/definition-capabilities/route.test.ts
index 6ac21a9c..8794f310 100644
--- a/src/app/api/v2/ai/agent/definition-capabilities/route.test.ts
+++ b/src/app/api/v2/ai/agent/definition-capabilities/route.test.ts
@@ -22,6 +22,12 @@ const mockListUserSelectableCapabilities = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/CustomAgentDefinitionService', () => ({
diff --git a/src/app/api/v2/ai/agent/definition-capabilities/route.ts b/src/app/api/v2/ai/agent/definition-capabilities/route.ts
index 4db7b587..8998577d 100644
--- a/src/app/api/v2/ai/agent/definition-capabilities/route.ts
+++ b/src/app/api/v2/ai/agent/definition-capabilities/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import {
   customAgentDefinitionService,
@@ -98,10 +98,7 @@ function serializeCapability(capability: UserAgentDefinitionCapability): UserAge
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const resourceBehavior = parseResourceBehavior(req);
   if (resourceBehavior instanceof Error) {
diff --git a/src/app/api/v2/ai/agent/definitions/[definitionId]/route.test.ts b/src/app/api/v2/ai/agent/definitions/[definitionId]/route.test.ts
index 02168b1f..8ee6b4eb 100644
--- a/src/app/api/v2/ai/agent/definitions/[definitionId]/route.test.ts
+++ b/src/app/api/v2/ai/agent/definitions/[definitionId]/route.test.ts
@@ -23,13 +23,29 @@ const mockArchiveUserDefinition = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/CustomAgentDefinitionService', () => {
+  const CONTRACT: Record<string, { httpStatus: number; code: string }> = {
+    not_found: { httpStatus: 404, code: 'custom_agent_not_found' },
+    model_unavailable: { httpStatus: 409, code: 'custom_agent_conflict' },
+    creation_unavailable: { httpStatus: 403, code: 'custom_agent_creation_unavailable' },
+  };
   class CustomAgentDefinitionServiceError extends Error {
-    constructor(public readonly code: string, message: string) {
+    readonly httpStatus: number;
+    readonly code: string;
+    constructor(public readonly reason: string, message: string) {
       super(message);
       this.name = 'CustomAgentDefinitionServiceError';
+      const contract = CONTRACT[reason] || { httpStatus: 400, code: 'custom_agent_invalid' };
+      this.httpStatus = contract.httpStatus;
+      this.code = contract.code;
     }
   }
 
diff --git a/src/app/api/v2/ai/agent/definitions/[definitionId]/route.ts b/src/app/api/v2/ai/agent/definitions/[definitionId]/route.ts
index b25d638d..9bf315ad 100644
--- a/src/app/api/v2/ai/agent/definitions/[definitionId]/route.ts
+++ b/src/app/api/v2/ai/agent/definitions/[definitionId]/route.ts
@@ -17,10 +17,9 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import {
-  CustomAgentDefinitionServiceError,
   customAgentDefinitionService,
   serializeUserAgentDefinition,
 } from 'server/services/agent/CustomAgentDefinitionService';
@@ -46,26 +45,6 @@ const RESOURCE_BEHAVIORS = new Set<UserAgentDefinitionResourceBehavior>([
   'current_workspace_when_available',
 ]);
 
-function mapDefinitionError(error: unknown, req: NextRequest) {
-  if (error instanceof CustomAgentDefinitionServiceError) {
-    if (error.code === 'not_found') {
-      return errorResponse(error, { status: 404 }, req);
-    }
-
-    if (error.code === 'model_unavailable') {
-      return errorResponse(error, { status: 409 }, req);
-    }
-
-    if (error.code === 'creation_unavailable') {
-      return errorResponse(error, { status: 403 }, req);
-    }
-
-    return errorResponse(error, { status: 400 }, req);
-  }
-
-  throw error;
-}
-
 async function readRequestBody(req: NextRequest): Promise<Record<string, unknown> | Error> {
   let body: unknown;
   try {
@@ -316,25 +295,16 @@ function parseUpsertBody(body: Record<string, unknown>): UserAgentDefinitionUpse
  *             schema:
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
+// CustomAgentDefinitionServiceError is an AppError; createApiHandler maps its httpStatus/code.
 const getHandler = async (req: NextRequest, { params }: { params: { definitionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
-  try {
-    const definition = await customAgentDefinitionService.getUserDefinition(params.definitionId, userIdentity.userId);
-    return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 200 }, req);
-  } catch (error) {
-    return mapDefinitionError(error, req);
-  }
+  const definition = await customAgentDefinitionService.getUserDefinition(params.definitionId, userIdentity.userId);
+  return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 200 }, req);
 };
 
 const patchHandler = async (req: NextRequest, { params }: { params: { definitionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await readRequestBody(req);
   if (body instanceof Error) {
@@ -346,37 +316,19 @@ const patchHandler = async (req: NextRequest, { params }: { params: { definition
     return errorResponse(input, { status: 400 }, req);
   }
 
-  try {
-    const definition = await customAgentDefinitionService.updateUserDefinition(
-      params.definitionId,
-      userIdentity,
-      input
-    );
-    return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 200 }, req);
-  } catch (error) {
-    return mapDefinitionError(error, req);
-  }
+  const definition = await customAgentDefinitionService.updateUserDefinition(params.definitionId, userIdentity, input);
+  return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 200 }, req);
 };
 
 const deleteHandler = async (req: NextRequest, { params }: { params: { definitionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
-
-  try {
-    const definition = await customAgentDefinitionService.archiveUserDefinition(
-      params.definitionId,
-      userIdentity.userId
-    );
-    return successResponse(
-      { archived: true, definition: serializeUserAgentDefinition(definition) },
-      { status: 200 },
-      req
-    );
-  } catch (error) {
-    return mapDefinitionError(error, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
+
+  const definition = await customAgentDefinitionService.archiveUserDefinition(params.definitionId, userIdentity.userId);
+  return successResponse(
+    { archived: true, definition: serializeUserAgentDefinition(definition) },
+    { status: 200 },
+    req
+  );
 };
 
 export const GET = createApiHandler(getHandler);
diff --git a/src/app/api/v2/ai/agent/definitions/route.test.ts b/src/app/api/v2/ai/agent/definitions/route.test.ts
index 2a65f392..ee6c8631 100644
--- a/src/app/api/v2/ai/agent/definitions/route.test.ts
+++ b/src/app/api/v2/ai/agent/definitions/route.test.ts
@@ -22,13 +22,29 @@ const mockCreateUserDefinition = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/CustomAgentDefinitionService', () => {
+  const CONTRACT: Record<string, { httpStatus: number; code: string }> = {
+    not_found: { httpStatus: 404, code: 'custom_agent_not_found' },
+    model_unavailable: { httpStatus: 409, code: 'custom_agent_conflict' },
+    creation_unavailable: { httpStatus: 403, code: 'custom_agent_creation_unavailable' },
+  };
   class CustomAgentDefinitionServiceError extends Error {
-    constructor(public readonly code: string, message: string) {
+    readonly httpStatus: number;
+    readonly code: string;
+    constructor(public readonly reason: string, message: string) {
       super(message);
       this.name = 'CustomAgentDefinitionServiceError';
+      const contract = CONTRACT[reason] || { httpStatus: 400, code: 'custom_agent_invalid' };
+      this.httpStatus = contract.httpStatus;
+      this.code = contract.code;
     }
   }
 
diff --git a/src/app/api/v2/ai/agent/definitions/route.ts b/src/app/api/v2/ai/agent/definitions/route.ts
index 56cae4ee..02945a5b 100644
--- a/src/app/api/v2/ai/agent/definitions/route.ts
+++ b/src/app/api/v2/ai/agent/definitions/route.ts
@@ -17,10 +17,9 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import {
-  CustomAgentDefinitionServiceError,
   customAgentDefinitionService,
   serializeUserAgentDefinition,
 } from 'server/services/agent/CustomAgentDefinitionService';
@@ -46,26 +45,6 @@ const RESOURCE_BEHAVIORS = new Set<UserAgentDefinitionResourceBehavior>([
   'current_workspace_when_available',
 ]);
 
-function mapDefinitionError(error: unknown, req: NextRequest) {
-  if (error instanceof CustomAgentDefinitionServiceError) {
-    if (error.code === 'not_found') {
-      return errorResponse(error, { status: 404 }, req);
-    }
-
-    if (error.code === 'model_unavailable') {
-      return errorResponse(error, { status: 409 }, req);
-    }
-
-    if (error.code === 'creation_unavailable') {
-      return errorResponse(error, { status: 403 }, req);
-    }
-
-    return errorResponse(error, { status: 400 }, req);
-  }
-
-  throw error;
-}
-
 async function readRequestBody(req: NextRequest): Promise<Record<string, unknown> | Error> {
   let body: unknown;
   try {
@@ -262,20 +241,14 @@ function parseUpsertBody(body: Record<string, unknown>): UserAgentDefinitionUpse
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const definitions = await customAgentDefinitionService.listUserDefinitions({ userId: userIdentity.userId });
   return successResponse({ definitions: definitions.map(serializeUserAgentDefinition) }, { status: 200 }, req);
 };
 
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await readRequestBody(req);
   if (body instanceof Error) {
@@ -287,12 +260,9 @@ const postHandler = async (req: NextRequest) => {
     return errorResponse(input, { status: 400 }, req);
   }
 
-  try {
-    const definition = await customAgentDefinitionService.createUserDefinition(userIdentity, input);
-    return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 201 }, req);
-  } catch (error) {
-    return mapDefinitionError(error, req);
-  }
+  // CustomAgentDefinitionServiceError is an AppError; createApiHandler maps its httpStatus/code.
+  const definition = await customAgentDefinitionService.createUserDefinition(userIdentity, input);
+  return successResponse({ definition: serializeUserAgentDefinition(definition) }, { status: 201 }, req);
 };
 
 export const GET = createApiHandler(getHandler);
diff --git a/src/app/api/v2/ai/agent/github-token/route.test.ts b/src/app/api/v2/ai/agent/github-token/route.test.ts
index 1bc07820..a31f17d8 100644
--- a/src/app/api/v2/ai/agent/github-token/route.test.ts
+++ b/src/app/api/v2/ai/agent/github-token/route.test.ts
@@ -16,10 +16,19 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getUser: jest.fn(),
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getUser: jest.fn(),
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
   fetchGitHubAuthenticatedUser: jest.fn(),
@@ -77,19 +86,23 @@ describe('GET /api/v2/ai/agent/github-token', () => {
     expect(response.status).toBe(401);
   });
 
-  it('returns 403 when the user is not an admin', async () => {
+  it('allows a normal (non-admin) user to check their own token', async () => {
+    // Per-user self-check: must NOT be admin-gated.
     mockGetUser.mockReturnValue({
       sub: 'user-123',
       realm_access: {
         roles: ['user'],
       },
     });
+    mockResolveRequestGitHubUserToken.mockResolvedValue({
+      githubUsername: 'sample-user',
+      githubToken: null,
+    });
 
     const response = await GET(makeRequest());
 
-    expect(response.status).toBe(403);
-    expect(mockResolveRequestGitHubUserToken).not.toHaveBeenCalled();
-    expect(mockFetchGitHubAuthenticatedUser).not.toHaveBeenCalled();
+    expect(response.status).toBe(200);
+    expect(mockResolveRequestGitHubUserToken).toHaveBeenCalled();
   });
 
   it('returns a safe failed check when no GitHub token can be fetched', async () => {
diff --git a/src/app/api/v2/ai/agent/github-token/route.ts b/src/app/api/v2/ai/agent/github-token/route.ts
index 6c4267ca..c40c81fb 100644
--- a/src/app/api/v2/ai/agent/github-token/route.ts
+++ b/src/app/api/v2/ai/agent/github-token/route.ts
@@ -17,8 +17,8 @@
 import { NextRequest } from 'next/server';
 import { fetchGitHubAuthenticatedUser, resolveRequestGitHubUserToken } from 'server/lib/agentSession/githubToken';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
-import { errorResponse, successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
 
 export const dynamic = 'force-dynamic';
 
@@ -46,14 +46,9 @@ interface GitHubTokenCheck {
  *         description: GitHub token check result
  *       '401':
  *         description: Unauthorized
- *       '403':
- *         description: Forbidden
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  requireRequestUserIdentity(req);
 
   const { githubUsername, githubToken } = await resolveRequestGitHubUserToken(req);
 
@@ -94,4 +89,5 @@ const getHandler = async (req: NextRequest) => {
   );
 };
 
-export const GET = createApiHandler(getHandler, { roles: ['admin'] });
+// Per-user self-check: requires user identity (enforced in-handler), NOT admin.
+export const GET = createApiHandler(getHandler);
diff --git a/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.test.ts b/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.test.ts
index 44e26ab6..099ec711 100644
--- a/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.test.ts
+++ b/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.test.ts
@@ -56,6 +56,12 @@ jest.mock('server/services/agentRuntime/mcp/oauthFlow', () => ({
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/lib/logger', () => ({
diff --git a/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.ts b/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.ts
index c9099e7b..e41b8ff6 100644
--- a/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.ts
+++ b/src/app/api/v2/ai/agent/mcp-connections/[slug]/oauth/start/route.ts
@@ -17,7 +17,7 @@
 import { auth } from '@ai-sdk/mcp';
 import { NextRequest, NextResponse } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import { APP_HOST } from 'shared/config';
 import {
@@ -121,10 +121,7 @@ function resolveAppOrigin(req: NextRequest): string | null {
  *               $ref: '#/components/schemas/StartAgentMcpConnectionOAuthSuccessResponse'
  */
 const postHandler = async (req: NextRequest, { params }: { params: Promise<{ slug: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { slug } = await params;
   const scope = req.nextUrl.searchParams.get('scope') || 'global';
diff --git a/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.test.ts b/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.test.ts
index 2af2362f..7efd66b6 100644
--- a/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.test.ts
+++ b/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.test.ts
@@ -46,6 +46,12 @@ jest.mock('server/services/userMcpConnection', () => ({
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/lib/logger', () => ({
diff --git a/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.ts b/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.ts
index a00147df..0b4ec827 100644
--- a/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.ts
+++ b/src/app/api/v2/ai/agent/mcp-connections/[slug]/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest, NextResponse } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
-import { errorResponse, successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
 import {
   applyCompiledConnectionConfigToTransport,
   buildMcpDefinitionFingerprint,
@@ -129,10 +129,7 @@ import UserMcpConnectionService from 'server/services/userMcpConnection';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const putHandler = async (req: NextRequest, { params }: { params: Promise<{ slug: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { slug } = await params;
   const scope = req.nextUrl.searchParams.get('scope') || 'global';
@@ -256,10 +253,7 @@ const putHandler = async (req: NextRequest, { params }: { params: Promise<{ slug
 };
 
 const deleteHandler = async (req: NextRequest, { params }: { params: Promise<{ slug: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { slug } = await params;
   const scope = req.nextUrl.searchParams.get('scope') || 'global';
diff --git a/src/app/api/v2/ai/agent/mcp-connections/route.ts b/src/app/api/v2/ai/agent/mcp-connections/route.ts
index 06511d11..193d078e 100644
--- a/src/app/api/v2/ai/agent/mcp-connections/route.ts
+++ b/src/app/api/v2/ai/agent/mcp-connections/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { McpConfigService } from 'server/services/agentRuntime/mcp/config';
 
 export const dynamic = 'force-dynamic';
@@ -56,10 +56,7 @@ export const dynamic = 'force-dynamic';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const repo = req.nextUrl.searchParams.get('repo') || undefined;
   const service = new McpConfigService();
diff --git a/src/app/api/v2/ai/agent/models/route.ts b/src/app/api/v2/ai/agent/models/route.ts
index b3f66712..e811f938 100644
--- a/src/app/api/v2/ai/agent/models/route.ts
+++ b/src/app/api/v2/ai/agent/models/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { successResponse, errorResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentProviderRegistry from 'server/services/agent/ProviderRegistry';
 
 export const dynamic = 'force-dynamic';
@@ -63,10 +63,7 @@ export const dynamic = 'force-dynamic';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const repo = req.nextUrl.searchParams.get('repo') || undefined;
   const models = await AgentProviderRegistry.listAvailableModelsForUser({
diff --git a/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.test.ts b/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.test.ts
index b4608c28..58fbb0f6 100644
--- a/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.test.ts
+++ b/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
   resolveRequestGitHubToken: jest.fn(),
@@ -97,7 +106,7 @@ describe('POST /api/v2/ai/agent/pending-actions/[actionId]/respond', () => {
 
     expect(response.status).toBe(401);
     await expect(response.json()).resolves.toMatchObject({
-      error: { message: 'Unauthorized' },
+      error: { message: 'Authentication is required.' },
     });
     expect(mockResolvePendingAction).not.toHaveBeenCalled();
   });
diff --git a/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.ts b/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.ts
index 9f5d1c67..8820d2ac 100644
--- a/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.ts
+++ b/src/app/api/v2/ai/agent/pending-actions/[actionId]/respond/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { resolveRequestGitHubToken } from 'server/lib/agentSession/githubToken';
 import ApprovalService from 'server/services/agent/ApprovalService';
 
@@ -84,10 +84,7 @@ import ApprovalService from 'server/services/agent/ApprovalService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const postHandler = async (req: NextRequest, { params }: { params: { actionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await req.json().catch(() => null);
   const responseBody = ApprovalService.normalizePendingActionResponseBody(body);
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.test.ts b/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.test.ts
index 5bc0bd1d..9569226a 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.test.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/RunService', () => ({
   __esModule: true,
@@ -98,7 +107,7 @@ describe('POST /api/v2/ai/agent/runs/[runId]/cancel', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockCancelRun).not.toHaveBeenCalled();
   });
 });
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.ts b/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.ts
index fc27846d..99be866c 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/cancel/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentRunService from 'server/services/agent/RunService';
 
 /**
@@ -52,10 +52,7 @@ import AgentRunService from 'server/services/agent/RunService';
  *         description: Agent run not found
  */
 const postHandler = async (req: NextRequest, { params }: { params: { runId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const run = await AgentRunService.cancelRun(params.runId, userIdentity.userId);
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/events/route.test.ts b/src/app/api/v2/ai/agent/runs/[runId]/events/route.test.ts
index a19c2866..be0a089c 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/events/route.test.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/events/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/RunEventService', () => ({
   __esModule: true,
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/events/route.ts b/src/app/api/v2/ai/agent/runs/[runId]/events/route.ts
index 81155859..7d193e96 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/events/route.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/events/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentRunEventService, {
   DEFAULT_RUN_EVENT_PAGE_LIMIT,
   MAX_RUN_EVENT_PAGE_LIMIT,
@@ -137,10 +137,7 @@ function parsePositiveInteger(value: string | null, fallback: number): number {
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { runId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   let afterSequence: number;
   let limit: number;
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.test.ts b/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.test.ts
index 520db77e..806a7e57 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.test.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/RunEventService', () => ({
   __esModule: true,
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.ts b/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.ts
index 543259cb..8908a154 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/events/stream/route.ts
@@ -119,6 +119,8 @@ const getHandler = async (req: NextRequest, { params }: { params: { runId: strin
       'Content-Type': 'text/event-stream',
       'Cache-Control': 'no-cache, no-transform',
       Connection: 'keep-alive',
+      // Disable proxy buffering (nginx and friends) so SSE frames flush immediately.
+      'X-Accel-Buffering': 'no',
     },
   });
 };
diff --git a/src/app/api/v2/ai/agent/runs/[runId]/route.ts b/src/app/api/v2/ai/agent/runs/[runId]/route.ts
index 23a685cb..538e0cc3 100644
--- a/src/app/api/v2/ai/agent/runs/[runId]/route.ts
+++ b/src/app/api/v2/ai/agent/runs/[runId]/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentRunService from 'server/services/agent/RunService';
 
 /**
@@ -53,10 +53,7 @@ import AgentRunService from 'server/services/agent/RunService';
  *         description: Agent run not found
  */
 const getHandler = async (req: NextRequest, { params }: { params: { runId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const run = await AgentRunService.getOwnedRun(params.runId, userIdentity.userId);
diff --git a/src/app/api/v2/ai/agent/runtime-controls/preview/route.test.ts b/src/app/api/v2/ai/agent/runtime-controls/preview/route.test.ts
index 2c8a80d7..1ccf58d5 100644
--- a/src/app/api/v2/ai/agent/runtime-controls/preview/route.test.ts
+++ b/src/app/api/v2/ai/agent/runtime-controls/preview/route.test.ts
@@ -21,13 +21,28 @@ const mockGetEntryPreview = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/ThreadRuntimeControlsService', () => {
+  const HTTP_STATUS: Record<string, number> = {
+    invalid_input: 400,
+    unknown_choice: 400,
+    policy_denied: 403,
+    not_found: 404,
+    active_run: 409,
+  };
   class AgentThreadRuntimeControlsError extends Error {
+    readonly httpStatus: number;
     constructor(public readonly code: string, message: string) {
       super(message);
       this.name = 'AgentThreadRuntimeControlsError';
+      this.httpStatus = HTTP_STATUS[code] ?? 400;
     }
   }
 
diff --git a/src/app/api/v2/ai/agent/runtime-controls/preview/route.ts b/src/app/api/v2/ai/agent/runtime-controls/preview/route.ts
index d423eb1e..2209e6f8 100644
--- a/src/app/api/v2/ai/agent/runtime-controls/preview/route.ts
+++ b/src/app/api/v2/ai/agent/runtime-controls/preview/route.ts
@@ -17,10 +17,9 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import AgentThreadRuntimeControlsService, {
-  AgentThreadRuntimeControlsError,
   type AgentRuntimeControlsEntryDefaultsInput,
   type AgentRuntimeControlsEntrySourceInput,
   type AgentThreadRuntimeControlChoiceInput,
@@ -137,21 +136,6 @@ function parsePreviewBody(body: unknown): RuntimeControlsPreviewBody | Error {
   };
 }
 
-function mapRuntimeControlsError(error: unknown, req: NextRequest) {
-  if (error instanceof AgentThreadRuntimeControlsError) {
-    const statusByCode: Record<AgentThreadRuntimeControlsError['code'], number> = {
-      invalid_input: 400,
-      unknown_choice: 400,
-      policy_denied: 403,
-      not_found: 404,
-      active_run: 409,
-    };
-    return errorResponse(error, { status: statusByCode[error.code] }, req);
-  }
-
-  throw error;
-}
-
 /**
  * @openapi
  * /api/v2/ai/agent/runtime-controls/preview:
@@ -200,25 +184,19 @@ function mapRuntimeControlsError(error: unknown, req: NextRequest) {
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const parsedBody = parsePreviewBody(await req.json().catch(() => ({})));
   if (parsedBody instanceof Error) {
     return errorResponse(parsedBody, { status: 400 }, req);
   }
 
-  try {
-    const state = await AgentThreadRuntimeControlsService.getEntryPreview({
-      userIdentity,
-      ...parsedBody,
-    });
-    return successResponse(state, { status: 200 }, req);
-  } catch (error) {
-    return mapRuntimeControlsError(error, req);
-  }
+  // AgentThreadRuntimeControlsError is an AppError; createApiHandler maps its httpStatus/code.
+  const state = await AgentThreadRuntimeControlsService.getEntryPreview({
+    userIdentity,
+    ...parsedBody,
+  });
+  return successResponse(state, { status: 200 }, req);
 };
 
 export const POST = createApiHandler(postHandler);
diff --git a/src/app/api/v2/ai/agent/sandbox-sessions/launches/[launchId]/route.ts b/src/app/api/v2/ai/agent/sandbox-sessions/launches/[launchId]/route.ts
index e9a6ffd0..0688941c 100644
--- a/src/app/api/v2/ai/agent/sandbox-sessions/launches/[launchId]/route.ts
+++ b/src/app/api/v2/ai/agent/sandbox-sessions/launches/[launchId]/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 
 import { redisClient } from 'server/lib/dependencies';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { getSandboxLaunchState, toPublicSandboxLaunchState } from 'server/lib/agentSession/sandboxLaunchState';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
@@ -121,10 +121,7 @@ import { errorResponse, successResponse } from 'server/lib/response';
  *         description: Launch not found
  */
 const getHandler = async (req: NextRequest, { params }: { params: Promise<{ launchId: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { launchId } = await params;
   const state = await getSandboxLaunchState(redisClient.getRedis(), launchId);
diff --git a/src/app/api/v2/ai/agent/sandbox-sessions/route.test.ts b/src/app/api/v2/ai/agent/sandbox-sessions/route.test.ts
index eeb2d18a..ccc263c9 100644
--- a/src/app/api/v2/ai/agent/sandbox-sessions/route.test.ts
+++ b/src/app/api/v2/ai/agent/sandbox-sessions/route.test.ts
@@ -28,6 +28,12 @@ jest.mock('server/lib/dependencies', () => ({
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/lib/queueManager', () => ({
diff --git a/src/app/api/v2/ai/agent/sandbox-sessions/route.ts b/src/app/api/v2/ai/agent/sandbox-sessions/route.ts
index 742db438..466215b1 100644
--- a/src/app/api/v2/ai/agent/sandbox-sessions/route.ts
+++ b/src/app/api/v2/ai/agent/sandbox-sessions/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import { v4 as uuid } from 'uuid';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { resolveRequestGitHubToken } from 'server/lib/agentSession/githubToken';
 import {
   AgentSessionRuntimeConfigError,
@@ -347,8 +347,7 @@ const sandboxLaunchQueue = QueueManager.getInstance().registerQueue(QUEUE_NAMES.
  *         description: Base build not found
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  requireRequestUserIdentity(req);
 
   const { searchParams } = new URL(req.url);
   const baseBuildUuid = searchParams.get('baseBuildUuid');
@@ -380,8 +379,7 @@ const getHandler = async (req: NextRequest) => {
 };
 
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   let body: CreateSandboxSessionBody;
   try {
diff --git a/src/app/api/v2/ai/agent/session-candidates/route.ts b/src/app/api/v2/ai/agent/session-candidates/route.ts
index 4d1bc358..85126a4d 100644
--- a/src/app/api/v2/ai/agent/session-candidates/route.ts
+++ b/src/app/api/v2/ai/agent/session-candidates/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentSessionService from 'server/services/agentSession';
 import { loadAgentSessionServiceCandidates } from 'server/services/agentSessionCandidates';
 
@@ -106,8 +106,7 @@ export const dynamic = 'force-dynamic';
  *         description: Build or lifecycle config not found
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { searchParams } = new URL(req.url);
   const buildUuid = searchParams.get('buildUuid');
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.test.ts
index c98cd555..6a611507 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.test.ts
@@ -25,6 +25,12 @@ jest.mock('server/lib/dependencies', () => ({}));
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/SessionReadService', () => ({
@@ -111,7 +117,7 @@ describe('/api/v2/ai/agent/sessions/[sessionId]', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockGetSession).not.toHaveBeenCalled();
     expect(mockEndSession).not.toHaveBeenCalled();
   });
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts
index 3ca2aacd..13ba9f76 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { successResponse, errorResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentSessionReadService from 'server/services/agent/SessionReadService';
 import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
 import AgentSessionService from 'server/services/agentSession';
@@ -111,8 +111,7 @@ import AgentSessionService from 'server/services/agentSession';
  *         description: Workspace action is blocked by an active run or another lifecycle action
  */
 const getHandler = async (req: NextRequest, { params }: { params: Promise<{ sessionId: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { sessionId } = await params;
   const sessionRecord = await AgentSessionReadService.getOwnedSessionRecord(sessionId, userIdentity.userId);
@@ -124,8 +123,7 @@ const getHandler = async (req: NextRequest, { params }: { params: Promise<{ sess
 };
 
 const deleteHandler = async (req: NextRequest, { params }: { params: Promise<{ sessionId: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { sessionId } = await params;
   const session = await AgentSessionService.getSession(sessionId);
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.test.ts
index 54919cfe..61e5a686 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.test.ts
@@ -25,6 +25,12 @@ jest.mock('server/lib/dependencies', () => ({}));
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
@@ -147,7 +153,7 @@ describe('/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockResolveRequestGitHubToken).not.toHaveBeenCalled();
     expect(mockResumeChatRuntime).not.toHaveBeenCalled();
     expect(mockSerializeSessionRecord).not.toHaveBeenCalled();
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.ts
index d4fd7b3b..fca58f8b 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/resume/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import { resolveRequestGitHubToken } from 'server/lib/agentSession/githubToken';
 import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
@@ -65,10 +65,7 @@ function isSessionNotFoundError(error: unknown): boolean {
  *         description: Workspace action is blocked by an active run or another lifecycle action
  */
 const postHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const githubToken = await resolveRequestGitHubToken(req);
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.test.ts
index c66db9c6..7836713d 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.test.ts
@@ -24,6 +24,12 @@ jest.mock('server/lib/dependencies', () => ({}));
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agentSession', () => {
@@ -139,7 +145,7 @@ describe('/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockSuspendChatRuntime).not.toHaveBeenCalled();
     expect(mockSerializeSessionRecord).not.toHaveBeenCalled();
   });
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.ts
index cc566a3d..22e2287f 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/sandbox/suspend/route.ts
@@ -17,7 +17,7 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
 import AgentSessionService from 'server/services/agentSession';
@@ -64,10 +64,7 @@ function isSessionNotFoundError(error: unknown): boolean {
  *         description: Workspace action is blocked by an active run or another lifecycle action
  */
 const postHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const session = await AgentSessionService.suspendChatRuntime({
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.test.ts
index 66a1a844..cf1c3ba4 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.test.ts
@@ -25,6 +25,12 @@ jest.mock('server/lib/dependencies', () => ({}));
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agentSession', () => ({
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts
index 1e370380..20d56822 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { serializeAgentSessionSummary } from 'server/services/agent/serializeSessionSummary';
 import AgentSessionService from 'server/services/agentSession';
 import type { RequestedAgentSessionServiceRef } from 'server/services/agentSessionCandidates';
@@ -230,8 +230,7 @@ function isRequestedSessionServiceRef(value: unknown): value is RequestedAgentSe
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const postHandler = async (req: NextRequest, { params }: { params: Promise<{ sessionId: string }> }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   let body: {
     services?: unknown[];
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.test.ts
index c0331c54..6d52dad6 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.test.ts
@@ -21,19 +21,31 @@ jest.mock('server/lib/dependencies', () => ({
   defaultRedis: {},
 }));
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/ThreadService', () => {
   class AgentThreadCreateNotFoundError extends Error {
-    constructor(public readonly code: 'session_not_found' | 'source_thread_not_found', message: string) {
+    readonly httpStatus = 404;
+    readonly code = 'thread_target_not_found';
+    constructor(public readonly reason: 'session_not_found' | 'source_thread_not_found', message: string) {
       super(message);
       this.name = 'AgentThreadCreateNotFoundError';
     }
   }
 
   class AgentThreadCreateConflictError extends Error {
+    readonly httpStatus = 409;
     constructor(
       public readonly code:
         | 'inactive_session'
@@ -69,6 +81,8 @@ jest.mock('server/services/agent/ThreadService', () => {
 
 jest.mock('server/services/agent/WorkspaceRuntimeStateService', () => {
   class WorkspaceActionBlockedError extends Error {
+    readonly httpStatus = 409;
+    readonly code = 'workspace_action_blocked';
     constructor(
       public readonly reason: 'active_run' | 'action_in_progress',
       message: string,
@@ -451,7 +465,7 @@ describe('/api/v2/ai/agent/sessions/[sessionId]/threads', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockCreateThread).not.toHaveBeenCalled();
     expect(mockSerializeThread).not.toHaveBeenCalled();
   });
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.ts
index ae70a6fb..95e8b9e5 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/threads/route.ts
@@ -18,12 +18,8 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
-import AgentThreadService, {
-  AgentThreadCreateConflictError,
-  AgentThreadCreateNotFoundError,
-} from 'server/services/agent/ThreadService';
-import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
+import AgentThreadService from 'server/services/agent/ThreadService';
 
 type CreateThreadBody = {
   title?: string;
@@ -77,19 +73,8 @@ async function readCreateThreadBody(req: NextRequest): Promise<CreateThreadBody
   return parseCreateThreadBody(body);
 }
 
+// Typed AppErrors self-map via createApiHandler; only plain not-found Errors still need mapping here.
 function mapCreateThreadError(error: unknown, req: NextRequest) {
-  if (error instanceof WorkspaceActionBlockedError) {
-    return errorResponse(error, { status: 409 }, req);
-  }
-
-  if (error instanceof AgentThreadCreateNotFoundError) {
-    return errorResponse(error, { status: 404 }, req);
-  }
-
-  if (error instanceof AgentThreadCreateConflictError) {
-    return errorResponse(error, { status: 409 }, req);
-  }
-
   if (
     error instanceof Error &&
     (error.message === 'Agent session not found' ||
@@ -99,17 +84,6 @@ function mapCreateThreadError(error: unknown, req: NextRequest) {
     return errorResponse(error, { status: 404 }, req);
   }
 
-  if (
-    error instanceof Error &&
-    (error.message === 'Cannot create a thread for an inactive session' ||
-      error.message === 'Wait for the session to finish starting before sending a message.' ||
-      error.message === 'This session is no longer available for new messages.' ||
-      error.message === 'Wait for the current agent run to finish before starting a new thread.' ||
-      error.message === 'Resolve pending approvals before starting a new thread.')
-  ) {
-    return errorResponse(error, { status: 409 }, req);
-  }
-
   throw error;
 }
 
@@ -206,10 +180,7 @@ function mapCreateThreadError(error: unknown, req: NextRequest) {
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const threads = await AgentThreadService.listThreadHistoryForSession(params.sessionId, userIdentity.userId);
@@ -224,10 +195,7 @@ const getHandler = async (req: NextRequest, { params }: { params: { sessionId: s
 };
 
 const postHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await readCreateThreadBody(req);
   if (body instanceof Error) {
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.test.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.test.ts
index cda8ebd1..2dc4f167 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.test.ts
@@ -26,6 +26,12 @@ jest.mock('server/lib/dependencies', () => ({}));
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
@@ -206,7 +212,7 @@ describe('/api/v2/ai/agent/sessions/[sessionId]/workspace/open', () => {
     const body = await response.json();
 
     expect(response.status).toBe(401);
-    expect(body.error.message).toBe('Unauthorized');
+    expect(body.error.message).toBe('Authentication is required.');
     expect(mockResolveRequestGitHubToken).not.toHaveBeenCalled();
     expect(mockOpenChatRuntime).not.toHaveBeenCalled();
     expect(mockSerializeSessionRecord).not.toHaveBeenCalled();
diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.ts
index d18271ad..5f1c6c70 100644
--- a/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/workspace/open/route.ts
@@ -19,7 +19,7 @@ import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { resolveRequestGitHubToken } from 'server/lib/agentSession/githubToken';
 import { buildWorkspaceFailureLinkData } from 'server/lib/agentSession/workspaceFailureLink';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { errorResponse, successResponse } from 'server/lib/response';
 import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
 import AgentSessionReadService from 'server/services/agent/SessionReadService';
@@ -88,10 +88,7 @@ function isSessionNotFoundError(error: unknown): boolean {
  *         description: Workspace action is blocked by an active run or another lifecycle action
  */
 const postHandler = async (req: NextRequest, { params }: { params: { sessionId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const githubToken = await resolveRequestGitHubToken(req);
diff --git a/src/app/api/v2/ai/agent/sessions/route.test.ts b/src/app/api/v2/ai/agent/sessions/route.test.ts
index dbefc7ee..d0614ecb 100644
--- a/src/app/api/v2/ai/agent/sessions/route.test.ts
+++ b/src/app/api/v2/ai/agent/sessions/route.test.ts
@@ -32,6 +32,12 @@ const mockResolveRequestedAgentSessionServices = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/ChatSessionService', () => ({
@@ -100,7 +106,10 @@ jest.mock('server/services/agentSessionCandidates', () => ({
 }));
 
 jest.mock('server/services/agent/ProviderRegistry', () => {
-  class MissingAgentProviderApiKeyError extends Error {}
+  class MissingAgentProviderApiKeyError extends Error {
+    readonly httpStatus = 400;
+    readonly code = 'provider_api_key_required';
+  }
   return {
     __esModule: true,
     default: {},
@@ -109,10 +118,19 @@ jest.mock('server/services/agent/ProviderRegistry', () => {
 });
 
 jest.mock('server/services/agent/ThreadRuntimeControlsService', () => {
+  const HTTP_STATUS: Record<string, number> = {
+    invalid_input: 400,
+    unknown_choice: 400,
+    policy_denied: 403,
+    not_found: 404,
+    active_run: 409,
+  };
   class AgentThreadRuntimeControlsError extends Error {
+    readonly httpStatus: number;
     constructor(public readonly code: string, message: string) {
       super(message);
       this.name = 'AgentThreadRuntimeControlsError';
+      this.httpStatus = HTTP_STATUS[code] ?? 400;
     }
   }
 
diff --git a/src/app/api/v2/ai/agent/sessions/route.ts b/src/app/api/v2/ai/agent/sessions/route.ts
index e3b05169..f6046ddb 100644
--- a/src/app/api/v2/ai/agent/sessions/route.ts
+++ b/src/app/api/v2/ai/agent/sessions/route.ts
@@ -18,20 +18,16 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { successResponse, errorResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import type { DevConfig } from 'server/models/yaml/YamlService';
 import type { LifecycleConfig } from 'server/models/yaml';
 import AgentChatSessionService from 'server/services/agent/ChatSessionService';
-import { MissingAgentProviderApiKeyError } from 'server/services/agent/ProviderRegistry';
 import AgentSessionReadService from 'server/services/agent/SessionReadService';
 import {
   DEFAULT_AGENT_SESSION_LIST_LIMIT,
   MAX_AGENT_SESSION_LIST_LIMIT,
 } from 'server/services/agent/SessionReadService';
-import {
-  AgentThreadRuntimeControlsError,
-  type AgentThreadRuntimeControlChoiceInput,
-} from 'server/services/agent/ThreadRuntimeControlsService';
+import { type AgentThreadRuntimeControlChoiceInput } from 'server/services/agent/ThreadRuntimeControlsService';
 import { AgentSessionKind, BuildKind } from 'shared/constants';
 
 interface RequestedAgentSessionServiceRef {
@@ -159,21 +155,6 @@ function parseRuntimeControlChoices(value: unknown): AgentThreadRuntimeControlCh
   return { agentId, toolChoiceIds, mcpChoiceIds };
 }
 
-function mapRuntimeControlsError(error: unknown, req: NextRequest) {
-  if (error instanceof AgentThreadRuntimeControlsError) {
-    const statusByCode: Record<AgentThreadRuntimeControlsError['code'], number> = {
-      invalid_input: 400,
-      unknown_choice: 400,
-      policy_denied: 403,
-      not_found: 404,
-      active_run: 409,
-    };
-    return errorResponse(error, { status: statusByCode[error.code] }, req);
-  }
-
-  return null;
-}
-
 async function resolveLifecycleConfigForSession({
   buildContext,
   repoUrl,
@@ -410,8 +391,7 @@ async function resolveRequestedServices(
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   const includeEnded = req.nextUrl.searchParams.get('includeEnded') === 'true';
   const page = parseInt(req.nextUrl.searchParams.get('page') || '1', 10);
@@ -439,8 +419,7 @@ const getHandler = async (req: NextRequest) => {
 };
 
 const postHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
+  const userIdentity = requireRequestUserIdentity(req);
 
   let body: CreateSessionBody;
   try {
@@ -522,18 +501,12 @@ const postHandler = async (req: NextRequest) => {
       const { AgentSessionRuntimeConfigError, AgentSessionWorkspaceStorageConfigError } = await import(
         'server/lib/agentSession/runtimeConfig'
       );
-      if (err instanceof MissingAgentProviderApiKeyError) {
-        return errorResponse(err, { status: 400 }, req);
-      }
+      // Config errors aren't AppErrors yet, so map them here; the AppErrors self-map via createApiHandler.
       if (err instanceof AgentSessionRuntimeConfigError || err instanceof AgentSessionWorkspaceStorageConfigError) {
         return errorResponse(err, { status: 400 }, req);
       }
-      const runtimeControlsResponse = mapRuntimeControlsError(err, req);
-      if (runtimeControlsResponse) {
-        return runtimeControlsResponse;
-      }
 
-      return errorResponse(err, { status: 500 }, req);
+      throw err;
     }
   }
 
@@ -652,9 +625,7 @@ const postHandler = async (req: NextRequest) => {
     if (err instanceof ActiveEnvironmentSessionError) {
       return errorResponse(err, { status: 409 }, req);
     }
-    if (err instanceof MissingAgentProviderApiKeyError) {
-      return errorResponse(err, { status: 400 }, req);
-    }
+    // MissingAgentProviderApiKeyError is an AppError (400) and self-maps via createApiHandler.
     if (err instanceof AgentSessionRuntimeConfigError) {
       return errorResponse(err, { status: 503 }, req);
     }
diff --git a/src/app/api/v2/ai/agent/settings/route.ts b/src/app/api/v2/ai/agent/settings/route.ts
index ef097785..2b38d879 100644
--- a/src/app/api/v2/ai/agent/settings/route.ts
+++ b/src/app/api/v2/ai/agent/settings/route.ts
@@ -16,8 +16,8 @@
 
 import { NextRequest } from 'next/server';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentSettingsService from 'server/services/agent/SettingsService';
 
 export const dynamic = 'force-dynamic';
@@ -56,10 +56,7 @@ export const dynamic = 'force-dynamic';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const repo = req.nextUrl.searchParams.get('repo') || undefined;
   const settings = await AgentSettingsService.getSettingsSnapshot(userIdentity, repo);
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.test.ts
index 71f88d58..715cf99c 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.test.ts
@@ -22,10 +22,18 @@ const mockSwitchThreadAgent = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/AgentSelectionService', () => {
   class AgentThreadAgentSwitchError extends Error {
+    readonly httpStatus = 409;
+    readonly code = 'agent_switch_blocked';
     constructor(
       public readonly reason: string,
       message: string,
@@ -157,14 +165,14 @@ describe('/api/v2/ai/agent/threads/[threadId]/agent', () => {
     expect(missingIdResponse.status).toBe(400);
   });
 
-  it('returns 400 for another user or unknown custom agent ids', async () => {
+  it('returns 409 for another user or unknown custom agent ids', async () => {
     mockSwitchThreadAgent.mockRejectedValueOnce(new AgentThreadAgentSwitchError('unknown_agent', 'Unknown agent.'));
 
     const response = await PATCH(makeRequest({ agentId: 'custom.another-user-agent' }), {
       params: { threadId: 'thread-1' },
     });
 
-    expect(response.status).toBe(400);
+    expect(response.status).toBe(409);
   });
 
   it('returns 409 for active run switch failures', async () => {
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.ts
index daed2bee..0f821d1c 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/agent/route.ts
@@ -18,14 +18,11 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
-import AgentSelectionService, { AgentThreadAgentSwitchError } from 'server/services/agent/AgentSelectionService';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
+import AgentSelectionService from 'server/services/agent/AgentSelectionService';
 
+// AgentThreadAgentSwitchError self-maps via createApiHandler; only plain not-found Errors need mapping here.
 function mapAgentSelectionError(error: unknown, req: NextRequest) {
-  if (error instanceof AgentThreadAgentSwitchError) {
-    return errorResponse(error, { status: error.reason === 'unknown_agent' ? 400 : 409 }, req);
-  }
-
   if (
     error instanceof Error &&
     (error.message === 'Agent thread not found' || error.message === 'Agent session not found')
@@ -97,10 +94,7 @@ function mapAgentSelectionError(error: unknown, req: NextRequest) {
  *                       $ref: '#/components/schemas/SwitchAgentSelectionResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const state = await AgentSelectionService.getThreadAgentState({ threadId: params.threadId, userIdentity });
@@ -111,10 +105,7 @@ const getHandler = async (req: NextRequest, { params }: { params: { threadId: st
 };
 
 const patchHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await req.json().catch(() => ({}));
   if (!body || typeof body !== 'object' || Array.isArray(body)) {
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.test.ts
index 3f3448fa..07cb84b7 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/MessageStore', () => ({
   __esModule: true,
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.ts
index 69eb1d19..ab395939 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/messages/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentMessageStore, {
   DEFAULT_AGENT_MESSAGE_PAGE_LIMIT,
   MAX_AGENT_MESSAGE_PAGE_LIMIT,
@@ -98,10 +98,7 @@ function parseLimit(value: string | null): number {
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   let limit;
   try {
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.test.ts
index 2c0411d9..87ccb0ce 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/ApprovalService', () => ({
   __esModule: true,
@@ -55,7 +64,7 @@ describe('GET /api/v2/ai/agent/threads/[threadId]/pending-actions', () => {
 
     expect(response.status).toBe(401);
     await expect(response.json()).resolves.toMatchObject({
-      error: { message: 'Unauthorized' },
+      error: { message: 'Authentication is required.' },
     });
     expect(mockListPendingActions).not.toHaveBeenCalled();
   });
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.ts
index b1fc2b54..29eb8d96 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/pending-actions/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import ApprovalService from 'server/services/agent/ApprovalService';
 
 /**
@@ -68,10 +68,7 @@ import ApprovalService from 'server/services/agent/ApprovalService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const pendingActions = await ApprovalService.listPendingActions(params.threadId, userIdentity.userId);
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/route.ts
index e2965a31..61f48171 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/route.ts
@@ -17,8 +17,8 @@
 import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
-import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { successResponse } from 'server/lib/response';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentThreadService from 'server/services/agent/ThreadService';
 
 /**
@@ -50,10 +50,7 @@ import AgentThreadService from 'server/services/agent/ThreadService';
  *                       $ref: '#/components/schemas/AgentThread'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const { thread, session } = await AgentThreadService.getOwnedThreadWithSession(params.threadId, userIdentity.userId);
   return successResponse(AgentThreadService.serializeThread(thread, session.uuid), { status: 200 }, req);
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.test.ts
index 5d5bf862..a4844830 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/lib/agentSession/githubToken', () => ({
   resolveRequestGitHubToken: jest.fn(),
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.ts
index 67d64147..1d1b4dbf 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/runs/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import { resolveRequestGitHubToken } from 'server/lib/agentSession/githubToken';
 import { buildWorkspaceFailureLinkData } from 'server/lib/agentSession/workspaceFailureLink';
 import AgentRunAdmissionService from 'server/services/agent/RunAdmissionService';
@@ -26,6 +26,7 @@ import AgentRunQueueService from 'server/services/agent/RunQueueService';
 import AgentRunService, { InvalidAgentRunDefaultsError } from 'server/services/agent/RunService';
 import AgentRunPlanResolver, { AgentRunPlanAgentUnavailableError } from 'server/services/agent/RunPlanResolver';
 import AgentThreadService from 'server/services/agent/ThreadService';
+import { getRunMessageText, readString } from 'server/lib/agent/runRequestText';
 import {
   normalizeCanonicalAgentMessagePart,
   type AgentRunRuntimeOptions,
@@ -37,6 +38,7 @@ import AgentSessionService from 'server/services/agentSession';
 import AgentMessageStore from 'server/services/agent/MessageStore';
 
 const MAX_RUN_MAX_ITERATIONS = 100;
+const DISPATCH_GITHUB_TOKEN_WAIT_MS = 250;
 
 function getUnknownKeys(value: Record<string, unknown>, allowedKeys: string[]): string[] {
   return Object.keys(value).filter((key) => !allowedKeys.includes(key));
@@ -96,8 +98,21 @@ function normalizeCanonicalRunMessage(value: unknown): CanonicalAgentRunMessageI
   };
 }
 
-function readString(value: unknown): string | null {
-  return typeof value === 'string' && value.trim() ? value.trim() : null;
+async function resolveDispatchGitHubToken(req: NextRequest): Promise<string | null> {
+  let timeout: ReturnType<typeof setTimeout> | null = null;
+
+  try {
+    return await Promise.race([
+      resolveRequestGitHubToken(req),
+      new Promise<null>((resolve) => {
+        timeout = setTimeout(() => resolve(null), DISPATCH_GITHUB_TOKEN_WAIT_MS);
+      }),
+    ]);
+  } finally {
+    if (timeout) {
+      clearTimeout(timeout);
+    }
+  }
 }
 
 function normalizeModelRequest(
@@ -176,18 +191,6 @@ function normalizeDebugIntent(value: unknown): { ok: true; value: AgentDebugRunI
   return { ok: true, value: normalized };
 }
 
-function getRunMessageText(message: CanonicalAgentRunMessageInput): string | null {
-  const text = message.parts
-    .filter((part): part is Extract<CanonicalAgentRunMessageInput['parts'][number], { type: 'text' }> => {
-      return part.type === 'text';
-    })
-    .map((part) => part.text)
-    .join('\n')
-    .trim();
-
-  return text || null;
-}
-
 /**
  * @openapi
  * /api/v2/ai/agent/threads/{threadId}/runs:
@@ -270,10 +273,7 @@ function getRunMessageText(message: CanonicalAgentRunMessageInput): string | nul
  *                   $ref: '#/components/schemas/ApiError'
  */
 const postHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   const body = await req.json().catch(() => ({}));
   if (!body || typeof body !== 'object' || Array.isArray(body)) {
@@ -405,7 +405,7 @@ const postHandler = async (req: NextRequest, { params }: { params: { threadId: s
   }
 
   if (admission.created || admission.run.status === 'queued') {
-    const githubToken = await resolveRequestGitHubToken(req);
+    const githubToken = await resolveDispatchGitHubToken(req);
     await AgentRunQueueService.enqueueRun(admission.run.uuid, 'submit', { githubToken });
   }
 
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.test.ts
index fee4f109..63ff6525 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.test.ts
@@ -22,13 +22,28 @@ const mockPatchChoices = jest.fn();
 
 jest.mock('server/lib/get-user', () => ({
   getRequestUserIdentity: (...args: unknown[]) => mockGetRequestUserIdentity(...args),
+  // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+  requireRequestUserIdentity: (...args: unknown[]) => {
+    const id = mockGetRequestUserIdentity(...args);
+    if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+    return id;
+  },
 }));
 
 jest.mock('server/services/agent/ThreadRuntimeControlsService', () => {
+  const HTTP_STATUS: Record<string, number> = {
+    invalid_input: 400,
+    unknown_choice: 400,
+    policy_denied: 403,
+    not_found: 404,
+    active_run: 409,
+  };
   class AgentThreadRuntimeControlsError extends Error {
+    readonly httpStatus: number;
     constructor(public readonly code: string, message: string) {
       super(message);
       this.name = 'AgentThreadRuntimeControlsError';
+      this.httpStatus = HTTP_STATUS[code] ?? 400;
     }
   }
 
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.ts
index 59828c75..37d27298 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/runtime-controls/route.ts
@@ -18,10 +18,8 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
-import AgentThreadRuntimeControlsService, {
-  AgentThreadRuntimeControlsError,
-} from 'server/services/agent/ThreadRuntimeControlsService';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
+import AgentThreadRuntimeControlsService from 'server/services/agent/ThreadRuntimeControlsService';
 
 type RuntimeControlsPatchBody = {
   toolChoiceIds?: string[];
@@ -73,21 +71,6 @@ function parsePatchBody(body: unknown): RuntimeControlsPatchBody | Error {
   return { toolChoiceIds, mcpChoiceIds };
 }
 
-function mapRuntimeControlsError(error: unknown, req: NextRequest) {
-  if (error instanceof AgentThreadRuntimeControlsError) {
-    const statusByCode: Record<AgentThreadRuntimeControlsError['code'], number> = {
-      invalid_input: 400,
-      unknown_choice: 400,
-      policy_denied: 403,
-      not_found: 404,
-      active_run: 409,
-    };
-    return errorResponse(error, { status: statusByCode[error.code] }, req);
-  }
-
-  throw error;
-}
-
 /**
  * @openapi
  * /api/v2/ai/agent/threads/{threadId}/runtime-controls:
@@ -185,24 +168,15 @@ function mapRuntimeControlsError(error: unknown, req: NextRequest) {
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
-  try {
-    const state = await AgentThreadRuntimeControlsService.getState({ threadId: params.threadId, userIdentity });
-    return successResponse(state, { status: 200 }, req);
-  } catch (error) {
-    return mapRuntimeControlsError(error, req);
-  }
+  // AgentThreadRuntimeControlsError is an AppError; createApiHandler maps its httpStatus/code.
+  const state = await AgentThreadRuntimeControlsService.getState({ threadId: params.threadId, userIdentity });
+  return successResponse(state, { status: 200 }, req);
 };
 
 const patchHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   let body: unknown;
   try {
@@ -216,16 +190,12 @@ const patchHandler = async (req: NextRequest, { params }: { params: { threadId:
     return errorResponse(parsedBody, { status: 400 }, req);
   }
 
-  try {
-    const state = await AgentThreadRuntimeControlsService.patchChoices({
-      threadId: params.threadId,
-      userIdentity,
-      ...parsedBody,
-    });
-    return successResponse(state, { status: 200 }, req);
-  } catch (error) {
-    return mapRuntimeControlsError(error, req);
-  }
+  const state = await AgentThreadRuntimeControlsService.patchChoices({
+    threadId: params.threadId,
+    userIdentity,
+    ...parsedBody,
+  });
+  return successResponse(state, { status: 200 }, req);
 };
 
 export const GET = createApiHandler(getHandler);
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.test.ts b/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.test.ts
index f148b657..aa6e30b4 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.test.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.test.ts
@@ -16,9 +16,18 @@
 
 import { NextRequest } from 'next/server';
 
-jest.mock('server/lib/get-user', () => ({
-  getRequestUserIdentity: jest.fn(),
-}));
+jest.mock('server/lib/get-user', () => {
+  const getRequestUserIdentity = jest.fn();
+  return {
+    getRequestUserIdentity,
+    // requireRequestUserIdentity mirrors getRequestUserIdentity; throws 401 when unauthenticated.
+    requireRequestUserIdentity: (...args: unknown[]) => {
+      const id = getRequestUserIdentity(...args);
+      if (!id) throw new (jest.requireActual('server/lib/appError').UnauthorizedError)();
+      return id;
+    },
+  };
+});
 
 jest.mock('server/services/agent/AgentUsageService', () => ({
   __esModule: true,
diff --git a/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.ts b/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.ts
index 0bffead5..90a751d3 100644
--- a/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.ts
+++ b/src/app/api/v2/ai/agent/threads/[threadId]/usage/route.ts
@@ -18,7 +18,7 @@ import { NextRequest } from 'next/server';
 import 'server/lib/dependencies';
 import { createApiHandler } from 'server/lib/createApiHandler';
 import { errorResponse, successResponse } from 'server/lib/response';
-import { getRequestUserIdentity } from 'server/lib/get-user';
+import { requireRequestUserIdentity } from 'server/lib/get-user';
 import AgentUsageService from 'server/services/agent/AgentUsageService';
 
 /**
@@ -57,10 +57,7 @@ import AgentUsageService from 'server/services/agent/AgentUsageService';
  *               $ref: '#/components/schemas/ApiErrorResponse'
  */
 const getHandler = async (req: NextRequest, { params }: { params: { threadId: string } }) => {
-  const userIdentity = getRequestUserIdentity(req);
-  if (!userIdentity) {
-    return errorResponse(new Error('Unauthorized'), { status: 401 }, req);
-  }
+  const userIdentity = requireRequestUserIdentity(req);
 
   try {
     const usage = await AgentUsageService.getOwnedThreadUsage(params.threadId, userIdentity.userId);
diff --git a/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.test.ts b/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.test.ts
new file mode 100644
index 00000000..1ee6a3fe
--- /dev/null
+++ b/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.test.ts
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockSetRepoConfig = jest.fn();
+const mockDeleteRepoConfig = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/agentSessionConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getRepoConfig: jest.fn().mockResolvedValue({}),
+      setRepoConfig: (...args: unknown[]) => mockSetRepoConfig(...args),
+      deleteRepoConfig: (...args: unknown[]) => mockDeleteRepoConfig(...args),
+    })),
+  },
+}));
+
+import { DELETE, PUT } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/ai/config/agent-session/repos/example-org/example-repo'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+const params = { params: { fullName: ['example-org', 'example-repo'] } };
+
+describe('/api/v2/ai/config/agent-session/repos/[...fullName] (admin-gated repo-level writes)', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({ sub: 'sample-admin', realm_access: { roles: ['admin'] } });
+    mockSetRepoConfig.mockResolvedValue({});
+    mockDeleteRepoConfig.mockResolvedValue(undefined);
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('PUT returns 403 for a non-admin and does not write', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await PUT(makeRequest({}), params);
+
+    expect(response.status).toBe(403);
+    expect(mockSetRepoConfig).not.toHaveBeenCalled();
+  });
+
+  it('PUT writes the repo config for an admin', async () => {
+    const response = await PUT(makeRequest({}), params);
+
+    expect(response.status).toBe(200);
+    expect(mockSetRepoConfig).toHaveBeenCalled();
+  });
+
+  it('DELETE returns 403 for a non-admin and does not delete', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await DELETE(makeRequest(), params);
+
+    expect(response.status).toBe(403);
+    expect(mockDeleteRepoConfig).not.toHaveBeenCalled();
+  });
+
+  it('DELETE deletes the repo config for an admin', async () => {
+    const response = await DELETE(makeRequest(), params);
+
+    expect(response.status).toBe(204);
+    expect(mockDeleteRepoConfig).toHaveBeenCalled();
+  });
+});
diff --git a/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.ts b/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.ts
index db66e755..d3a9cf47 100644
--- a/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.ts
+++ b/src/app/api/v2/ai/config/agent-session/repos/[...fullName]/route.ts
@@ -155,5 +155,6 @@ const deleteHandler = async (req: NextRequest, { params }: { params: { fullName:
 };
 
 export const GET = createApiHandler(getHandler);
-export const PUT = createApiHandler(putHandler);
-export const DELETE = createApiHandler(deleteHandler);
+// Repo-level control-plane mutations — admin only (matches runtime-config repo twin).
+export const PUT = createApiHandler(putHandler, { roles: ['admin'] });
+export const DELETE = createApiHandler(deleteHandler, { roles: ['admin'] });
diff --git a/src/app/api/v2/ai/config/agent-session/route.test.ts b/src/app/api/v2/ai/config/agent-session/route.test.ts
new file mode 100644
index 00000000..6d497157
--- /dev/null
+++ b/src/app/api/v2/ai/config/agent-session/route.test.ts
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockSetGlobalConfig = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/agentSessionConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getGlobalConfig: jest.fn().mockResolvedValue({}),
+      setGlobalConfig: (...args: unknown[]) => mockSetGlobalConfig(...args),
+    })),
+  },
+}));
+
+import { PUT } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/ai/config/agent-session'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+describe('PUT /api/v2/ai/config/agent-session (admin-gated org-wide write)', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({ sub: 'sample-admin', realm_access: { roles: ['admin'] } });
+    mockSetGlobalConfig.mockResolvedValue({});
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('returns 403 for a non-admin and does not write', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await PUT(makeRequest({}));
+
+    expect(response.status).toBe(403);
+    expect(mockSetGlobalConfig).not.toHaveBeenCalled();
+  });
+
+  it('writes the global config for an admin', async () => {
+    const response = await PUT(makeRequest({}));
+
+    expect(response.status).toBe(200);
+    expect(mockSetGlobalConfig).toHaveBeenCalled();
+  });
+});
diff --git a/src/app/api/v2/ai/config/agent-session/route.ts b/src/app/api/v2/ai/config/agent-session/route.ts
index c30e5dd8..a4f616d8 100644
--- a/src/app/api/v2/ai/config/agent-session/route.ts
+++ b/src/app/api/v2/ai/config/agent-session/route.ts
@@ -106,4 +106,5 @@ const putHandler = async (req: NextRequest) => {
 };
 
 export const GET = createApiHandler(getHandler);
-export const PUT = createApiHandler(putHandler);
+// Org-wide control-plane mutation — admin only (matches admin/agent/* twins).
+export const PUT = createApiHandler(putHandler, { roles: ['admin'] });
diff --git a/src/app/api/v2/ai/config/agent-session/runtime/route.test.ts b/src/app/api/v2/ai/config/agent-session/runtime/route.test.ts
new file mode 100644
index 00000000..8040512f
--- /dev/null
+++ b/src/app/api/v2/ai/config/agent-session/runtime/route.test.ts
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockSetGlobalRuntimeConfig = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/agentSessionConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getGlobalRuntimeConfig: jest.fn().mockResolvedValue({}),
+      setGlobalRuntimeConfig: (...args: unknown[]) => mockSetGlobalRuntimeConfig(...args),
+    })),
+  },
+}));
+
+import { PUT } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/ai/config/agent-session/runtime'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+describe('PUT /api/v2/ai/config/agent-session/runtime (admin-gated org-wide write)', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({ sub: 'sample-admin', realm_access: { roles: ['admin'] } });
+    mockSetGlobalRuntimeConfig.mockResolvedValue({});
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('returns 403 for a non-admin and does not write', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await PUT(makeRequest({}));
+
+    expect(response.status).toBe(403);
+    expect(mockSetGlobalRuntimeConfig).not.toHaveBeenCalled();
+  });
+
+  it('writes the global runtime config for an admin', async () => {
+    const response = await PUT(makeRequest({}));
+
+    expect(response.status).toBe(200);
+    expect(mockSetGlobalRuntimeConfig).toHaveBeenCalled();
+  });
+});
diff --git a/src/app/api/v2/ai/config/agent-session/runtime/route.ts b/src/app/api/v2/ai/config/agent-session/runtime/route.ts
index edd84cc7..18b3b955 100644
--- a/src/app/api/v2/ai/config/agent-session/runtime/route.ts
+++ b/src/app/api/v2/ai/config/agent-session/runtime/route.ts
@@ -106,4 +106,5 @@ const putHandler = async (req: NextRequest) => {
 };
 
 export const GET = createApiHandler(getHandler);
-export const PUT = createApiHandler(putHandler);
+// Org-wide control-plane mutation — admin only.
+export const PUT = createApiHandler(putHandler, { roles: ['admin'] });
diff --git a/src/app/api/v2/ai/config/mcp-servers/[slug]/route.test.ts b/src/app/api/v2/ai/config/mcp-servers/[slug]/route.test.ts
new file mode 100644
index 00000000..d7139f45
--- /dev/null
+++ b/src/app/api/v2/ai/config/mcp-servers/[slug]/route.test.ts
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockUpdate = jest.fn();
+const mockDelete = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/agentRuntime/mcp/config', () => ({
+  McpConfigService: jest.fn().mockImplementation(() => ({
+    update: (...args: unknown[]) => mockUpdate(...args),
+    delete: (...args: unknown[]) => mockDelete(...args),
+  })),
+  redactMcpConfigSecrets: (config: unknown) => config,
+}));
+
+jest.mock('server/lib/dependencies', () => ({}));
+
+import { DELETE, PUT } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/ai/config/mcp-servers/sample'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+const params = { params: Promise.resolve({ slug: 'sample' }) };
+
+describe('/api/v2/ai/config/mcp-servers/[slug] (admin-gated org-wide writes)', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({ sub: 'sample-admin', realm_access: { roles: ['admin'] } });
+    mockUpdate.mockResolvedValue({ slug: 'sample', name: 'Sample' });
+    mockDelete.mockResolvedValue(undefined);
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('PUT returns 403 for a non-admin and does not update', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await PUT(makeRequest({ name: 'New' }), params);
+
+    expect(response.status).toBe(403);
+    expect(mockUpdate).not.toHaveBeenCalled();
+  });
+
+  it('PUT updates for an admin', async () => {
+    const response = await PUT(makeRequest({ name: 'New' }), params);
+
+    expect(response.status).toBe(200);
+    expect(mockUpdate).toHaveBeenCalled();
+  });
+
+  it('DELETE returns 403 for a non-admin and does not delete', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await DELETE(makeRequest(), params);
+
+    expect(response.status).toBe(403);
+    expect(mockDelete).not.toHaveBeenCalled();
+  });
+
+  it('DELETE deletes for an admin', async () => {
+    const response = await DELETE(makeRequest(), params);
+
+    expect(response.status).toBe(204);
+    expect(mockDelete).toHaveBeenCalled();
+  });
+});
diff --git a/src/app/api/v2/ai/config/mcp-servers/[slug]/route.ts b/src/app/api/v2/ai/config/mcp-servers/[slug]/route.ts
index 71c7a1d6..3724b2e0 100644
--- a/src/app/api/v2/ai/config/mcp-servers/[slug]/route.ts
+++ b/src/app/api/v2/ai/config/mcp-servers/[slug]/route.ts
@@ -230,5 +230,6 @@ const deleteHandler = async (req: NextRequest, { params }: { params: Promise<{ s
 };
 
 export const GET = createApiHandler(getHandler);
-export const PUT = createApiHandler(putHandler);
-export const DELETE = createApiHandler(deleteHandler);
+// Org-wide control-plane mutations — admin only.
+export const PUT = createApiHandler(putHandler, { roles: ['admin'] });
+export const DELETE = createApiHandler(deleteHandler, { roles: ['admin'] });
diff --git a/src/app/api/v2/ai/config/mcp-servers/route.test.ts b/src/app/api/v2/ai/config/mcp-servers/route.test.ts
new file mode 100644
index 00000000..edaf1e9e
--- /dev/null
+++ b/src/app/api/v2/ai/config/mcp-servers/route.test.ts
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockCreate = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/agentRuntime/mcp/config', () => ({
+  McpConfigService: jest.fn().mockImplementation(() => ({
+    listByScope: jest.fn().mockResolvedValue([]),
+    create: (...args: unknown[]) => mockCreate(...args),
+  })),
+  redactMcpConfigSecrets: (config: unknown) => config,
+}));
+
+jest.mock('server/lib/dependencies', () => ({}));
+
+import { POST } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/ai/config/mcp-servers'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+describe('POST /api/v2/ai/config/mcp-servers (admin-gated org-wide write)', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({ sub: 'sample-admin', realm_access: { roles: ['admin'] } });
+    mockCreate.mockResolvedValue({ slug: 'sample', name: 'Sample', scope: 'global' });
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('returns 403 for a non-admin user and does not create', async () => {
+    mockGetUser.mockReturnValue({ sub: 'sample-user', realm_access: { roles: ['user'] } });
+
+    const response = await POST(makeRequest({ slug: 'sample', name: 'Sample', transport: {} }));
+
+    expect(response.status).toBe(403);
+    expect(mockCreate).not.toHaveBeenCalled();
+  });
+
+  it('creates the global MCP config for an admin', async () => {
+    const response = await POST(makeRequest({ slug: 'sample', name: 'Sample', transport: {} }));
+
+    expect(response.status).toBe(201);
+    expect(mockCreate).toHaveBeenCalled();
+  });
+});
diff --git a/src/app/api/v2/ai/config/mcp-servers/route.ts b/src/app/api/v2/ai/config/mcp-servers/route.ts
index b61fd5a7..8a9cf528 100644
--- a/src/app/api/v2/ai/config/mcp-servers/route.ts
+++ b/src/app/api/v2/ai/config/mcp-servers/route.ts
@@ -170,4 +170,5 @@ const postHandler = async (req: NextRequest) => {
 };
 
 export const GET = createApiHandler(getHandler);
-export const POST = createApiHandler(postHandler);
+// Org-wide control-plane mutation (registers global MCP servers w/ secrets) — admin only.
+export const POST = createApiHandler(postHandler, { roles: ['admin'] });
diff --git a/src/app/api/v2/config/sites/route.test.ts b/src/app/api/v2/config/sites/route.test.ts
new file mode 100644
index 00000000..e69bdbbe
--- /dev/null
+++ b/src/app/api/v2/config/sites/route.test.ts
@@ -0,0 +1,167 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { NextRequest } from 'next/server';
+
+const mockGetUser = jest.fn();
+const mockGetSitesConfig = jest.fn();
+const mockSetSitesConfig = jest.fn();
+
+jest.mock('server/lib/get-user', () => ({
+  getUser: (...args: unknown[]) => mockGetUser(...args),
+}));
+
+jest.mock('server/services/sitesConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getConfig: (...args: unknown[]) => mockGetSitesConfig(...args),
+      setConfig: (...args: unknown[]) => mockSetSitesConfig(...args),
+    })),
+  },
+}));
+
+import { GET, PUT } from './route';
+
+function makeRequest(body?: unknown): NextRequest {
+  return {
+    headers: new Headers([['x-request-id', 'req-test']]),
+    nextUrl: new URL('http://localhost/api/v2/config/sites'),
+    json: jest.fn().mockResolvedValue(body),
+  } as unknown as NextRequest;
+}
+
+describe('/api/v2/config/sites', () => {
+  const originalEnableAuth = process.env.ENABLE_AUTH;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.ENABLE_AUTH = 'true';
+    mockGetUser.mockReturnValue({
+      sub: 'sample-user',
+      realm_access: {
+        roles: ['admin'],
+      },
+    });
+    mockGetSitesConfig.mockResolvedValue({
+      enabled: false,
+      domain: 'localhost',
+      hostPrefix: 'site',
+    });
+    mockSetSitesConfig.mockImplementation(async (config) => config);
+  });
+
+  afterEach(() => {
+    if (originalEnableAuth === undefined) {
+      delete process.env.ENABLE_AUTH;
+    } else {
+      process.env.ENABLE_AUTH = originalEnableAuth;
+    }
+  });
+
+  it('requires admin access before reading config', async () => {
+    mockGetUser.mockReturnValue({
+      sub: 'sample-user',
+      realm_access: {
+        roles: ['user'],
+      },
+    });
+
+    const response = await GET(makeRequest());
+    const body = await response.json();
+
+    expect(response.status).toBe(403);
+    expect(body.error.message).toBe('Forbidden: insufficient permissions');
+    expect(mockGetSitesConfig).not.toHaveBeenCalled();
+  });
+
+  it('returns the Sites config', async () => {
+    const response = await GET(makeRequest());
+    const body = await response.json();
+
+    expect(response.status).toBe(200);
+    expect(body.data.config).toMatchObject({
+      enabled: false,
+      domain: 'localhost',
+      hostPrefix: 'site',
+    });
+  });
+
+  it('rejects invalid updates', async () => {
+    const request = makeRequest({
+      enabled: true,
+      domain: 'sites.example.com',
+      upload: {
+        maxFiles: 0,
+      },
+    });
+
+    const response = await PUT(request);
+    const body = await response.json();
+
+    expect(response.status).toBe(400);
+    expect(body.error.message).toContain('Validation failed');
+    expect(mockSetSitesConfig).not.toHaveBeenCalled();
+  });
+
+  it('rejects partial replacement updates', async () => {
+    const response = await PUT(makeRequest({ enabled: true }));
+    const body = await response.json();
+
+    expect(response.status).toBe(400);
+    expect(body.error.message).toContain('Validation failed');
+    expect(mockSetSitesConfig).not.toHaveBeenCalled();
+  });
+
+  it('updates the Sites config', async () => {
+    const nextConfig = {
+      enabled: true,
+      domain: 'sites.example.com',
+      port: 443,
+      hostPrefix: 'preview',
+      ttl: {
+        enabled: true,
+        defaultDays: 14,
+        extensionDays: 7,
+      },
+      upload: {
+        maxUploadBytes: 20971520,
+        maxExtractedBytes: 20971520,
+        maxFiles: 1000,
+        allowedExtensions: ['html', 'zip'],
+      },
+      storage: {
+        backend: 's3',
+        bucket: 'lifecycle-sites',
+        prefix: 'sites',
+        region: 'us-west-2',
+        endpoint: null,
+        forcePathStyle: false,
+      },
+      cleanup: {
+        enabled: true,
+        intervalMinutes: 30,
+      },
+    };
+
+    const response = await PUT(makeRequest(nextConfig));
+    const body = await response.json();
+
+    expect(response.status).toBe(200);
+    expect(mockSetSitesConfig).toHaveBeenCalledWith(nextConfig);
+    expect(body.data.config).toEqual(nextConfig);
+  });
+});
diff --git a/src/app/api/v2/config/sites/route.ts b/src/app/api/v2/config/sites/route.ts
new file mode 100644
index 00000000..c2d0ec0e
--- /dev/null
+++ b/src/app/api/v2/config/sites/route.ts
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import JsonSchema from 'jsonschema';
+import { NextRequest } from 'next/server';
+import { createApiHandler } from 'server/lib/createApiHandler';
+import { errorResponse, successResponse } from 'server/lib/response';
+import { sitesConfigSchema } from 'server/lib/validation/sitesConfigSchemas';
+import SitesConfigService from 'server/services/sitesConfig';
+import type { SitesConfig } from 'server/services/types/globalConfig';
+
+/**
+ * @openapi
+ * /api/v2/config/sites:
+ *   get:
+ *     summary: Get Sites configuration
+ *     description: Returns the global Sites hosting configuration stored in global_config under the sites key.
+ *     tags:
+ *       - Config
+ *     operationId: getSitesConfig
+ *     responses:
+ *       '200':
+ *         description: Sites hosting configuration.
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/SitesConfigSuccessResponse'
+ *       '401':
+ *         description: Unauthorized
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/ApiErrorResponse'
+ *       '403':
+ *         description: Forbidden
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/ApiErrorResponse'
+ *   put:
+ *     summary: Update Sites configuration
+ *     description: Replaces the global Sites hosting configuration stored in global_config under the sites key.
+ *     tags:
+ *       - Config
+ *     operationId: updateSitesConfig
+ *     requestBody:
+ *       required: true
+ *       content:
+ *         application/json:
+ *           schema:
+ *             $ref: '#/components/schemas/SitesConfig'
+ *     responses:
+ *       '200':
+ *         description: Updated Sites hosting configuration.
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/SitesConfigSuccessResponse'
+ *       '400':
+ *         description: Validation error
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/ApiErrorResponse'
+ *       '401':
+ *         description: Unauthorized
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/ApiErrorResponse'
+ *       '403':
+ *         description: Forbidden
+ *         content:
+ *           application/json:
+ *             schema:
+ *               $ref: '#/components/schemas/ApiErrorResponse'
+ */
+const getHandler = async (req: NextRequest) => {
+  const config = await SitesConfigService.getInstance().getConfig();
+  return successResponse({ config }, { status: 200 }, req);
+};
+
+const putHandler = async (req: NextRequest) => {
+  let body: unknown;
+  try {
+    body = await req.json();
+  } catch {
+    return errorResponse(new Error('Invalid JSON in request body'), { status: 400 }, req);
+  }
+
+  const validator = new JsonSchema.Validator();
+  const result = validator.validate(body, sitesConfigSchema);
+  if (!result.valid) {
+    const messages = result.errors.map((entry) => entry.stack).join('; ');
+    return errorResponse(new Error(`Validation failed: ${messages}`), { status: 400 }, req);
+  }
+
+  const config = await SitesConfigService.getInstance().setConfig(body as SitesConfig);
+  return successResponse({ config }, { status: 200 }, req);
+};
+
+export const GET = createApiHandler(getHandler, { roles: ['admin'] });
+export const PUT = createApiHandler(putHandler, { roles: ['admin'] });
diff --git a/src/server/jobs/__tests__/agentSessionCleanup.test.ts b/src/server/jobs/__tests__/agentSessionCleanup.test.ts
index 68256c91..93af21a7 100644
--- a/src/server/jobs/__tests__/agentSessionCleanup.test.ts
+++ b/src/server/jobs/__tests__/agentSessionCleanup.test.ts
@@ -39,6 +39,9 @@ jest.mock('server/services/agent/WorkspaceRuntimeStateService', () => {
   return {
     __esModule: true,
     WorkspaceActionBlockedError,
+    WorkspaceRuntimeStateService: {
+      recordWorkspaceFailure: jest.fn(),
+    },
   };
 });
 jest.mock('server/lib/logger', () => ({
@@ -66,7 +69,63 @@ import AgentSession from 'server/models/AgentSession';
 import AgentSessionService from 'server/services/agentSession';
 import { getLogger } from 'server/lib/logger';
 import { processAgentSessionCleanup } from '../agentSessionCleanup';
-import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
+import {
+  WorkspaceActionBlockedError,
+  WorkspaceRuntimeStateService,
+} from 'server/services/agent/WorkspaceRuntimeStateService';
+
+const mockRecordWorkspaceFailure = WorkspaceRuntimeStateService.recordWorkspaceFailure as jest.Mock;
+
+// idle-active cohort: 3 chained .where (status, lastActivity, callback) resolving on the 3rd.
+function buildIdleActiveQuery(result: unknown[]) {
+  const query = { where: jest.fn() };
+  query.where
+    .mockImplementationOnce(() => query)
+    .mockImplementationOnce(() => query)
+    .mockImplementationOnce((callback: (b: unknown) => void) => {
+      callback({
+        whereNot: jest.fn().mockReturnValue({ orWhereNot: jest.fn() }),
+      });
+      return Promise.resolve(result);
+    });
+  return query;
+}
+
+// provisioning-timeout cohort: 4 chained .where (status, sessionKind, workspaceStatus, updatedAt).
+function buildFourWhereQuery(result: unknown[]) {
+  const query = { where: jest.fn() };
+  query.where
+    .mockImplementationOnce(() => query)
+    .mockImplementationOnce(() => query)
+    .mockImplementationOnce(() => query)
+    .mockImplementationOnce(() => Promise.resolve(result));
+  return query;
+}
+
+// stale-starting cohort: 2 chained .where (status, updatedAt).
+function buildTwoWhereQuery(result: unknown[]) {
+  const query = { where: jest.fn() };
+  query.where.mockImplementationOnce(() => query).mockImplementationOnce(() => Promise.resolve(result));
+  return query;
+}
+
+/**
+ * Wires AgentSession.query in source-call order:
+ *   1) idle-active, 2) provisioning-timeout, 3) stale-starting, 4) hibernated-expiry.
+ */
+function mockCleanupQueries(opts: {
+  idleActive?: unknown[];
+  provisioningTimeout?: unknown[];
+  staleStarting?: unknown[];
+  hibernatedExpiry?: unknown[];
+}) {
+  (AgentSession.query as jest.Mock) = jest
+    .fn()
+    .mockReturnValueOnce(buildIdleActiveQuery(opts.idleActive ?? []))
+    .mockReturnValueOnce(buildFourWhereQuery(opts.provisioningTimeout ?? []))
+    .mockReturnValueOnce(buildTwoWhereQuery(opts.staleStarting ?? []))
+    .mockReturnValueOnce(buildFourWhereQuery(opts.hibernatedExpiry ?? []));
+}
 
 describe('agentSessionCleanup', () => {
   const mockLogger = {
@@ -104,41 +163,12 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const startingQuery = { where: jest.fn() };
-    startingQuery.where
-      .mockImplementationOnce(() => startingQuery)
-      .mockImplementationOnce(() => Promise.resolve(startingSessions));
-
-    const suspendedQuery = { where: jest.fn() };
-    suspendedQuery.where
-      .mockImplementationOnce(() => suspendedQuery)
-      .mockImplementationOnce(() => suspendedQuery)
-      .mockImplementationOnce(() => suspendedQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(startingQuery)
-      .mockReturnValueOnce(suspendedQuery);
+    mockCleanupQueries({ idleActive: activeSessions, staleStarting: startingSessions });
     (AgentSessionService.endSession as jest.Mock).mockResolvedValue(undefined);
 
     await processAgentSessionCleanup();
 
-    expect(AgentSession.query).toHaveBeenCalledTimes(3);
+    expect(AgentSession.query).toHaveBeenCalledTimes(4);
     expect(AgentSessionService.endSession).toHaveBeenCalledTimes(2);
     expect(AgentSessionService.endSession).toHaveBeenNthCalledWith(1, 'active-session');
     expect(AgentSessionService.endSession).toHaveBeenNthCalledWith(2, 'starting-session');
@@ -161,36 +191,7 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
     (AgentSessionService.suspendChatRuntime as jest.Mock).mockResolvedValue(undefined);
 
     await processAgentSessionCleanup();
@@ -243,36 +244,7 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
     (AgentSessionService.endSession as jest.Mock).mockResolvedValue(undefined);
 
     await processAgentSessionCleanup();
@@ -300,36 +272,7 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
     (AgentSessionService.endSession as jest.Mock).mockRejectedValue(
       new WorkspaceActionBlockedError('active_run', 'Active run')
     );
@@ -359,36 +302,7 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
     (AgentSessionService.endSession as jest.Mock).mockRejectedValue(
       new WorkspaceActionBlockedError('action_in_progress', 'Action in progress', {
         currentAction: 'resume',
@@ -421,98 +335,65 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
 
     await processAgentSessionCleanup();
 
     expect(AgentSessionService.suspendChatRuntime).not.toHaveBeenCalled();
     expect(AgentSessionService.endSession).not.toHaveBeenCalled();
+    expect(mockRecordWorkspaceFailure).not.toHaveBeenCalled();
     expect(mockLogger.info).toHaveBeenCalledWith(
       'Session: cleanup skipped sessionId=provisioning-chat-session reason=runtime_provisioning'
     );
   });
 
-  it('ends an idle chat session when runtime provisioning is stale', async () => {
-    const activeSessions = [
-      {
-        id: 1,
-        uuid: 'stale-provisioning-chat-session',
-        userId: 'sample-user',
-        sessionKind: 'chat',
-        workspaceStatus: 'provisioning',
-        status: 'active',
-        namespace: 'sample-namespace',
-        pvcName: 'sample-pvc',
-        lastActivity: '2026-03-23T11:00:00.000Z',
-        updatedAt: '2026-03-23T11:40:00.000Z',
-      },
-    ];
-
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+  it('transitions a stale provisioning chat session to a retryable failure instead of ending it', async () => {
+    // Stale provision (updatedAt past the 15-min starting cutoff) lands in the provisioning-timeout cohort.
+    const timedOutSession = {
+      id: 1,
+      uuid: 'stale-provisioning-chat-session',
+      userId: 'sample-user',
+      sessionKind: 'chat',
+      workspaceStatus: 'provisioning',
+      status: 'active',
+      namespace: 'sample-namespace',
+      pvcName: 'sample-pvc',
+      lastActivity: '2026-03-23T11:00:00.000Z',
+      updatedAt: '2026-03-23T11:40:00.000Z',
+    };
+
+    mockCleanupQueries({ provisioningTimeout: [timedOutSession] });
     (AgentSessionService.endSession as jest.Mock).mockResolvedValue(undefined);
+    mockRecordWorkspaceFailure.mockResolvedValue(undefined);
 
     await processAgentSessionCleanup();
 
+    // Recovered to retryable FAILED, never ended/destroyed and never suspended.
+    expect(AgentSessionService.endSession).not.toHaveBeenCalled();
     expect(AgentSessionService.suspendChatRuntime).not.toHaveBeenCalled();
-    expect(AgentSessionService.endSession).toHaveBeenCalledWith('stale-provisioning-chat-session');
+    expect(mockRecordWorkspaceFailure).toHaveBeenCalledTimes(1);
+
+    const [sessionIdArg, stateArg] = mockRecordWorkspaceFailure.mock.calls[0];
+    expect(sessionIdArg).toBe(1);
+    expect(stateArg.sessionPatch).toEqual(
+      expect.objectContaining({
+        status: 'active',
+        workspaceStatus: 'failed',
+      })
+    );
+    // Lifecycle claim released so the FAILED -> retry path is unblocked.
+    expect(stateArg.runtimeLifecycle).toBeNull();
+    expect(stateArg.failure).toEqual(
+      expect.objectContaining({
+        code: 'workspace_provisioning_timeout',
+        retryable: true,
+        stage: 'connect_runtime',
+        origin: 'chat_runtime',
+      })
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      expect.stringContaining('Session: cleanup provisioning timed out sessionId=stale-provisioning-chat-session')
+    );
   });
 
   it('skips idle chat suspension when a run is still active', async () => {
@@ -532,36 +413,7 @@ describe('agentSessionCleanup', () => {
       },
     ];
 
-    const activeQuery = { where: jest.fn() };
-    activeQuery.where
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce(() => activeQuery)
-      .mockImplementationOnce((callback) => {
-        callback({
-          whereNot: jest.fn().mockReturnValue({
-            orWhereNot: jest.fn(),
-          }),
-        });
-        return Promise.resolve(activeSessions);
-      });
-
-    const emptyTwoWhereQuery = { where: jest.fn() };
-    emptyTwoWhereQuery.where
-      .mockImplementationOnce(() => emptyTwoWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    const emptyFourWhereQuery = { where: jest.fn() };
-    emptyFourWhereQuery.where
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => emptyFourWhereQuery)
-      .mockImplementationOnce(() => Promise.resolve([]));
-
-    (AgentSession.query as jest.Mock) = jest
-      .fn()
-      .mockReturnValueOnce(activeQuery)
-      .mockReturnValueOnce(emptyTwoWhereQuery)
-      .mockReturnValueOnce(emptyFourWhereQuery);
+    mockCleanupQueries({ idleActive: activeSessions });
     (AgentSessionService.suspendChatRuntime as jest.Mock).mockRejectedValue(
       new WorkspaceActionBlockedError('active_run', 'Active run')
     );
@@ -576,4 +428,32 @@ describe('agentSessionCleanup', () => {
     expect(mockLogger.error).not.toHaveBeenCalled();
     expect(mockLogger.info).toHaveBeenCalledWith('Session: cleanup skipped sessionId=chat-session reason=active_run');
   });
+
+  it('logs but does not end the session when the provisioning-timeout failure write fails', async () => {
+    const timedOutSession = {
+      id: 7,
+      uuid: 'reaper-error-chat-session',
+      userId: 'sample-user',
+      sessionKind: 'chat',
+      workspaceStatus: 'provisioning',
+      status: 'active',
+      namespace: 'sample-namespace',
+      pvcName: 'sample-pvc',
+      lastActivity: '2026-03-23T11:00:00.000Z',
+      updatedAt: '2026-03-23T11:40:00.000Z',
+    };
+
+    mockCleanupQueries({ provisioningTimeout: [timedOutSession] });
+    mockRecordWorkspaceFailure.mockRejectedValue(new Error('db write failed'));
+
+    await processAgentSessionCleanup();
+
+    expect(mockRecordWorkspaceFailure).toHaveBeenCalledTimes(1);
+    // A failed failure-write must never fall back to destroying the recoverable session.
+    expect(AgentSessionService.endSession).not.toHaveBeenCalled();
+    expect(mockLogger.error).toHaveBeenCalledWith(
+      expect.objectContaining({ sessionId: 'reaper-error-chat-session' }),
+      expect.stringContaining('Session: cleanup provisioning-timeout failed')
+    );
+  });
 });
diff --git a/src/server/jobs/agentSessionCleanup.ts b/src/server/jobs/agentSessionCleanup.ts
index 396d7801..d1afa9df 100644
--- a/src/server/jobs/agentSessionCleanup.ts
+++ b/src/server/jobs/agentSessionCleanup.ts
@@ -17,12 +17,19 @@
 import AgentSession from 'server/models/AgentSession';
 import AgentSessionService from 'server/services/agentSession';
 import { getLogger } from 'server/lib/logger';
-import { AgentSessionKind, AgentWorkspaceStatus } from 'shared/constants';
+import { AgentChatStatus, AgentSessionKind, AgentWorkspaceStatus } from 'shared/constants';
 import { resolveAgentSessionCleanupConfig } from 'server/lib/agentSession/runtimeConfig';
-import { WorkspaceActionBlockedError } from 'server/services/agent/WorkspaceRuntimeStateService';
+import {
+  WorkspaceActionBlockedError,
+  WorkspaceRuntimeStateService,
+} from 'server/services/agent/WorkspaceRuntimeStateService';
+import { buildWorkspaceRuntimeFailure } from 'server/lib/agentSession/startupFailureState';
 
 const logger = () => getLogger();
 
+const PROVISIONING_TIMEOUT_MESSAGE =
+  'Workspace provisioning timed out. The previous attempt was interrupted before the workspace became ready. Retry to start it again.';
+
 export async function processAgentSessionCleanup(): Promise<void> {
   const cleanupConfig = await resolveAgentSessionCleanupConfig();
   const activeCutoff = new Date(Date.now() - cleanupConfig.activeIdleSuspendMs);
@@ -36,6 +43,13 @@ export async function processAgentSessionCleanup(): Promise<void> {
         .whereNot('sessionKind', AgentSessionKind.CHAT)
         .orWhereNot('workspaceStatus', AgentWorkspaceStatus.HIBERNATED);
     });
+  // Chat provisioning is synchronous in the HTTP request; if that process dies the catch never runs and
+  // the session is stranded in PROVISIONING under a live claim. Reap stale ones into a retryable FAILED.
+  const timedOutProvisioningSessions = await AgentSession.query()
+    .where('status', 'active')
+    .where('sessionKind', AgentSessionKind.CHAT)
+    .where('workspaceStatus', AgentWorkspaceStatus.PROVISIONING)
+    .where('updatedAt', '<', startingCutoff);
   const staleSessions = [
     ...idleActiveSessions,
     ...(await AgentSession.query().where('status', 'starting').where('updatedAt', '<', startingCutoff)),
@@ -46,14 +60,41 @@ export async function processAgentSessionCleanup(): Promise<void> {
       .where('updatedAt', '<', suspendedExpiryCutoff)),
   ];
 
+  for (const session of timedOutProvisioningSessions) {
+    const sessionId = session.uuid || String(session.id);
+    try {
+      const failure = buildWorkspaceRuntimeFailure({
+        error: new Error(PROVISIONING_TIMEOUT_MESSAGE),
+        stage: 'connect_runtime',
+        origin: 'chat_runtime',
+        retryable: true,
+        code: 'workspace_provisioning_timeout',
+      });
+      logger().info(`Session: cleanup provisioning timed out sessionId=${sessionId} updatedAt=${session.updatedAt}`);
+      await WorkspaceRuntimeStateService.recordWorkspaceFailure(session.id, {
+        sessionPatch: {
+          status: 'active',
+          chatStatus: AgentChatStatus.READY,
+          workspaceStatus: AgentWorkspaceStatus.FAILED,
+        } as unknown as Partial<AgentSession>,
+        failure,
+        // Release the stranded lifecycle claim so the retry path is unblocked.
+        runtimeLifecycle: null,
+      });
+    } catch (err) {
+      logger().error({ error: err, sessionId }, `Session: cleanup provisioning-timeout failed sessionId=${sessionId}`);
+    }
+  }
+
   for (const session of staleSessions) {
     const sessionId = session.uuid || String(session.id);
     try {
+      // Provisioning chat runtimes are owned by the provisioning-timeout reaper above; never end them here.
       const isProvisioningChatRuntime =
         session.status === 'active' &&
         session.sessionKind === AgentSessionKind.CHAT &&
         session.workspaceStatus === AgentWorkspaceStatus.PROVISIONING;
-      if (isProvisioningChatRuntime && new Date(session.updatedAt).getTime() >= startingCutoff.getTime()) {
+      if (isProvisioningChatRuntime) {
         logger().info(`Session: cleanup skipped sessionId=${sessionId} reason=runtime_provisioning`);
         continue;
       }
diff --git a/src/server/jobs/index.ts b/src/server/jobs/index.ts
index fd847d56..8f24f333 100644
--- a/src/server/jobs/index.ts
+++ b/src/server/jobs/index.ts
@@ -30,8 +30,6 @@ import {
   resolveAgentSessionCleanupConfig,
 } from 'server/lib/agentSession/runtimeConfig';
 
-let isBootstrapped = false;
-
 export default function bootstrapJobs(services: IServices) {
   if (defaultDb.services) {
     return;
@@ -205,28 +203,40 @@ export default function bootstrapJobs(services: IServices) {
   defaultDb.services = services;
 
   if (process.env.NEXT_MANUAL_SIG_HANDLE) {
-    if (!isBootstrapped) {
-      isBootstrapped = true;
-
-      // This function is used to handle graceful shutdowns add things as needed.
-      const handleExit = async (signal: string) => {
-        getLogger().info(`Jobs: shutting down signal=${signal}`);
-        try {
-          const redisClient = RedisClient.getInstance();
-          const queueManager = QueueManager.getInstance();
-          await queueManager.emptyAndCloseAllQueues();
-          await redisClient.close();
-          process.exit(0);
-        } catch (error) {
-          getLogger().error({ error }, 'Jobs: shutdown failed');
-          process.exit(0);
-        }
-      };
-
-      process.on('SIGINT', () => handleExit('SIGINT'));
-      process.on('SIGTERM', () => handleExit('SIGTERM'));
-      getLogger().info('Jobs: signal handlers registered');
+    const globalForJobs = global as unknown as {
+      sigintHandler?: () => void;
+      sigtermHandler?: () => void;
+    };
+
+    // Remove any previously registered handlers to prevent memory leaks during hot reloads
+    if (globalForJobs.sigintHandler) {
+      process.off('SIGINT', globalForJobs.sigintHandler);
     }
+    if (globalForJobs.sigtermHandler) {
+      process.off('SIGTERM', globalForJobs.sigtermHandler);
+    }
+
+    // This function is used to handle graceful shutdowns add things as needed.
+    const handleExit = async (signal: string) => {
+      getLogger().info(`Jobs: shutting down signal=${signal}`);
+      try {
+        const redisClient = RedisClient.getInstance();
+        const queueManager = QueueManager.getInstance();
+        await queueManager.emptyAndCloseAllQueues();
+        await redisClient.close();
+        process.exit(0);
+      } catch (error) {
+        getLogger().error({ error }, 'Jobs: shutdown failed');
+        process.exit(0);
+      }
+    };
+
+    globalForJobs.sigintHandler = () => handleExit('SIGINT');
+    globalForJobs.sigtermHandler = () => handleExit('SIGTERM');
+
+    process.on('SIGINT', globalForJobs.sigintHandler);
+    process.on('SIGTERM', globalForJobs.sigtermHandler);
+    getLogger().info('Jobs: signal handlers registered');
   }
   getLogger().info('Jobs: bootstrap complete');
 }
diff --git a/src/server/lib/agent/runRequestText.ts b/src/server/lib/agent/runRequestText.ts
new file mode 100644
index 00000000..69d79002
--- /dev/null
+++ b/src/server/lib/agent/runRequestText.ts
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import type { CanonicalAgentRunMessageInput } from 'server/services/agent/canonicalMessages';
+
+export function readString(value: unknown): string | null {
+  return typeof value === 'string' && value.trim() ? value.trim() : null;
+}
+
+export function getRunMessageText(message: CanonicalAgentRunMessageInput): string | null {
+  const text = message.parts
+    .filter((part): part is Extract<CanonicalAgentRunMessageInput['parts'][number], { type: 'text' }> => {
+      return part.type === 'text';
+    })
+    .map((part) => part.text)
+    .join('\n')
+    .trim();
+
+  return text || null;
+}
diff --git a/src/server/lib/agentSession/__tests__/runtimeConfig.test.ts b/src/server/lib/agentSession/__tests__/runtimeConfig.test.ts
index 3b5e8df2..fd3ec480 100644
--- a/src/server/lib/agentSession/__tests__/runtimeConfig.test.ts
+++ b/src/server/lib/agentSession/__tests__/runtimeConfig.test.ts
@@ -45,7 +45,6 @@ import {
   DEFAULT_AGENT_SESSION_WORKSPACE_STORAGE_SIZE,
   DEFAULT_AGENT_SESSION_WORKSPACE_TOOL_DISCOVERY_TIMEOUT_MS,
   DEFAULT_AGENT_SESSION_WORKSPACE_TOOL_EXECUTION_TIMEOUT_MS,
-  mergeAgentSessionReadiness,
   mergeAgentSessionReadinessForServices,
   mergeAgentSessionResources,
   resolveAgentSessionControlPlaneConfig,
@@ -451,17 +450,6 @@ describe('runtimeConfig', () => {
     });
   });
 
-  it('merges direct readiness overrides over runtime defaults', () => {
-    expect(
-      mergeAgentSessionReadiness(resolveAgentSessionReadinessFromDefaults({ timeoutMs: 60000, pollMs: 1000 }), {
-        timeoutMs: 120000,
-      })
-    ).toEqual({
-      timeoutMs: 120000,
-      pollMs: 1000,
-    });
-  });
-
   it('merges service readiness overrides over runtime defaults', () => {
     expect(
       mergeAgentSessionReadinessForServices(
diff --git a/src/server/lib/agentSession/__tests__/startupFailureState.test.ts b/src/server/lib/agentSession/__tests__/startupFailureState.test.ts
index 3d848d2f..0b723cb7 100644
--- a/src/server/lib/agentSession/__tests__/startupFailureState.test.ts
+++ b/src/server/lib/agentSession/__tests__/startupFailureState.test.ts
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+import { AppError } from 'server/lib/appError';
 import {
   WORKSPACE_RUNTIME_FAILURE_STAGES,
   buildAgentSessionStartupFailure,
@@ -80,6 +81,7 @@ describe('startupFailureState', () => {
       recordedAt: failure.recordedAt,
       retryable: false,
       origin: 'agent_session',
+      code: 'workspace_attach_services_failed',
     });
   });
 
@@ -188,4 +190,64 @@ describe('startupFailureState', () => {
       );
     }
   });
+
+  it('derives a stable code per failure stage when the error is not an AppError', () => {
+    expect(buildWorkspaceRuntimeFailure({ error: new Error('boom'), stage: 'connect_runtime' }).code).toBe(
+      'workspace_connect_runtime_failed'
+    );
+    expect(buildWorkspaceRuntimeFailure({ error: new Error('boom'), stage: 'suspend' }).code).toBe(
+      'workspace_suspend_failed'
+    );
+    // Default stage is connect_runtime when none is supplied.
+    expect(buildWorkspaceRuntimeFailure({ error: new Error('boom') }).code).toBe('workspace_connect_runtime_failed');
+  });
+
+  it('propagates the originating AppError code and nextAction onto the durable failure', () => {
+    const appError = new AppError({
+      httpStatus: 503,
+      code: 'session_workspace_gateway_unavailable',
+      message: 'gateway unavailable',
+      retryable: true,
+      nextAction: { kind: 'reconnect', label: 'Reconnect workspace' },
+    });
+
+    const failure = buildWorkspaceRuntimeFailure({
+      error: appError,
+      stage: 'connect_runtime',
+      origin: 'manual_runtime',
+      retryable: true,
+    });
+
+    expect(failure.code).toBe('session_workspace_gateway_unavailable');
+    expect(failure.nextAction).toEqual({ kind: 'reconnect', label: 'Reconnect workspace' });
+  });
+
+  it('lets an explicit code override the AppError code and stage default', () => {
+    const failure = buildWorkspaceRuntimeFailure({
+      error: new Error('Workspace provisioning timed out'),
+      stage: 'connect_runtime',
+      origin: 'chat_runtime',
+      retryable: true,
+      code: 'workspace_provisioning_timeout',
+    });
+
+    expect(failure.code).toBe('workspace_provisioning_timeout');
+    expect(failure.retryable).toBe(true);
+  });
+
+  it('round-trips the code and nextAction through normalizeWorkspaceRuntimeFailure', () => {
+    const built = buildWorkspaceRuntimeFailure({
+      error: new AppError({
+        httpStatus: 503,
+        code: 'session_workspace_gateway_unavailable',
+        message: 'gateway unavailable',
+        nextAction: { kind: 'reconnect', label: 'Reconnect workspace' },
+      }),
+      stage: 'connect_runtime',
+    });
+
+    const normalized = normalizeWorkspaceRuntimeFailure(built);
+    expect(normalized.code).toBe('session_workspace_gateway_unavailable');
+    expect(normalized.nextAction).toEqual({ kind: 'reconnect', label: 'Reconnect workspace' });
+  });
 });
diff --git a/src/server/lib/agentSession/__tests__/systemPrompt.test.ts b/src/server/lib/agentSession/__tests__/systemPrompt.test.ts
index ccb722ce..ac37efc1 100644
--- a/src/server/lib/agentSession/__tests__/systemPrompt.test.ts
+++ b/src/server/lib/agentSession/__tests__/systemPrompt.test.ts
@@ -89,6 +89,83 @@ describe('agent session system prompt', () => {
     );
   });
 
+  it('emits the top-level namespace from build context when not set directly', () => {
+    const prompt = buildAgentSessionDynamicSystemPrompt({
+      buildUuid: 'sample-build-1',
+      build: { uuid: 'sample-build-1', namespace: 'env-sample-123456' },
+      services: [],
+    });
+
+    expect(prompt).toContain('- namespace: env-sample-123456');
+  });
+
+  it('renders lifecycle config presence and declared services', () => {
+    const prompt = buildAgentSessionDynamicSystemPrompt({
+      buildUuid: 'sample-build-1',
+      lifecycleConfig: {
+        status: 'present',
+        path: 'lifecycle.yaml',
+        declaredServices: ['next-web', 'api'],
+      },
+      services: [],
+    });
+
+    expect(prompt).toContain('- lifecycleConfig: present (lifecycle.yaml)');
+    expect(prompt).toContain('- declaredServices: next-web, api');
+  });
+
+  it('renders a missing or invalid lifecycle config without declared services', () => {
+    const missing = buildAgentSessionDynamicSystemPrompt({
+      buildUuid: 'sample-build-1',
+      lifecycleConfig: { status: 'missing', path: 'lifecycle.yaml' },
+      services: [],
+    });
+    expect(missing).toContain('- lifecycleConfig: missing (lifecycle.yaml)');
+    expect(missing).not.toContain('declaredServices');
+
+    const invalid = buildAgentSessionDynamicSystemPrompt({
+      buildUuid: 'sample-build-1',
+      lifecycleConfig: { status: 'invalid', path: 'lifecycle.yaml' },
+      services: [],
+    });
+    expect(invalid).toContain('- lifecycleConfig: invalid (lifecycle.yaml)');
+  });
+
+  it('reports an invalid lifecycle config when fetch throws', async () => {
+    const buildGraphQuery = {
+      withGraphFetched: jest.fn().mockResolvedValue({
+        uuid: 'sample-build-1',
+        status: 'build_failed',
+        namespace: 'env-sample-123456',
+        pullRequest: {
+          fullName: 'example-org/example-repo',
+          branchName: 'feature/sample',
+          pullRequestNumber: 42,
+          status: 'open',
+          labels: [],
+          deployOnUpdate: false,
+          repository: { htmlUrl: 'https://github.com/example-org/example-repo' },
+        },
+        deploys: [],
+      }),
+    };
+    (Build.query as jest.Mock) = jest.fn().mockReturnValue({
+      findOne: jest.fn().mockReturnValue(buildGraphQuery),
+    });
+    (Deploy.query as jest.Mock) = jest.fn().mockReturnValue({
+      where: jest.fn().mockReturnValue({ withGraphFetched: jest.fn().mockResolvedValue([]) }),
+    });
+    (fetchLifecycleConfig as jest.Mock).mockRejectedValue(new Error('invalid yaml'));
+
+    const context = await resolveAgentSessionPromptContext({
+      sessionDbId: 123,
+      namespace: null,
+      buildUuid: 'sample-build-1',
+    });
+
+    expect(context.lifecycleConfig).toEqual({ status: 'invalid', path: 'lifecycle.yaml' });
+  });
+
   it('combines the configured and dynamic prompts with spacing', () => {
     expect(
       combineAgentSessionAppendSystemPrompt('Use concise responses.', 'Session context:\n- namespace: env-sample')
@@ -140,6 +217,8 @@ describe('agent session system prompt', () => {
     expect(prompt).not.toContain('Lifecycle debugging profile:');
     expect(prompt).not.toContain('explicitly asks to continue into repair');
     expect(prompt).toContain('Initial Lifecycle snapshot:');
+    // Top-level namespace falls back to build.namespace for build-context chats.
+    expect(prompt).toContain('- namespace: env-sample-123456');
     expect(prompt).toContain(
       '- build=sample-build-1: buildStatusAtStart=deploy_failed, buildStatusMessageAtStart=web deploy failed, namespace=env-sample-123456, sha=abc123'
     );
@@ -147,7 +226,7 @@ describe('agent session system prompt', () => {
     expect(prompt).toContain(
       '- repo=example-org/example-repo, branch=feature/sample, number=42, url=https://github.com/example-org/example-repo/pull/42, statusAtStart=open, labelsAtStart=lifecycle-deploy, deployOnUpdateAtStart=true, deployLabels=lifecycle-deploy!, disabledLabels=lifecycle-disabled!, latestCommit=abc123, repositoryUrl=https://github.com/example-org/example-repo'
     );
-    expect(prompt).toContain('Deploy roster:');
+    expect(prompt).toContain('DEPLOYS — roster:');
     expect(prompt).toContain(
       '- next-web: deployUuid=next-web-deploy-1, activeAtStart=true, statusAtStart=deploy_failed, statusMessageAtStart=CrashLoopBackOff, repo=example-org/example-repo, branch=feature/sample, publicUrl=https://next-web-sample.lifecycle.dev.example.com, dockerImage=registry.example.test/next-web:abc123, buildPipelineId=build-pipeline-1, deployPipelineId=deploy-pipeline-1'
     );
@@ -196,7 +275,7 @@ describe('agent session system prompt', () => {
       },
     });
 
-    expect(prompt).toContain('Selected deploy:');
+    expect(prompt).toContain('DEPLOYS — selected:');
     expect(prompt).toContain(
       '- sample-service: deployUuid=deploy-1, activeAtStart=false, statusAtStart=build_failed, statusMessageAtStart=Dockerfile not found, repo=example-org/service-repo, branch=feature/service-change, serviceSha=service-sha-1, dockerfilePath=services/sample/Dockerfile'
     );
@@ -273,7 +352,7 @@ describe('agent session system prompt', () => {
     expect(prompt).toContain(
       '- sample-service: deployUuid=sample-service-sample-build-1, activeAtStart=false, statusAtStart=pending, statusMessageAtStart=<none>'
     );
-    expect(prompt).toContain('Deploy roster:');
+    expect(prompt).toContain('DEPLOYS — roster:');
     expect(prompt).not.toContain('Fresh repository reads:');
     expect(prompt).not.toContain('Mismatch handling:');
     expect(prompt).not.toContain('lifecycle.yaml');
@@ -361,6 +440,11 @@ describe('agent session system prompt', () => {
         latestCommit: 'abc123',
         repositoryUrl: 'https://github.com/example-org/example-repo',
       },
+      lifecycleConfig: {
+        status: 'present',
+        path: 'lifecycle.yaml',
+        declaredServices: ['next-web'],
+      },
       services: [
         {
           name: 'next-web',
@@ -492,6 +576,11 @@ describe('agent session system prompt', () => {
         latestCommit: 'abc123',
         repositoryUrl: 'https://github.com/example-org/example-repo',
       },
+      lifecycleConfig: {
+        status: 'present',
+        path: 'lifecycle.yaml',
+        declaredServices: ['next-web'],
+      },
       services: [
         {
           name: 'next-web',
diff --git a/src/server/lib/agentSession/__tests__/workspaceEditorProxy.test.ts b/src/server/lib/agentSession/__tests__/workspaceEditorProxy.test.ts
index 8393af38..65a59efb 100644
--- a/src/server/lib/agentSession/__tests__/workspaceEditorProxy.test.ts
+++ b/src/server/lib/agentSession/__tests__/workspaceEditorProxy.test.ts
@@ -14,7 +14,16 @@
  * limitations under the License.
  */
 
-import { buildWorkspaceEditorProxyHeaders, serializeSocketHttpResponse } from '../workspaceEditorProxy';
+import {
+  buildWorkspaceEditorProxyHeaders,
+  serializeSocketHttpResponse,
+  classifyEditorProxyFailure,
+  resolveEditorProxyFailureMapping,
+  buildWorkspaceEditorErrorPage,
+  isEditorNavigationRequest,
+  editorProxyConnections,
+  EDITOR_PROXY_MAX_PER_SESSION,
+} from '../workspaceEditorProxy';
 
 describe('workspaceEditorProxy', () => {
   it('drops hop-by-hop headers for plain HTTP proxy requests', () => {
@@ -96,4 +105,105 @@ describe('workspaceEditorProxy', () => {
       }).toString('utf8')
     ).toBe('HTTP/1.1 502 Bad Gateway\r\nContent-Length: 18\r\n\r\neditor unavailable');
   });
+
+  describe('classifyEditorProxyFailure', () => {
+    it('maps explicit coded errors', () => {
+      expect(classifyEditorProxyFailure(new Error('editor-proxy-timeout'))).toBe('timeout');
+      expect(classifyEditorProxyFailure(new Error('editor-proxy-capacity'))).toBe('capacity');
+    });
+
+    it('maps auth failures', () => {
+      expect(classifyEditorProxyFailure(new Error('Authentication token is required'))).toBe('auth');
+      expect(classifyEditorProxyFailure(new Error('Forbidden: you do not own this session'))).toBe('auth');
+    });
+
+    it('prefers a suspended workspace over a missing pod', () => {
+      expect(classifyEditorProxyFailure(new Error('Workspace is not ready'), { workspaceUnavailable: true })).toBe(
+        'workspace-suspended'
+      );
+    });
+
+    it('maps a missing pod / not-found to pod-gone', () => {
+      expect(classifyEditorProxyFailure(new Error('Session not found or not active'), { podMissing: true })).toBe(
+        'pod-gone'
+      );
+      expect(classifyEditorProxyFailure(new Error('Session not found or not active'))).toBe('pod-gone');
+    });
+
+    it('maps socket error codes and unknowns to unreachable', () => {
+      expect(classifyEditorProxyFailure(Object.assign(new Error('x'), { code: 'ECONNREFUSED' }))).toBe('unreachable');
+      expect(classifyEditorProxyFailure(new Error('something weird'))).toBe('unreachable');
+    });
+  });
+
+  describe('resolveEditorProxyFailureMapping', () => {
+    it('maps reasons to the documented status codes', () => {
+      expect(resolveEditorProxyFailureMapping('auth').status).toBe(401);
+      expect(resolveEditorProxyFailureMapping('workspace-suspended').status).toBe(409);
+      expect(resolveEditorProxyFailureMapping('pod-gone').status).toBe(410);
+      expect(resolveEditorProxyFailureMapping('unreachable').status).toBe(502);
+      expect(resolveEditorProxyFailureMapping('timeout').status).toBe(504);
+      expect(resolveEditorProxyFailureMapping('capacity').status).toBe(503);
+    });
+  });
+
+  describe('isEditorNavigationRequest', () => {
+    it('detects html navigations and rejects asset/ws requests', () => {
+      expect(isEditorNavigationRequest({ accept: 'text/html,application/xhtml+xml' })).toBe(true);
+      expect(isEditorNavigationRequest({ accept: 'application/json' })).toBe(false);
+      expect(isEditorNavigationRequest({})).toBe(false);
+    });
+  });
+
+  describe('buildWorkspaceEditorErrorPage', () => {
+    it('renders a branded page with an escaped deep-link CTA', () => {
+      const html = buildWorkspaceEditorErrorPage({
+        reason: 'workspace-suspended',
+        sessionUrl: 'https://lfc.test/new/abc?x="1"',
+      });
+      expect(html).toContain('Workspace suspended');
+      expect(html).toContain('Resume workspace');
+      // URL quotes must be escaped so they cannot break out of the href attribute.
+      expect(html).toContain('href="https://lfc.test/new/abc?x=&quot;1&quot;"');
+      expect(html).not.toContain('x="1"');
+    });
+  });
+
+  describe('editorProxyConnections registry', () => {
+    const sessionId = 'registry-test-session';
+
+    afterEach(() => {
+      // Drain any leftover tokens between cases.
+      while (editorProxyConnections.sizeForSession(sessionId) > 0) {
+        // no-op: tokens released within each test
+        break;
+      }
+    });
+
+    it('registers and releases, keeping the gauge consistent', () => {
+      const before = editorProxyConnections.size();
+      const a = {};
+      const b = {};
+      expect(editorProxyConnections.tryRegister(sessionId, a)).toBe(true);
+      expect(editorProxyConnections.tryRegister(sessionId, b)).toBe(true);
+      expect(editorProxyConnections.sizeForSession(sessionId)).toBe(2);
+      expect(editorProxyConnections.size()).toBe(before + 2);
+      editorProxyConnections.release(sessionId, a);
+      editorProxyConnections.release(sessionId, b);
+      expect(editorProxyConnections.sizeForSession(sessionId)).toBe(0);
+      expect(editorProxyConnections.size()).toBe(before);
+    });
+
+    it('rejects once the per-session cap is reached', () => {
+      const tokens: object[] = [];
+      for (let i = 0; i < EDITOR_PROXY_MAX_PER_SESSION; i += 1) {
+        const token = {};
+        tokens.push(token);
+        expect(editorProxyConnections.tryRegister(sessionId, token)).toBe(true);
+      }
+      expect(editorProxyConnections.tryRegister(sessionId, {})).toBe(false);
+      tokens.forEach((token) => editorProxyConnections.release(sessionId, token));
+      expect(editorProxyConnections.sizeForSession(sessionId)).toBe(0);
+    });
+  });
 });
diff --git a/src/server/lib/agentSession/runtimeConfig.ts b/src/server/lib/agentSession/runtimeConfig.ts
index 4cdf6331..9210cfb7 100644
--- a/src/server/lib/agentSession/runtimeConfig.ts
+++ b/src/server/lib/agentSession/runtimeConfig.ts
@@ -108,7 +108,7 @@ export const DEFAULT_AGENT_SESSION_CONTROL_PLANE_APPEND_SYSTEM_PROMPT = [
   'When a tool execution is not approved, do not retry the denied action. Use the denial reason as updated guidance and continue from there.',
   'When showing multi-line exact text such as file contents, command output, diffs, or JSON, use a fenced code block instead of inline code.',
 ].join('\n');
-export const DEFAULT_AGENT_SESSION_MAX_ITERATIONS = 8;
+export const DEFAULT_AGENT_SESSION_MAX_ITERATIONS = 20;
 export const DEFAULT_AGENT_SESSION_WORKSPACE_TOOL_DISCOVERY_TIMEOUT_MS = 3000;
 export const DEFAULT_AGENT_SESSION_WORKSPACE_TOOL_EXECUTION_TIMEOUT_MS = 15000;
 export const DEFAULT_AGENT_SESSION_KEEP_ATTACHED_SERVICES_ON_SESSION_NODE = true;
@@ -293,16 +293,6 @@ export function resolveAgentSessionReadinessFromDefaults(
   };
 }
 
-export function mergeAgentSessionReadiness(
-  baseReadiness: ResolvedAgentSessionReadinessConfig,
-  overrides?: AgentSessionReadinessConfig | null
-): ResolvedAgentSessionReadinessConfig {
-  return {
-    timeoutMs: normalizeNonNegativeInteger(overrides?.timeoutMs) ?? baseReadiness.timeoutMs,
-    pollMs: normalizeNonNegativeInteger(overrides?.pollMs) ?? baseReadiness.pollMs,
-  };
-}
-
 export function mergeAgentSessionReadinessForServices(
   baseReadiness: ResolvedAgentSessionReadinessConfig,
   overrides: Array<AgentSessionReadinessConfig | null | undefined>
diff --git a/src/server/lib/agentSession/startupFailureState.ts b/src/server/lib/agentSession/startupFailureState.ts
index 18dfd04a..e2b8a63d 100644
--- a/src/server/lib/agentSession/startupFailureState.ts
+++ b/src/server/lib/agentSession/startupFailureState.ts
@@ -15,6 +15,7 @@
  */
 
 import type { Redis } from 'ioredis';
+import { isAppError, type AppErrorAction } from 'server/lib/appError';
 
 const AGENT_SESSION_STARTUP_FAILURE_REDIS_PREFIX = 'lifecycle:agent:session:startup-failure:';
 const AGENT_SESSION_STARTUP_FAILURE_TTL_SECONDS = 60 * 60;
@@ -53,6 +54,42 @@ export interface WorkspaceRuntimeFailure {
   recordedAt: string;
   retryable: boolean;
   origin: WorkspaceRuntimeFailureOrigin;
+  /** Machine-readable failure code so durable failures honor the coded error contract. Optional for back-compat. */
+  code?: string;
+  /** Optional coded next-step affordance (mirrors AppError.nextAction). */
+  nextAction?: AppErrorAction;
+}
+
+/** Stable fallback codes per failure stage when the originating error carries no AppError code. */
+const DEFAULT_CODE_BY_STAGE: Record<WorkspaceRuntimeFailureStage, string> = {
+  create_session: 'workspace_create_session_failed',
+  prepare_infrastructure: 'workspace_prepare_infrastructure_failed',
+  connect_runtime: 'workspace_connect_runtime_failed',
+  attach_services: 'workspace_attach_services_failed',
+  suspend: 'workspace_suspend_failed',
+  resume: 'workspace_resume_failed',
+  cleanup: 'workspace_cleanup_failed',
+};
+
+function deriveFailureCode(error: unknown, stage: WorkspaceRuntimeFailureStage, explicitCode?: string): string {
+  if (explicitCode) {
+    return explicitCode;
+  }
+  // Prefer the originating typed AppError's code when present.
+  if (isAppError(error)) {
+    return error.code;
+  }
+  return DEFAULT_CODE_BY_STAGE[stage];
+}
+
+function deriveNextAction(error: unknown, explicit?: AppErrorAction): AppErrorAction | undefined {
+  if (explicit) {
+    return explicit;
+  }
+  if (isAppError(error) && error.nextAction) {
+    return error.nextAction;
+  }
+  return undefined;
 }
 
 export type AgentSessionStartupFailureStage = WorkspaceRuntimeFailureStage;
@@ -89,6 +126,24 @@ function isWorkspaceRuntimeFailureOrigin(value: unknown): value is WorkspaceRunt
   );
 }
 
+const APP_ERROR_ACTION_KINDS = ['continue', 'retry', 'reconnect', 'update_key', 'navigate'] as const;
+
+function readNextAction(value: unknown): AppErrorAction | undefined {
+  if (
+    isRecord(value) &&
+    typeof value.kind === 'string' &&
+    (APP_ERROR_ACTION_KINDS as readonly string[]).includes(value.kind) &&
+    typeof value.label === 'string'
+  ) {
+    return {
+      kind: value.kind as AppErrorAction['kind'],
+      label: value.label,
+      ...(typeof value.href === 'string' ? { href: value.href } : {}),
+    };
+  }
+  return undefined;
+}
+
 function normalizeFailureMessage(error: unknown): string {
   const rawMessage =
     error instanceof Error
@@ -254,10 +309,14 @@ export function buildWorkspaceRuntimeFailure(params: {
   origin?: WorkspaceRuntimeFailureOrigin;
   retryable?: boolean;
   recordedAt?: string;
+  /** Explicit stable code; otherwise derived from the AppError code or the stage default. */
+  code?: string;
+  nextAction?: AppErrorAction;
 }): WorkspaceRuntimeFailure {
   const stage = params.stage || 'connect_runtime';
   const message = sanitizeFailureText(normalizeFailureMessage(params.error), DEFAULT_STARTUP_FAILURE_MESSAGE);
   const classified = classifyFailure(message, stage);
+  const nextAction = deriveNextAction(params.error, params.nextAction);
 
   return {
     stage,
@@ -266,6 +325,8 @@ export function buildWorkspaceRuntimeFailure(params: {
     recordedAt: normalizeRecordedAt(params.recordedAt),
     retryable: params.retryable === true,
     origin: params.origin || 'agent_session',
+    code: deriveFailureCode(params.error, stage, params.code),
+    ...(nextAction ? { nextAction } : {}),
   };
 }
 
@@ -287,6 +348,7 @@ export function normalizeWorkspaceRuntimeFailure(
     typeof failure.title === 'string' &&
     typeof failure.message === 'string'
   ) {
+    const nextAction = readNextAction(failure.nextAction);
     return {
       stage: failure.stage,
       title: sanitizeFailureText(failure.title, defaultTitleForStage(failure.stage)),
@@ -294,6 +356,8 @@ export function normalizeWorkspaceRuntimeFailure(
       recordedAt: normalizeRecordedAt(failure.recordedAt ?? fallback.recordedAt),
       retryable: typeof failure.retryable === 'boolean' ? failure.retryable : fallback.retryable === true,
       origin: isWorkspaceRuntimeFailureOrigin(failure.origin) ? failure.origin : fallback.origin || 'legacy',
+      ...(typeof failure.code === 'string' && failure.code ? { code: failure.code } : {}),
+      ...(nextAction ? { nextAction } : {}),
     };
   }
 
diff --git a/src/server/lib/agentSession/systemPrompt.ts b/src/server/lib/agentSession/systemPrompt.ts
index cb72a0df..65cd844e 100644
--- a/src/server/lib/agentSession/systemPrompt.ts
+++ b/src/server/lib/agentSession/systemPrompt.ts
@@ -67,12 +67,19 @@ export interface AgentSessionPromptPullRequestContext {
   repositoryUrl?: string;
 }
 
+export interface AgentSessionPromptLifecycleConfigContext {
+  status: 'present' | 'missing' | 'invalid';
+  path: string;
+  declaredServices?: string[];
+}
+
 export interface AgentSessionPromptContext {
   namespace?: string | null;
   buildUuid?: string | null;
   gatheredAt?: string;
   build?: AgentSessionPromptBuildContext;
   pullRequest?: AgentSessionPromptPullRequestContext;
+  lifecycleConfig?: AgentSessionPromptLifecycleConfigContext;
   services: AgentSessionPromptServiceContext[];
   selectedDeploy?: AgentSessionPromptServiceContext;
   diagnosticServices?: AgentSessionPromptServiceContext[];
@@ -140,14 +147,24 @@ function formatOptionalBoolean(value: boolean | undefined): string {
 export function buildAgentSessionDynamicSystemPrompt(context: AgentSessionPromptContext): string {
   const lines = ['Initial Lifecycle snapshot:'];
 
-  if (context.namespace) {
-    lines.push(`- namespace: ${context.namespace}`);
+  // Surface namespace (top-level or from build) prominently for get_k8s_resources/get_pod_logs.
+  const namespace = context.namespace || context.build?.namespace;
+  if (namespace) {
+    lines.push(`- namespace: ${namespace}`);
   }
 
   if (context.buildUuid) {
     lines.push(`- buildUuid: ${context.buildUuid}`);
   }
 
+  if (context.lifecycleConfig) {
+    const { status, path } = context.lifecycleConfig;
+    lines.push(`- lifecycleConfig: ${status} (${path})`);
+    if (context.lifecycleConfig.declaredServices?.length) {
+      lines.push(`- declaredServices: ${context.lifecycleConfig.declaredServices.join(', ')}`);
+    }
+  }
+
   if (context.gatheredAt) {
     lines.push(`- observedAt: ${context.gatheredAt}`, '- source: lifecycle_db');
   }
@@ -232,11 +249,11 @@ export function buildAgentSessionDynamicSystemPrompt(context: AgentSessionPrompt
       service.deployPipelineId ? `deployPipelineId=${service.deployPipelineId}` : undefined,
     ]);
 
-    lines.push('Selected deploy:', `- ${service.name}${details ? `: ${details}` : ''}`);
+    lines.push('DEPLOYS — selected:', `- ${service.name}${details ? `: ${details}` : ''}`);
   }
 
   if (context.diagnosticServices?.length) {
-    lines.push('Deploy roster:');
+    lines.push('DEPLOYS — roster:');
 
     const diagnosticServices = [...context.diagnosticServices].sort((left, right) =>
       left.name.localeCompare(right.name)
@@ -286,10 +303,58 @@ type BuildDiagnosticContext = {
   source: { repo?: string; branch?: string };
   build?: AgentSessionPromptBuildContext;
   pullRequest?: AgentSessionPromptPullRequestContext;
+  lifecycleConfig?: AgentSessionPromptLifecycleConfigContext;
   deploys: Deploy[];
   diagnosticServices: AgentSessionPromptServiceContext[];
 };
 
+// Representative config path in the snapshot; actual file may be a .lifecycle.yaml/.yml variant.
+const LIFECYCLE_CONFIG_PATH = 'lifecycle.yaml';
+
+// Detect missing/invalid lifecycle.yaml (a common root cause) for the snapshot without throwing; seeds the shared cache.
+async function resolveLifecycleConfigPresence(
+  repo: string | undefined,
+  branch: string | undefined,
+  cache: Map<string, Promise<LifecycleConfig | null>>
+): Promise<AgentSessionPromptLifecycleConfigContext | undefined> {
+  if (!repo || !branch) {
+    return undefined;
+  }
+
+  // fetchLifecycleConfig returns null when absent, throws on parse errors: distinguishes missing from invalid.
+  const key = `${repo}::${branch}`;
+  let fetchPromise = cache.get(key);
+  if (!fetchPromise) {
+    fetchPromise = fetchLifecycleConfig(repo, branch).then((config) => config ?? null);
+    // Shared cache stores a non-throwing variant for the workDir path.
+    cache.set(
+      key,
+      fetchPromise.catch(() => null)
+    );
+  }
+
+  try {
+    const config = await fetchPromise;
+    if (!config) {
+      return { status: 'missing', path: LIFECYCLE_CONFIG_PATH };
+    }
+
+    const declaredServices = Array.isArray(config.services)
+      ? config.services
+          .map((service) => normalizeOptionalString(service?.name))
+          .filter((name): name is string => Boolean(name))
+      : [];
+
+    return {
+      status: 'present',
+      path: LIFECYCLE_CONFIG_PATH,
+      ...(declaredServices.length ? { declaredServices } : {}),
+    };
+  } catch {
+    return { status: 'invalid', path: LIFECYCLE_CONFIG_PATH };
+  }
+}
+
 async function fetchCachedLifecycleConfig(
   repositoryName: string,
   branchName: string,
@@ -342,7 +407,10 @@ function formatDeployDiagnosticService(
   };
 }
 
-async function resolveBuildDiagnosticContext(buildUuid?: string | null): Promise<BuildDiagnosticContext> {
+async function resolveBuildDiagnosticContext(
+  buildUuid: string | null | undefined,
+  lifecycleConfigCache: Map<string, Promise<LifecycleConfig | null>>
+): Promise<BuildDiagnosticContext> {
   const normalizedBuildUuid = normalizeOptionalString(buildUuid);
   if (!normalizedBuildUuid) {
     return { source: {}, deploys: [], diagnosticServices: [] };
@@ -360,9 +428,11 @@ async function resolveBuildDiagnosticContext(buildUuid?: string | null): Promise
     repo: normalizeOptionalString(pullRequest?.fullName),
     branch: normalizeOptionalString(pullRequest?.branchName),
   };
+  const lifecycleConfig = await resolveLifecycleConfigPresence(source.repo, source.branch, lifecycleConfigCache);
 
   return {
     source,
+    lifecycleConfig,
     build: build
       ? {
           uuid: build.uuid,
@@ -397,14 +467,14 @@ async function resolveBuildDiagnosticContext(buildUuid?: string | null): Promise
 export async function resolveAgentSessionPromptContext(
   lookup: SessionPromptLookupContext
 ): Promise<AgentSessionPromptContext> {
+  const lifecycleConfigCache = new Map<string, Promise<LifecycleConfig | null>>();
   const [session, deploys, buildSource] = await Promise.all([
     AgentSession.query().findById(lookup.sessionDbId),
     Deploy.query()
       .where({ devModeSessionId: lookup.sessionDbId })
       .withGraphFetched('[deployable, repository, service]'),
-    resolveBuildDiagnosticContext(lookup.buildUuid),
+    resolveBuildDiagnosticContext(lookup.buildUuid, lifecycleConfigCache),
   ]);
-  const lifecycleConfigCache = new Map<string, Promise<LifecycleConfig | null>>();
   const allDeploys = deploys.length > 0 ? deploys : buildSource.deploys;
   const deployById = new Map(allDeploys.filter((deploy) => deploy.id != null).map((deploy) => [deploy.id, deploy]));
 
@@ -500,6 +570,7 @@ export async function resolveAgentSessionPromptContext(
     gatheredAt: new Date().toISOString(),
     build: buildSource.build,
     pullRequest: buildSource.pullRequest,
+    ...(buildSource.lifecycleConfig ? { lifecycleConfig: buildSource.lifecycleConfig } : {}),
     services,
     ...(services[0]?.deployUuid ? { selectedDeploy: services[0] } : {}),
     diagnosticServices: buildSource.diagnosticServices,
diff --git a/src/server/lib/agentSession/workspaceEditorProxy.ts b/src/server/lib/agentSession/workspaceEditorProxy.ts
index bdb9cc29..970e0ff4 100644
--- a/src/server/lib/agentSession/workspaceEditorProxy.ts
+++ b/src/server/lib/agentSession/workspaceEditorProxy.ts
@@ -16,6 +16,25 @@
 
 import { IncomingHttpHeaders, STATUS_CODES } from 'http';
 
+// --- Hardening tunables (rh-2) ---
+// Upstream connect+response budget; reaps pods that accept TCP but never respond.
+export const EDITOR_PROXY_TIMEOUT_MS = parseInt(process.env.AGENT_SESSION_EDITOR_PROXY_TIMEOUT_MS || '30000', 10);
+// WS heartbeat: ping every 30s, terminate if no pong within the deadline.
+export const EDITOR_PROXY_PING_INTERVAL_MS = parseInt(
+  process.env.AGENT_SESSION_EDITOR_PROXY_PING_INTERVAL_MS || '30000',
+  10
+);
+export const EDITOR_PROXY_PONG_DEADLINE_MS = parseInt(
+  process.env.AGENT_SESSION_EDITOR_PROXY_PONG_DEADLINE_MS || '10000',
+  10
+);
+// Concurrent live proxied connections (HTTP in-flight + WS) caps. 0 disables.
+export const EDITOR_PROXY_MAX_PER_SESSION = parseInt(
+  process.env.AGENT_SESSION_EDITOR_PROXY_MAX_PER_SESSION || '64',
+  10
+);
+export const EDITOR_PROXY_MAX_GLOBAL = parseInt(process.env.AGENT_SESSION_EDITOR_PROXY_MAX_GLOBAL || '512', 10);
+
 const HOP_BY_HOP_HEADERS = new Set([
   'connection',
   'keep-alive',
@@ -139,3 +158,210 @@ export function serializeSocketHttpResponse(opts: SerializeSocketHttpResponseOpt
   const headBuffer = Buffer.from(`${lines.join('\r\n')}\r\n\r\n`, 'utf8');
   return bodyBuffer.length > 0 ? Buffer.concat([headBuffer, bodyBuffer]) : headBuffer;
 }
+
+// --- Coded failure mapping (err-4) ---
+// Coded reasons drive the right deep-link CTA on the branded error page.
+export type EditorProxyFailureReason =
+  | 'auth'
+  | 'workspace-suspended'
+  | 'pod-gone'
+  | 'unreachable'
+  | 'timeout'
+  | 'capacity';
+
+export interface EditorProxyFailureMapping {
+  reason: EditorProxyFailureReason;
+  status: number;
+  title: string;
+  message: string;
+  // CTA links back to the Lifecycle session so the user can resume/restart.
+  cta: string;
+}
+
+const EDITOR_PROXY_FAILURE_MAPPING: Record<EditorProxyFailureReason, Omit<EditorProxyFailureMapping, 'reason'>> = {
+  auth: {
+    status: 401,
+    title: 'Editor session expired',
+    message: 'Your editor session is no longer authenticated. Reopen the editor from the session to continue.',
+    cta: 'Back to session',
+  },
+  'workspace-suspended': {
+    // 409 Conflict: the workspace exists but is paused; resume reconciles it.
+    status: 409,
+    title: 'Workspace suspended',
+    message: 'This workspace is suspended. Resume it from the session to reopen the editor.',
+    cta: 'Resume workspace',
+  },
+  'pod-gone': {
+    // 410 Gone: the backing pod no longer exists; a restart re-provisions it.
+    status: 410,
+    title: 'Workspace stopped',
+    message: 'The workspace runtime is no longer running. Restart it from the session to reopen the editor.',
+    cta: 'Restart workspace',
+  },
+  unreachable: {
+    status: 502,
+    title: 'Editor unavailable',
+    message: 'The editor runtime is starting up or temporarily unreachable. Restart the workspace if this persists.',
+    cta: 'Back to session',
+  },
+  timeout: {
+    status: 504,
+    title: 'Editor timed out',
+    message: 'The editor runtime did not respond in time. Restart the workspace if this persists.',
+    cta: 'Restart workspace',
+  },
+  capacity: {
+    status: 503,
+    title: 'Too many editor connections',
+    message: 'This session has too many open editor connections. Close some tabs and try again.',
+    cta: 'Back to session',
+  },
+};
+
+export interface EditorProxyFailureContext {
+  // True when the upstream workspace status is not READY (suspended/failed/etc).
+  workspaceUnavailable?: boolean;
+  // True when a podName/namespace could not be resolved (pod gone).
+  podMissing?: boolean;
+}
+
+// Maps a thrown error (auth string, socket error code, or explicit timeout/capacity) to a coded reason.
+export function classifyEditorProxyFailure(
+  error: unknown,
+  ctx: EditorProxyFailureContext = {}
+): EditorProxyFailureReason {
+  const message = error instanceof Error ? error.message : String(error ?? '');
+  const code = (error as { code?: string } | null)?.code;
+
+  if (message === 'editor-proxy-timeout') {
+    return 'timeout';
+  }
+  if (message === 'editor-proxy-capacity') {
+    return 'capacity';
+  }
+  if (message.includes('Authentication') || message.includes('Forbidden')) {
+    return 'auth';
+  }
+  if (ctx.workspaceUnavailable) {
+    return 'workspace-suspended';
+  }
+  if (ctx.podMissing || message.includes('Session not found')) {
+    return 'pod-gone';
+  }
+  if (code === 'ECONNREFUSED' || code === 'ENOTFOUND' || code === 'EHOSTUNREACH' || code === 'ECONNRESET') {
+    return 'unreachable';
+  }
+  return 'unreachable';
+}
+
+export function resolveEditorProxyFailureMapping(reason: EditorProxyFailureReason): EditorProxyFailureMapping {
+  return { reason, ...EDITOR_PROXY_FAILURE_MAPPING[reason] };
+}
+
+// True for top-level browser navigations, which need an HTML page rather than a raw status body.
+export function isEditorNavigationRequest(headers: IncomingHttpHeaders): boolean {
+  const accept = headers.accept;
+  const value = Array.isArray(accept) ? accept.join(',') : accept || '';
+  return value.includes('text/html');
+}
+
+function escapeHtml(value: string): string {
+  return value
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;');
+}
+
+export interface BuildWorkspaceEditorErrorPageOpts {
+  reason: EditorProxyFailureReason;
+  // Absolute or relative deep-link back to the Lifecycle session.
+  sessionUrl: string;
+}
+
+// Small branded HTML error page for navigation failures (err-4).
+export function buildWorkspaceEditorErrorPage(opts: BuildWorkspaceEditorErrorPageOpts): string {
+  const { reason, sessionUrl } = opts;
+  const mapping = resolveEditorProxyFailureMapping(reason);
+  const safeUrl = escapeHtml(sessionUrl);
+  return `<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>${escapeHtml(mapping.title)}</title>
+<style>
+  :root { color-scheme: light dark; }
+  body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+    background: #0b0d12; color: #e6e8ee; display: flex; min-height: 100vh; align-items: center; justify-content: center; }
+  .card { max-width: 30rem; padding: 2.5rem; text-align: center; }
+  .badge { display: inline-block; font-size: 0.75rem; letter-spacing: 0.08em; text-transform: uppercase;
+    color: #8a90a2; margin-bottom: 1rem; }
+  h1 { font-size: 1.4rem; margin: 0 0 0.75rem; }
+  p { color: #aab0c0; line-height: 1.5; margin: 0 0 1.75rem; }
+  a.cta { display: inline-block; background: #4f7cff; color: #fff; text-decoration: none; font-weight: 600;
+    padding: 0.7rem 1.4rem; border-radius: 0.6rem; }
+  a.cta:hover { background: #3f6cef; }
+</style>
+</head>
+<body>
+  <div class="card">
+    <div class="badge">Lifecycle Workspace &middot; ${escapeHtml(mapping.reason)}</div>
+    <h1>${escapeHtml(mapping.title)}</h1>
+    <p>${escapeHtml(mapping.message)}</p>
+    <a class="cta" href="${safeUrl}">${escapeHtml(mapping.cta)}</a>
+  </div>
+</body>
+</html>`;
+}
+
+// --- Connection registry (rh-2) ---
+// Module-level live-socket registry so abandoned sockets are observable and over-cap attempts are rejected.
+class EditorProxyConnectionRegistry {
+  private readonly bySession = new Map<string, Set<object>>();
+  private total = 0;
+
+  size(): number {
+    return this.total;
+  }
+
+  sizeForSession(sessionId: string): number {
+    return this.bySession.get(sessionId)?.size ?? 0;
+  }
+
+  // Returns false (and registers nothing) when a cap would be exceeded.
+  tryRegister(sessionId: string, token: object): boolean {
+    if (EDITOR_PROXY_MAX_GLOBAL > 0 && this.total >= EDITOR_PROXY_MAX_GLOBAL) {
+      return false;
+    }
+    if (EDITOR_PROXY_MAX_PER_SESSION > 0 && this.sizeForSession(sessionId) >= EDITOR_PROXY_MAX_PER_SESSION) {
+      return false;
+    }
+    let set = this.bySession.get(sessionId);
+    if (!set) {
+      set = new Set();
+      this.bySession.set(sessionId, set);
+    }
+    if (!set.has(token)) {
+      set.add(token);
+      this.total += 1;
+    }
+    return true;
+  }
+
+  release(sessionId: string, token: object): void {
+    const set = this.bySession.get(sessionId);
+    if (!set || !set.has(token)) {
+      return;
+    }
+    set.delete(token);
+    this.total -= 1;
+    if (set.size === 0) {
+      this.bySession.delete(sessionId);
+    }
+  }
+}
+
+export const editorProxyConnections = new EditorProxyConnectionRegistry();
diff --git a/src/server/lib/appError.ts b/src/server/lib/appError.ts
new file mode 100644
index 00000000..b19ee22a
--- /dev/null
+++ b/src/server/lib/appError.ts
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Machine-readable API error contract: httpStatus + stable code + optional recovery action. */
+
+export type AppErrorActionKind = 'continue' | 'retry' | 'reconnect' | 'update_key' | 'navigate';
+
+export interface AppErrorAction {
+  kind: AppErrorActionKind;
+  label: string;
+  href?: string;
+}
+
+export interface AppErrorParams {
+  httpStatus: number;
+  code: string;
+  message: string;
+  details?: Record<string, unknown>;
+  nextAction?: AppErrorAction;
+  /** Whether the same request is worth retrying as-is (rate limits, transient 5xx). */
+  retryable?: boolean;
+  cause?: unknown;
+}
+
+export class AppError extends Error {
+  readonly httpStatus: number;
+  readonly code: string;
+  readonly details?: Record<string, unknown>;
+  readonly nextAction?: AppErrorAction;
+  readonly retryable: boolean;
+
+  constructor(params: AppErrorParams) {
+    super(params.message);
+    this.name = 'AppError';
+    this.httpStatus = params.httpStatus;
+    this.code = params.code;
+    this.details = params.details;
+    this.nextAction = params.nextAction;
+    this.retryable = params.retryable ?? false;
+    if (params.cause !== undefined) {
+      (this as { cause?: unknown }).cause = params.cause;
+    }
+  }
+}
+
+/** True for AppError or any error duck-typing its fields (httpStatus + code), so classes can opt in mid-migration. */
+export function isAppError(error: unknown): error is AppError {
+  if (error instanceof AppError) {
+    return true;
+  }
+  return (
+    error instanceof Error &&
+    typeof (error as { httpStatus?: unknown }).httpStatus === 'number' &&
+    typeof (error as { code?: unknown }).code === 'string'
+  );
+}
+
+/** Common HTTP error shapes so routes/services throw a typed error instead of hand-building a response. */
+export class UnauthorizedError extends AppError {
+  constructor(message = 'Authentication is required.', details?: Record<string, unknown>) {
+    super({ httpStatus: 401, code: 'unauthorized', message, details });
+    this.name = 'UnauthorizedError';
+  }
+}
+
+export class ForbiddenError extends AppError {
+  constructor(message = 'You do not have access to this resource.', details?: Record<string, unknown>) {
+    super({ httpStatus: 403, code: 'forbidden', message, details });
+    this.name = 'ForbiddenError';
+  }
+}
+
+export class NotFoundError extends AppError {
+  constructor(
+    message = 'The requested resource was not found.',
+    code = 'not_found',
+    details?: Record<string, unknown>
+  ) {
+    super({ httpStatus: 404, code, message, details });
+    this.name = 'NotFoundError';
+  }
+}
+
+export class ConflictError extends AppError {
+  constructor(message: string, code = 'conflict', details?: Record<string, unknown>) {
+    super({ httpStatus: 409, code, message, details });
+    this.name = 'ConflictError';
+  }
+}
+
+export class BadRequestError extends AppError {
+  constructor(message: string, code = 'bad_request', details?: Record<string, unknown>) {
+    super({ httpStatus: 400, code, message, details });
+    this.name = 'BadRequestError';
+  }
+}
+
+export interface SerializedAppError {
+  message: string;
+  code?: string;
+  details?: Record<string, unknown>;
+  nextAction?: AppErrorAction;
+}
+
+export function toErrorResponseError(error: unknown): SerializedAppError {
+  if (isAppError(error)) {
+    return {
+      message: error.message,
+      code: error.code,
+      ...(error.details ? { details: error.details } : {}),
+      ...(error.nextAction ? { nextAction: error.nextAction } : {}),
+    };
+  }
+  return {
+    message: error instanceof Error ? error.message : 'An unknown error occurred.',
+  };
+}
diff --git a/src/server/lib/auth.test.ts b/src/server/lib/auth.test.ts
new file mode 100644
index 00000000..306679b1
--- /dev/null
+++ b/src/server/lib/auth.test.ts
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const mockCreateRemoteJWKSet = jest.fn();
+const mockJwtVerify = jest.fn();
+
+jest.mock('jose', () => ({
+  createRemoteJWKSet: (...args: unknown[]) => mockCreateRemoteJWKSet(...args),
+  jwtVerify: (...args: unknown[]) => mockJwtVerify(...args),
+}));
+
+import { verifyBearerToken } from './auth';
+
+describe('verifyBearerToken', () => {
+  const originalIssuer = process.env.KEYCLOAK_ISSUER;
+  const originalAudience = process.env.KEYCLOAK_CLIENT_ID;
+  const originalJwksUrl = process.env.KEYCLOAK_JWKS_URL;
+
+  function restoreEnvValue(name: string, value: string | undefined) {
+    if (value === undefined) {
+      delete process.env[name];
+      return;
+    }
+
+    process.env[name] = value;
+  }
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    process.env.KEYCLOAK_ISSUER = 'http://localhost:8081/realms/lifecycle';
+    process.env.KEYCLOAK_CLIENT_ID = 'lifecycle-core';
+    process.env.KEYCLOAK_JWKS_URL = 'http://localhost:8081/realms/lifecycle/protocol/openid-connect/certs';
+    mockCreateRemoteJWKSet.mockReturnValue('jwks');
+  });
+
+  afterAll(() => {
+    restoreEnvValue('KEYCLOAK_ISSUER', originalIssuer);
+    restoreEnvValue('KEYCLOAK_CLIENT_ID', originalAudience);
+    restoreEnvValue('KEYCLOAK_JWKS_URL', originalJwksUrl);
+  });
+
+  it('logs JWT verification failures without serializing token claims', async () => {
+    const warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => undefined);
+    const error = Object.assign(new Error('"exp" claim timestamp check failed'), {
+      code: 'ERR_JWT_EXPIRED',
+      name: 'JWTExpired',
+      claim: 'exp',
+      reason: 'check_failed',
+      payload: {
+        email: 'sensitive@example.com',
+        preferred_username: 'sensitive-user',
+      },
+    });
+    mockJwtVerify.mockRejectedValue(error);
+
+    const result = await verifyBearerToken('expired-token');
+
+    expect(result).toEqual({
+      success: false,
+      error: {
+        message: 'Authentication failed: "exp" claim timestamp check failed',
+        status: 401,
+      },
+    });
+    expect(warnSpy).toHaveBeenCalledWith('Auth: JWT verification failed', {
+      name: 'JWTExpired',
+      message: '"exp" claim timestamp check failed',
+      code: 'ERR_JWT_EXPIRED',
+      claim: 'exp',
+      reason: 'check_failed',
+    });
+    expect(JSON.stringify(warnSpy.mock.calls)).not.toContain('sensitive@example.com');
+    expect(JSON.stringify(warnSpy.mock.calls)).not.toContain('sensitive-user');
+
+    warnSpy.mockRestore();
+  });
+});
diff --git a/src/server/lib/auth.ts b/src/server/lib/auth.ts
index 156d328a..1b0f939a 100644
--- a/src/server/lib/auth.ts
+++ b/src/server/lib/auth.ts
@@ -33,6 +33,14 @@ type RequestWithHeaders = {
   headers?: HeadersLike | null;
 };
 
+type JwtVerificationErrorSummary = {
+  name: string;
+  message: string;
+  code?: unknown;
+  claim?: unknown;
+  reason?: unknown;
+};
+
 type RemoteJwksUrl = Parameters<typeof createRemoteJWKSet>[0];
 type UrlConstructor = new (input: string, base?: string) => unknown;
 
@@ -54,6 +62,26 @@ function buildJwksUrl(input: string): RemoteJwksUrl {
   return new urlCtor(input) as RemoteJwksUrl;
 }
 
+function summarizeJwtVerificationError(error: unknown): JwtVerificationErrorSummary {
+  const maybeError = error as Partial<JwtVerificationErrorSummary> | null | undefined;
+  const summary: JwtVerificationErrorSummary = {
+    name: error instanceof Error ? error.name : typeof maybeError?.name === 'string' ? maybeError.name : 'Error',
+    message: error instanceof Error ? error.message : String(error),
+  };
+
+  if (maybeError?.code !== undefined) {
+    summary.code = maybeError.code;
+  }
+  if (maybeError?.claim !== undefined) {
+    summary.claim = maybeError.claim;
+  }
+  if (maybeError?.reason !== undefined) {
+    summary.reason = maybeError.reason;
+  }
+
+  return summary;
+}
+
 export async function verifyBearerToken(token: string | null | undefined): Promise<AuthResult> {
   if (!token) {
     return {
@@ -62,7 +90,6 @@ export async function verifyBearerToken(token: string | null | undefined): Promi
     };
   }
 
-  // 2. Get Keycloak configuration from environment variables.
   const issuer = process.env.KEYCLOAK_ISSUER;
   const audience = process.env.KEYCLOAK_CLIENT_ID;
   const jwksUrl = process.env.KEYCLOAK_JWKS_URL;
@@ -76,22 +103,18 @@ export async function verifyBearerToken(token: string | null | undefined): Promi
   }
 
   try {
-    // 3. Fetch the JSON Web Key Set (JWKS) from your Keycloak server.
     const JWKS = createRemoteJWKSet(buildJwksUrl(jwksUrl));
 
-    // 4. Verify the token. This function checks the signature, expiration, issuer, and audience.
     const { payload } = await jwtVerify(token, JWKS, {
       issuer,
       audience,
     });
 
-    // 5. If verification is successful, return a success result.
     return { success: true, payload };
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : 'An unknown error occurred';
-    console.error('Auth: JWT verification failed', error);
+    console.warn('Auth: JWT verification failed', summarizeJwtVerificationError(error));
 
-    // 6. If any part of the verification fails, return an error.
     return {
       success: false,
       error: { message: `Authentication failed: ${errorMessage}`, status: 401 },
diff --git a/src/server/lib/codefresh/index.ts b/src/server/lib/codefresh/index.ts
index f9291dd2..c77a40e3 100644
--- a/src/server/lib/codefresh/index.ts
+++ b/src/server/lib/codefresh/index.ts
@@ -135,13 +135,22 @@ export async function kubeContextStep({ context, cluster }: { context: string; c
   };
 }
 
-export const getLogs = async (id: string) => {
+// Typed result so callers can tell "fetched (maybe empty)" from "fetch failed".
+export type GetLogsResult = { ok: true; output: string } | { ok: false; reason: string };
+
+export const getLogsResult = async (id: string): Promise<GetLogsResult> => {
   try {
     const command = `codefresh logs ${id}`;
     const output = await shellPromise(command);
-    return output;
+    return { ok: true, output: output ?? '' };
   } catch (error) {
     getLogger().error({ error }, `Codefresh: getLogs failed pipelineId=${id}`);
-    return '';
+    return { ok: false, reason: error instanceof Error ? error.message : String(error) };
   }
 };
+
+// Back-compat string wrapper: failures collapse to '' (existing deploy.ts behavior).
+export const getLogs = async (id: string): Promise<string> => {
+  const result = await getLogsResult(id);
+  return result.ok ? result.output : '';
+};
diff --git a/src/server/lib/createApiHandler.ts b/src/server/lib/createApiHandler.ts
index 3f9ab79f..4e6f8b6e 100644
--- a/src/server/lib/createApiHandler.ts
+++ b/src/server/lib/createApiHandler.ts
@@ -59,6 +59,7 @@ export function createApiHandler(handler: RouteHandler, options?: ApiHandlerOpti
         throw error;
       }
 
+      // errorResponse honors AppError.httpStatus; non-AppErrors fall back to 500.
       return errorResponse(error, { status: 500 }, req);
     }
   };
diff --git a/src/server/lib/get-user.ts b/src/server/lib/get-user.ts
index d0fa74e2..7ed024b0 100644
--- a/src/server/lib/get-user.ts
+++ b/src/server/lib/get-user.ts
@@ -16,6 +16,7 @@
 
 import type { NextRequest } from 'next/server';
 import type { JWTPayload } from 'jose';
+import { UnauthorizedError } from './appError';
 
 const decode = <T = JWTPayload>(raw: string | null): T | null => {
   if (!raw) return null;
@@ -139,3 +140,12 @@ export function getRequestUserIdentity(req: NextRequest): RequestUserIdentity |
 
   return buildUserIdentity(null, getLocalDevUserId());
 }
+
+/** Resolve the request identity or throw UnauthorizedError; the handler wrapper maps it to a coded 401. */
+export function requireRequestUserIdentity(req: NextRequest): RequestUserIdentity {
+  const identity = getRequestUserIdentity(req);
+  if (!identity) {
+    throw new UnauthorizedError();
+  }
+  return identity;
+}
diff --git a/src/server/lib/response.ts b/src/server/lib/response.ts
index b6db7248..f0bb5357 100644
--- a/src/server/lib/response.ts
+++ b/src/server/lib/response.ts
@@ -17,6 +17,7 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { PaginationMetadata } from './paginate';
 import { getLogger } from 'server/lib/logger';
+import { isAppError, toErrorResponseError, type AppErrorAction } from './appError';
 
 interface Metadata {
   pagination?: PaginationMetadata;
@@ -26,7 +27,7 @@ interface Metadata {
 
 type SuccessStatusCode = 200 | 201;
 
-type ErrorStatusCode = 400 | 401 | 403 | 404 | 409 | 500 | 502 | 503;
+type ErrorStatusCode = 400 | 401 | 403 | 404 | 409 | 410 | 422 | 429 | 500 | 502 | 503;
 
 interface SuccessResponse<T> {
   request_id: string;
@@ -45,6 +46,10 @@ export interface ErrorResponse {
   data: unknown | null;
   error: {
     message: string;
+    /** Stable, machine-readable discriminant. The UI switches on this, not on the message. */
+    code?: string;
+    details?: Record<string, unknown>;
+    nextAction?: AppErrorAction;
   };
 }
 
@@ -78,16 +83,19 @@ export function errorResponse(error: unknown, options: ErrorResponseOptions, req
     errorStack = error.stack || '';
   }
 
-  getLogger().error({ error, stack: errorStack }, `API: error message=${errorMessage}`);
+  // Honor AppError.httpStatus so a 409/422/etc. isn't shipped as the caller's default 500.
+  const status = isAppError(error) ? (error.httpStatus as ErrorStatusCode) : options.status;
 
-  const { status } = options;
+  const appErrorCode = isAppError(error) ? error.code : undefined;
+  getLogger().error(
+    { error, stack: errorStack, code: appErrorCode, status },
+    `API: error message=${errorMessage}${appErrorCode ? ` code=${appErrorCode}` : ''}`
+  );
 
   const body: ErrorResponse = {
     request_id: req.headers.get('x-request-id') || '',
     data: options.data ?? null,
-    error: {
-      message: error instanceof Error ? error.message : 'An unknown error occurred.',
-    },
+    error: toErrorResponseError(error),
   };
   return NextResponse.json(body, { status });
 }
diff --git a/src/server/lib/validation/sitesConfigSchemas.ts b/src/server/lib/validation/sitesConfigSchemas.ts
new file mode 100644
index 00000000..66af0704
--- /dev/null
+++ b/src/server/lib/validation/sitesConfigSchemas.ts
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+export const sitesConfigSchema = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['enabled', 'domain', 'port', 'hostPrefix', 'ttl', 'upload', 'storage', 'cleanup'],
+  properties: {
+    enabled: { type: 'boolean' },
+    domain: { type: 'string', minLength: 1 },
+    port: { type: ['integer', 'null'], minimum: 1, maximum: 65535 },
+    hostPrefix: { type: 'string', minLength: 1 },
+    ttl: {
+      type: 'object',
+      additionalProperties: false,
+      required: ['enabled', 'defaultDays', 'extensionDays'],
+      properties: {
+        enabled: { type: 'boolean' },
+        defaultDays: { type: 'integer', minimum: 1 },
+        extensionDays: { type: 'integer', minimum: 1 },
+      },
+    },
+    upload: {
+      type: 'object',
+      additionalProperties: false,
+      required: ['maxUploadBytes', 'maxExtractedBytes', 'maxFiles', 'allowedExtensions'],
+      properties: {
+        maxUploadBytes: { type: 'integer', minimum: 1 },
+        maxExtractedBytes: { type: 'integer', minimum: 1 },
+        maxFiles: { type: 'integer', minimum: 1 },
+        allowedExtensions: {
+          type: 'array',
+          minItems: 1,
+          items: { type: 'string', minLength: 1, pattern: '^\\.?[A-Za-z0-9][A-Za-z0-9.+-]*$' },
+        },
+        allowedTypes: {
+          type: 'array',
+          minItems: 1,
+          items: { type: 'string', minLength: 1, pattern: '^\\.?[A-Za-z0-9][A-Za-z0-9.+-]*$' },
+        },
+      },
+    },
+    storage: {
+      type: 'object',
+      additionalProperties: false,
+      required: ['backend', 'bucket', 'prefix', 'region', 'endpoint', 'forcePathStyle'],
+      properties: {
+        backend: { type: 'string', enum: ['s3', 'minio'] },
+        bucket: { type: 'string', minLength: 1 },
+        prefix: { type: 'string' },
+        region: { type: 'string', minLength: 1 },
+        endpoint: { type: ['string', 'null'] },
+        forcePathStyle: { type: ['boolean', 'null'] },
+      },
+    },
+    cleanup: {
+      type: 'object',
+      additionalProperties: false,
+      required: ['enabled', 'intervalMinutes'],
+      properties: {
+        enabled: { type: 'boolean' },
+        intervalMinutes: { type: 'integer', minimum: 1 },
+      },
+    },
+  },
+};
diff --git a/src/server/services/__tests__/agentSession.test.ts b/src/server/services/__tests__/agentSession.test.ts
index 0f6bc5f4..55b56544 100644
--- a/src/server/services/__tests__/agentSession.test.ts
+++ b/src/server/services/__tests__/agentSession.test.ts
@@ -2348,6 +2348,39 @@ describe('AgentSessionService', () => {
     recordFailureSpy.mockRestore();
   });
 
+  it('nulls podName in the suspend claim before deleting the pod so a crash never leaves READY + a live pod (sr-3)', async () => {
+    const chatSession = {
+      id: 321,
+      uuid: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+      userId: 'sample-user',
+      sessionKind: AgentSessionKind.CHAT,
+      status: 'active',
+      workspaceStatus: AgentWorkspaceStatus.READY,
+      chatStatus: AgentChatStatus.READY,
+      namespace: 'chat-aaaaaaaa',
+      podName: 'agent-aaaaaaaa',
+      pvcName: 'agent-pvc-aaaaaaaa',
+    };
+    mockSessionQuery.findOne.mockResolvedValueOnce(chatSession);
+    mockSessionQuery.forUpdate.mockResolvedValueOnce(chatSession);
+    queuePatchedSession(chatSession);
+    queuePatchedSession({ ...chatSession, workspaceStatus: AgentWorkspaceStatus.HIBERNATED, podName: null });
+
+    await AgentSessionService.suspendChatRuntime({
+      sessionId: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+      userId: 'sample-user',
+    });
+
+    // The claim (first session patch) clears podName before the pod is deleted.
+    const claimPatch = mockSessionQuery.patchAndFetchById.mock.calls[0][1] as Record<string, unknown>;
+    expect(claimPatch).toMatchObject({ workspaceStatus: AgentWorkspaceStatus.READY, podName: null });
+    expect(mockSessionQuery.patchAndFetchById.mock.invocationCallOrder[0]).toBeLessThan(
+      (deleteSessionWorkspacePod as jest.Mock).mock.invocationCallOrder[0]
+    );
+    // The pod is still deleted using the captured original podName.
+    expect(deleteSessionWorkspacePod).toHaveBeenCalledWith('chat-aaaaaaaa', 'agent-aaaaaaaa');
+  });
+
   it.each([AgentSessionKind.ENVIRONMENT, AgentSessionKind.SANDBOX])(
     'rejects %s sessions during chat runtime resume provisioning',
     async (sessionKind) => {
@@ -2530,7 +2563,117 @@ describe('AgentSessionService', () => {
       })
     ).rejects.toThrow('resume pod failed');
 
-    expectSandboxFailure({ stage: 'resume', origin: 'resume' });
+    // Resume failures are retryable (sr-2), so assert retryable:true inline.
+    expect(sandboxWritePayloads()).toContainEqual(
+      expect.objectContaining({
+        status: 'failed',
+        error: expect.objectContaining({ stage: 'resume', origin: 'resume', retryable: true }),
+      })
+    );
+    // sr-1: a failed resume reuses the persisted PVC + namespace, so neither may be deleted.
+    expect(deleteAgentPvc).not.toHaveBeenCalled();
+    expect(mockDeleteNamespace).not.toHaveBeenCalled();
+  });
+
+  it('records resume failures as retryable so the UI can offer retry (sr-2/NDE-3)', async () => {
+    const chatSession = {
+      id: 321,
+      uuid: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+      userId: 'sample-user',
+      ownerGithubUsername: 'sample-user',
+      sessionKind: 'chat',
+      podName: null,
+      namespace: 'chat-aaaaaaaa',
+      pvcName: 'agent-pvc-aaaaaaaa',
+      model: 'claude-sonnet-4-6',
+      buildKind: null,
+      status: 'active',
+      chatStatus: 'ready',
+      workspaceStatus: AgentWorkspaceStatus.HIBERNATED,
+      devModeSnapshots: {},
+      forwardedAgentSecretProviders: [],
+      workspaceRepos: [],
+      selectedServices: [],
+      skillPlan: { version: 1, skills: [] },
+    };
+    mockSessionQuery.findOne.mockResolvedValue(chatSession);
+    mockSessionQuery.forUpdate.mockResolvedValueOnce(chatSession);
+    queuePatchedSession(chatSession);
+    queuePatchedSession({
+      ...chatSession,
+      workspaceStatus: AgentWorkspaceStatus.FAILED,
+    });
+    (createSessionWorkspacePod as jest.Mock).mockRejectedValueOnce(new Error('resume pod failed'));
+
+    await expect(
+      AgentSessionService.resumeChatRuntime({
+        sessionId: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+        userId: 'sample-user',
+        userIdentity: {
+          userId: 'sample-user',
+          githubUsername: 'sample-user',
+        } as any,
+        githubToken: 'sample-gh-token',
+      })
+    ).rejects.toThrow('resume pod failed');
+
+    expect(sandboxWritePayloads()).toContainEqual(
+      expect.objectContaining({
+        status: 'failed',
+        error: expect.objectContaining({
+          stage: 'resume',
+          origin: 'resume',
+          retryable: true,
+        }),
+      })
+    );
+  });
+
+  it('deletes genuinely-owned fresh resources on a non-resume provision failure', async () => {
+    const chatSession = {
+      id: 321,
+      uuid: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+      userId: 'user-123',
+      ownerGithubUsername: 'sample-user',
+      sessionKind: 'chat',
+      podName: null,
+      namespace: null,
+      pvcName: null,
+      model: 'claude-sonnet-4-6',
+      buildKind: null,
+      status: 'active',
+      chatStatus: 'ready',
+      workspaceStatus: 'none',
+      devModeSnapshots: {},
+      forwardedAgentSecretProviders: [],
+      workspaceRepos: [],
+      selectedServices: [],
+      skillPlan: { version: 1, skills: [] },
+    };
+    mockSessionQuery.findOne.mockResolvedValue(chatSession);
+    mockSessionQuery.forUpdate.mockResolvedValueOnce(chatSession);
+    queuePatchedSession(chatSession);
+    queuePatchedSession({
+      ...chatSession,
+      workspaceStatus: AgentWorkspaceStatus.FAILED,
+    });
+    (createSessionWorkspacePod as jest.Mock).mockRejectedValueOnce(new Error('pod creation failed'));
+
+    await expect(
+      AgentSessionService.provisionChatRuntime({
+        sessionId: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
+        userId: 'user-123',
+        userIdentity: {
+          userId: 'user-123',
+          githubUsername: 'sample-user',
+        } as any,
+        githubToken: 'sample-gh-token',
+      })
+    ).rejects.toThrow('pod creation failed');
+
+    // Fresh provision owns the PVC/namespace, so a failure must clean them up.
+    expect(deleteAgentPvc).toHaveBeenCalledWith('chat-aaaaaaaa', 'agent-pvc-aaaaaaaa');
+    expect(mockDeleteNamespace).toHaveBeenCalledWith('chat-aaaaaaaa');
   });
 
   it('publishes a chat session HTTP port through ingress', async () => {
@@ -6136,6 +6279,8 @@ describe('AgentSessionService', () => {
             id: 123,
             namespace: 'test-ns',
             buildUuid: 'build-123',
+            workspaceStatus: AgentWorkspaceStatus.READY,
+            podName: 'agent-test',
           }),
         }),
       });
@@ -6148,13 +6293,15 @@ describe('AgentSessionService', () => {
         buildUuid: 'build-123',
       });
       expect(systemPrompt.buildAgentSessionDynamicSystemPrompt).toHaveBeenCalled();
+      const dynamicArgs = (systemPrompt.buildAgentSessionDynamicSystemPrompt as jest.Mock).mock.calls[0][0];
+      expect(dynamicArgs.toolLines.length).toBeGreaterThan(0);
       expect(systemPrompt.combineAgentSessionAppendSystemPrompt).toHaveBeenCalledWith(
         'Use concise responses.',
         'Session context:\n- namespace: test-ns'
       );
     });
 
-    it('appends dynamic build context for chat sessions without a namespace', async () => {
+    it('appends dynamic build context without workspace tool inventory for build-context chats', async () => {
       mockGetEffectiveAgentSessionConfig.mockResolvedValue({
         appendSystemPrompt: 'Use concise responses.',
       });
@@ -6162,7 +6309,8 @@ describe('AgentSessionService', () => {
         namespace: null,
         buildUuid: 'build-123',
         services: [],
-        build: { uuid: 'build-123', status: 'build_failed' },
+        build: { uuid: 'build-123', status: 'build_failed', namespace: 'env-build-123' },
+        lifecycleConfig: { status: 'missing', path: 'lifecycle.yaml' },
       });
       (systemPrompt.buildAgentSessionDynamicSystemPrompt as jest.Mock).mockReturnValue(
         'Session context:\n- buildUuid: build-123\nBuild context:\n- buildUuid=build-123: status=build_failed'
@@ -6176,6 +6324,8 @@ describe('AgentSessionService', () => {
             namespace: null,
             buildUuid: 'build-123',
             skillPlan: { skills: [] },
+            workspaceStatus: AgentWorkspaceStatus.NONE,
+            podName: null,
           }),
         }),
       });
@@ -6186,12 +6336,34 @@ describe('AgentSessionService', () => {
         namespace: null,
         buildUuid: 'build-123',
       });
-      expect(systemPrompt.buildAgentSessionDynamicSystemPrompt).toHaveBeenCalledWith(
-        expect.objectContaining({
-          namespace: null,
-          buildUuid: 'build-123',
-          toolLines: [],
-        })
+      const dynamicArgs = (systemPrompt.buildAgentSessionDynamicSystemPrompt as jest.Mock).mock.calls[0][0];
+      expect(dynamicArgs.toolLines).toEqual([]);
+      // Top-level namespace falls back to build.namespace.
+      expect(dynamicArgs.namespace).toBe('env-build-123');
+      expect(dynamicArgs.lifecycleConfig).toEqual({ status: 'missing', path: 'lifecycle.yaml' });
+    });
+
+    it('emits the UNAVAILABLE snapshot when prompt context resolution fails', async () => {
+      mockGetEffectiveAgentSessionConfig.mockResolvedValue({
+        appendSystemPrompt: 'Use concise responses.',
+      });
+      (systemPrompt.resolveAgentSessionPromptContext as jest.Mock).mockRejectedValue(new Error('lookup failed'));
+
+      (AgentSession.query as jest.Mock) = jest.fn().mockReturnValue({
+        findOne: jest.fn().mockReturnValue({
+          select: jest.fn().mockResolvedValue({
+            id: 123,
+            namespace: null,
+            buildUuid: 'build-123',
+            skillPlan: { skills: [] },
+          }),
+        }),
+      });
+
+      const prompt = await AgentSessionService.getSessionAppendSystemPrompt('sess-1');
+      expect(prompt).toContain('Use concise responses.');
+      expect(prompt).toContain(
+        'Initial Lifecycle snapshot: UNAVAILABLE (context lookup failed) — gather build/deploy/k8s state via tools and note in your answer that baseline context was unavailable.'
       );
     });
 
diff --git a/src/server/services/agent/AgentSelectionService.ts b/src/server/services/agent/AgentSelectionService.ts
index bec43227..0f4f59dc 100644
--- a/src/server/services/agent/AgentSelectionService.ts
+++ b/src/server/services/agent/AgentSelectionService.ts
@@ -20,6 +20,7 @@ import AgentThread from 'server/models/AgentThread';
 import type AgentSession from 'server/models/AgentSession';
 import type AgentSource from 'server/models/AgentSource';
 import type { RequestUserIdentity } from 'server/lib/get-user';
+import { ConflictError } from 'server/lib/appError';
 import AgentCapabilityService from './CapabilityService';
 import * as AgentDefinitionRegistry from './AgentDefinitionRegistry';
 import { customAgentDefinitionService } from './CustomAgentDefinitionService';
@@ -46,14 +47,12 @@ export type AgentSelectionUnavailableReason =
   | 'source_incompatible'
   | 'disabled_by_policy';
 
-export class AgentThreadAgentSwitchError extends Error {
-  constructor(
-    public readonly reason: AgentSelectionUnavailableReason,
-    message: string,
-    public readonly details: Record<string, unknown> = {}
-  ) {
-    super(message);
+export class AgentThreadAgentSwitchError extends ConflictError {
+  readonly reason: AgentSelectionUnavailableReason;
+  constructor(reason: AgentSelectionUnavailableReason, message: string, extra: Record<string, unknown> = {}) {
+    super(message, 'agent_switch_blocked', { reason, ...extra });
     this.name = 'AgentThreadAgentSwitchError';
+    this.reason = reason;
   }
 }
 
diff --git a/src/server/services/agent/BuildContextChatService.ts b/src/server/services/agent/BuildContextChatService.ts
index 81e5632c..2a1f893b 100644
--- a/src/server/services/agent/BuildContextChatService.ts
+++ b/src/server/services/agent/BuildContextChatService.ts
@@ -21,6 +21,7 @@ import AgentThread from 'server/models/AgentThread';
 import Build from 'server/models/Build';
 import Deploy from 'server/models/Deploy';
 import { AgentChatStatus, AgentSessionKind } from 'shared/constants';
+import { NotFoundError, BadRequestError } from 'server/lib/appError';
 import AgentChatSessionService, {
   type AgentBuildContextChatMetadata,
   type AgentBuildContextSelectedDeployMetadata,
@@ -29,17 +30,27 @@ import AgentThreadService from './ThreadService';
 
 const ACTIVE_BUILD_CONTEXT_CHAT_UNIQUE_CONSTRAINT = 'agent_sessions_active_build_context_chat_unique';
 
-export class BuildContextChatBuildNotFoundError extends Error {
-  constructor(readonly buildUuid: string) {
-    super(`Build not found: ${buildUuid}`);
+export class BuildContextChatBuildNotFoundError extends NotFoundError {
+  readonly buildUuid: string;
+  constructor(buildUuid: string) {
+    super(`Build not found: ${buildUuid}`, 'build_not_found', { buildUuid });
     this.name = 'BuildContextChatBuildNotFoundError';
+    this.buildUuid = buildUuid;
   }
 }
 
-export class BuildContextChatSelectedDeployError extends Error {
-  constructor(readonly buildUuid: string, readonly selectedDeployUuid: string) {
-    super(`Selected deploy ${selectedDeployUuid} does not belong to build ${buildUuid}`);
+export class BuildContextChatSelectedDeployError extends BadRequestError {
+  readonly buildUuid: string;
+  readonly selectedDeployUuid: string;
+  constructor(buildUuid: string, selectedDeployUuid: string) {
+    super(
+      `Selected deploy ${selectedDeployUuid} does not belong to build ${buildUuid}`,
+      'build_selected_deploy_invalid',
+      { buildUuid, selectedDeployUuid }
+    );
     this.name = 'BuildContextChatSelectedDeployError';
+    this.buildUuid = buildUuid;
+    this.selectedDeployUuid = selectedDeployUuid;
   }
 }
 
diff --git a/src/server/services/agent/CapabilityService.ts b/src/server/services/agent/CapabilityService.ts
index ac8742df..6c7e983a 100644
--- a/src/server/services/agent/CapabilityService.ts
+++ b/src/server/services/agent/CapabilityService.ts
@@ -14,61 +14,51 @@
  * limitations under the License.
  */
 
-import { dynamicTool, jsonSchema, type ToolSet } from 'ai';
+import { type ToolSet } from 'ai';
 import AgentSession from 'server/models/AgentSession';
-import AgentSessionService from 'server/services/agentSession';
-import { SESSION_WORKSPACE_GATEWAY_PORT } from 'server/lib/agentSession/podFactory';
-import { McpConfigService, sanitizeMcpErrorMessage, sanitizeMcpResult } from 'server/services/agentRuntime/mcp/config';
+import { getOwnedSession } from 'server/services/agent/sessionOwnership';
+import { McpConfigService } from 'server/services/agentRuntime/mcp/config';
 import { McpClientManager } from 'server/services/agentRuntime/mcp/client';
-import { applyMcpDefaultToolArgs } from 'server/services/agentRuntime/mcp/runtimeConfig';
 import { usesSessionWorkspaceGatewayExecution } from 'server/services/agentRuntime/mcp/sessionPod';
 import type { RequestUserIdentity } from 'server/lib/get-user';
 import { getLogger } from 'server/lib/logger';
 import type { AgentSessionToolRule } from 'server/services/types/agentSessionConfig';
-import type {
-  CapabilityPolicyConfig,
-  CustomAgentCreationPolicyConfig,
-  AgentRuntimeConfig,
-} from 'server/services/types/agentRuntimeConfig';
+import type { CapabilityPolicyConfig, CustomAgentCreationPolicyConfig } from 'server/services/types/agentRuntimeConfig';
 import AgentPolicyService from './PolicyService';
 import type { ResolvedAgentCapabilityAccess } from './PolicyService';
-import type { AgentApprovalMode, AgentApprovalPolicy, AgentCapabilityKey, AgentToolAuditRecord } from './types';
+import type { AgentApprovalPolicy, AgentToolAuditRecord } from './types';
 import type { AgentCapabilityCatalogId } from './capabilityCatalog';
-import type { ResolvedMcpServer } from 'server/services/agentRuntime/mcp/types';
 import AgentRuntimeConfigService from 'server/services/agentRuntime/config/agentRuntimeConfig';
 import { assertSafeWorkspaceMutationCommand, isReadOnlyWorkspaceCommand } from './sandboxExecSafety';
-import { buildProposedFileChanges, buildResultFileChanges, didToolResultFail } from './fileChanges';
-import type { AgentFileChangeData } from './types';
-import { resolveAgentSessionDurabilityConfig } from 'server/lib/agentSession/runtimeConfig';
+import { didToolResultFail } from './fileChanges';
 import {
   buildAgentToolKey,
-  CHAT_PUBLISH_HTTP_TOOL_NAME,
-  LIFECYCLE_BUILTIN_SERVER_SLUG,
   SESSION_WORKSPACE_MUTATION_TOOL_NAME,
   SESSION_WORKSPACE_READONLY_TOOL_NAME,
-  SESSION_WORKSPACE_SERVER_NAME,
-  SESSION_WORKSPACE_SERVER_SLUG,
-  buildWorkspaceMutationExecDescription,
-  buildWorkspaceReadonlyExecDescription,
 } from './toolKeys';
 import { getSessionWorkspaceCatalogEntriesForRuntimeTool } from './sandboxToolCatalog';
-import { SessionWorkspaceGatewayUnavailableError } from './errors';
-import AgentSandboxService from './SandboxService';
+import { registerLifecycleDiagnosticFixTools, registerLifecycleDiagnosticReadTools } from './diagnosticTools';
+import type { AgentRuntimeToolMetadata } from './toolMetadata';
 import {
-  registerLifecycleDiagnosticFixTools,
-  registerLifecycleDiagnosticReadTools,
-  type LifecycleDiagnosticGithubSafety,
-} from './diagnosticTools';
-import { buildAgentRuntimeToolMetadata, type AgentRuntimeToolMetadata } from './toolMetadata';
-import { YamlConfigParser } from 'server/lib/yamlConfigParser';
-import type { LifecycleConfig } from 'server/models/yaml/Config';
-
-type ToolExecutionHooks = {
-  onToolStarted?: (audit: AgentToolAuditRecord) => Promise<void>;
-  onToolFinished?: (audit: AgentToolAuditRecord & { result: unknown; status: 'completed' | 'failed' }) => Promise<void>;
-  onFileChange?: (change: AgentFileChangeData) => Promise<void>;
-  getActiveRunUuid?: () => string | null | undefined;
-};
+  isCatalogCapabilityAllowed,
+  recordToolMetadata,
+  resolveToolApprovalMode,
+  selectedMcpConnectionRefs,
+  toAiDynamicTool,
+  toAiJsonSchema,
+  type ToolExecutionHooks,
+} from './capabilityToolHelpers';
+import { resolveLifecycleDiagnosticGithubSafety, resolvePrimaryRepo } from './capabilitySessionContext';
+import {
+  emitResultFileChanges,
+  isChatWorkspaceRuntimeReady,
+  registerChatPublishHttpTool,
+  registerChatWorkspaceTools,
+  resolveSessionExecutionServer,
+  resolveSessionWorkspaceGatewayServer,
+  WORKSPACE_EXEC_INPUT_SCHEMA,
+} from './chatWorkspaceToolRegistration';
+import { registerGenericMcpTool } from './mcpToolRegistration';
 
 export type { AgentRuntimeToolMetadata } from './toolMetadata';
 
@@ -86,1089 +76,7 @@ type BuildToolSetOptions = {
   selectedRuntimeMcpConnectionRefs?: string[];
 };
 
-type SessionWorkspaceGatewayTimeouts = {
-  discoveryTimeoutMs: number;
-  executionTimeoutMs: number;
-};
-
-const WORKSPACE_EXEC_RUNTIME_TOOL_NAME = 'workspace.exec';
-const WORKSPACE_WRITE_FILE_RUNTIME_TOOL_NAME = 'workspace.write_file';
-const WORKSPACE_EDIT_FILE_RUNTIME_TOOL_NAME = 'workspace.edit_file';
-const REDACTED_MCP_DEFAULT_ARG = '******';
-const WORKSPACE_EXEC_INPUT_SCHEMA = {
-  type: 'object',
-  required: ['command'],
-  additionalProperties: false,
-  properties: {
-    command: {
-      type: 'string',
-      minLength: 1,
-      description: 'Command to run with bash -lc',
-    },
-    cwd: {
-      type: 'string',
-      description: 'Working directory relative to the workspace',
-    },
-    timeoutMs: {
-      type: 'integer',
-      minimum: 1,
-      maximum: 120000,
-      description: 'Command timeout in milliseconds',
-    },
-  },
-} as const;
-const WORKSPACE_WRITE_FILE_INPUT_SCHEMA = {
-  type: 'object',
-  required: ['path', 'content'],
-  additionalProperties: false,
-  properties: {
-    path: {
-      type: 'string',
-      minLength: 1,
-      description: 'Workspace-relative file path to write',
-    },
-    content: {
-      type: 'string',
-      description: 'Complete file content to write',
-    },
-  },
-} as const;
-const WORKSPACE_EDIT_FILE_INPUT_SCHEMA = {
-  type: 'object',
-  required: ['path', 'oldText', 'newText'],
-  additionalProperties: false,
-  properties: {
-    path: {
-      type: 'string',
-      minLength: 1,
-      description: 'Workspace-relative file path to edit',
-    },
-    oldText: {
-      type: 'string',
-      description: 'Exact existing text to replace',
-    },
-    newText: {
-      type: 'string',
-      description: 'Replacement text',
-    },
-  },
-} as const;
-const LIFECYCLE_CONFIG_WRITE_PATTERNS = ['lifecycle.yaml', 'lifecycle.yml'];
-const PUBLISH_HTTP_INPUT_SCHEMA = {
-  type: 'object',
-  required: ['port'],
-  additionalProperties: false,
-  properties: {
-    port: {
-      type: 'integer',
-      minimum: 1,
-      maximum: 65535,
-      description: 'Workspace HTTP port to expose through ingress',
-    },
-  },
-} as const;
-
-function toAiJsonSchema(schema: unknown) {
-  return jsonSchema(schema as any);
-}
-
-function toAiDynamicTool(config: unknown) {
-  return dynamicTool(config as any);
-}
-
-function resolvePrimaryRepo(session: AgentSession): string | undefined {
-  const primaryRepo = (session.workspaceRepos || []).find((repo) => repo.primary)?.repo;
-  if (primaryRepo) {
-    return primaryRepo;
-  }
-
-  return session.selectedServices?.[0]?.repo || undefined;
-}
-
-function resolvePrimaryBranch(session: AgentSession): string | null {
-  const primaryWorkspaceRepo =
-    (session.workspaceRepos || []).find((repo) => repo.primary) || session.workspaceRepos?.[0];
-  if (primaryWorkspaceRepo?.branch) {
-    return primaryWorkspaceRepo.branch;
-  }
-
-  return session.selectedServices?.[0]?.branch || null;
-}
-
-function addReferencedFile(files: Set<string>, value: unknown) {
-  if (typeof value !== 'string') {
-    return;
-  }
-
-  const normalized = value.trim().replace(/^\/+/, '').replace(/^\.\//, '');
-  if (normalized) {
-    files.add(normalized);
-  }
-}
-
-function collectLifecycleConfigReferencedFiles(config: LifecycleConfig | null | undefined): string[] {
-  const files = new Set<string>();
-
-  for (const service of config?.services || []) {
-    const candidate = service as Record<string, any>;
-    addReferencedFile(files, candidate.github?.docker?.app?.dockerfilePath);
-    addReferencedFile(files, candidate.github?.docker?.init?.dockerfilePath);
-    addReferencedFile(files, candidate.helm?.docker?.app?.dockerfilePath);
-    addReferencedFile(files, candidate.helm?.docker?.init?.dockerfilePath);
-    addReferencedFile(files, candidate.helm?.envMapping?.app?.path);
-    addReferencedFile(files, candidate.helm?.envMapping?.init?.path);
-
-    for (const valueFile of candidate.helm?.chart?.valueFiles || []) {
-      addReferencedFile(files, valueFile);
-    }
-  }
-
-  return [...files];
-}
-
-function collectSelectedDeployReferencedFiles(session: AgentSession): string[] {
-  const files = new Set<string>();
-  const selectedService = session.selectedServices?.[0];
-  if (!selectedService) {
-    return [];
-  }
-
-  addReferencedFile(files, selectedService.dockerfilePath);
-  addReferencedFile(files, selectedService.initDockerfilePath);
-  for (const valueFile of selectedService.chartValueFiles || []) {
-    addReferencedFile(files, valueFile);
-  }
-
-  return [...files];
-}
-
-async function resolveLifecycleDiagnosticGithubSafety({
-  session,
-  repoFullName,
-  config,
-}: {
-  session: AgentSession;
-  repoFullName?: string;
-  config?: AgentRuntimeConfig | null;
-}): Promise<LifecycleDiagnosticGithubSafety> {
-  const allowedBranch = resolvePrimaryBranch(session);
-  const allowedWritePatterns = [
-    ...new Set([...LIFECYCLE_CONFIG_WRITE_PATTERNS, ...(config?.allowedWritePatterns || [])]),
-  ];
-  const selectedDeployReferencedFiles = collectSelectedDeployReferencedFiles(session);
-  const safety: LifecycleDiagnosticGithubSafety = {
-    allowedBranch,
-    allowedWritePatterns,
-    excludedFilePatterns: config?.excludedFilePatterns || [],
-    referencedFiles: selectedDeployReferencedFiles,
-  };
-
-  if (!repoFullName || !allowedBranch) {
-    return safety;
-  }
-
-  try {
-    const lifecycleConfig = await new YamlConfigParser().parseYamlConfigFromBranch(repoFullName, allowedBranch);
-    safety.referencedFiles = [
-      ...new Set([...selectedDeployReferencedFiles, ...collectLifecycleConfigReferencedFiles(lifecycleConfig)]),
-    ];
-  } catch (error) {
-    getLogger().warn(
-      { error, repo: repoFullName, branch: allowedBranch },
-      `AgentExec: lifecycle config references unavailable repo=${repoFullName} branch=${allowedBranch}`
-    );
-  }
-
-  return safety;
-}
-
-function resolveToolApprovalMode({
-  toolRules,
-  toolKey,
-  capabilityMode,
-}: {
-  toolRules: AgentSessionToolRule[] | undefined;
-  toolKey: string;
-  capabilityMode: AgentApprovalMode;
-}): AgentApprovalMode {
-  const rule = toolRules?.find((item) => item.toolKey === toolKey);
-  return rule?.mode || capabilityMode;
-}
-
-function recordToolMetadata(
-  toolMetadata: AgentRuntimeToolMetadata[] | undefined,
-  metadata: Omit<AgentRuntimeToolMetadata, 'effect' | 'resourceDomain' | 'workspaceNeed' | 'exposure'>
-) {
-  toolMetadata?.push(buildAgentRuntimeToolMetadata(metadata));
-}
-
-function isCatalogCapabilityAllowed(
-  resolvedCapabilityAccess: ResolvedAgentCapabilityAccess[] | undefined,
-  capabilityId: AgentCapabilityCatalogId
-): boolean {
-  if (!resolvedCapabilityAccess) {
-    return false;
-  }
-
-  return resolvedCapabilityAccess.some((entry) => entry.capabilityId === capabilityId && entry.allowed);
-}
-
-function selectedMcpConnectionRefs(connectionRefs?: string[]): Set<string> | undefined {
-  if (connectionRefs === undefined) {
-    return undefined;
-  }
-
-  return new Set(connectionRefs.map((connectionRef) => connectionRef.trim()).filter(Boolean));
-}
-
-function redactMcpDefaultArgs(
-  args: Record<string, unknown>,
-  defaultArgs: Record<string, string> | undefined
-): Record<string, unknown> {
-  if (!defaultArgs || Object.keys(defaultArgs).length === 0) {
-    return args;
-  }
-
-  const redacted = { ...args };
-  for (const key of Object.keys(defaultArgs)) {
-    if (key in redacted) {
-      redacted[key] = REDACTED_MCP_DEFAULT_ARG;
-    }
-  }
-
-  return redacted;
-}
-
-function resolveSessionWorkspaceGatewayBaseUrl(session: AgentSession): string | null {
-  if (!session.podName || !session.namespace || session.status !== 'active') {
-    return null;
-  }
-
-  return `http://${session.podName}.${session.namespace}.svc.cluster.local:${SESSION_WORKSPACE_GATEWAY_PORT}`;
-}
-
-function isChatWorkspaceRuntimeReady(session: AgentSession): boolean {
-  return (
-    session.sessionKind === 'chat' &&
-    session.status === 'active' &&
-    session.workspaceStatus === 'ready' &&
-    Boolean(session.namespace) &&
-    Boolean(session.podName)
-  );
-}
-
-async function resolveSessionWorkspaceGatewayServer(
-  session: AgentSession,
-  timeouts: SessionWorkspaceGatewayTimeouts
-): Promise<ResolvedMcpServer | null> {
-  const baseUrl =
-    (await AgentSandboxService.resolveWorkspaceGatewayBaseUrl(session.uuid)) ||
-    resolveSessionWorkspaceGatewayBaseUrl(session);
-  if (!baseUrl) {
-    return null;
-  }
-
-  const url = `${baseUrl}/mcp`;
-  const client = new McpClientManager();
-
-  try {
-    await client.connect({ type: 'http', url }, timeouts.discoveryTimeoutMs);
-    const discoveredTools = await client.listTools(timeouts.discoveryTimeoutMs);
-
-    return {
-      scope: 'session',
-      slug: 'sandbox',
-      name: 'Session Workspace',
-      transport: { type: 'http', url },
-      timeout: timeouts.executionTimeoutMs,
-      defaultArgs: {},
-      env: {},
-      discoveredTools,
-    };
-  } catch (error) {
-    getLogger().warn(
-      { error },
-      `AgentExec: workspace gateway unavailable sessionId=${session.uuid} namespace=${session.namespace} podName=${session.podName}`
-    );
-    throw new SessionWorkspaceGatewayUnavailableError({
-      sessionId: session.uuid,
-      cause: error,
-    });
-  } finally {
-    await client.close();
-  }
-}
-
-function resolveSessionExecutionServer(session: AgentSession, server: ResolvedMcpServer): ResolvedMcpServer | null {
-  if (!usesSessionWorkspaceGatewayExecution(server.transport)) {
-    return server;
-  }
-
-  const baseUrl = resolveSessionWorkspaceGatewayBaseUrl(session);
-  if (!baseUrl) {
-    return null;
-  }
-
-  return {
-    ...server,
-    transport: {
-      type: 'http',
-      url: `${baseUrl}/servers/${encodeURIComponent(server.slug)}/mcp`,
-    },
-  };
-}
-
-async function loadLatestSession(sessionUuid: string): Promise<AgentSession> {
-  const session = await AgentSession.query().findOne({ uuid: sessionUuid });
-  if (!session) {
-    throw new Error('Agent session not found');
-  }
-
-  return session;
-}
-
-async function getFileChangePreviewChars(): Promise<number> {
-  return (await resolveAgentSessionDurabilityConfig()).fileChangePreviewChars;
-}
-
-async function ensureChatWorkspaceRuntime({
-  session,
-  userIdentity,
-  requestGitHubToken,
-  allowedActiveRunUuid,
-}: {
-  session: AgentSession;
-  userIdentity: RequestUserIdentity;
-  requestGitHubToken?: string | null;
-  allowedActiveRunUuid?: string | null;
-}): Promise<AgentSession> {
-  const latestSession = await loadLatestSession(session.uuid);
-  if (latestSession.sessionKind !== 'chat') {
-    return latestSession;
-  }
-
-  const ensured = await AgentSandboxService.ensureChatSandbox({
-    sessionId: latestSession.uuid,
-    userId: userIdentity.userId,
-    userIdentity,
-    githubToken: requestGitHubToken,
-    ...(allowedActiveRunUuid ? { allowedActiveRunUuid } : {}),
-  });
-
-  return ensured.session;
-}
-
-async function executeWorkspaceRuntimeTool({
-  session,
-  runtimeToolName,
-  input,
-  timeoutMs,
-  userIdentity,
-  requestGitHubToken,
-  allowedActiveRunUuid,
-}: {
-  session: AgentSession;
-  runtimeToolName: string;
-  input: Record<string, unknown>;
-  timeoutMs: number;
-  userIdentity: RequestUserIdentity;
-  requestGitHubToken?: string | null;
-  allowedActiveRunUuid?: string | null;
-}) {
-  const runtimeSession = await ensureChatWorkspaceRuntime({
-    session,
-    userIdentity,
-    requestGitHubToken,
-    allowedActiveRunUuid,
-  });
-  const baseUrl =
-    (await AgentSandboxService.resolveWorkspaceGatewayBaseUrl(runtimeSession.uuid)) ||
-    resolveSessionWorkspaceGatewayBaseUrl(runtimeSession);
-  if (!baseUrl) {
-    throw new SessionWorkspaceGatewayUnavailableError({
-      sessionId: runtimeSession.uuid,
-      cause: new Error('Session workspace gateway URL is not available'),
-    });
-  }
-
-  const client = new McpClientManager();
-  try {
-    await client.connect({ type: 'http', url: `${baseUrl}/mcp` }, timeoutMs);
-    return await client.callTool(runtimeToolName, input, timeoutMs);
-  } catch (error) {
-    throw new SessionWorkspaceGatewayUnavailableError({
-      sessionId: runtimeSession.uuid,
-      cause: error,
-    });
-  } finally {
-    await client.close();
-  }
-}
-
-async function emitResultFileChanges({
-  hooks,
-  toolCallId,
-  sourceTool,
-  input,
-  result,
-  failed,
-}: {
-  hooks?: ToolExecutionHooks;
-  toolCallId?: string;
-  sourceTool: string;
-  input: Record<string, unknown>;
-  result: unknown;
-  failed: boolean;
-}) {
-  if (!toolCallId) {
-    return;
-  }
-
-  const changes = buildResultFileChanges({
-    toolCallId,
-    sourceTool,
-    input,
-    result,
-    failed,
-    previewChars: await getFileChangePreviewChars(),
-  });
-
-  for (const change of changes) {
-    await hooks?.onFileChange?.(change);
-  }
-}
-
-function registerChatWorkspaceExecTool({
-  tools,
-  session,
-  userIdentity,
-  approvalPolicy,
-  workspaceToolExecutionTimeoutMs,
-  requestGitHubToken,
-  hooks,
-  toolRules,
-  toolName,
-  capabilityKey,
-  description,
-  readOnly,
-  catalogCapabilityId,
-  resolvedCapabilityAccess,
-  toolMetadata,
-}: {
-  tools: ToolSet;
-  session: AgentSession;
-  userIdentity: RequestUserIdentity;
-  approvalPolicy: AgentApprovalPolicy;
-  workspaceToolExecutionTimeoutMs: number;
-  requestGitHubToken?: string | null;
-  hooks?: ToolExecutionHooks;
-  toolRules?: AgentSessionToolRule[];
-  toolName: string;
-  capabilityKey: AgentCapabilityKey;
-  description: string;
-  readOnly: boolean;
-  catalogCapabilityId: AgentCapabilityCatalogId;
-  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
-  toolMetadata?: AgentRuntimeToolMetadata[];
-}) {
-  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, catalogCapabilityId)) {
-    return;
-  }
-
-  const toolKey = buildAgentToolKey(SESSION_WORKSPACE_SERVER_SLUG, toolName);
-  const mode = resolveToolApprovalMode({
-    toolRules,
-    toolKey,
-    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
-  });
-
-  if (mode === 'deny') {
-    return;
-  }
-
-  tools[toolKey] = toAiDynamicTool({
-    description,
-    inputSchema: toAiJsonSchema(WORKSPACE_EXEC_INPUT_SCHEMA),
-    needsApproval: mode === 'require_approval',
-    execute: async (input, context) => {
-      const args = (input as Record<string, unknown>) || {};
-      const command = typeof args.command === 'string' ? args.command : '';
-      if (readOnly && !isReadOnlyWorkspaceCommand(command)) {
-        throw new Error(
-          'This command is not a safe read-only inspection command. Use the workspace exec mutation tool for state-changing, networked, or process-managing commands.'
-        );
-      }
-      if (!readOnly) {
-        assertSafeWorkspaceMutationCommand(command);
-      }
-
-      const toolCallId = context?.toolCallId;
-      const audit: AgentToolAuditRecord = {
-        source: 'mcp',
-        serverSlug: SESSION_WORKSPACE_SERVER_SLUG,
-        toolName,
-        toolCallId,
-        args,
-        capabilityKey,
-      };
-
-      await hooks?.onToolStarted?.(audit);
-
-      try {
-        const runtimeArgs = readOnly ? args : { ...args, captureFileChanges: true };
-        const result = await executeWorkspaceRuntimeTool({
-          session,
-          runtimeToolName: WORKSPACE_EXEC_RUNTIME_TOOL_NAME,
-          input: runtimeArgs,
-          timeoutMs: workspaceToolExecutionTimeoutMs,
-          userIdentity,
-          requestGitHubToken,
-          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
-        });
-        const failed = result.isError || didToolResultFail(result);
-        if (!readOnly) {
-          await emitResultFileChanges({
-            hooks,
-            toolCallId,
-            sourceTool: toolName,
-            input: args,
-            result,
-            failed,
-          });
-        }
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result,
-          status: failed ? 'failed' : 'completed',
-        });
-        return result;
-      } catch (error) {
-        getLogger().warn({ error }, `AgentExec: chat workspace tool failed sessionId=${session.uuid} tool=${toolName}`);
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result: {
-            error: error instanceof Error ? error.message : String(error),
-          },
-          status: 'failed',
-        });
-        throw error;
-      }
-    },
-  });
-  recordToolMetadata(toolMetadata, {
-    toolKey,
-    catalogCapabilityId,
-    capabilityKey,
-    approvalMode: mode,
-  });
-}
-
-function registerChatWorkspaceFileTool({
-  tools,
-  session,
-  userIdentity,
-  approvalPolicy,
-  workspaceToolExecutionTimeoutMs,
-  requestGitHubToken,
-  hooks,
-  toolRules,
-  toolName,
-  inputSchema,
-  description,
-  catalogCapabilityId,
-  resolvedCapabilityAccess,
-  toolMetadata,
-}: {
-  tools: ToolSet;
-  session: AgentSession;
-  userIdentity: RequestUserIdentity;
-  approvalPolicy: AgentApprovalPolicy;
-  workspaceToolExecutionTimeoutMs: number;
-  requestGitHubToken?: string | null;
-  hooks?: ToolExecutionHooks;
-  toolRules?: AgentSessionToolRule[];
-  toolName: string;
-  inputSchema: Record<string, unknown>;
-  description: string;
-  catalogCapabilityId: AgentCapabilityCatalogId;
-  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
-  toolMetadata?: AgentRuntimeToolMetadata[];
-}) {
-  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, catalogCapabilityId)) {
-    return;
-  }
-
-  const toolKey = buildAgentToolKey(SESSION_WORKSPACE_SERVER_SLUG, toolName);
-  const capabilityKey: AgentCapabilityKey = 'workspace_write';
-  const mode = resolveToolApprovalMode({
-    toolRules,
-    toolKey,
-    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
-  });
-
-  if (mode === 'deny') {
-    return;
-  }
-
-  tools[toolKey] = toAiDynamicTool({
-    description,
-    inputSchema: toAiJsonSchema(inputSchema),
-    needsApproval: mode === 'require_approval',
-    onInputAvailable: async ({ input, toolCallId }) => {
-      if (!toolCallId) {
-        return;
-      }
-
-      const args = (input as Record<string, unknown>) || {};
-      const changes = buildProposedFileChanges({
-        toolCallId,
-        sourceTool: toolName,
-        input: args,
-        previewChars: await getFileChangePreviewChars(),
-      });
-
-      for (const change of changes) {
-        await hooks?.onFileChange?.(change);
-      }
-    },
-    execute: async (input, context) => {
-      const args = (input as Record<string, unknown>) || {};
-      const toolCallId = context?.toolCallId;
-      const audit: AgentToolAuditRecord = {
-        source: 'mcp',
-        serverSlug: SESSION_WORKSPACE_SERVER_SLUG,
-        toolName,
-        toolCallId,
-        args,
-        capabilityKey,
-      };
-
-      await hooks?.onToolStarted?.(audit);
-
-      try {
-        const result = await executeWorkspaceRuntimeTool({
-          session,
-          runtimeToolName: toolName,
-          input: args,
-          timeoutMs: workspaceToolExecutionTimeoutMs,
-          userIdentity,
-          requestGitHubToken,
-          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
-        });
-        const failed = result.isError || didToolResultFail(result);
-        if (toolCallId) {
-          const changes = buildResultFileChanges({
-            toolCallId,
-            sourceTool: toolName,
-            input: args,
-            result,
-            failed,
-            previewChars: await getFileChangePreviewChars(),
-          });
-
-          for (const change of changes) {
-            await hooks?.onFileChange?.(change);
-          }
-        }
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result,
-          status: failed ? 'failed' : 'completed',
-        });
-        return result;
-      } catch (error) {
-        getLogger().warn(
-          { error },
-          `AgentExec: chat workspace file tool failed sessionId=${session.uuid} tool=${toolName}`
-        );
-        if (toolCallId) {
-          const changes = buildResultFileChanges({
-            toolCallId,
-            sourceTool: toolName,
-            input: args,
-            result: {
-              error: error instanceof Error ? error.message : String(error),
-            },
-            failed: true,
-            previewChars: await getFileChangePreviewChars(),
-          });
-
-          for (const change of changes) {
-            await hooks?.onFileChange?.(change);
-          }
-        }
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result: {
-            error: error instanceof Error ? error.message : String(error),
-          },
-          status: 'failed',
-        });
-        throw error;
-      }
-    },
-  });
-  recordToolMetadata(toolMetadata, {
-    toolKey,
-    catalogCapabilityId,
-    capabilityKey,
-    approvalMode: mode,
-  });
-}
-
-function registerChatPublishHttpTool({
-  tools,
-  session,
-  approvalPolicy,
-  userIdentity,
-  requestGitHubToken,
-  hooks,
-  toolRules,
-  resolvedCapabilityAccess,
-  toolMetadata,
-}: {
-  tools: ToolSet;
-  session: AgentSession;
-  approvalPolicy: AgentApprovalPolicy;
-  userIdentity: RequestUserIdentity;
-  requestGitHubToken?: string | null;
-  hooks?: ToolExecutionHooks;
-  toolRules?: AgentSessionToolRule[];
-  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
-  toolMetadata?: AgentRuntimeToolMetadata[];
-}) {
-  const toolKey = buildAgentToolKey(LIFECYCLE_BUILTIN_SERVER_SLUG, CHAT_PUBLISH_HTTP_TOOL_NAME);
-  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, 'preview_publish')) {
-    return;
-  }
-
-  const capabilityKey: AgentCapabilityKey = 'deploy_k8s_mutation';
-  const mode = resolveToolApprovalMode({
-    toolRules,
-    toolKey,
-    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
-  });
-
-  if (mode === 'deny') {
-    return;
-  }
-
-  tools[toolKey] = toAiDynamicTool({
-    description:
-      'Expose a running HTTP app from the chat workspace through lifecycle-managed ingress and return the reachable URL.',
-    inputSchema: toAiJsonSchema(PUBLISH_HTTP_INPUT_SCHEMA),
-    needsApproval: mode === 'require_approval',
-    execute: async (input, context) => {
-      const args = (input as Record<string, unknown>) || {};
-      const toolCallId = context?.toolCallId;
-      const audit: AgentToolAuditRecord = {
-        source: 'mcp',
-        serverSlug: LIFECYCLE_BUILTIN_SERVER_SLUG,
-        toolName: CHAT_PUBLISH_HTTP_TOOL_NAME,
-        toolCallId,
-        args,
-        capabilityKey,
-      };
-
-      await hooks?.onToolStarted?.(audit);
-
-      try {
-        const runtimeSession = await ensureChatWorkspaceRuntime({
-          session,
-          userIdentity,
-          requestGitHubToken,
-          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
-        });
-        const port = Number(args.port);
-        if (!Number.isInteger(port) || port < 1 || port > 65535) {
-          throw new Error('port must be an integer between 1 and 65535');
-        }
-
-        const result = await AgentSessionService.publishChatHttpPort({
-          sessionId: runtimeSession.uuid,
-          userId: userIdentity.userId,
-          port,
-        });
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result,
-          status: 'completed',
-        });
-        return result;
-      } catch (error) {
-        getLogger().warn({ error }, `AgentExec: chat publish failed sessionId=${session.uuid}`);
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result: {
-            error: error instanceof Error ? error.message : String(error),
-          },
-          status: 'failed',
-        });
-        throw error;
-      }
-    },
-  });
-  recordToolMetadata(toolMetadata, {
-    toolKey,
-    catalogCapabilityId: 'preview_publish',
-    capabilityKey,
-    approvalMode: mode,
-  });
-}
-
-function registerChatWorkspaceTools({
-  tools,
-  session,
-  userIdentity,
-  approvalPolicy,
-  workspaceToolExecutionTimeoutMs,
-  requestGitHubToken,
-  hooks,
-  toolRules,
-  resolvedCapabilityAccess,
-  toolMetadata,
-}: {
-  tools: ToolSet;
-  session: AgentSession;
-  userIdentity: RequestUserIdentity;
-  approvalPolicy: AgentApprovalPolicy;
-  workspaceToolExecutionTimeoutMs: number;
-  requestGitHubToken?: string | null;
-  hooks?: ToolExecutionHooks;
-  toolRules?: AgentSessionToolRule[];
-  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
-  toolMetadata?: AgentRuntimeToolMetadata[];
-}) {
-  registerChatWorkspaceExecTool({
-    tools,
-    session,
-    userIdentity,
-    approvalPolicy,
-    workspaceToolExecutionTimeoutMs,
-    requestGitHubToken,
-    hooks,
-    toolRules,
-    toolName: SESSION_WORKSPACE_READONLY_TOOL_NAME,
-    capabilityKey: 'read',
-    description: buildWorkspaceReadonlyExecDescription(SESSION_WORKSPACE_SERVER_NAME),
-    readOnly: true,
-    catalogCapabilityId: 'read_context',
-    resolvedCapabilityAccess,
-    toolMetadata,
-  });
-  registerChatWorkspaceExecTool({
-    tools,
-    session,
-    userIdentity,
-    approvalPolicy,
-    workspaceToolExecutionTimeoutMs,
-    requestGitHubToken,
-    hooks,
-    toolRules,
-    toolName: SESSION_WORKSPACE_MUTATION_TOOL_NAME,
-    capabilityKey: 'shell_exec',
-    description: buildWorkspaceMutationExecDescription(SESSION_WORKSPACE_SERVER_NAME),
-    readOnly: false,
-    catalogCapabilityId: 'workspace_shell',
-    resolvedCapabilityAccess,
-    toolMetadata,
-  });
-  registerChatWorkspaceFileTool({
-    tools,
-    session,
-    userIdentity,
-    approvalPolicy,
-    workspaceToolExecutionTimeoutMs,
-    requestGitHubToken,
-    hooks,
-    toolRules,
-    toolName: WORKSPACE_WRITE_FILE_RUNTIME_TOOL_NAME,
-    inputSchema: WORKSPACE_WRITE_FILE_INPUT_SCHEMA,
-    description:
-      'Write a file in the chat workspace. Use this when the user asks to create or replace file contents. This provisions the workspace only when the tool runs.',
-    catalogCapabilityId: 'workspace_files',
-    resolvedCapabilityAccess,
-    toolMetadata,
-  });
-  registerChatWorkspaceFileTool({
-    tools,
-    session,
-    userIdentity,
-    approvalPolicy,
-    workspaceToolExecutionTimeoutMs,
-    requestGitHubToken,
-    hooks,
-    toolRules,
-    toolName: WORKSPACE_EDIT_FILE_RUNTIME_TOOL_NAME,
-    inputSchema: WORKSPACE_EDIT_FILE_INPUT_SCHEMA,
-    description:
-      'Edit a file in the chat workspace by replacing exact text. Use this for targeted file modifications. This provisions the workspace only when the tool runs.',
-    catalogCapabilityId: 'workspace_files',
-    resolvedCapabilityAccess,
-    toolMetadata,
-  });
-}
-
-function registerGenericMcpTool({
-  tools,
-  session,
-  server,
-  discoveredTool,
-  exposedToolName,
-  description,
-  capabilityKey,
-  mode,
-  catalogCapabilityId,
-  hooks,
-  toolMetadata,
-}: {
-  tools: ToolSet;
-  session: AgentSession;
-  server: ResolvedMcpServer;
-  discoveredTool: ResolvedMcpServer['discoveredTools'][number];
-  exposedToolName: string;
-  description: string;
-  capabilityKey: AgentCapabilityKey;
-  mode: AgentApprovalMode;
-  catalogCapabilityId: AgentCapabilityCatalogId;
-  hooks?: ToolExecutionHooks;
-  toolMetadata?: AgentRuntimeToolMetadata[];
-}) {
-  const toolKey = buildAgentToolKey(server.slug, exposedToolName);
-
-  tools[toolKey] = toAiDynamicTool({
-    description,
-    inputSchema: toAiJsonSchema(discoveredTool.inputSchema as Record<string, unknown>),
-    needsApproval: mode === 'require_approval',
-    onInputAvailable: async ({ input, toolCallId }) => {
-      if (!toolCallId) {
-        return;
-      }
-
-      const runtimeArgs = applyMcpDefaultToolArgs(
-        discoveredTool.inputSchema as Record<string, unknown>,
-        server.defaultArgs,
-        (input as Record<string, unknown>) || {}
-      );
-      const auditArgs = redactMcpDefaultArgs(runtimeArgs, server.defaultArgs);
-      const changes = buildProposedFileChanges({
-        toolCallId,
-        sourceTool: exposedToolName,
-        input: auditArgs,
-        previewChars: await getFileChangePreviewChars(),
-      });
-
-      for (const change of changes) {
-        await hooks?.onFileChange?.(change);
-      }
-    },
-    execute: async (input, context) => {
-      const toolCallId = context?.toolCallId;
-      const runtimeArgs = applyMcpDefaultToolArgs(
-        discoveredTool.inputSchema as Record<string, unknown>,
-        server.defaultArgs,
-        (input as Record<string, unknown>) || {}
-      );
-      const auditArgs = redactMcpDefaultArgs(runtimeArgs, server.defaultArgs);
-      const audit: AgentToolAuditRecord = {
-        source: 'mcp',
-        serverSlug: server.slug,
-        toolName: exposedToolName,
-        toolCallId,
-        args: auditArgs,
-        capabilityKey,
-      };
-
-      await hooks?.onToolStarted?.(audit);
-
-      const mcpSecretSources = [
-        {
-          compiledConfig: {
-            env: server.env,
-            defaultArgs: server.defaultArgs,
-          },
-          transport: server.transport,
-        },
-      ];
-      const client = new McpClientManager();
-      try {
-        await client.connect(server.transport, server.timeout);
-        const rawResult = await client.callTool(discoveredTool.name, runtimeArgs, server.timeout);
-        const failed = rawResult.isError || didToolResultFail(rawResult);
-        const result = failed ? sanitizeMcpResult(rawResult, mcpSecretSources) : rawResult;
-        if (toolCallId) {
-          const changes = buildResultFileChanges({
-            toolCallId,
-            sourceTool: exposedToolName,
-            input: auditArgs,
-            result,
-            failed,
-            previewChars: await getFileChangePreviewChars(),
-          });
-
-          for (const change of changes) {
-            await hooks?.onFileChange?.(change);
-          }
-        }
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result,
-          status: failed ? 'failed' : 'completed',
-        });
-        return result;
-      } catch (error) {
-        const errorMessage = sanitizeMcpErrorMessage(error, mcpSecretSources);
-        getLogger().warn(
-          { error: errorMessage },
-          `AgentExec: mcp tool failed sessionId=${session.uuid} server=${server.slug} tool=${exposedToolName}`
-        );
-        if (toolCallId) {
-          const changes = buildResultFileChanges({
-            toolCallId,
-            sourceTool: exposedToolName,
-            input: auditArgs,
-            result: {
-              error: errorMessage,
-            },
-            failed: true,
-            previewChars: await getFileChangePreviewChars(),
-          });
-
-          for (const change of changes) {
-            await hooks?.onFileChange?.(change);
-          }
-        }
-        await hooks?.onToolFinished?.({
-          ...audit,
-          result: {
-            error: errorMessage,
-          },
-          status: 'failed',
-        });
-        throw new Error(errorMessage);
-      } finally {
-        await client.close();
-      }
-    },
-  });
-  recordToolMetadata(toolMetadata, {
-    toolKey,
-    catalogCapabilityId,
-    capabilityKey,
-    approvalMode: mode,
-  });
-}
-
 export default class AgentCapabilityService {
-  static async getOwnedSession(sessionUuid: string, userId: string): Promise<AgentSession> {
-    const session = await AgentSession.query().findOne({ uuid: sessionUuid, userId });
-    if (!session) {
-      throw new Error('Agent session not found');
-    }
-
-    return session;
-  }
-
   static async resolveSessionContext(
     sessionUuid: string,
     userIdentity: RequestUserIdentity
@@ -1179,7 +87,7 @@ export default class AgentCapabilityService {
     capabilityPolicy?: CapabilityPolicyConfig;
     customAgentCreationPolicy?: CustomAgentCreationPolicyConfig;
   }> {
-    const session = await this.getOwnedSession(sessionUuid, userIdentity.userId);
+    const session = await getOwnedSession(sessionUuid, userIdentity.userId);
     const repoFullName = resolvePrimaryRepo(session);
     const [approvalPolicy, effectiveAgentConfig] = await Promise.all([
       AgentPolicyService.getEffectivePolicy(repoFullName),
diff --git a/src/server/services/agent/ChatSessionService.ts b/src/server/services/agent/ChatSessionService.ts
index c30839f4..bb68bc72 100644
--- a/src/server/services/agent/ChatSessionService.ts
+++ b/src/server/services/agent/ChatSessionService.ts
@@ -248,6 +248,7 @@ export default class AgentChatSessionService {
         userId: opts.userId,
         ownerGithubUsername: opts.userIdentity?.githubUsername || null,
         podName: null,
+        // Workspace namespace stays null until runtime provisioning; build namespace comes from build context instead.
         namespace: null,
         pvcName: null,
         model: selection.modelId,
diff --git a/src/server/services/agent/CustomAgentDefinitionService.ts b/src/server/services/agent/CustomAgentDefinitionService.ts
index 9a29a523..3c2eaae3 100644
--- a/src/server/services/agent/CustomAgentDefinitionService.ts
+++ b/src/server/services/agent/CustomAgentDefinitionService.ts
@@ -15,6 +15,7 @@
  */
 
 import type { RequestUserIdentity } from 'server/lib/get-user';
+import { AppError } from 'server/lib/appError';
 import AgentDefinition from 'server/models/AgentDefinition';
 import AgentRuntimeConfigService from 'server/services/agentRuntime/config/agentRuntimeConfig';
 import { v4 as uuid } from 'uuid';
@@ -93,13 +94,28 @@ export interface UserAgentDefinitionPublicContract {
   status: 'active' | 'archived';
 }
 
-export class CustomAgentDefinitionServiceError extends Error {
-  code: CustomAgentDefinitionErrorCode;
-
-  constructor(code: CustomAgentDefinitionErrorCode, message: string) {
-    super(message);
+// Maps each service-error discriminant to {httpStatus, stable contract code} so routes never re-map.
+const CUSTOM_AGENT_ERROR_CONTRACT: Record<CustomAgentDefinitionErrorCode, { httpStatus: number; code: string }> = {
+  not_found: { httpStatus: 404, code: 'custom_agent_not_found' },
+  invalid_input: { httpStatus: 400, code: 'custom_agent_invalid' },
+  model_unavailable: { httpStatus: 409, code: 'custom_agent_conflict' },
+  creation_unavailable: { httpStatus: 403, code: 'custom_agent_creation_unavailable' },
+  creator_capability_reserved: { httpStatus: 400, code: 'custom_agent_invalid' },
+  unknown_capability: { httpStatus: 400, code: 'custom_agent_invalid' },
+  admin_only: { httpStatus: 400, code: 'custom_agent_invalid' },
+  system_only: { httpStatus: 400, code: 'custom_agent_invalid' },
+  disabled: { httpStatus: 400, code: 'custom_agent_invalid' },
+  source_incompatible: { httpStatus: 400, code: 'custom_agent_invalid' },
+};
+
+export class CustomAgentDefinitionServiceError extends AppError {
+  readonly reason: CustomAgentDefinitionErrorCode;
+
+  constructor(reason: CustomAgentDefinitionErrorCode, message: string) {
+    const contract = CUSTOM_AGENT_ERROR_CONTRACT[reason];
+    super({ httpStatus: contract.httpStatus, code: contract.code, message, details: { reason } });
     this.name = 'CustomAgentDefinitionServiceError';
-    this.code = code;
+    this.reason = reason;
   }
 }
 
diff --git a/src/server/services/agent/InstructionTemplateService.ts b/src/server/services/agent/InstructionTemplateService.ts
index 38e7f231..ab379483 100644
--- a/src/server/services/agent/InstructionTemplateService.ts
+++ b/src/server/services/agent/InstructionTemplateService.ts
@@ -17,6 +17,7 @@
 import { createHash } from 'crypto';
 import AgentInstructionTemplate from 'server/models/AgentInstructionTemplate';
 import { getLogger } from 'server/lib/logger';
+import { AppError } from 'server/lib/appError';
 import {
   SYSTEM_INSTRUCTION_TEMPLATE_DEFINITIONS,
   type SystemInstructionTemplateDefinition,
@@ -65,19 +66,31 @@ export type UpdateInstructionTemplateOverrideInput = {
   updatedBy?: string | null;
 };
 
-export class InstructionTemplateServiceError extends Error {
+// Maps each discriminant to {httpStatus, stable contract code} so routes never re-map.
+const INSTRUCTION_TEMPLATE_ERROR_CONTRACT: Record<
+  InstructionTemplateServiceErrorCode,
+  { httpStatus: number; code: string }
+> = {
+  invalid_ref: { httpStatus: 400, code: 'instruction_template_ref_invalid' },
+  unknown_ref: { httpStatus: 404, code: 'instruction_template_not_found' },
+  invalid_content: { httpStatus: 400, code: 'instruction_template_content_invalid' },
+};
+
+export class InstructionTemplateServiceError extends AppError {
   readonly statusCode: number;
-  readonly details?: Record<string, unknown>;
+  readonly templateCode: InstructionTemplateServiceErrorCode;
 
   constructor(
-    public readonly code: InstructionTemplateServiceErrorCode,
+    templateCode: InstructionTemplateServiceErrorCode,
     message: string,
     options: { statusCode?: number; details?: Record<string, unknown> } = {}
   ) {
-    super(message);
+    const contract = INSTRUCTION_TEMPLATE_ERROR_CONTRACT[templateCode];
+    const httpStatus = options.statusCode || contract.httpStatus;
+    super({ httpStatus, code: contract.code, message, details: { templateCode, ...(options.details || {}) } });
     this.name = 'InstructionTemplateServiceError';
-    this.statusCode = options.statusCode || (code === 'unknown_ref' ? 404 : 400);
-    this.details = options.details;
+    this.templateCode = templateCode;
+    this.statusCode = httpStatus;
   }
 }
 
@@ -162,11 +175,43 @@ export default class InstructionTemplateService {
   static async seedSystemTemplates(
     definitions: readonly SystemInstructionTemplateDefinition[] = SYSTEM_INSTRUCTION_TEMPLATE_DEFINITIONS
   ): Promise<InstructionTemplateView[]> {
-    const rows = await Promise.all(
-      definitions.map((definition) => {
-        assertValidTemplateRef(definition.ref);
-        assertValidContent(definition.defaultContent);
+    const refs = definitions.map((d) => d.ref);
+    const existingRows = await AgentInstructionTemplate.query().whereIn('ref', refs).orderBy('ref', 'asc');
+    const existingMap = new Map(existingRows.map((r) => [r.ref, r]));
+
+    const toUpsert: SystemInstructionTemplateDefinition[] = [];
+    const unchangedRows: AgentInstructionTemplate[] = [];
+
+    for (const definition of definitions) {
+      assertValidTemplateRef(definition.ref);
+      assertValidContent(definition.defaultContent);
+
+      const existing = existingMap.get(definition.ref);
+      if (!existing) {
+        toUpsert.push(definition);
+      } else {
+        const defaultHash = computeInstructionTemplateContentHash(definition.defaultContent);
+        const needsUpdate =
+          existing.name !== definition.name ||
+          existing.description !== definition.description ||
+          existing.defaultContent !== definition.defaultContent ||
+          existing.defaultVersion !== definition.defaultVersion ||
+          existing.defaultHash !== defaultHash;
+
+        if (needsUpdate) {
+          toUpsert.push(definition);
+        } else {
+          unchangedRows.push(existing);
+        }
+      }
+    }
+
+    if (toUpsert.length === 0) {
+      return definitions.map((def) => toView(existingMap.get(def.ref)!));
+    }
 
+    const upsertedRows = await Promise.all(
+      toUpsert.map((definition) => {
         return AgentInstructionTemplate.upsert(
           {
             ref: definition.ref,
@@ -181,8 +226,18 @@ export default class InstructionTemplateService {
       })
     );
 
-    getLogger().info(`AgentExec: instruction templates seeded count=${rows.length}`);
-    return rows.map(toView);
+    const allRowsMap = new Map<string, AgentInstructionTemplate>();
+    for (const row of unchangedRows) {
+      allRowsMap.set(row.ref, row);
+    }
+    for (const row of upsertedRows) {
+      allRowsMap.set(row.ref, row);
+    }
+
+    getLogger().info(
+      `AgentExec: instruction templates seeded count=${definitions.length} (upserted=${toUpsert.length})`
+    );
+    return definitions.map((def) => toView(allRowsMap.get(def.ref)!));
   }
 
   static async listTemplates(): Promise<InstructionTemplateView[]> {
diff --git a/src/server/services/agent/LifecycleAiSdkHarness.ts b/src/server/services/agent/LifecycleAiSdkHarness.ts
index 66e9825a..1dd2f49d 100644
--- a/src/server/services/agent/LifecycleAiSdkHarness.ts
+++ b/src/server/services/agent/LifecycleAiSdkHarness.ts
@@ -554,6 +554,10 @@ function createEagerApprovalRequestSync({
   };
 }
 
+// Coalesce chunk flushes to avoid a per-token insert+notify storm.
+const STREAM_FLUSH_BATCH_SIZE = 10;
+const STREAM_FLUSH_INTERVAL_MS = 50;
+
 async function consumeStream(
   runUuid: string,
   executionOwner: string,
@@ -562,6 +566,18 @@ async function consumeStream(
 ): Promise<void> {
   const reader = stream.getReader();
   const batch: AgentUiMessageChunk[] = [];
+  let lastFlushAt = Date.now();
+
+  const flushBatch = async () => {
+    if (batch.length === 0) {
+      return;
+    }
+    const chunks = batch.splice(0, batch.length);
+    await AgentRunService.appendStreamChunksForExecutionOwner(runUuid, executionOwner, chunks, {
+      beforeAppendChunks: ({ trx, run }) => beforeAppendChunks?.(chunks, { trx, run }),
+    });
+    lastFlushAt = Date.now();
+  };
 
   try {
     let streamDone = false;
@@ -577,20 +593,12 @@ async function consumeStream(
       }
 
       batch.push(value);
-      if (batch.length >= 10) {
-        const chunks = batch.splice(0, batch.length);
-        await AgentRunService.appendStreamChunksForExecutionOwner(runUuid, executionOwner, chunks, {
-          beforeAppendChunks: ({ trx, run }) => beforeAppendChunks?.(chunks, { trx, run }),
-        });
+      if (batch.length >= STREAM_FLUSH_BATCH_SIZE || Date.now() - lastFlushAt >= STREAM_FLUSH_INTERVAL_MS) {
+        await flushBatch();
       }
     }
 
-    if (batch.length > 0) {
-      const chunks = batch.splice(0, batch.length);
-      await AgentRunService.appendStreamChunksForExecutionOwner(runUuid, executionOwner, chunks, {
-        beforeAppendChunks: ({ trx, run }) => beforeAppendChunks?.(chunks, { trx, run }),
-      });
-    }
+    await flushBatch();
   } finally {
     reader.releaseLock();
   }
diff --git a/src/server/services/agent/MessageStore.ts b/src/server/services/agent/MessageStore.ts
index d9e0cd9a..c8544d07 100644
--- a/src/server/services/agent/MessageStore.ts
+++ b/src/server/services/agent/MessageStore.ts
@@ -79,6 +79,14 @@ function normalizeMessageId(value: unknown): string | null {
   return typeof value === 'string' && value.trim() ? value.trim() : null;
 }
 
+// Joined run.startedAt/completedAt aliases come back as Date (they skip the model's timestamp->ISO serialization).
+function normalizeTimestamp(value: unknown): string | null {
+  if (value instanceof Date) {
+    return Number.isNaN(value.getTime()) ? null : value.toISOString();
+  }
+  return typeof value === 'string' && value.trim() ? value.trim() : null;
+}
+
 function isAgentSwitchMessage(message: AgentMessage): boolean {
   return message.role === 'system' && message.metadata?.kind === AGENT_SWITCH_METADATA_KIND;
 }
@@ -207,6 +215,8 @@ function serializeCanonicalAgentMessage(
 
   const enrichedMessage = message as AgentMessage & {
     runUuid?: string | null;
+    runStartedAt?: Date | string | null;
+    runCompletedAt?: Date | string | null;
     createdAt?: string | null;
   };
 
@@ -219,11 +229,59 @@ function serializeCanonicalAgentMessage(
     runId: runUuid || normalizeMessageId(enrichedMessage.runUuid),
     role: message.role as CanonicalAgentMessage['role'],
     parts,
-    ...(isAgentSwitchMessage(message) ? { metadata: message.metadata || {} } : {}),
+    ...resolveSerializedMetadata(message, enrichedMessage),
     createdAt: enrichedMessage.createdAt || null,
   };
 }
 
+// Assistant messages carry run start/end timestamps in metadata so the read API can render a thinking duration.
+function resolveSerializedMetadata(
+  message: AgentMessage,
+  enrichedMessage: AgentMessage & {
+    runStartedAt?: Date | string | null;
+    runCompletedAt?: Date | string | null;
+    createdAt?: string | null;
+  }
+): { metadata?: Record<string, unknown> } {
+  if (isAgentSwitchMessage(message)) {
+    return { metadata: message.metadata || {} };
+  }
+
+  if (message.role !== 'assistant') {
+    return {};
+  }
+
+  const metadata = message.metadata || {};
+  // run.startedAt is the authoritative thinking start; stored metadata.createdAt is a buggy ~completion value.
+  const createdAt =
+    normalizeTimestamp(enrichedMessage.runStartedAt) ||
+    normalizeTimestamp(metadata.createdAt) ||
+    normalizeTimestamp(enrichedMessage.createdAt);
+  const completedAt =
+    normalizeTimestamp(enrichedMessage.runCompletedAt) || normalizeTimestamp(metadata.completedAt) || createdAt;
+
+  return {
+    metadata: {
+      ...metadata,
+      ...(createdAt ? { createdAt: clampStartBeforeEnd(createdAt, completedAt) } : {}),
+      ...(completedAt ? { completedAt } : {}),
+    },
+  };
+}
+
+// Keep the served start strictly before the end so the UI renders a real positive duration.
+function clampStartBeforeEnd(start: string, end: string | null): string {
+  if (!end) {
+    return start;
+  }
+  const startMs = Date.parse(start);
+  const endMs = Date.parse(end);
+  if (Number.isNaN(startMs) || Number.isNaN(endMs)) {
+    return start;
+  }
+  return startMs <= endMs ? start : end;
+}
+
 function normalizeLimit(limit?: number): number {
   if (!Number.isFinite(limit) || !limit) {
     return DEFAULT_AGENT_MESSAGE_PAGE_LIMIT;
@@ -232,6 +290,64 @@ function normalizeLimit(limit?: number): number {
   return Math.min(Math.max(1, Math.trunc(limit)), MAX_AGENT_MESSAGE_PAGE_LIMIT);
 }
 
+// Build the messageId -> row index used by every upsert path.
+function buildExistingByMessageId(rows: AgentMessage[]): Map<string, AgentMessage> {
+  const existingByMessageId = new Map<string, AgentMessage>();
+  for (const message of rows) {
+    for (const messageId of getStoredMessageIds(message)) {
+      existingByMessageId.set(messageId, message);
+    }
+  }
+
+  return existingByMessageId;
+}
+
+// Shared insert-or-patch loop; `reAddPatchedRow` keeps the index current after a patch for callers that rely on it.
+async function applyCanonicalMessageUpserts(
+  threadId: number,
+  messages: CanonicalAgentInputMessage[],
+  existingByMessageId: Map<string, AgentMessage>,
+  options: {
+    runId?: number | null;
+    trx?: Transaction;
+    metadataFor?: (message: CanonicalAgentInputMessage) => Record<string, unknown> | undefined;
+    reAddPatchedRow?: boolean;
+  } = {}
+): Promise<void> {
+  for (const message of messages) {
+    const incomingMessageId = getIncomingMessageId(message);
+    const row = incomingMessageId ? existingByMessageId.get(incomingMessageId) : undefined;
+    const stored = buildStoredCanonicalMessage(message, options.metadataFor?.(message), row);
+    const patch: PartialModelObject<AgentMessage> = {
+      role: message.role,
+      parts: stored.parts as unknown as Record<string, unknown>[],
+      uiMessage: null,
+      clientMessageId: stored.clientMessageId,
+      metadata: toJsonRecord(stored.metadata),
+      runId: resolveStoredRunId(message.role, row, options.runId),
+    };
+
+    if (!row) {
+      const inserted = await AgentMessage.query(options.trx).insert({
+        uuid: stored.uuid,
+        threadId,
+        ...patch,
+      });
+      for (const messageId of getStoredMessageIds(inserted)) {
+        existingByMessageId.set(messageId, inserted);
+      }
+      continue;
+    }
+
+    const updated = await AgentMessage.query(options.trx).patchAndFetchById(row.id, patch);
+    if (options.reAddPatchedRow) {
+      for (const messageId of getStoredMessageIds(updated)) {
+        existingByMessageId.set(messageId, updated);
+      }
+    }
+  }
+}
+
 export default class AgentMessageStore {
   static serializeCanonicalMessage(
     message: AgentMessage,
@@ -295,7 +411,7 @@ export default class AgentMessageStore {
             .whereRaw('"message"."metadata"->>? = ?', ['kind', AGENT_SWITCH_METADATA_KIND]);
         });
       })
-      .select('message.*', 'run.uuid as runUuid')
+      .select('message.*', 'run.uuid as runUuid', 'run.startedAt as runStartedAt', 'run.completedAt as runCompletedAt')
       .orderBy('message.createdAt', 'desc')
       .orderBy('message.id', 'desc')
       .limit(limit + 1);
@@ -423,47 +539,16 @@ export default class AgentMessageStore {
   ): Promise<AgentUIMessage[]> {
     const thread = await AgentThreadService.getOwnedThread(threadUuid, userId);
     const run = runUuid ? await AgentRun.query().findOne({ uuid: runUuid, threadId: thread.id }) : null;
-    const runId = run?.id ?? null;
     const existing = await AgentMessage.query().where({ threadId: thread.id });
-    const existingByMessageId = new Map<string, AgentMessage>();
-    for (const message of existing) {
-      for (const messageId of getStoredMessageIds(message)) {
-        existingByMessageId.set(messageId, message);
-      }
-    }
+    const existingByMessageId = buildExistingByMessageId(existing);
 
     const nonEmptyMessages = messages.filter(
       (message) => normalizeCanonicalAgentMessageParts(message.parts).length > 0
     );
 
-    for (const message of nonEmptyMessages) {
-      const incomingMessageId = getIncomingMessageId(message);
-      const row = incomingMessageId ? existingByMessageId.get(incomingMessageId) : undefined;
-      const stored = buildStoredCanonicalMessage(message, undefined, row);
-      const metadata = toJsonRecord(stored.metadata);
-      const patch: PartialModelObject<AgentMessage> = {
-        role: message.role,
-        parts: stored.parts as unknown as Record<string, unknown>[],
-        uiMessage: null,
-        clientMessageId: stored.clientMessageId,
-        metadata,
-        runId: resolveStoredRunId(message.role, row, runId),
-      };
-
-      if (!row) {
-        const inserted = await AgentMessage.query().insert({
-          uuid: stored.uuid,
-          threadId: thread.id,
-          ...patch,
-        });
-        for (const messageId of getStoredMessageIds(inserted)) {
-          existingByMessageId.set(messageId, inserted);
-        }
-        continue;
-      }
-
-      await AgentMessage.query().patchAndFetchById(row.id, patch);
-    }
+    await applyCanonicalMessageUpserts(thread.id, nonEmptyMessages, existingByMessageId, {
+      runId: run?.id ?? null,
+    });
 
     const reloaded = await AgentMessage.query().where({ threadId: thread.id }).orderBy('createdAt', 'asc');
     return reloaded.flatMap((row) => {
@@ -488,43 +573,13 @@ export default class AgentMessageStore {
     }
 
     const existing = await loadExistingMessagesForIncomingIds(thread.id, nonEmptyMessages, options?.trx);
-    const existingByMessageId = new Map<string, AgentMessage>();
-    for (const message of existing) {
-      for (const messageId of getStoredMessageIds(message)) {
-        existingByMessageId.set(messageId, message);
-      }
-    }
+    const existingByMessageId = buildExistingByMessageId(existing);
 
-    for (const message of nonEmptyMessages) {
-      const incomingMessageId = getIncomingMessageId(message);
-      const row = incomingMessageId ? existingByMessageId.get(incomingMessageId) : undefined;
-      const stored = buildStoredCanonicalMessage(message, undefined, row);
-      const patch: PartialModelObject<AgentMessage> = {
-        role: message.role,
-        parts: stored.parts as unknown as Record<string, unknown>[],
-        uiMessage: null,
-        clientMessageId: stored.clientMessageId,
-        metadata: toJsonRecord(stored.metadata),
-        runId: resolveStoredRunId(message.role, row, options?.runId),
-      };
-
-      if (!row) {
-        const inserted = await AgentMessage.query(options?.trx).insert({
-          uuid: stored.uuid,
-          threadId: thread.id,
-          ...patch,
-        });
-        for (const messageId of getStoredMessageIds(inserted)) {
-          existingByMessageId.set(messageId, inserted);
-        }
-        continue;
-      }
-
-      const updated = await AgentMessage.query(options?.trx).patchAndFetchById(row.id, patch);
-      for (const messageId of getStoredMessageIds(updated)) {
-        existingByMessageId.set(messageId, updated);
-      }
-    }
+    await applyCanonicalMessageUpserts(thread.id, nonEmptyMessages, existingByMessageId, {
+      runId: options?.runId,
+      trx: options?.trx,
+      reAddPatchedRow: true,
+    });
   }
 
   static async syncCanonicalMessagesFromUiMessages(
@@ -568,43 +623,15 @@ export default class AgentMessageStore {
       .filter((message) => message.parts.length > 0);
 
     const existing = await AgentMessage.query(options?.trx).where({ threadId: thread.id });
-    const existingByMessageId = new Map<string, AgentMessage>();
-    for (const message of existing) {
-      for (const messageId of getStoredMessageIds(message)) {
-        existingByMessageId.set(messageId, message);
-      }
-    }
-
-    for (const message of canonicalMessages) {
-      const incomingMessageId = getIncomingMessageId(message);
-      const row = incomingMessageId ? existingByMessageId.get(incomingMessageId) : undefined;
-      const stored = buildStoredCanonicalMessage(
-        message,
-        incomingMessageId ? metadataById.get(incomingMessageId) : undefined,
-        row
-      );
-      const patch: PartialModelObject<AgentMessage> = {
-        role: message.role,
-        parts: stored.parts as unknown as Record<string, unknown>[],
-        uiMessage: null,
-        clientMessageId: stored.clientMessageId,
-        metadata: toJsonRecord(stored.metadata),
-        runId: resolveStoredRunId(message.role, row, options?.runId),
-      };
-
-      if (!row) {
-        const inserted = await AgentMessage.query(options?.trx).insert({
-          uuid: stored.uuid,
-          threadId: thread.id,
-          ...patch,
-        });
-        for (const messageId of getStoredMessageIds(inserted)) {
-          existingByMessageId.set(messageId, inserted);
-        }
-        continue;
-      }
-
-      await AgentMessage.query(options?.trx).patchAndFetchById(row.id, patch);
-    }
+    const existingByMessageId = buildExistingByMessageId(existing);
+
+    await applyCanonicalMessageUpserts(thread.id, canonicalMessages, existingByMessageId, {
+      runId: options?.runId,
+      trx: options?.trx,
+      metadataFor: (message) => {
+        const incomingMessageId = getIncomingMessageId(message);
+        return incomingMessageId ? metadataById.get(incomingMessageId) : undefined;
+      },
+    });
   }
 }
diff --git a/src/server/services/agent/PolicyService.ts b/src/server/services/agent/PolicyService.ts
index e3b25d2f..cd95178d 100644
--- a/src/server/services/agent/PolicyService.ts
+++ b/src/server/services/agent/PolicyService.ts
@@ -25,7 +25,7 @@ import {
   type AgentCapabilitySourceKind,
 } from './capabilityCatalog';
 import type { AgentApprovalMode, AgentApprovalPolicy, AgentCapabilityKey } from './types';
-import { DEFAULT_AGENT_APPROVAL_POLICY } from './types';
+import { AGENT_CAPABILITY_KEYS, DEFAULT_AGENT_APPROVAL_POLICY } from './types';
 
 type McpAnnotations = {
   readOnlyHint?: boolean;
@@ -70,11 +70,19 @@ export default class AgentPolicyService {
   static async getEffectivePolicy(repoFullName?: string): Promise<AgentApprovalPolicy> {
     const config = await AgentRuntimeConfigService.getInstance().getEffectiveConfig(repoFullName);
     const configured = (config as { approvalPolicy?: ApprovalPolicyConfig }).approvalPolicy;
+    const configuredDefaultMode = configured?.defaultMode;
+    const defaultMode = configuredDefaultMode || DEFAULT_AGENT_APPROVAL_POLICY.defaultMode;
+    const baseRules = configuredDefaultMode
+      ? AGENT_CAPABILITY_KEYS.reduce<Record<AgentCapabilityKey, AgentApprovalMode>>((acc, capabilityKey) => {
+          acc[capabilityKey] = configuredDefaultMode;
+          return acc;
+        }, {} as Record<AgentCapabilityKey, AgentApprovalMode>)
+      : DEFAULT_AGENT_APPROVAL_POLICY.rules;
 
     return {
-      defaultMode: configured?.defaultMode || DEFAULT_AGENT_APPROVAL_POLICY.defaultMode,
+      defaultMode,
       rules: {
-        ...DEFAULT_AGENT_APPROVAL_POLICY.rules,
+        ...baseRules,
         ...(configured?.rules || {}),
       },
     };
diff --git a/src/server/services/agent/ProviderRegistry.ts b/src/server/services/agent/ProviderRegistry.ts
index 8f696187..a788cafa 100644
--- a/src/server/services/agent/ProviderRegistry.ts
+++ b/src/server/services/agent/ProviderRegistry.ts
@@ -23,6 +23,7 @@ import UserApiKeyService from 'server/services/userApiKey';
 import { transformProviderModels } from 'server/services/agentRuntime/models/modelTransformation';
 import type { RequestUserIdentity } from 'server/lib/get-user';
 import { getLogger } from 'server/lib/logger';
+import { BadRequestError } from 'server/lib/appError';
 import type { AgentModelSummary, AgentResolvedModelSelection } from './types';
 import { getProviderEnvVarCandidates, normalizeStoredAgentProviderName } from './providerConfig';
 
@@ -36,21 +37,23 @@ function normalizeModelProvider(provider: string): string | null {
   return normalizeStoredAgentProviderName(provider);
 }
 
-export class MissingAgentProviderApiKeyError extends Error {
+export class MissingAgentProviderApiKeyError extends BadRequestError {
   readonly provider: string;
 
   constructor(provider: string) {
     super(
-      `No API key is configured for provider "${provider}". Save your ${provider} API key in Agent Session settings or configure a shared Agent provider key.`
+      `No API key is configured for provider "${provider}". Save your ${provider} API key in Agent Session settings or configure a shared Agent provider key.`,
+      'provider_api_key_required',
+      { provider }
     );
     this.name = 'MissingAgentProviderApiKeyError';
     this.provider = provider;
   }
 }
 
-export class AgentModelSelectionError extends Error {
+export class AgentModelSelectionError extends BadRequestError {
   constructor(message: string) {
-    super(message);
+    super(message, 'model_selection_invalid');
     this.name = 'AgentModelSelectionError';
   }
 }
diff --git a/src/server/services/agent/RunEventService.ts b/src/server/services/agent/RunEventService.ts
index 4705368c..e65b05f1 100644
--- a/src/server/services/agent/RunEventService.ts
+++ b/src/server/services/agent/RunEventService.ts
@@ -21,13 +21,10 @@ import { sanitizeAgentRunStreamChunks, type AgentUiMessageChunk } from './stream
 import { limitDurablePayloadRecord } from './payloadLimits';
 import { resolveAgentSessionDurabilityConfig } from 'server/lib/agentSession/runtimeConfig';
 import { AgentRunOwnershipLostError } from './AgentRunOwnershipLostError';
+import { readString } from './runEventUtils';
+import { toChunkEvents, chunkFromEvent, type ChunkEvent } from './runEventChunkCodec';
 import type { Transaction } from 'objection';
 
-type ChunkEvent = {
-  eventType: string;
-  payload: Record<string, unknown>;
-};
-
 type RunEventAppendTarget = Pick<AgentRun, 'id'> & Partial<Pick<AgentRun, 'uuid'>>;
 
 type RunEventAppendOptions = {
@@ -39,7 +36,8 @@ type RunEventAppendOptions = {
 export const DEFAULT_RUN_EVENT_PAGE_LIMIT = 100;
 export const MAX_RUN_EVENT_PAGE_LIMIT = 500;
 export const RUN_EVENT_STREAM_PAGE_LIMIT = 100;
-export const RUN_EVENT_STREAM_POLL_INTERVAL_MS = 2000;
+// Polling fallback when LISTEN/notify is unavailable; tight so short reasoning bursts still stream live.
+export const RUN_EVENT_STREAM_POLL_INTERVAL_MS = 250;
 const AGENT_RUN_EVENT_VERSION = 1;
 const RUN_EVENT_NOTIFY_CHANNEL = 'agent_run_events';
 const RUN_EVENT_TERMINAL_STATUSES = new Set<AgentRun['status']>(['completed', 'failed', 'cancelled']);
@@ -94,28 +92,25 @@ type PgListenConnection = {
 
 type RunEventNotificationSubscriber = (notification: RunEventNotification) => void;
 
-const notificationSubscribers = new Map<string, Set<RunEventNotificationSubscriber>>();
-let notificationConnection: PgListenConnection | null = null;
-let notificationListenPromise: Promise<void> | null = null;
-
-function cloneValue<T>(value: T): T {
-  return JSON.parse(JSON.stringify(value)) as T;
-}
-
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return !!value && typeof value === 'object' && !Array.isArray(value);
-}
-
-function asRecord(value: unknown): Record<string, unknown> {
-  return isRecord(value) ? value : {};
-}
-
-function readString(value: unknown): string | undefined {
-  return typeof value === 'string' ? value : undefined;
-}
+// Pinned to globalThis so LISTEN state survives Next.js dev module re-eval.
+type RunEventNotifyGlobal = typeof globalThis & {
+  __lifecycleRunEventNotify?: {
+    subscribers: Map<string, Set<RunEventNotificationSubscriber>>;
+    connection: PgListenConnection | null;
+    listenPromise: Promise<void> | null;
+  };
+};
 
-function readBoolean(value: unknown): boolean | undefined {
-  return typeof value === 'boolean' ? value : undefined;
+function runEventNotifyState() {
+  const globalScope = globalThis as RunEventNotifyGlobal;
+  if (!globalScope.__lifecycleRunEventNotify) {
+    globalScope.__lifecycleRunEventNotify = {
+      subscribers: new Map(),
+      connection: null,
+      listenPromise: null,
+    };
+  }
+  return globalScope.__lifecycleRunEventNotify;
 }
 
 function isRunEventStreamOpen(run: Pick<AgentRun, 'status'>): boolean {
@@ -132,29 +127,8 @@ function encodeCanonicalSseEvent(event: SerializedRunEvent): Uint8Array {
   );
 }
 
-function pickDefined(source: Record<string, unknown>, keys: string[]): Record<string, unknown> {
-  const picked: Record<string, unknown> = {};
-
-  for (const key of keys) {
-    if (source[key] !== undefined) {
-      picked[key] = cloneValue(source[key]);
-    }
-  }
-
-  return picked;
-}
-
-function compactChunk(fields: Record<string, unknown>): AgentUiMessageChunk {
-  const chunk: Record<string, unknown> = {};
-
-  for (const [key, value] of Object.entries(fields)) {
-    if (value !== undefined) {
-      chunk[key] = value;
-    }
-  }
-
-  return chunk as AgentUiMessageChunk;
-}
+// SSE comment frame keeps idle connections warm so proxies/LBs don't idle-kill the stream.
+const SSE_KEEPALIVE_FRAME = textEncoder.encode(': keepalive\n\n');
 
 function parseRunEventNotification(payload: string | undefined): RunEventNotification | null {
   if (!payload) {
@@ -180,7 +154,7 @@ function parseRunEventNotification(payload: string | undefined): RunEventNotific
 }
 
 function notifySubscribers(notification: RunEventNotification): void {
-  const subscribers = notificationSubscribers.get(notification.runId);
+  const subscribers = runEventNotifyState().subscribers.get(notification.runId);
   if (!subscribers) {
     return;
   }
@@ -191,8 +165,9 @@ function notifySubscribers(notification: RunEventNotification): void {
 }
 
 function clearNotificationConnection(): void {
-  notificationConnection = null;
-  notificationListenPromise = null;
+  const state = runEventNotifyState();
+  state.connection = null;
+  state.listenPromise = null;
 }
 
 function handleNotification(notification: { channel?: string; payload?: string }): void {
@@ -211,511 +186,6 @@ function handleNotificationError(error: unknown): void {
   clearNotificationConnection();
 }
 
-function toChunkEvents(chunk: AgentUiMessageChunk): ChunkEvent[] {
-  const chunkRecord = chunk as unknown as Record<string, unknown>;
-
-  switch (chunk.type) {
-    case 'start':
-      return [
-        {
-          eventType: 'message.created',
-          payload: {
-            messageId: chunk.messageId,
-            metadata: chunk.messageMetadata || {},
-          },
-        },
-      ];
-    case 'message-metadata':
-      return [
-        {
-          eventType: 'message.metadata',
-          payload: {
-            metadata: cloneValue(chunk.messageMetadata || {}),
-          },
-        },
-      ];
-    case 'text-start':
-      return [
-        {
-          eventType: 'message.part.started',
-          payload: {
-            partType: 'text',
-            partId: chunk.id,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'text-delta':
-      return [
-        {
-          eventType: 'message.delta',
-          payload: {
-            partType: 'text',
-            partId: chunk.id,
-            delta: chunk.delta,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'text-end':
-      return [
-        {
-          eventType: 'message.part.completed',
-          payload: {
-            partType: 'text',
-            partId: chunk.id,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'reasoning-start':
-      return [
-        {
-          eventType: 'message.part.started',
-          payload: {
-            partType: 'reasoning',
-            partId: chunk.id,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'reasoning-delta':
-      return [
-        {
-          eventType: 'message.delta',
-          payload: {
-            partType: 'reasoning',
-            partId: chunk.id,
-            delta: chunk.delta,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'reasoning-end':
-      return [
-        {
-          eventType: 'message.part.completed',
-          payload: {
-            partType: 'reasoning',
-            partId: chunk.id,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'tool-input-start':
-      return [
-        {
-          eventType: 'tool.call.input.started',
-          payload: {
-            toolCallId: chunk.toolCallId,
-            toolName: chunk.toolName,
-            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'title']),
-          },
-        },
-      ];
-    case 'tool-input-delta':
-      return [
-        {
-          eventType: 'tool.call.input.delta',
-          payload: {
-            toolCallId: chunk.toolCallId,
-            inputTextDelta: chunk.inputTextDelta,
-          },
-        },
-      ];
-    case 'tool-input-available':
-    case 'tool-input-error':
-      return [
-        {
-          eventType: 'tool.call.started',
-          payload: {
-            toolCallId: chunk.toolCallId,
-            toolName: chunk.toolName,
-            inputStatus: chunk.type === 'tool-input-error' ? 'error' : 'available',
-            input: 'input' in chunk ? chunk.input : null,
-            errorText: 'errorText' in chunk ? chunk.errorText : null,
-            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'title']),
-          },
-        },
-      ];
-    case 'tool-output-available':
-    case 'tool-output-error':
-    case 'tool-output-denied':
-      return [
-        {
-          eventType: 'tool.call.completed',
-          payload: {
-            toolCallId: chunk.toolCallId,
-            output: 'output' in chunk ? chunk.output : null,
-            errorText: 'errorText' in chunk ? chunk.errorText : null,
-            status:
-              chunk.type === 'tool-output-available'
-                ? 'completed'
-                : chunk.type === 'tool-output-denied'
-                ? 'denied'
-                : 'failed',
-            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'preliminary']),
-          },
-        },
-      ];
-    case 'tool-approval-request':
-      return [
-        {
-          eventType: 'approval.requested',
-          payload: {
-            ...pickDefined(chunkRecord, ['actionId']),
-            approvalId: chunk.approvalId,
-            toolCallId: chunk.toolCallId,
-          },
-        },
-      ];
-    case 'data-file-change':
-      return [
-        {
-          eventType: 'tool.file_change',
-          payload: {
-            id: chunk.id,
-            data: cloneValue(chunk.data),
-            transient: chunk.transient,
-          },
-        },
-      ];
-    case 'source-url':
-      return [
-        {
-          eventType: 'message.source',
-          payload: {
-            sourceType: 'url',
-            sourceId: chunk.sourceId,
-            url: chunk.url,
-            ...pickDefined(chunkRecord, ['title', 'providerMetadata']),
-          },
-        },
-      ];
-    case 'source-document':
-      return [
-        {
-          eventType: 'message.source',
-          payload: {
-            sourceType: 'document',
-            sourceId: chunk.sourceId,
-            mediaType: chunk.mediaType,
-            title: chunk.title,
-            ...pickDefined(chunkRecord, ['filename', 'providerMetadata']),
-          },
-        },
-      ];
-    case 'file':
-      return [
-        {
-          eventType: 'message.file',
-          payload: {
-            url: chunk.url,
-            mediaType: chunk.mediaType,
-            ...pickDefined(chunkRecord, ['providerMetadata']),
-          },
-        },
-      ];
-    case 'start-step':
-      return [
-        {
-          eventType: 'run.step.started',
-          payload: {},
-        },
-      ];
-    case 'finish-step':
-      return [
-        {
-          eventType: 'run.step.completed',
-          payload: {},
-        },
-      ];
-    case 'finish':
-      return [
-        {
-          eventType: 'run.finished',
-          payload: {
-            finishReason: chunk.finishReason,
-            metadata: chunk.messageMetadata || {},
-          },
-        },
-      ];
-    case 'abort':
-      return [
-        {
-          eventType: 'run.aborted',
-          payload: {
-            reason: chunk.reason,
-          },
-        },
-      ];
-    case 'error':
-      return [
-        {
-          eventType: 'run.error',
-          payload: {
-            errorText: chunk.errorText,
-          },
-        },
-      ];
-  }
-
-  return [];
-}
-
-function chunkFromMessagePartEvent(eventType: string, payload: Record<string, unknown>): AgentUiMessageChunk | null {
-  const partType = readString(payload.partType);
-  const partId = readString(payload.partId) || readString(payload.messageId);
-  if ((partType !== 'text' && partType !== 'reasoning') || !partId) {
-    return null;
-  }
-
-  const providerMetadata = payload.providerMetadata;
-
-  if (eventType === 'message.part.started') {
-    return compactChunk({
-      type: partType === 'text' ? 'text-start' : 'reasoning-start',
-      id: partId,
-      providerMetadata,
-    });
-  }
-
-  if (eventType === 'message.delta') {
-    return compactChunk({
-      type: partType === 'text' ? 'text-delta' : 'reasoning-delta',
-      id: partId,
-      delta: readString(payload.delta) || '',
-      providerMetadata,
-    });
-  }
-
-  if (eventType === 'message.part.completed') {
-    return compactChunk({
-      type: partType === 'text' ? 'text-end' : 'reasoning-end',
-      id: partId,
-      providerMetadata,
-    });
-  }
-
-  return null;
-}
-
-function chunkFromToolStartedEvent(payload: Record<string, unknown>): AgentUiMessageChunk | null {
-  const toolCallId = readString(payload.toolCallId);
-  const toolName = readString(payload.toolName);
-  if (!toolCallId || !toolName) {
-    return null;
-  }
-
-  const inputStatus = readString(payload.inputStatus);
-  return compactChunk({
-    type: inputStatus === 'error' ? 'tool-input-error' : 'tool-input-available',
-    toolCallId,
-    toolName,
-    input: payload.input,
-    errorText: inputStatus === 'error' ? readString(payload.errorText) || 'Tool input failed.' : undefined,
-    providerExecuted: readBoolean(payload.providerExecuted),
-    providerMetadata: payload.providerMetadata,
-    dynamic: readBoolean(payload.dynamic),
-    title: readString(payload.title),
-  });
-}
-
-function chunkFromToolCompletedEvent(payload: Record<string, unknown>): AgentUiMessageChunk | null {
-  const toolCallId = readString(payload.toolCallId);
-  if (!toolCallId) {
-    return null;
-  }
-
-  const status = readString(payload.status);
-  if (status === 'denied') {
-    return compactChunk({
-      type: 'tool-output-denied',
-      toolCallId,
-    });
-  }
-
-  if (status === 'failed') {
-    return compactChunk({
-      type: 'tool-output-error',
-      toolCallId,
-      errorText: readString(payload.errorText) || 'Tool execution failed.',
-      providerExecuted: readBoolean(payload.providerExecuted),
-      providerMetadata: payload.providerMetadata,
-      dynamic: readBoolean(payload.dynamic),
-    });
-  }
-
-  return compactChunk({
-    type: 'tool-output-available',
-    toolCallId,
-    output: payload.output,
-    providerExecuted: readBoolean(payload.providerExecuted),
-    providerMetadata: payload.providerMetadata,
-    dynamic: readBoolean(payload.dynamic),
-    preliminary: readBoolean(payload.preliminary),
-  });
-}
-
-function chunkFromEvent(event: AgentRunEvent): AgentUiMessageChunk | null {
-  const payload = asRecord(event.payload);
-
-  switch (event.eventType) {
-    case 'message.created':
-      return compactChunk({
-        type: 'start',
-        messageId: readString(payload.messageId),
-        messageMetadata: payload.metadata,
-      });
-    case 'message.metadata':
-      return compactChunk({
-        type: 'message-metadata',
-        messageMetadata: payload.metadata || {},
-      });
-    case 'message.part.started':
-    case 'message.delta':
-    case 'message.part.completed':
-      return chunkFromMessagePartEvent(event.eventType, payload);
-    case 'tool.call.input.started': {
-      const toolCallId = readString(payload.toolCallId);
-      const toolName = readString(payload.toolName);
-      if (!toolCallId || !toolName) {
-        return null;
-      }
-
-      return compactChunk({
-        type: 'tool-input-start',
-        toolCallId,
-        toolName,
-        providerExecuted: readBoolean(payload.providerExecuted),
-        providerMetadata: payload.providerMetadata,
-        dynamic: readBoolean(payload.dynamic),
-        title: readString(payload.title),
-      });
-    }
-    case 'tool.call.input.delta': {
-      const toolCallId = readString(payload.toolCallId);
-      if (!toolCallId) {
-        return null;
-      }
-
-      return compactChunk({
-        type: 'tool-input-delta',
-        toolCallId,
-        inputTextDelta: readString(payload.inputTextDelta) || '',
-      });
-    }
-    case 'tool.call.started':
-      return chunkFromToolStartedEvent(payload);
-    case 'tool.call.completed':
-      return chunkFromToolCompletedEvent(payload);
-    case 'approval.requested': {
-      const approvalId = readString(payload.approvalId);
-      const toolCallId = readString(payload.toolCallId);
-      if (!approvalId || !toolCallId) {
-        return null;
-      }
-
-      return compactChunk({
-        type: 'tool-approval-request',
-        actionId: readString(payload.actionId),
-        approvalId,
-        toolCallId,
-      });
-    }
-    case 'tool.file_change':
-      if (!payload.data) {
-        return null;
-      }
-
-      return compactChunk({
-        type: 'data-file-change',
-        id: readString(payload.id),
-        data: payload.data,
-        transient: readBoolean(payload.transient),
-      });
-    case 'message.source':
-      if (payload.sourceType === 'url') {
-        const sourceId = readString(payload.sourceId);
-        const url = readString(payload.url);
-        if (!sourceId || !url) {
-          return null;
-        }
-
-        return compactChunk({
-          type: 'source-url',
-          sourceId,
-          url,
-          title: readString(payload.title),
-          providerMetadata: payload.providerMetadata,
-        });
-      }
-
-      if (payload.sourceType === 'document') {
-        const sourceId = readString(payload.sourceId);
-        const mediaType = readString(payload.mediaType);
-        const title = readString(payload.title);
-        if (!sourceId || !mediaType || !title) {
-          return null;
-        }
-
-        return compactChunk({
-          type: 'source-document',
-          sourceId,
-          mediaType,
-          title,
-          filename: readString(payload.filename),
-          providerMetadata: payload.providerMetadata,
-        });
-      }
-
-      return null;
-    case 'message.file': {
-      const url = readString(payload.url);
-      const mediaType = readString(payload.mediaType);
-      if (!url || !mediaType) {
-        return null;
-      }
-
-      return compactChunk({
-        type: 'file',
-        url,
-        mediaType,
-        providerMetadata: payload.providerMetadata,
-      });
-    }
-    case 'run.step.started':
-      return compactChunk({ type: 'start-step' });
-    case 'run.step.completed':
-      return compactChunk({ type: 'finish-step' });
-    case 'run.finished':
-      return compactChunk({
-        type: 'finish',
-        finishReason: readString(payload.finishReason),
-        messageMetadata: payload.metadata,
-      });
-    case 'run.aborted':
-      return compactChunk({
-        type: 'abort',
-        reason: readString(payload.reason),
-      });
-    case 'run.error':
-      return compactChunk({
-        type: 'error',
-        errorText: readString(payload.errorText) || 'Agent run failed.',
-      });
-    case 'run.failed': {
-      const error = asRecord(payload.error);
-      return compactChunk({
-        type: 'error',
-        errorText: readString(error.message) || readString(payload.errorText) || 'Agent run failed.',
-      });
-    }
-    default:
-      return null;
-  }
-}
-
 export function normalizeRunEventPageLimit(limit?: number | null): number {
   if (!Number.isFinite(limit)) {
     return DEFAULT_RUN_EVENT_PAGE_LIMIT;
@@ -734,15 +204,16 @@ function normalizeRunEventAfterSequence(afterSequence?: number | null): number {
 
 export default class AgentRunEventService {
   private static async ensureNotificationListener(): Promise<void> {
-    if (notificationConnection) {
+    const state = runEventNotifyState();
+    if (state.connection) {
       return;
     }
 
-    if (notificationListenPromise) {
-      return notificationListenPromise;
+    if (state.listenPromise) {
+      return state.listenPromise;
     }
 
-    notificationListenPromise = (async () => {
+    state.listenPromise = (async () => {
       const knex = AgentRunEvent.knex() as unknown as {
         client: {
           acquireConnection(): Promise<PgListenConnection>;
@@ -755,7 +226,7 @@ export default class AgentRunEventService {
         connection.on('notification', handleNotification);
         connection.on('error', handleNotificationError);
         await connection.query(`LISTEN ${RUN_EVENT_NOTIFY_CHANNEL}`);
-        notificationConnection = connection;
+        state.connection = connection;
       } catch (error) {
         await knex.client.releaseConnection(connection);
         throw error;
@@ -767,18 +238,19 @@ export default class AgentRunEventService {
         throw error;
       })
       .finally(() => {
-        notificationListenPromise = null;
+        state.listenPromise = null;
       });
 
-    return notificationListenPromise;
+    return state.listenPromise;
   }
 
   static async waitForRunEventNotification(
     runUuid: string,
     afterSequence: number,
-    timeoutMs: number
+    timeoutMs: number,
+    signal?: AbortSignal
   ): Promise<boolean> {
-    if (timeoutMs <= 0) {
+    if (timeoutMs <= 0 || signal?.aborted) {
       return false;
     }
 
@@ -791,7 +263,7 @@ export default class AgentRunEventService {
 
     return new Promise((resolve) => {
       let timeout: NodeJS.Timeout | null = null;
-      const subscribers = notificationSubscribers.get(runUuid) || new Set<RunEventNotificationSubscriber>();
+      const subscribers = runEventNotifyState().subscribers.get(runUuid) || new Set<RunEventNotificationSubscriber>();
 
       const cleanup = (notified: boolean) => {
         if (timeout) {
@@ -799,14 +271,20 @@ export default class AgentRunEventService {
           timeout = null;
         }
 
+        if (signal) {
+          signal.removeEventListener('abort', onAbort);
+        }
+
         subscribers.delete(subscriber);
         if (subscribers.size === 0) {
-          notificationSubscribers.delete(runUuid);
+          runEventNotifyState().subscribers.delete(runUuid);
         }
 
         resolve(notified);
       };
 
+      const onAbort = () => cleanup(false);
+
       const subscriber: RunEventNotificationSubscriber = (notification) => {
         if (notification.latestSequence > afterSequence) {
           cleanup(true);
@@ -814,8 +292,12 @@ export default class AgentRunEventService {
       };
 
       subscribers.add(subscriber);
-      notificationSubscribers.set(runUuid, subscribers);
+      runEventNotifyState().subscribers.set(runUuid, subscribers);
       timeout = setTimeout(() => cleanup(false), timeoutMs);
+
+      if (signal) {
+        signal.addEventListener('abort', onAbort, { once: true });
+      }
     });
   }
 
@@ -887,15 +369,34 @@ export default class AgentRunEventService {
     const pageLimit = normalizeRunEventPageLimit(options.pageLimit ?? RUN_EVENT_STREAM_PAGE_LIMIT);
     const pollIntervalMs = options.pollIntervalMs ?? RUN_EVENT_STREAM_POLL_INTERVAL_MS;
 
+    // `stopped` exits the loop on disconnect; the controller interrupts the notification wait.
+    let stopped = false;
+    const abortController = new AbortController();
+
     return new ReadableStream<Uint8Array>({
       start: async (controller) => {
         let cursor = normalizeRunEventAfterSequence(afterSequence);
         let sawTerminalEvent = false;
 
+        // enqueue() throws after the consumer cancels; guard every write so a disconnect tears down cleanly.
+        const safeEnqueue = (chunk: Uint8Array): boolean => {
+          if (stopped) {
+            return false;
+          }
+          try {
+            controller.enqueue(chunk);
+            return true;
+          } catch {
+            stopped = true;
+            abortController.abort();
+            return false;
+          }
+        };
+
         const drainAvailableEvents = async (): Promise<boolean> => {
           let hasMoreEvents = true;
 
-          while (hasMoreEvents) {
+          while (hasMoreEvents && !stopped) {
             const page = await this.listRunEventsPage(runUuid, {
               afterSequence: cursor,
               limit: pageLimit,
@@ -908,57 +409,111 @@ export default class AgentRunEventService {
               if (RUN_EVENT_TERMINAL_EVENT_TYPES.has(event.eventType)) {
                 sawTerminalEvent = true;
               }
-              controller.enqueue(encodeCanonicalSseEvent(this.serializeRunEvent(event)));
+              if (!safeEnqueue(encodeCanonicalSseEvent(this.serializeRunEvent(event)))) {
+                return false;
+              }
             }
 
             cursor = page.nextSequence;
             hasMoreEvents = page.hasMore;
           }
 
-          return true;
+          return !stopped;
         };
 
-        let streamOpen = true;
-        while (streamOpen) {
-          if (!(await drainAvailableEvents())) {
-            controller.close();
-            return;
-          }
-
-          const currentRun = await AgentRun.query().findOne({ uuid: runUuid });
-          if (!currentRun) {
-            controller.close();
-            return;
-          }
-
-          if (!isRunEventStreamOpen(currentRun)) {
-            if (sawTerminalEvent) {
-              streamOpen = false;
-              controller.close();
-              return;
+        try {
+          let streamOpen = true;
+          while (streamOpen && !stopped) {
+            if (!(await drainAvailableEvents())) {
+              break;
             }
 
-            if (!(await drainAvailableEvents())) {
-              controller.close();
-              return;
+            const currentRun = await AgentRun.query().findOne({ uuid: runUuid });
+            if (!currentRun) {
+              break;
             }
 
-            if (sawTerminalEvent) {
-              streamOpen = false;
-              controller.close();
-              return;
+            if (!isRunEventStreamOpen(currentRun)) {
+              // Terminal status: drain anything still pending, then close.
+              if (!sawTerminalEvent) {
+                if (!(await drainAvailableEvents())) {
+                  break;
+                }
+              }
+
+              // Self-heal: terminal status without a terminal event would poll forever; repair it.
+              if (!sawTerminalEvent) {
+                await this.ensureTerminalEventForTerminalRun(runUuid);
+                await drainAvailableEvents();
+              }
+
+              break;
             }
 
-            if (!sawTerminalEvent) {
-              await this.waitForRunEventNotification(runUuid, cursor, pollIntervalMs);
-              continue;
+            // Idle keep-alive: with no new events the connection would be idle-killed (~30-60s).
+            if (!safeEnqueue(SSE_KEEPALIVE_FRAME)) {
+              break;
             }
-          }
 
-          await this.waitForRunEventNotification(runUuid, cursor, pollIntervalMs);
+            await this.waitForRunEventNotification(runUuid, cursor, pollIntervalMs, abortController.signal);
+          }
+        } finally {
+          stopped = true;
+          if (!abortController.signal.aborted) {
+            abortController.abort();
+          }
+          try {
+            controller.close();
+          } catch {
+            // already closed/errored by the consumer
+          }
         }
       },
+      cancel: () => {
+        stopped = true;
+        abortController.abort();
+      },
+    });
+  }
+
+  /** Idempotently append the terminal run.* event for a terminal-status run missing it, so stranded streams recover. Returns true if appended. */
+  private static async ensureTerminalEventForTerminalRun(runUuid: string): Promise<boolean> {
+    let appendedSequence: number | null = null;
+    await AgentRun.transaction(async (trx) => {
+      const run = await AgentRun.query(trx).findOne({ uuid: runUuid }).forUpdate();
+      if (!run || isRunEventStreamOpen(run)) {
+        return;
+      }
+
+      const terminalEventType = `run.${run.status}`;
+      if (!RUN_EVENT_TERMINAL_EVENT_TYPES.has(terminalEventType)) {
+        return;
+      }
+
+      const existing = await AgentRunEvent.query(trx).where({ runId: run.id, eventType: terminalEventType }).first();
+      if (existing) {
+        return;
+      }
+
+      const runWithError = run as AgentRun & { error?: unknown; usageSummary?: unknown };
+      appendedSequence = await this.appendStatusEventForRunInTransaction(
+        run,
+        terminalEventType,
+        {
+          status: run.status,
+          error: runWithError.error || null,
+          usageSummary: runWithError.usageSummary || {},
+          repaired: true,
+        },
+        trx
+      );
     });
+
+    if (appendedSequence) {
+      await this.notifyRunEventsInserted(runUuid, appendedSequence);
+      return true;
+    }
+    return false;
   }
 
   private static requireExecutionOwner(
diff --git a/src/server/services/agent/RunExecutor.ts b/src/server/services/agent/RunExecutor.ts
index b433742c..6605b4ed 100644
--- a/src/server/services/agent/RunExecutor.ts
+++ b/src/server/services/agent/RunExecutor.ts
@@ -46,6 +46,7 @@ import { AgentRunOwnershipLostError } from './AgentRunOwnershipLostError';
 import { isReadOnlyDebugIntent, resolveDebugIntent, resolveDebugToolLoopControls } from './debugToolLoopControls';
 import { buildDebugRepairObservationText } from './debugRepairObservation';
 import { assistantRunHasText, sanitizeDebugRepairAssistantMessages } from './debugResponseSanitizer';
+import { resolveThinkingProviderOptions } from './thinkingProviderOptions';
 
 const DEBUG_READ_ONLY_SYNTHESIS_SYSTEM_PROMPT = [
   'You are completing a read-only Debug diagnosis after the evidence-gathering tool loop reached its tool-step budget.',
@@ -57,6 +58,16 @@ const DEBUG_READ_ONLY_SYNTHESIS_SYSTEM_PROMPT = [
 const DEBUG_READ_ONLY_SYNTHESIS_USER_PROMPT =
   'Write the final diagnostic answer now. Do not continue investigating or call tools.';
 
+const DEBUG_REPAIR_SYNTHESIS_SYSTEM_PROMPT = [
+  'You are closing out a Debug repair run after the tool loop reached its step budget without a confirmed fix.',
+  'Summarize what (if anything) was changed, the current environment state, and the single next step.',
+  'Do NOT call tools and do NOT claim an unverified fix.',
+  'Use only the evidence already present in the transcript.',
+].join(' ');
+
+const DEBUG_REPAIR_SYNTHESIS_USER_PROMPT =
+  'Write the final repair summary now: what was changed, what is still blocking, and the single next step. Do not call tools and do not claim an unverified fix.';
+
 function buildSystemPrompt(parts: Array<string | undefined>): string | undefined {
   const normalized = parts.map((part) => part?.trim()).filter(Boolean) as string[];
   if (normalized.length === 0) {
@@ -306,6 +317,8 @@ export default class AgentRunExecutor {
       selection,
       userIdentity,
     });
+    // Reasoning is for the streaming ToolLoopAgent path only; synthesis fallbacks discard it.
+    const thinkingProviderOptions = resolveThinkingProviderOptions(selection.provider, selection.modelId);
     const observabilityTracker = new AgentRunObservabilityTracker(selection);
     const touchSessionActivity = async () => {
       try {
@@ -328,7 +341,7 @@ export default class AgentRunExecutor {
       repoFullName,
       runControlPlaneConfig.appendSystemPrompt
     );
-    let run: Awaited<ReturnType<typeof AgentRunService.createRun>> | null = null;
+    let run: AgentRun | null = null;
     let heartbeatTimer: NodeJS.Timeout | null = null;
 
     const requireRun = () => {
@@ -523,9 +536,14 @@ export default class AgentRunExecutor {
         maxIterations: runControlPlaneConfig.maxIterations,
       });
       const resolvedInstructionTexts = readResolvedInstructionTexts(executionRunPlan);
-      const synthesizeReadOnlyDebugAnswer = async (messages: AgentUIMessage[]): Promise<string | null> => {
-        const debugIntent = resolveDebugIntent(executionRunPlan);
-        if (!debugIntent || !isReadOnlyDebugIntent(debugIntent)) {
+      // Tools-off synthesis of the final answer after the loop hits its step budget.
+      const synthesizeDebugFinalAnswer = async (
+        messages: AgentUIMessage[],
+        synthesisSystemPrompt: string,
+        synthesisUserPrompt: string,
+        abortSignal?: AbortSignal
+      ): Promise<string | null> => {
+        if (abortSignal?.aborted) {
           return null;
         }
 
@@ -544,10 +562,11 @@ export default class AgentRunExecutor {
               ...resolvedInstructionTexts,
               executionRunPlan?.prompt.instructionAddendum || undefined,
               sessionPrompt,
-              DEBUG_READ_ONLY_SYNTHESIS_SYSTEM_PROMPT,
+              synthesisSystemPrompt,
             ]),
-            messages: [...modelMessages, { role: 'user', content: DEBUG_READ_ONLY_SYNTHESIS_USER_PROMPT }],
+            messages: [...modelMessages, { role: 'user', content: synthesisUserPrompt }],
             toolChoice: 'none',
+            abortSignal,
           });
           observabilityTracker.addGeneration({
             usage: result.totalUsage,
@@ -568,8 +587,41 @@ export default class AgentRunExecutor {
           return null;
         }
       };
+      const synthesizeReadOnlyDebugAnswer = async (
+        messages: AgentUIMessage[],
+        abortSignal?: AbortSignal
+      ): Promise<string | null> => {
+        const debugIntent = resolveDebugIntent(executionRunPlan);
+        if (!debugIntent || !isReadOnlyDebugIntent(debugIntent)) {
+          return null;
+        }
+
+        return synthesizeDebugFinalAnswer(
+          messages,
+          DEBUG_READ_ONLY_SYNTHESIS_SYSTEM_PROMPT,
+          DEBUG_READ_ONLY_SYNTHESIS_USER_PROMPT,
+          abortSignal
+        );
+      };
+      const synthesizeRepairSummaryAnswer = async (
+        messages: AgentUIMessage[],
+        abortSignal?: AbortSignal
+      ): Promise<string | null> => {
+        const debugIntent = resolveDebugIntent(executionRunPlan);
+        if (debugIntent !== 'repair') {
+          return null;
+        }
+
+        return synthesizeDebugFinalAnswer(
+          messages,
+          DEBUG_REPAIR_SYNTHESIS_SYSTEM_PROMPT,
+          DEBUG_REPAIR_SYNTHESIS_USER_PROMPT,
+          abortSignal
+        );
+      };
       const agent = new ToolLoopAgent({
         model,
+        providerOptions: thinkingProviderOptions,
         instructions: buildSystemPrompt([
           runControlPlaneConfig.systemPrompt,
           ...resolvedInstructionTexts,
@@ -653,13 +705,15 @@ export default class AgentRunExecutor {
         onStreamFinish: async ({
           messages: updatedMessages,
           finishReason,
-          isAborted: _isAborted,
+          isAborted,
         }: {
           messages: AgentUIMessage[];
           finishReason?: string;
           isAborted: boolean;
         }) => {
           let observabilitySummary = observabilityTracker.getSummary();
+          // No graceful synthesis if the run was cancelled or ownership was lost.
+          const synthesisAllowed = !isAborted && !controller.signal.aborted;
           try {
             if (!activeExecutionOwner) {
               throw new Error('Agent run execution owner is required.');
@@ -667,8 +721,8 @@ export default class AgentRunExecutor {
 
             let effectiveMessages = updatedMessages;
             let effectiveFinishReason = finishReason;
-            if (finishReason === 'tool-calls') {
-              const synthesizedAnswer = await synthesizeReadOnlyDebugAnswer(updatedMessages);
+            if (finishReason === 'tool-calls' && synthesisAllowed) {
+              const synthesizedAnswer = await synthesizeReadOnlyDebugAnswer(updatedMessages, controller.signal);
               if (synthesizedAnswer) {
                 effectiveMessages = appendAssistantTextForRun(updatedMessages, activeRun.uuid, synthesizedAnswer);
                 effectiveFinishReason = 'stop';
@@ -703,19 +757,32 @@ export default class AgentRunExecutor {
             if (hasDebugRepairObservation && effectiveFinishReason === 'tool-calls') {
               effectiveFinishReason = 'stop';
             }
+            // Repair run hit its budget with no commit observation: synthesize a summary instead of failing blank.
+            if (!hasDebugRepairObservation && effectiveFinishReason === 'tool-calls' && synthesisAllowed) {
+              const repairSummary = await synthesizeRepairSummaryAnswer(effectiveMessages, controller.signal);
+              if (repairSummary) {
+                effectiveMessages = appendAssistantTextForRun(effectiveMessages, activeRun.uuid, repairSummary);
+                effectiveFinishReason = 'stop';
+              }
+            }
 
             observabilitySummary = observabilityTracker.getSummary();
+            const completedAt = new Date().toISOString();
             const messagesWithApprovalStages = applyApprovalResponsesToFileChangeParts(effectiveMessages);
+            // Stamp run start/end onto the assistant message so the read API exposes a thinking duration.
             const messagesWithObservability = applyFinalObservabilityToMessages(
               messagesWithApprovalStages,
               activeRun.uuid,
-              buildMessageObservabilityMetadataPatch(observabilitySummary)
+              {
+                ...buildMessageObservabilityMetadataPatch(observabilitySummary),
+                createdAt: activeRun.startedAt || completedAt,
+                completedAt,
+              }
             );
             const terminalFailure = classifyTerminalRunFailure({
               finishReason: effectiveFinishReason,
               maxIterations: loopControls.effectiveMaxIterations,
             });
-            const completedAt = new Date().toISOString();
 
             const finalizedRun = await AgentRunService.finalizeRunForExecutionOwner(
               activeRun.uuid,
diff --git a/src/server/services/agent/RunPlanResolver.ts b/src/server/services/agent/RunPlanResolver.ts
index 8b06dcdc..641304fb 100644
--- a/src/server/services/agent/RunPlanResolver.ts
+++ b/src/server/services/agent/RunPlanResolver.ts
@@ -20,6 +20,7 @@ import type AgentSource from 'server/models/AgentSource';
 import type AgentThread from 'server/models/AgentThread';
 import type { RequestUserIdentity } from 'server/lib/get-user';
 import { getLogger } from 'server/lib/logger';
+import { AppError, ConflictError } from 'server/lib/appError';
 import { AgentWorkspaceStatus } from 'shared/constants';
 import AgentCapabilityService from './CapabilityService';
 import AgentPolicyService from './PolicyService';
@@ -53,33 +54,48 @@ type FindPriorCompletedDebugIntentRun = (input: {
   intents: AgentDebugRunIntent[];
 }) => Promise<boolean>;
 
-export class AgentRunPlanCapabilityUnavailableError extends Error {
-  constructor(public readonly capabilityId: string, public readonly reason: string | undefined) {
-    super(`Agent capability "${capabilityId}" is unavailable${reason ? `: ${reason}` : ''}.`);
+export class AgentRunPlanCapabilityUnavailableError extends ConflictError {
+  readonly capabilityId: string;
+  readonly reason: string | undefined;
+  constructor(capabilityId: string, reason: string | undefined) {
+    super(
+      `Agent capability "${capabilityId}" is unavailable${reason ? `: ${reason}` : ''}.`,
+      'capability_unavailable',
+      {
+        capabilityId,
+        ...(reason ? { reason } : {}),
+      }
+    );
     this.name = 'AgentRunPlanCapabilityUnavailableError';
+    this.capabilityId = capabilityId;
+    this.reason = reason;
   }
 }
 
-export class AgentRunPlanAgentUnavailableError extends Error {
-  constructor(
-    public readonly agentId: string,
-    public readonly reason: string,
-    public readonly details?: Record<string, unknown>
-  ) {
-    super(`Agent "${agentId}" is unavailable: ${reason}.`);
+export class AgentRunPlanAgentUnavailableError extends ConflictError {
+  readonly agentId: string;
+  readonly reason: string;
+  constructor(agentId: string, reason: string, extra?: Record<string, unknown>) {
+    super(`Agent "${agentId}" is unavailable: ${reason}.`, 'agent_unavailable', { agentId, reason, ...extra });
     this.name = 'AgentRunPlanAgentUnavailableError';
+    this.agentId = agentId;
+    this.reason = reason;
   }
 }
 
-export class AgentRunPlanInstructionTemplateError extends Error {
-  constructor(
-    public readonly code: string,
-    message: string,
-    public readonly statusCode?: number,
-    public readonly details?: Record<string, unknown>
-  ) {
-    super(`Agent instruction template configuration is invalid: ${message}`);
+export class AgentRunPlanInstructionTemplateError extends AppError {
+  readonly templateCode: string;
+  readonly statusCode?: number;
+  constructor(templateCode: string, message: string, statusCode?: number, details?: Record<string, unknown>) {
+    super({
+      httpStatus: 422,
+      code: 'instruction_template_invalid',
+      message: `Agent instruction template configuration is invalid: ${message}`,
+      details: { templateCode, ...(details || {}) },
+    });
     this.name = 'AgentRunPlanInstructionTemplateError';
+    this.templateCode = templateCode;
+    this.statusCode = statusCode;
   }
 }
 
@@ -158,7 +174,12 @@ async function resolveInstructionSnapshots(
     return resolved.map(toResolvedInstructionSnapshot);
   } catch (error) {
     if (error instanceof InstructionTemplateServiceError) {
-      throw new AgentRunPlanInstructionTemplateError(error.code, error.message, error.statusCode, error.details);
+      throw new AgentRunPlanInstructionTemplateError(
+        error.templateCode,
+        error.message,
+        error.statusCode,
+        error.details
+      );
     }
 
     throw error;
@@ -420,7 +441,7 @@ async function resolveSelectedDefinition({
       definition: await customAgentDefinitionService.getUserDefinition(selectedAgentDefinitionId, userId),
     };
   } catch (error) {
-    if (!(error instanceof CustomAgentDefinitionServiceError) || error.code !== 'not_found') {
+    if (!(error instanceof CustomAgentDefinitionServiceError) || error.reason !== 'not_found') {
       throw error;
     }
 
diff --git a/src/server/services/agent/RunResumeEligibilityService.ts b/src/server/services/agent/RunResumeEligibilityService.ts
index d5d80873..df71573c 100644
--- a/src/server/services/agent/RunResumeEligibilityService.ts
+++ b/src/server/services/agent/RunResumeEligibilityService.ts
@@ -16,6 +16,10 @@
 
 import AgentPendingAction from 'server/models/AgentPendingAction';
 import type AgentRun from 'server/models/AgentRun';
+import {
+  DEFAULT_AGENT_SESSION_RUN_EXECUTION_LEASE_MS,
+  resolveAgentSessionDurabilityConfig,
+} from 'server/lib/agentSession/runtimeConfig';
 import { isAgentRunPlanSnapshotV1, type AgentRunPlanResolvedCapabilityAccess } from './runPlanTypes';
 import type { AgentCapabilityKey, AgentRunStatus } from './types';
 
@@ -56,13 +60,18 @@ export interface AgentRunPendingActionSummary {
 }
 
 export interface EvaluateRunResumeEligibilityInput {
-  run: Pick<AgentRun, 'status' | 'executionOwner' | 'leaseExpiresAt' | 'runPlanSnapshot'> & {
+  run: Pick<
+    AgentRun,
+    'status' | 'executionOwner' | 'leaseExpiresAt' | 'heartbeatAt' | 'startedAt' | 'runPlanSnapshot'
+  > & {
     id?: number;
   };
   pendingActions?: AgentRunPendingActionSummary | null;
   now?: Date;
   eventHistoryExhausted?: boolean;
   savedStateInvalid?: boolean;
+  // Heartbeat-staleness window: a starting/running run older than this is orphaned even with an unexpired lease. Defaults to the derived 3x-heartbeat-interval.
+  heartbeatStaleMs?: number;
 }
 
 const TERMINAL_STATUSES = new Set<AgentRunStatus>(['completed', 'failed', 'cancelled']);
@@ -91,6 +100,24 @@ function isLeaseExpired(leaseExpiresAt: string | null | undefined, now: Date): b
   return Boolean(leaseExpiresAt) && new Date(leaseExpiresAt as string).getTime() <= now.getTime();
 }
 
+function resolveHeartbeatStaleMs(runExecutionLeaseMs: number): number {
+  // 3x the heartbeat interval, never exceeding the lease.
+  const intervalMs = Math.min(Math.max(Math.floor(runExecutionLeaseMs / 3), 10_000), 60_000);
+  return Math.min(runExecutionLeaseMs, intervalMs * 3);
+}
+
+function isHeartbeatStale(
+  run: Pick<AgentRun, 'heartbeatAt' | 'startedAt'>,
+  now: Date,
+  heartbeatStaleMs: number
+): boolean {
+  const reference = run.heartbeatAt || run.startedAt; // fall back to startedAt if never heartbeated
+  if (!reference) {
+    return false;
+  }
+  return new Date(reference).getTime() <= now.getTime() - heartbeatStaleMs;
+}
+
 function unsafeCapability(access: AgentRunPlanResolvedCapabilityAccess): {
   reason: Extract<AgentRunResumeReason, 'unknown_capability' | 'write_capability'>;
   capabilityId: string;
@@ -190,7 +217,10 @@ export default class AgentRunResumeEligibilityService {
       return decision(input, 'manual_recovery_required', 'ambiguous_ownership');
     }
 
-    if (!isLeaseExpired(input.run.leaseExpiresAt, now)) {
+    // A stale heartbeat means the run is orphaned (worker died/OOM) even with an unexpired lease, so it must NOT count as lease_active.
+    const heartbeatStaleMs =
+      input.heartbeatStaleMs ?? resolveHeartbeatStaleMs(DEFAULT_AGENT_SESSION_RUN_EXECUTION_LEASE_MS);
+    if (!isLeaseExpired(input.run.leaseExpiresAt, now) && !isHeartbeatStale(input.run, now, heartbeatStaleMs)) {
       return decision(input, 'replay_only', 'lease_active');
     }
 
@@ -221,11 +251,16 @@ export default class AgentRunResumeEligibilityService {
     run: AgentRun & { id?: number },
     options: Omit<EvaluateRunResumeEligibilityInput, 'run' | 'pendingActions'> = {}
   ): Promise<AgentRunResumeEligibility> {
+    const heartbeatStaleMs =
+      options.heartbeatStaleMs ??
+      resolveHeartbeatStaleMs((await resolveAgentSessionDurabilityConfig()).runExecutionLeaseMs);
+
     if (!Number.isInteger(run.id)) {
       return this.evaluate({
         run,
         pendingActions: null,
         ...options,
+        heartbeatStaleMs,
       });
     }
 
@@ -251,6 +286,7 @@ export default class AgentRunResumeEligibilityService {
       run,
       pendingActions,
       ...options,
+      heartbeatStaleMs,
     });
   }
 }
diff --git a/src/server/services/agent/RunService.ts b/src/server/services/agent/RunService.ts
index a30c0ea2..da98798c 100644
--- a/src/server/services/agent/RunService.ts
+++ b/src/server/services/agent/RunService.ts
@@ -16,6 +16,7 @@
 
 import { raw, type PartialModelObject, type Transaction } from 'objection';
 import 'server/lib/dependencies';
+import { getLogger } from 'server/lib/logger';
 import AgentRun from 'server/models/AgentRun';
 import AgentThread from 'server/models/AgentThread';
 import AgentSession from 'server/models/AgentSession';
@@ -25,32 +26,74 @@ import AgentRunEventService from './RunEventService';
 import { isAgentRunPlanSnapshotV1, type AgentDebugRunIntent, type AgentRunPlanSnapshotV1 } from './runPlanTypes';
 import { serializeRunPlanSummary } from './runPlanSummary';
 import { AgentRunOwnershipLostError } from './AgentRunOwnershipLostError';
+import { ConflictError, BadRequestError } from 'server/lib/appError';
+import { classifyThrownRunError } from './runErrorClassification';
 import type { AgentRunResumeEligibility } from './RunResumeEligibilityService';
-import {
-  DEFAULT_AGENT_SESSION_DISPATCH_RECOVERY_LIMIT,
-  DEFAULT_AGENT_SESSION_QUEUED_RUN_DISPATCH_STALE_MS,
-  DEFAULT_AGENT_SESSION_RUN_EXECUTION_LEASE_MS,
-  resolveAgentSessionDurabilityConfig,
-} from 'server/lib/agentSession/runtimeConfig';
+import { resolveAgentSessionDurabilityConfig } from 'server/lib/agentSession/runtimeConfig';
 
 const activeRunControllers = new Map<string, AbortController>();
 const RUN_NOT_FOUND_ERROR = 'Agent run not found';
 export const TERMINAL_RUN_STATUSES: AgentRunStatus[] = ['completed', 'failed', 'cancelled'];
-export const DEFAULT_RUN_EXECUTION_LEASE_MS = DEFAULT_AGENT_SESSION_RUN_EXECUTION_LEASE_MS;
-export const DEFAULT_RUN_DISPATCH_RECOVERY_LIMIT = DEFAULT_AGENT_SESSION_DISPATCH_RECOVERY_LIMIT;
-export const DEFAULT_QUEUED_RUN_DISPATCH_STALE_MS = DEFAULT_AGENT_SESSION_QUEUED_RUN_DISPATCH_STALE_MS;
 const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
 
-export class ActiveAgentRunError extends Error {
+// Best-effort fast cross-process abort; the ownership fence still stops a missed worker.
+const RUN_CANCEL_NOTIFY_CHANNEL = 'agent_run_cancel';
+
+type PgListenConnection = {
+  on(event: 'notification', listener: (notification: { channel?: string; payload?: string }) => void): void;
+  on(event: 'error', listener: (error: unknown) => void): void;
+  query(sql: string): Promise<unknown>;
+};
+
+let cancelNotificationConnection: PgListenConnection | null = null;
+let cancelNotificationListenPromise: Promise<void> | null = null;
+
+function clearCancelNotificationConnection(): void {
+  cancelNotificationConnection = null;
+  cancelNotificationListenPromise = null;
+}
+
+function parseCancelNotification(payload: string | undefined): string | null {
+  if (!payload) {
+    return null;
+  }
+
+  try {
+    const parsed = JSON.parse(payload) as Record<string, unknown>;
+    const runId = typeof parsed.runId === 'string' ? parsed.runId : null;
+    return runId && isUuid(runId) ? runId : null;
+  } catch (error) {
+    getLogger().warn({ error }, 'AgentExec: ignored invalid run-cancel notification');
+    return null;
+  }
+}
+
+function handleCancelNotification(notification: { channel?: string; payload?: string }): void {
+  if (notification.channel !== RUN_CANCEL_NOTIFY_CHANNEL) {
+    return;
+  }
+
+  const runUuid = parseCancelNotification(notification.payload);
+  if (runUuid) {
+    activeRunControllers.get(runUuid)?.abort();
+  }
+}
+
+function handleCancelNotificationError(error: unknown): void {
+  getLogger().warn({ error }, 'AgentExec: run-cancel notification listener failed');
+  clearCancelNotificationConnection();
+}
+
+export class ActiveAgentRunError extends ConflictError {
   constructor() {
-    super('Wait for the current agent run to finish before starting another run.');
+    super('Wait for the current agent run to finish before starting another run.', 'run_already_running');
     this.name = 'ActiveAgentRunError';
   }
 }
 
-export class InvalidAgentRunDefaultsError extends Error {
+export class InvalidAgentRunDefaultsError extends BadRequestError {
   constructor(message: string) {
-    super(message);
+    super(message, 'run_defaults_invalid');
     this.name = 'InvalidAgentRunDefaultsError';
   }
 }
@@ -73,6 +116,19 @@ function serializeRunError(error: unknown): Record<string, unknown> {
       };
     }
 
+    // Give provider/SDK/OAuth/ownership failures a stable code + recovery action so they aren't persisted as uncoded prose.
+    const classified = classifyThrownRunError(error);
+    if (classified) {
+      return {
+        name: classified.name || 'AgentRunTerminalFailure',
+        code: classified.code,
+        message: classified.message,
+        ...(classified.details ? { details: classified.details } : {}),
+        ...(classified.retryable !== undefined ? { retryable: classified.retryable } : {}),
+        ...(classified.nextAction ? { nextAction: classified.nextAction } : {}),
+      };
+    }
+
     const serialized: Record<string, unknown> = {
       message: error.message,
       stack: error.stack || null,
@@ -140,6 +196,24 @@ function isLeaseExpired(leaseExpiresAt: string | null | undefined, now: Date): b
   return new Date(leaseExpiresAt).getTime() <= now.getTime();
 }
 
+function resolveHeartbeatStaleMs(runExecutionLeaseMs: number): number {
+  // 3x the heartbeat interval, never exceeding the lease.
+  const intervalMs = Math.min(Math.max(Math.floor(runExecutionLeaseMs / 3), 10_000), 60_000);
+  return Math.min(runExecutionLeaseMs, intervalMs * 3);
+}
+
+function isHeartbeatStale(
+  run: Pick<AgentRun, 'heartbeatAt' | 'startedAt'>,
+  now: Date,
+  heartbeatStaleMs: number
+): boolean {
+  const reference = run.heartbeatAt || run.startedAt; // fall back to startedAt if never heartbeated
+  if (!reference) {
+    return false;
+  }
+  return new Date(reference).getTime() <= now.getTime() - heartbeatStaleMs;
+}
+
 function shouldReleaseExecution(status: AgentRunStatus): boolean {
   return (
     status === 'queued' ||
@@ -295,49 +369,69 @@ export default class AgentRunService {
     return run;
   }
 
-  static async createRun({
-    thread,
-    session,
-    provider,
-    model,
-    policy,
-    runPlanSnapshot,
-  }: {
-    thread: AgentThread;
-    session: AgentSession;
-    provider: string;
-    model: string;
-    policy: AgentApprovalPolicy;
-    runPlanSnapshot: AgentRunPlanSnapshotV1;
-  }): Promise<AgentRun> {
-    const run = await this.createQueuedRun({
-      thread,
-      session,
-      policy,
-      requestedHarness: session.defaultHarness,
-      requestedProvider: provider,
-      requestedModel: model,
-      resolvedHarness: session.defaultHarness || 'lifecycle_ai_sdk',
-      resolvedProvider: provider,
-      resolvedModel: model,
-      runPlanSnapshot,
-    });
-
-    return this.startRun(run.uuid, {
-      resolvedHarness: session.defaultHarness || 'lifecycle_ai_sdk',
-      provider,
-      model,
-    });
-  }
-
   static registerAbortController(runUuid: string, controller: AbortController): void {
     activeRunControllers.set(runUuid, controller);
+    // Lazily start the cross-process cancel listener once this worker owns a controller.
+    void this.ensureCancelNotificationListener().catch(() => {});
   }
 
   static clearAbortController(runUuid: string): void {
     activeRunControllers.delete(runUuid);
   }
 
+  // Single shared LISTEN connection per process; a connection drop clears the cache so the next caller re-listens.
+  private static async ensureCancelNotificationListener(): Promise<void> {
+    if (cancelNotificationConnection) {
+      return;
+    }
+
+    if (cancelNotificationListenPromise) {
+      return cancelNotificationListenPromise;
+    }
+
+    cancelNotificationListenPromise = (async () => {
+      const knex = AgentRun.knex() as unknown as {
+        client: {
+          acquireConnection(): Promise<PgListenConnection>;
+          releaseConnection(connection: PgListenConnection): Promise<void>;
+        };
+      };
+      const connection = await knex.client.acquireConnection();
+
+      try {
+        connection.on('notification', handleCancelNotification);
+        connection.on('error', handleCancelNotificationError);
+        await connection.query(`LISTEN ${RUN_CANCEL_NOTIFY_CHANNEL}`);
+        cancelNotificationConnection = connection;
+      } catch (error) {
+        await knex.client.releaseConnection(connection);
+        throw error;
+      }
+    })()
+      .catch((error) => {
+        clearCancelNotificationConnection();
+        getLogger().warn({ error }, 'AgentExec: run-cancel notification listener unavailable');
+        throw error;
+      })
+      .finally(() => {
+        cancelNotificationListenPromise = null;
+      });
+
+    return cancelNotificationListenPromise;
+  }
+
+  // Best-effort broadcast so workers on other replicas abort their local controller.
+  private static async notifyRunCancelled(runUuid: string): Promise<void> {
+    try {
+      await AgentRun.knex().raw('select pg_notify(?, ?)', [
+        RUN_CANCEL_NOTIFY_CHANNEL,
+        JSON.stringify({ runId: runUuid }),
+      ]);
+    } catch (error) {
+      getLogger().warn({ error, runUuid }, `AgentExec: run-cancel notify failed runId=${runUuid}`);
+    }
+  }
+
   static async getRunByUuid(runUuid: string): Promise<AgentRun | undefined> {
     if (!isUuid(runUuid)) {
       return undefined;
@@ -382,12 +476,14 @@ export default class AgentRunService {
     now?: Date;
     queuedStaleMs?: number;
   } = {}): Promise<AgentRun[]> {
-    const durability =
-      limit === undefined || queuedStaleMs === undefined ? await resolveAgentSessionDurabilityConfig() : null;
-    const effectiveLimit = limit ?? durability!.dispatchRecoveryLimit;
-    const effectiveQueuedStaleMs = queuedStaleMs ?? durability!.queuedRunDispatchStaleMs;
+    // Always resolve durability: the lease is required to derive the heartbeat-staleness cutoff.
+    const durability = await resolveAgentSessionDurabilityConfig();
+    const effectiveLimit = limit ?? durability.dispatchRecoveryLimit;
+    const effectiveQueuedStaleMs = queuedStaleMs ?? durability.queuedRunDispatchStaleMs;
+    const heartbeatStaleMs = resolveHeartbeatStaleMs(durability.runExecutionLeaseMs);
     const nowIso = now.toISOString();
     const queuedCutoff = new Date(now.getTime() - effectiveQueuedStaleMs).toISOString();
+    const heartbeatCutoff = new Date(now.getTime() - heartbeatStaleMs).toISOString();
 
     return AgentRun.query()
       .where((builder) => {
@@ -399,6 +495,13 @@ export default class AgentRunService {
           .whereNotNull('leaseExpiresAt')
           .where('leaseExpiresAt', '<=', nowIso);
       })
+      .orWhere((builder) => {
+        builder.whereIn('status', ['starting', 'running']).where((heartbeatBuilder) => {
+          heartbeatBuilder.where('heartbeatAt', '<=', heartbeatCutoff).orWhere((fallbackBuilder) => {
+            fallbackBuilder.whereNull('heartbeatAt').where('startedAt', '<=', heartbeatCutoff);
+          });
+        });
+      })
       .orderBy('updatedAt', 'asc')
       .limit(Math.max(1, Math.floor(effectiveLimit)));
   }
@@ -416,6 +519,7 @@ export default class AgentRunService {
     const nowIso = now.toISOString();
     const effectiveLeaseMs = leaseMs ?? (await resolveAgentSessionDurabilityConfig()).runExecutionLeaseMs;
     const leaseExpiresAt = new Date(now.getTime() + effectiveLeaseMs).toISOString();
+    const heartbeatStaleMs = resolveHeartbeatStaleMs(effectiveLeaseMs);
 
     return AgentRun.transaction(async (trx) => {
       const run = await AgentRun.query(trx).findOne({ uuid: runUuid }).forUpdate();
@@ -424,7 +528,8 @@ export default class AgentRunService {
       }
 
       const staleClaim =
-        (run.status === 'starting' || run.status === 'running') && isLeaseExpired(run.leaseExpiresAt, now);
+        (run.status === 'starting' || run.status === 'running') &&
+        (isLeaseExpired(run.leaseExpiresAt, now) || isHeartbeatStale(run, now, heartbeatStaleMs));
       if (run.status !== 'queued' && !staleClaim) {
         return null;
       }
@@ -523,46 +628,48 @@ export default class AgentRunService {
     const run = await this.getOwnedRun(runUuid, userId);
     activeRunControllers.get(run.uuid)?.abort();
 
-    await this.patchStatus(run.uuid, 'cancelled', {
-      cancelledAt: new Date().toISOString(),
-      completedAt: new Date().toISOString(),
-    } as Partial<AgentRun>);
+    const now = new Date().toISOString();
+    // Status patch and run.cancelled event MUST be atomic: a terminal status with no terminal event hangs every SSE stream forever.
+    let latestSequence: number | null = null;
+    await AgentRun.transaction(async (trx) => {
+      const lockedRun = await AgentRun.query(trx).findById(run.id).forUpdate();
+      if (!lockedRun) {
+        throw new Error(RUN_NOT_FOUND_ERROR);
+      }
 
-    this.clearAbortController(run.uuid);
-    return this.getOwnedRun(run.uuid, userId);
-  }
+      if (TERMINAL_RUN_STATUSES.includes(lockedRun.status)) {
+        return;
+      }
 
-  static isTerminalStatus(status: AgentRunStatus): boolean {
-    return TERMINAL_RUN_STATUSES.includes(status);
-  }
+      const cancelledRun = await AgentRun.query(trx).patchAndFetchById(lockedRun.id, {
+        status: 'cancelled',
+        cancelledAt: now,
+        completedAt: now,
+        executionOwner: null,
+        leaseExpiresAt: null,
+        heartbeatAt: null,
+      } as Partial<AgentRun>);
 
-  // Compatibility path for owner-independent control actions. Executor writes should use owner-aware helpers.
-  static async patchRun(runUuid: string, patch: Partial<AgentRun>): Promise<AgentRun> {
-    const run = await AgentRun.query().findOne({ uuid: runUuid });
-    if (!run) {
-      throw new Error(RUN_NOT_FOUND_ERROR);
+      latestSequence = await AgentRunEventService.appendStatusEventForRunInTransaction(
+        cancelledRun,
+        statusEventType('cancelled'),
+        this.buildStatusEventPayload('cancelled', cancelledRun),
+        trx
+      );
+    });
+
+    if (latestSequence) {
+      await AgentRunEventService.notifyRunEventsInserted(run.uuid, latestSequence);
+      // Fast cross-process abort for a worker executing this run on another replica.
+      await this.notifyRunCancelled(run.uuid);
     }
 
-    return AgentRun.query().patchAndFetchById(run.id, patch);
+    this.clearAbortController(run.uuid);
+    return this.getOwnedRun(run.uuid, userId);
   }
 
-  static async patchStatus(runUuid: string, status: AgentRunStatus, patch?: Partial<AgentRun>): Promise<AgentRun> {
-    const releaseExecution = shouldReleaseExecution(status);
-    const updatedRun = await this.patchRun(runUuid, {
-      status,
-      ...patch,
-      ...(releaseExecution
-        ? {
-            executionOwner: null,
-            leaseExpiresAt: null,
-            heartbeatAt: null,
-          }
-        : {}),
-    } as Partial<AgentRun>);
-
-    await this.appendRunStatusEvent(runUuid, statusEventType(status), status, updatedRun);
-
-    return updatedRun;
+  static isTerminalStatus(status: AgentRunStatus): boolean {
+    return TERMINAL_RUN_STATUSES.includes(status);
   }
 
   static async assertRunExecutionOwner(runUuid: string, executionOwner: string): Promise<AgentRun> {
@@ -649,6 +756,8 @@ export default class AgentRunService {
     },
     eventContext: OwnerStatusEventContext = {}
   ): Promise<AgentRun> {
+    // This worker is starting generation; ensure it can hear a cross-process cancel.
+    void this.ensureCancelNotificationListener().catch(() => {});
     const now = new Date().toISOString();
     return this.patchStatusForExecutionOwner(
       runUuid,
@@ -836,15 +945,6 @@ export default class AgentRunService {
     return updatedRun;
   }
 
-  private static async appendRunStatusEvent(
-    runUuid: string,
-    eventType: string,
-    status: AgentRunStatus,
-    updatedRun: AgentRun
-  ): Promise<void> {
-    await AgentRunEventService.appendStatusEvent(runUuid, eventType, this.buildStatusEventPayload(status, updatedRun));
-  }
-
   private static buildStatusEventPayload(
     status: AgentRunStatus,
     updatedRun: AgentRun,
@@ -882,51 +982,6 @@ export default class AgentRunService {
     }
   }
 
-  static async startRun(
-    runUuid: string,
-    resolved: {
-      resolvedHarness: string;
-      provider: string;
-      model: string;
-      sandboxGeneration?: number | null;
-    }
-  ): Promise<AgentRun> {
-    const now = new Date().toISOString();
-    return this.patchStatus(runUuid, 'running', {
-      startedAt: now,
-      completedAt: null,
-      cancelledAt: null,
-      error: null,
-      resolvedHarness: resolved.resolvedHarness,
-      resolvedProvider: resolved.provider,
-      resolvedModel: resolved.model,
-      provider: resolved.provider,
-      model: resolved.model,
-      sandboxGeneration: resolved.sandboxGeneration ?? null,
-    } as Partial<AgentRun>);
-  }
-
-  static async markWaitingForApproval(runUuid: string): Promise<AgentRun> {
-    return this.patchStatus(runUuid, 'waiting_for_approval');
-  }
-
-  static async markCompleted(runUuid: string, usageSummary?: AgentRunUsageSummary): Promise<AgentRun> {
-    this.clearAbortController(runUuid);
-    return this.patchStatus(runUuid, 'completed', {
-      completedAt: new Date().toISOString(),
-      usageSummary: (usageSummary || {}) as Record<string, unknown>,
-    });
-  }
-
-  static async markFailed(runUuid: string, error: unknown, usageSummary?: AgentRunUsageSummary): Promise<AgentRun> {
-    this.clearAbortController(runUuid);
-    return this.patchStatus(runUuid, 'failed', {
-      completedAt: new Date().toISOString(),
-      usageSummary: (usageSummary || {}) as Record<string, unknown>,
-      error: serializeRunError(error),
-    });
-  }
-
   static async markQueuedRunDispatchFailed(runUuid: string, error: unknown): Promise<AgentRun> {
     let latestSequence: number | null = null;
     const failedRun = await AgentRun.transaction(async (trx) => {
@@ -975,6 +1030,8 @@ export default class AgentRunService {
     }
 
     const now = options.now || new Date();
+    const { runExecutionLeaseMs } = await resolveAgentSessionDurabilityConfig();
+    const heartbeatStaleMs = resolveHeartbeatStaleMs(runExecutionLeaseMs);
     let latestSequence: number | null = null;
     const pausedRun = await AgentRun.transaction(async (trx) => {
       const run = await AgentRun.query(trx).findOne({ uuid: runUuid }).forUpdate();
@@ -992,7 +1049,11 @@ export default class AgentRunService {
         }
       }
 
-      if (!options.allowActiveLease && !isLeaseExpired(run.leaseExpiresAt, now)) {
+      if (
+        !options.allowActiveLease &&
+        !isLeaseExpired(run.leaseExpiresAt, now) &&
+        !isHeartbeatStale(run, now, heartbeatStaleMs)
+      ) {
         return null;
       }
 
diff --git a/src/server/services/agent/ThreadRuntimeControlsService.ts b/src/server/services/agent/ThreadRuntimeControlsService.ts
index 99376c51..1e5c2247 100644
--- a/src/server/services/agent/ThreadRuntimeControlsService.ts
+++ b/src/server/services/agent/ThreadRuntimeControlsService.ts
@@ -16,6 +16,7 @@
 
 import { createHash } from 'crypto';
 import type { RequestUserIdentity } from 'server/lib/get-user';
+import { AppError } from 'server/lib/appError';
 import type AgentThread from 'server/models/AgentThread';
 import { McpConfigService } from 'server/services/agentRuntime/mcp/config';
 import type { AgentMcpConnection } from 'server/services/agentRuntime/mcp/types';
@@ -93,9 +94,18 @@ export type ResolvedRunAdmissionRuntimeChoices = {
 
 type RuntimeControlsErrorCode = 'invalid_input' | 'unknown_choice' | 'policy_denied' | 'active_run' | 'not_found';
 
-export class AgentThreadRuntimeControlsError extends Error {
-  constructor(public readonly code: RuntimeControlsErrorCode, message: string) {
-    super(message);
+// Each discriminant carries its own HTTP status so routes never re-map it.
+const RUNTIME_CONTROLS_HTTP_STATUS: Record<RuntimeControlsErrorCode, number> = {
+  invalid_input: 400,
+  unknown_choice: 400,
+  policy_denied: 403,
+  not_found: 404,
+  active_run: 409,
+};
+
+export class AgentThreadRuntimeControlsError extends AppError {
+  constructor(code: RuntimeControlsErrorCode, message: string) {
+    super({ httpStatus: RUNTIME_CONTROLS_HTTP_STATUS[code], code, message });
     this.name = 'AgentThreadRuntimeControlsError';
   }
 }
diff --git a/src/server/services/agent/ThreadService.ts b/src/server/services/agent/ThreadService.ts
index f7b1e8d2..5999c13d 100644
--- a/src/server/services/agent/ThreadService.ts
+++ b/src/server/services/agent/ThreadService.ts
@@ -15,11 +15,13 @@
  */
 
 import AgentSession from 'server/models/AgentSession';
+import { getOwnedSession } from 'server/services/agent/sessionOwnership';
 import AgentMessage from 'server/models/AgentMessage';
 import AgentPendingAction from 'server/models/AgentPendingAction';
 import AgentRun from 'server/models/AgentRun';
 import AgentThread from 'server/models/AgentThread';
 import type { Transaction } from 'objection';
+import { NotFoundError, ConflictError } from 'server/lib/appError';
 import { canSessionAcceptMessages, getSessionMessageBlockReason } from './sessionReadiness';
 import { TERMINAL_RUN_STATUSES } from './RunService';
 import WorkspaceRuntimeStateService from './WorkspaceRuntimeStateService';
@@ -90,17 +92,21 @@ export type AgentThreadCreateConflictCode =
   | 'active_run'
   | 'pending_approval';
 
-export class AgentThreadCreateNotFoundError extends Error {
-  constructor(public readonly code: AgentThreadCreateNotFoundCode, message: string) {
-    super(message);
+export class AgentThreadCreateNotFoundError extends NotFoundError {
+  readonly reason: AgentThreadCreateNotFoundCode;
+  constructor(reason: AgentThreadCreateNotFoundCode, message: string) {
+    super(message, 'thread_target_not_found', { reason });
     this.name = 'AgentThreadCreateNotFoundError';
+    this.reason = reason;
   }
 }
 
-export class AgentThreadCreateConflictError extends Error {
-  constructor(public readonly code: AgentThreadCreateConflictCode, message: string) {
-    super(message);
+export class AgentThreadCreateConflictError extends ConflictError {
+  readonly reason: AgentThreadCreateConflictCode;
+  constructor(reason: AgentThreadCreateConflictCode, message: string) {
+    super(message, reason, { reason });
     this.name = 'AgentThreadCreateConflictError';
+    this.reason = reason;
   }
 }
 
@@ -304,14 +310,7 @@ export default class AgentThreadService {
   static getRuntimeControlChoices = getRuntimeControlChoices;
   static buildRuntimeControlChoicesMetadataPatch = buildRuntimeControlChoicesMetadataPatch;
 
-  static async getOwnedSession(sessionUuid: string, userId: string): Promise<AgentSession> {
-    const session = await AgentSession.query().findOne({ uuid: sessionUuid, userId });
-    if (!session) {
-      throw new Error('Agent session not found');
-    }
-
-    return session;
-  }
+  static getOwnedSession = getOwnedSession;
 
   static async getOwnedThread(threadUuid: string, userId: string): Promise<AgentThread> {
     const thread = await AgentThread.query()
diff --git a/src/server/services/agent/WorkspaceRuntimeStateService.ts b/src/server/services/agent/WorkspaceRuntimeStateService.ts
index 6b79377d..bc7453e0 100644
--- a/src/server/services/agent/WorkspaceRuntimeStateService.ts
+++ b/src/server/services/agent/WorkspaceRuntimeStateService.ts
@@ -24,20 +24,19 @@ import type { WorkspaceRuntimePlanMetadata } from 'server/lib/agentSession/works
 import type AgentSandbox from 'server/models/AgentSandbox';
 import AgentRun from 'server/models/AgentRun';
 import AgentSession from 'server/models/AgentSession';
+import { ConflictError } from 'server/lib/appError';
 import AgentSandboxService, { type AgentSandboxRuntimeLifecycleMetadata } from './SandboxService';
 import { TERMINAL_RUN_STATUSES } from './RunService';
 
 export type WorkspaceRuntimeAction = 'provision' | 'resume' | 'suspend' | 'end' | 'cleanup' | 'retry';
 export type WorkspaceActionBlockedReason = 'active_run' | 'action_in_progress';
 
-export class WorkspaceActionBlockedError extends Error {
-  constructor(
-    public readonly reason: WorkspaceActionBlockedReason,
-    message: string,
-    public readonly details: Record<string, unknown> = {}
-  ) {
-    super(message);
+export class WorkspaceActionBlockedError extends ConflictError {
+  readonly reason: WorkspaceActionBlockedReason;
+  constructor(reason: WorkspaceActionBlockedReason, message: string, extra: Record<string, unknown> = {}) {
+    super(message, 'workspace_action_blocked', { reason, ...extra });
     this.name = 'WorkspaceActionBlockedError';
+    this.reason = reason;
   }
 }
 
diff --git a/src/server/services/agent/__tests__/CapabilityService.test.ts b/src/server/services/agent/__tests__/CapabilityService.test.ts
index baac3e9a..20b6a713 100644
--- a/src/server/services/agent/__tests__/CapabilityService.test.ts
+++ b/src/server/services/agent/__tests__/CapabilityService.test.ts
@@ -33,15 +33,19 @@ const mockEnsureChatSandbox = jest.fn();
 const mockDiagnosticToolExecute = jest.fn();
 const mockParseYamlConfigFromBranch = jest.fn();
 const mockGithubOctokitRequest = jest.fn();
+const mockBuildFindOne = jest.fn();
 const mockGithubClientInstances: Array<{
   setAllowedBranch: jest.Mock;
   setReferencedFiles: jest.Mock;
   setExcludedFilePatterns: jest.Mock;
   setAllowedWritePatterns: jest.Mock;
+  setAllowedRepos: jest.Mock;
   isFilePathAllowed: jest.Mock;
   validateBranch: jest.Mock;
   getOctokit: jest.Mock;
 }> = [];
+const mockK8sClientInstances: Array<{ setAllowedNamespace: jest.Mock }> = [];
+const mockDatabaseClientInstances: Array<{ setBuildScope: jest.Mock }> = [];
 
 let currentTransport: Record<string, unknown> | null = null;
 
@@ -53,6 +57,8 @@ function mockMakeDiagnosticToolClass(name: string, description = `${name} descri
       type: 'object',
       properties: {},
     },
+    // get_lifecycle_logs is scoped to the build's UUID at registration; expose the setter.
+    setAllowedBuildUuid: jest.fn(),
     execute: (args: Record<string, unknown>, signal?: AbortSignal) => mockDiagnosticToolExecute(name, args, signal),
   }));
 }
@@ -107,7 +113,14 @@ jest.mock('server/services/agentRuntime/mcp/client', () => ({
 }));
 
 jest.mock('server/services/agent/tools/shared/k8sClient', () => ({
-  K8sClient: jest.fn(),
+  K8sClient: jest.fn().mockImplementation(() => {
+    const client = {
+      setAllowedNamespace: jest.fn(),
+      resolveNamespace: jest.fn(),
+    };
+    mockK8sClientInstances.push(client);
+    return client;
+  }),
 }));
 jest.mock('server/services/agent/tools/shared/githubClient', () => ({
   GitHubClient: jest.fn().mockImplementation(() => {
@@ -116,6 +129,7 @@ jest.mock('server/services/agent/tools/shared/githubClient', () => ({
       setReferencedFiles: jest.fn(),
       setExcludedFilePatterns: jest.fn(),
       setAllowedWritePatterns: jest.fn(),
+      setAllowedRepos: jest.fn(),
       isFilePathAllowed: jest.fn(),
       validateBranch: jest.fn(),
       getOctokit: jest.fn().mockResolvedValue({ request: mockGithubOctokitRequest }),
@@ -131,7 +145,25 @@ jest.mock('server/lib/yamlConfigParser', () => ({
   })),
 }));
 jest.mock('server/services/agent/tools/shared/databaseClient', () => ({
-  DatabaseClient: jest.fn(),
+  DatabaseClient: jest.fn().mockImplementation(() => {
+    const client = {
+      setBuildScope: jest.fn(),
+      queryTable: jest.fn(),
+    };
+    mockDatabaseClientInstances.push(client);
+    return client;
+  }),
+}));
+
+jest.mock('server/models/Build', () => ({
+  __esModule: true,
+  default: {
+    query: jest.fn(() => ({
+      findOne: jest.fn(() => ({
+        withGraphFetched: (...args: unknown[]) => mockBuildFindOne(...args),
+      })),
+    })),
+  },
 }));
 jest.mock('server/services/agent/tools/codefresh/getCodefreshLogs', () => ({
   GetCodefreshLogsTool: mockMakeDiagnosticToolClass('get_codefresh_logs'),
@@ -298,6 +330,9 @@ describe('AgentCapabilityService.buildToolSet', () => {
   beforeEach(() => {
     jest.clearAllMocks();
     mockGithubClientInstances.length = 0;
+    mockK8sClientInstances.length = 0;
+    mockDatabaseClientInstances.length = 0;
+    mockBuildFindOne.mockResolvedValue(null);
     mockGithubOctokitRequest.mockResolvedValue({
       data: {
         sha: 'existing-file-sha',
@@ -1263,6 +1298,78 @@ describe('AgentCapabilityService.buildToolSet', () => {
     expect(tools.mcp__lifecycle__patch_k8s_resource).toEqual(expect.objectContaining({ needsApproval: true }));
   });
 
+  it('locks diagnostic tools to the build namespace, repos, and DB scope resolved from the Build', async () => {
+    mockResolveServers.mockResolvedValue([]);
+    mockBuildFindOne.mockResolvedValue({
+      id: 99,
+      uuid: 'sample-build-1',
+      namespace: 'env-sample-build-1',
+      environmentId: 5,
+      pullRequestId: 21,
+      pullRequest: {
+        id: 21,
+        fullName: 'example-org/example-repo',
+        repository: { id: 100, fullName: 'example-org/example-repo' },
+      },
+      deploys: [
+        { repository: { id: 100, fullName: 'example-org/example-repo' } },
+        { repository: { id: 200, fullName: 'example-org/secondary-repo' } },
+      ],
+    });
+
+    await buildToolSetForTest({
+      session: {
+        uuid: 'session-build-context',
+        sessionKind: 'chat',
+        buildUuid: 'sample-build-1',
+        workspaceStatus: 'none',
+        status: 'active',
+        podName: null,
+        namespace: null,
+        workspaceRepos: [
+          {
+            repo: 'example-org/example-repo',
+            repoUrl: 'https://github.com/example-org/example-repo.git',
+            branch: 'feature/sample',
+            revision: null,
+            mountPath: '/workspace',
+            primary: true,
+          },
+        ],
+      } as any,
+      repoFullName: 'example-org/example-repo',
+      userIdentity,
+      approvalPolicy: {} as any,
+      workspaceToolDiscoveryTimeoutMs: 4500,
+      workspaceToolExecutionTimeoutMs: 22000,
+    });
+
+    // k8s clients (read + fix factories) locked to the build's namespace.
+    expect(mockK8sClientInstances.length).toBeGreaterThan(0);
+    for (const client of mockK8sClientInstances) {
+      expect(client.setAllowedNamespace).toHaveBeenCalledWith('env-sample-build-1');
+    }
+
+    // github clients locked to the FULL set of repos the build spans (multi-repo).
+    for (const client of mockGithubClientInstances) {
+      expect(client.setAllowedRepos).toHaveBeenCalledWith(
+        expect.arrayContaining(['example-org/example-repo', 'example-org/secondary-repo'])
+      );
+    }
+
+    // database client scoped to this build's records.
+    expect(mockDatabaseClientInstances.length).toBeGreaterThan(0);
+    expect(mockDatabaseClientInstances[0].setBuildScope).toHaveBeenCalledWith(
+      expect.objectContaining({
+        buildId: 99,
+        buildUuid: 'sample-build-1',
+        pullRequestId: 21,
+        environmentId: 5,
+        repositoryIds: expect.arrayContaining([100, 200]),
+      })
+    );
+  });
+
   it('redacts MCP default args from tool audit hooks while preserving runtime execution args', async () => {
     mockResolveServers.mockResolvedValue([
       {
diff --git a/src/server/services/agent/__tests__/CustomAgentDefinitionService.test.ts b/src/server/services/agent/__tests__/CustomAgentDefinitionService.test.ts
index bf7dffb4..b2a4dfee 100644
--- a/src/server/services/agent/__tests__/CustomAgentDefinitionService.test.ts
+++ b/src/server/services/agent/__tests__/CustomAgentDefinitionService.test.ts
@@ -135,16 +135,16 @@ describe('CustomAgentDefinitionService', () => {
     mockFindOne.mockResolvedValue(null);
 
     await expect(service.getUserDefinition('custom.other-user', 'sample-user')).rejects.toMatchObject({
-      code: 'not_found',
+      reason: 'not_found',
     });
     await expect(service.getUserDefinition('custom.archived', 'sample-user')).rejects.toBeInstanceOf(
       CustomAgentDefinitionServiceError
     );
     await expect(service.getUserDefinition('system.freeform', 'sample-user')).rejects.toMatchObject({
-      code: 'not_found',
+      reason: 'not_found',
     });
     await expect(service.getUserDefinition('system.debug', 'sample-user')).rejects.toMatchObject({
-      code: 'not_found',
+      reason: 'not_found',
     });
 
     expect(mockFindOne).toHaveBeenCalledWith({
@@ -291,7 +291,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: 'unknown_capability',
+      reason: 'unknown_capability',
       message: 'Some selected capabilities are no longer available. Review the list and save again.',
     });
 
@@ -313,7 +313,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: reason,
+      reason: reason,
       message: 'Some selected capabilities are no longer available. Review the list and save again.',
     });
 
@@ -347,7 +347,7 @@ describe('CustomAgentDefinitionService', () => {
           resourceBehavior: 'chat_only',
         })
       ).rejects.toMatchObject({
-        code: reason,
+        reason: reason,
         message: 'Some selected capabilities are no longer available. Review the list and save again.',
       });
 
@@ -392,7 +392,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: 'source_incompatible',
+      reason: 'source_incompatible',
       message: 'Some selected capabilities are no longer available. Review the list and save again.',
     });
   });
@@ -414,7 +414,7 @@ describe('CustomAgentDefinitionService', () => {
           resourceBehavior: 'chat_only',
         })
       ).rejects.toMatchObject({
-        code: 'creation_unavailable',
+        reason: 'creation_unavailable',
         message: 'Custom agent creation is not available. Ask an admin for access.',
       });
 
@@ -503,7 +503,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: 'creation_unavailable',
+      reason: 'creation_unavailable',
     });
 
     expect(mockPatchAndFetchById).not.toHaveBeenCalled();
@@ -551,7 +551,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: 'creator_capability_reserved',
+      reason: 'creator_capability_reserved',
       message: 'Some selected capabilities are no longer available. Review the list and save again.',
     });
 
@@ -568,7 +568,7 @@ describe('CustomAgentDefinitionService', () => {
         resourceBehavior: 'chat_only',
       })
     ).rejects.toMatchObject({
-      code: 'model_unavailable',
+      reason: 'model_unavailable',
       message: 'Selected model is no longer available. Choose another model and save again.',
     });
     expect(mockListAvailableModelsForUser).toHaveBeenCalledWith({ userIdentity });
diff --git a/src/server/services/agent/__tests__/FirstPartyAgentDefinitions.integration.test.ts b/src/server/services/agent/__tests__/FirstPartyAgentDefinitions.integration.test.ts
index 495c2ea1..034dd0f6 100644
--- a/src/server/services/agent/__tests__/FirstPartyAgentDefinitions.integration.test.ts
+++ b/src/server/services/agent/__tests__/FirstPartyAgentDefinitions.integration.test.ts
@@ -409,9 +409,11 @@ describe('First-party agent definition integration regressions', () => {
         'diagnostics_kubernetes',
         'diagnostics_database',
         'github_write',
-        'external_mcp_write',
       ])
     );
+    // Debug repairs go through github_write, not external MCP writes (least privilege).
+    expect(debug.requiredCapabilityRefs).not.toContain('read_context');
+    expect(debug.requiredCapabilityRefs).not.toContain('external_mcp_write');
 
     const result = await resolveRunPlan({
       source: {
@@ -434,13 +436,7 @@ describe('First-party agent definition integration regressions', () => {
         approvalMode: 'require_approval',
       })
     );
-    expect(getCapabilityAccess(result, 'external_mcp_write')).toEqual(
-      expect.objectContaining({
-        allowed: true,
-        availability: 'admin_only',
-        approvalMode: 'require_approval',
-      })
-    );
+    expect(getCapabilityAccess(result, 'external_mcp_write')?.allowed).not.toBe(true);
   });
 
   it('fails Develop without prepared workspace/source resources and keeps Free-form minimal capabilities', async () => {
diff --git a/src/server/services/agent/__tests__/InstructionTemplateService.test.ts b/src/server/services/agent/__tests__/InstructionTemplateService.test.ts
index 308d6fc2..8e0f902c 100644
--- a/src/server/services/agent/__tests__/InstructionTemplateService.test.ts
+++ b/src/server/services/agent/__tests__/InstructionTemplateService.test.ts
@@ -167,20 +167,16 @@ describe('InstructionTemplateService', () => {
 
     expect([...SYSTEM_INSTRUCTION_TEMPLATE_REFS].sort()).toEqual(builtInRefs);
     expect(SYSTEM_INSTRUCTION_TEMPLATE_DEFINITIONS).toHaveLength(3);
-    expect(debugDefinition?.defaultVersion).toBe(3);
+    expect(debugDefinition?.defaultVersion).toBe(4);
     expect(developDefinition?.defaultVersion).toBe(1);
     expect(freeformDefinition?.defaultVersion).toBe(1);
 
-    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Lifecycle debugging profile:'));
-    expect(debugDefinition?.defaultContent).toEqual(
-      expect.stringContaining('Compare desired config state with actual runtime state')
-    );
-    expect(debugDefinition?.defaultContent).toEqual(
-      expect.stringContaining('Investigate build failures before deploy failures')
-    );
-    expect(debugDefinition?.defaultContent).toEqual(
-      expect.stringContaining('Cite specific evidence before diagnosing a root cause')
-    );
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('You are the Lifecycle Debug Agent'));
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('comparing desired vs actual'));
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Investigation order:'));
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Failure playbooks'));
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('previous:true'));
+    expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Cite the specific evidence'));
     expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Repair'));
     expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Investigate more'));
     expect(debugDefinition?.defaultContent).toEqual(expect.stringContaining('Open workspace'));
@@ -287,7 +283,7 @@ describe('InstructionTemplateService', () => {
     expect(template.override).toEqual(
       expect.objectContaining({
         content: 'Keep this sample admin override.',
-        baseDefaultVersion: 3,
+        baseDefaultVersion: 4,
       })
     );
     expect(template.effective).toEqual(
@@ -338,7 +334,7 @@ describe('InstructionTemplateService', () => {
     expect(template.default).toEqual(
       expect.objectContaining({
         content: debugV2Default,
-        version: 3,
+        version: 4,
         hash: computeInstructionTemplateContentHash(debugV2Default as string),
       })
     );
@@ -361,7 +357,7 @@ describe('InstructionTemplateService', () => {
     expect(reset.effective).toEqual(
       expect.objectContaining({
         source: 'default',
-        version: 3,
+        version: 4,
         content: debugV2Default,
         hash: computeInstructionTemplateContentHash(debugV2Default as string),
       })
@@ -397,19 +393,24 @@ describe('InstructionTemplateService', () => {
 
     await expect(InstructionTemplateService.resolveRefs([''])).rejects.toMatchObject({
       name: InstructionTemplateServiceError.name,
-      code: 'invalid_ref',
+      code: 'instruction_template_ref_invalid',
+      templateCode: 'invalid_ref',
+      httpStatus: 400,
       statusCode: 400,
     });
 
     await expect(InstructionTemplateService.getTemplate('system:missing')).rejects.toMatchObject({
       name: InstructionTemplateServiceError.name,
-      code: 'unknown_ref',
+      code: 'instruction_template_not_found',
+      templateCode: 'unknown_ref',
+      httpStatus: 404,
       statusCode: 404,
       details: { ref: 'system:missing' },
     });
 
     await expect(InstructionTemplateService.resolveRefs(['system:debug', 'system:missing'])).rejects.toMatchObject({
-      code: 'unknown_ref',
+      code: 'instruction_template_not_found',
+      templateCode: 'unknown_ref',
       details: { ref: 'system:missing' },
     });
   });
diff --git a/src/server/services/agent/__tests__/MessageStore.test.ts b/src/server/services/agent/__tests__/MessageStore.test.ts
index bde747cc..bd945151 100644
--- a/src/server/services/agent/__tests__/MessageStore.test.ts
+++ b/src/server/services/agent/__tests__/MessageStore.test.ts
@@ -80,6 +80,141 @@ describe('AgentMessageStore', () => {
       });
     });
 
+    it('prefers run.startedAt for assistant createdAt over a buggy ~completion stored metadata.createdAt', () => {
+      expect(
+        AgentMessageStore.serializeCanonicalMessage(
+          {
+            uuid: '44444444-4444-4444-8444-444444444444',
+            clientMessageId: null,
+            role: 'assistant',
+            parts: [{ type: 'text', text: 'Done' }],
+            // Buggy stored values were set near completion, yielding ~0/negative durations.
+            metadata: {
+              runId: 'run-1',
+              createdAt: '2026-04-25T00:00:08.900Z',
+              completedAt: '2026-04-25T00:00:09.000Z',
+            },
+            runStartedAt: '2026-04-25T00:00:01.000Z',
+            runCompletedAt: '2026-04-25T00:00:09.000Z',
+            createdAt: '2026-04-25T00:00:08.950Z',
+          } as any,
+          'thread-uuid'
+        )
+      ).toEqual(
+        expect.objectContaining({
+          role: 'assistant',
+          metadata: {
+            runId: 'run-1',
+            createdAt: '2026-04-25T00:00:01.000Z',
+            completedAt: '2026-04-25T00:00:09.000Z',
+          },
+        })
+      );
+    });
+
+    it('handles run timestamps returned as Date objects from the join (not ISO strings)', () => {
+      expect(
+        AgentMessageStore.serializeCanonicalMessage(
+          {
+            uuid: '44444444-4444-4444-8444-444444444444',
+            clientMessageId: null,
+            role: 'assistant',
+            parts: [{ type: 'text', text: 'Done' }],
+            metadata: {
+              runId: 'run-1',
+              createdAt: '2026-04-25T00:00:08.900Z',
+              completedAt: '2026-04-25T00:00:09.000Z',
+            },
+            runStartedAt: new Date('2026-04-25T00:00:01.000Z'),
+            runCompletedAt: new Date('2026-04-25T00:00:09.000Z'),
+            createdAt: '2026-04-25T00:00:08.950Z',
+          } as any,
+          'thread-uuid'
+        )
+      ).toEqual(
+        expect.objectContaining({
+          role: 'assistant',
+          metadata: {
+            runId: 'run-1',
+            createdAt: '2026-04-25T00:00:01.000Z',
+            completedAt: '2026-04-25T00:00:09.000Z',
+          },
+        })
+      );
+    });
+
+    it('falls back to stored metadata, then the row created time, when run.startedAt is absent', () => {
+      expect(
+        AgentMessageStore.serializeCanonicalMessage(
+          {
+            uuid: '44444444-4444-4444-8444-444444444444',
+            clientMessageId: null,
+            role: 'assistant',
+            parts: [{ type: 'text', text: 'Done' }],
+            metadata: { runId: 'run-1', createdAt: '2026-04-25T00:00:02.000Z' },
+            runCompletedAt: '2026-04-25T00:00:09.000Z',
+            createdAt: '2026-04-25T00:00:05.000Z',
+          } as any,
+          'thread-uuid'
+        )
+      ).toEqual(
+        expect.objectContaining({
+          role: 'assistant',
+          metadata: {
+            runId: 'run-1',
+            createdAt: '2026-04-25T00:00:02.000Z',
+            completedAt: '2026-04-25T00:00:09.000Z',
+          },
+        })
+      );
+    });
+
+    it('clamps the served createdAt to completedAt when run.startedAt is later than completion', () => {
+      expect(
+        AgentMessageStore.serializeCanonicalMessage(
+          {
+            uuid: '44444444-4444-4444-8444-444444444444',
+            clientMessageId: null,
+            role: 'assistant',
+            parts: [{ type: 'text', text: 'Done' }],
+            metadata: { runId: 'run-1' },
+            runStartedAt: '2026-04-25T00:00:10.000Z',
+            runCompletedAt: '2026-04-25T00:00:09.000Z',
+            createdAt: '2026-04-25T00:00:08.000Z',
+          } as any,
+          'thread-uuid'
+        )
+      ).toEqual(
+        expect.objectContaining({
+          role: 'assistant',
+          metadata: {
+            runId: 'run-1',
+            createdAt: '2026-04-25T00:00:09.000Z',
+            completedAt: '2026-04-25T00:00:09.000Z',
+          },
+        })
+      );
+    });
+
+    it('falls back to the assistant row created time when no run timestamps exist', () => {
+      const serialized = AgentMessageStore.serializeCanonicalMessage(
+        {
+          uuid: '55555555-5555-4555-8555-555555555555',
+          clientMessageId: null,
+          role: 'assistant',
+          parts: [{ type: 'text', text: 'Done' }],
+          metadata: {},
+          createdAt: '2026-04-25T00:00:00.000Z',
+        } as any,
+        'thread-uuid'
+      );
+
+      expect(serialized.metadata).toEqual({
+        createdAt: '2026-04-25T00:00:00.000Z',
+        completedAt: '2026-04-25T00:00:00.000Z',
+      });
+    });
+
     it('returns typed agent switch system messages but rejects unrelated system messages', () => {
       const switchMessage = AgentMessageStore.serializeCanonicalMessage(
         {
diff --git a/src/server/services/agent/__tests__/PolicyService.test.ts b/src/server/services/agent/__tests__/PolicyService.test.ts
index 905704fe..2e327967 100644
--- a/src/server/services/agent/__tests__/PolicyService.test.ts
+++ b/src/server/services/agent/__tests__/PolicyService.test.ts
@@ -14,10 +14,25 @@
  * limitations under the License.
  */
 
+const mockGetEffectiveConfig = jest.fn();
+
+jest.mock('server/services/agentRuntime/config/agentRuntimeConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getEffectiveConfig: mockGetEffectiveConfig,
+    })),
+  },
+}));
+
 import AgentPolicyService from '../PolicyService';
-import { DEFAULT_AGENT_APPROVAL_POLICY } from '../types';
+import { AGENT_CAPABILITY_KEYS, DEFAULT_AGENT_APPROVAL_POLICY } from '../types';
 
 describe('AgentPolicyService', () => {
+  beforeEach(() => {
+    mockGetEffectiveConfig.mockReset();
+  });
+
   it('keeps read-only sandbox tools in the read capability', () => {
     expect(
       AgentPolicyService.capabilityForSessionWorkspaceTool('workspace.read_file', {
@@ -48,6 +63,43 @@ describe('AgentPolicyService', () => {
     );
   });
 
+  it('keeps system approval modes when no approval default is configured', async () => {
+    mockGetEffectiveConfig.mockResolvedValue({
+      approvalPolicy: {
+        rules: {
+          shell_exec: 'allow',
+        },
+      },
+    });
+
+    await expect(AgentPolicyService.getEffectivePolicy()).resolves.toEqual({
+      defaultMode: DEFAULT_AGENT_APPROVAL_POLICY.defaultMode,
+      rules: {
+        ...DEFAULT_AGENT_APPROVAL_POLICY.rules,
+        shell_exec: 'allow',
+      },
+    });
+  });
+
+  it('uses an explicit approval default as the fallback for known capability families', async () => {
+    mockGetEffectiveConfig.mockResolvedValue({
+      approvalPolicy: {
+        defaultMode: 'deny',
+        rules: {
+          shell_exec: 'allow',
+        },
+      },
+    });
+
+    await expect(AgentPolicyService.getEffectivePolicy()).resolves.toEqual({
+      defaultMode: 'deny',
+      rules: {
+        ...Object.fromEntries(AGENT_CAPABILITY_KEYS.map((capabilityKey) => [capabilityKey, 'deny'])),
+        shell_exec: 'allow',
+      },
+    });
+  });
+
   it('allows all user-owned definitions to use all-users capabilities', () => {
     const result = AgentPolicyService.resolveCapabilityAccess({
       capabilityId: 'workspace_files',
diff --git a/src/server/services/agent/__tests__/RunEventService.test.ts b/src/server/services/agent/__tests__/RunEventService.test.ts
index 5b7361ef..b1fa7755 100644
--- a/src/server/services/agent/__tests__/RunEventService.test.ts
+++ b/src/server/services/agent/__tests__/RunEventService.test.ts
@@ -31,10 +31,10 @@ jest.mock('server/models/AgentRunEvent', () => ({
 }));
 
 jest.mock('server/lib/agentSession/runtimeConfig', () => {
-  const actual = jest.requireActual('server/lib/agentSession/runtimeConfig');
   return {
     __esModule: true,
-    ...actual,
+    DEFAULT_AGENT_SESSION_MAX_DURABLE_PAYLOAD_BYTES: 64 * 1024,
+    DEFAULT_AGENT_SESSION_PAYLOAD_PREVIEW_BYTES: 16 * 1024,
     resolveAgentSessionDurabilityConfig: jest.fn().mockResolvedValue({
       runExecutionLeaseMs: 30 * 60 * 1000,
       queuedRunDispatchStaleMs: 30 * 1000,
@@ -48,7 +48,7 @@ jest.mock('server/lib/agentSession/runtimeConfig', () => {
 
 import AgentRun from 'server/models/AgentRun';
 import AgentRunEvent from 'server/models/AgentRunEvent';
-import AgentRunEventService from '../RunEventService';
+import AgentRunEventService, { RUN_EVENT_STREAM_POLL_INTERVAL_MS } from '../RunEventService';
 import { AgentRunOwnershipLostError } from '../AgentRunOwnershipLostError';
 
 const mockRunQuery = AgentRun.query as jest.Mock;
@@ -69,6 +69,10 @@ describe('AgentRunEventService', () => {
     jest.restoreAllMocks();
   });
 
+  it('keeps the fallback stream poll cadence live enough for reasoning chunks', () => {
+    expect(RUN_EVENT_STREAM_POLL_INTERVAL_MS).toBeLessThanOrEqual(500);
+  });
+
   it('loads run events after a sequence cursor with one extra row for hasMore', async () => {
     const limit = jest.fn().mockResolvedValue([
       {
@@ -731,7 +735,7 @@ describe('AgentRunEventService', () => {
     expect(text).toContain('id: 2\nevent: run.completed');
   });
 
-  it('waits once for the terminal event when terminal status is visible first', async () => {
+  it('drains the terminal event and closes without polling when terminal status is visible', async () => {
     const terminalEvent = {
       uuid: 'event-2',
       runUuid: 'run-1',
@@ -746,44 +750,21 @@ describe('AgentRunEventService', () => {
       createdAt: null,
       updatedAt: null,
     } as any;
-    const listRunEventsPage = jest
-      .spyOn(AgentRunEventService, 'listRunEventsPage')
-      .mockResolvedValueOnce({
-        events: [],
-        nextSequence: 1,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'running',
-        },
-        limit: 100,
-        maxLimit: 500,
-      })
-      .mockResolvedValueOnce({
-        events: [],
-        nextSequence: 1,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'completed',
-        },
-        limit: 100,
-        maxLimit: 500,
-      })
-      .mockResolvedValueOnce({
-        events: [terminalEvent],
-        nextSequence: 2,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'completed',
-        },
-        limit: 100,
-        maxLimit: 500,
-      });
+    // Terminal status + event are atomic, so the first drain emits the terminal event and closes without waiting.
+    const listRunEventsPage = jest.spyOn(AgentRunEventService, 'listRunEventsPage').mockResolvedValue({
+      events: [terminalEvent],
+      nextSequence: 2,
+      hasMore: false,
+      run: {
+        id: 'run-1',
+        status: 'completed',
+      },
+      limit: 100,
+      maxLimit: 500,
+    });
     const waitForRunEventNotification = jest
       .spyOn(AgentRunEventService, 'waitForRunEventNotification')
-      .mockResolvedValue(true);
+      .mockResolvedValue(false);
     mockRunQuery.mockReturnValue({
       findOne: jest.fn().mockResolvedValue({
         uuid: 'run-1',
@@ -795,64 +776,32 @@ describe('AgentRunEventService', () => {
       AgentRunEventService.createCanonicalRunEventStream('run-1', 1, { pollIntervalMs: 10 })
     ).text();
 
-    expect(waitForRunEventNotification).toHaveBeenCalledWith('run-1', 1, 10);
-    expect(listRunEventsPage).toHaveBeenNthCalledWith(3, 'run-1', {
-      afterSequence: 1,
-      limit: 100,
-    });
+    expect(waitForRunEventNotification).not.toHaveBeenCalled();
+    expect(listRunEventsPage).toHaveBeenCalledTimes(1);
     expect(text).toContain('id: 2\nevent: run.completed');
   });
 
-  it('keeps following terminal runs until a terminal event is available', async () => {
-    const terminalEvent = {
-      uuid: 'event-2',
-      runUuid: 'run-1',
-      threadUuid: 'thread-1',
-      sessionUuid: 'session-1',
-      runId: 17,
-      sequence: 2,
-      eventType: 'run.completed',
-      payload: {
+  it('self-heals a terminal run that is missing its terminal event, then closes', async () => {
+    // Terminal status with no terminal event (crash / legacy non-atomic write) must not poll forever: repair and close.
+    const ensureTerminal = jest
+      .spyOn(
+        AgentRunEventService as unknown as {
+          ensureTerminalEventForTerminalRun: (runUuid: string) => Promise<boolean>;
+        },
+        'ensureTerminalEventForTerminalRun'
+      )
+      .mockResolvedValue(false);
+    const listRunEventsPage = jest.spyOn(AgentRunEventService, 'listRunEventsPage').mockResolvedValue({
+      events: [],
+      nextSequence: 1,
+      hasMore: false,
+      run: {
+        id: 'run-1',
         status: 'completed',
       },
-      createdAt: null,
-      updatedAt: null,
-    } as any;
-    const listRunEventsPage = jest
-      .spyOn(AgentRunEventService, 'listRunEventsPage')
-      .mockResolvedValueOnce({
-        events: [],
-        nextSequence: 1,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'running',
-        },
-        limit: 100,
-        maxLimit: 500,
-      })
-      .mockResolvedValueOnce({
-        events: [],
-        nextSequence: 1,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'completed',
-        },
-        limit: 100,
-        maxLimit: 500,
-      })
-      .mockResolvedValueOnce({
-        events: [terminalEvent],
-        nextSequence: 2,
-        hasMore: false,
-        run: {
-          id: 'run-1',
-          status: 'completed',
-        },
-        limit: 100,
-        maxLimit: 500,
-      });
+      limit: 100,
+      maxLimit: 500,
+    });
     const waitForRunEventNotification = jest
       .spyOn(AgentRunEventService, 'waitForRunEventNotification')
       .mockResolvedValue(false);
@@ -867,8 +816,10 @@ describe('AgentRunEventService', () => {
       AgentRunEventService.createCanonicalRunEventStream('run-1', 1, { pollIntervalMs: 10 })
     ).text();
 
-    expect(waitForRunEventNotification).toHaveBeenCalledWith('run-1', 1, 10);
-    expect(listRunEventsPage).toHaveBeenCalledTimes(3);
-    expect(text).toContain('id: 2\nevent: run.completed');
+    expect(ensureTerminal).toHaveBeenCalledWith('run-1');
+    expect(waitForRunEventNotification).not.toHaveBeenCalled();
+    expect(listRunEventsPage).toHaveBeenCalled();
+    expect(typeof text).toBe('string');
+    ensureTerminal.mockRestore();
   });
 });
diff --git a/src/server/services/agent/__tests__/RunExecutor.test.ts b/src/server/services/agent/__tests__/RunExecutor.test.ts
index 38acf2ed..8d2944e1 100644
--- a/src/server/services/agent/__tests__/RunExecutor.test.ts
+++ b/src/server/services/agent/__tests__/RunExecutor.test.ts
@@ -1272,7 +1272,7 @@ describe('AgentRunExecutor', () => {
     expect(mockMarkFailedForExecutionOwner).not.toHaveBeenCalled();
   });
 
-  it('reports the effective Debug repair loop cap when repair stops on tool-calls', async () => {
+  it('uses the configured global budget and synthesizes a summary when repair stops on tool-calls', async () => {
     const debugRepairRunPlanSnapshot = {
       ...runPlanSnapshot,
       agent: {
@@ -1290,7 +1290,7 @@ describe('AgentRunExecutor', () => {
     mockGetEffectiveSessionConfig.mockResolvedValueOnce({
       systemPrompt: 'DB prompt as stored',
       appendSystemPrompt: undefined,
-      maxIterations: 350,
+      maxIterations: 14,
       workspaceToolDiscoveryTimeoutMs: 3000,
       workspaceToolExecutionTimeoutMs: 15000,
       toolRules: [],
@@ -1307,6 +1307,20 @@ describe('AgentRunExecutor', () => {
       runtimeOptions: {},
       runPlanSnapshot: debugRepairRunPlanSnapshot,
     });
+    mockConvertToModelMessages.mockResolvedValueOnce([{ role: 'user', content: 'repair this' }]);
+    mockGenerateText.mockResolvedValueOnce({
+      text: 'Updated the Dockerfile; build is still failing on missing base image. Next: confirm the base image tag.',
+      totalUsage: { inputTokens: 5, outputTokens: 7, totalTokens: 12 },
+      finishReason: 'stop',
+      rawFinishReason: 'STOP',
+      warnings: [],
+      response: {
+        id: 'repair-synthesis-response-1',
+        modelId: 'gpt-5.4',
+        timestamp: '2026-05-07T00:00:00.000Z',
+      },
+      providerMetadata: undefined,
+    });
 
     const execution = await AgentRunExecutor.execute({
       session: { uuid: 'sess-1', id: 17 } as any,
@@ -1328,20 +1342,103 @@ describe('AgentRunExecutor', () => {
       isAborted: false,
     });
 
-    expect(mockStepCountIs).toHaveBeenCalledWith(10);
+    // Effective budget now equals the configured global maxIterations (no debug-specific cap).
+    expect(mockStepCountIs).toHaveBeenCalledWith(14);
+    // Repair stopping on tool-calls without a commit observation gets a graceful summary, not a blank max_iterations failure.
+    expect(mockGenerateText).toHaveBeenCalledWith(
+      expect.objectContaining({
+        system: expect.stringContaining(
+          'You are closing out a Debug repair run after the tool loop reached its step budget without a confirmed fix.'
+        ),
+        toolChoice: 'none',
+      })
+    );
+    expect(mockLastFinalizeResult).toEqual(
+      expect.objectContaining({
+        status: 'completed',
+        patch: expect.objectContaining({
+          usageSummary: expect.objectContaining({
+            finishReason: 'stop',
+          }),
+        }),
+      })
+    );
+    expect(mockUpsertCanonicalUiMessagesForThread).toHaveBeenCalledWith(
+      expect.anything(),
+      expect.arrayContaining([
+        expect.objectContaining({
+          id: 'assistant-1',
+          parts: expect.arrayContaining([
+            expect.objectContaining({
+              type: 'text',
+              text: expect.stringContaining(
+                'Updated the Dockerfile; build is still failing on missing base image. Next: confirm the base image tag.'
+              ),
+            }),
+          ]),
+        }),
+      ]),
+      expect.anything()
+    );
+  });
+
+  it('does not synthesize a repair summary when the run was aborted', async () => {
+    const debugRepairRunPlanSnapshot = {
+      ...runPlanSnapshot,
+      agent: {
+        id: 'system.debug',
+        label: 'Debug',
+        sourceKind: 'build_context_chat',
+      },
+      debug: {
+        requestedIntent: 'repair',
+        resolvedIntent: 'repair',
+        decisionSource: 'client_request',
+        reasonCode: 'repair_requested',
+      },
+    };
+    mockResolveForRunAdmission.mockResolvedValueOnce({
+      approvalPolicy: 'on-request',
+      requestedHarness: null,
+      requestedProvider: null,
+      requestedModel: null,
+      resolvedHarness: 'lifecycle_ai_sdk',
+      resolvedProvider: 'openai',
+      resolvedModel: 'gpt-5.4',
+      sandboxRequirement: { filesystem: 'persistent' },
+      runtimeOptions: {},
+      runPlanSnapshot: debugRepairRunPlanSnapshot,
+    });
+
+    const execution = await AgentRunExecutor.execute({
+      session: { uuid: 'sess-1', id: 17 } as any,
+      thread: { id: 7, uuid: 'thread-1' } as any,
+      userIdentity: { userId: 'sample-user' } as any,
+      messages: [],
+    });
+
+    await execution.onStreamFinish({
+      messages: [
+        {
+          id: 'assistant-1',
+          role: 'assistant',
+          parts: [{ type: 'text', text: 'Still repairing' }],
+          metadata: { runId: 'run-1' },
+        } as any,
+      ],
+      finishReason: 'tool-calls',
+      isAborted: true,
+    });
+
+    expect(mockGenerateText).not.toHaveBeenCalled();
     expect(mockLastFinalizeResult).toEqual(
       expect.objectContaining({
         status: 'failed',
         error: expect.objectContaining({
           code: 'max_iterations_exceeded',
-          details: expect.objectContaining({
-            finishReason: 'tool-calls',
-            maxIterations: 10,
-          }),
         }),
       })
     );
-    expect(mockGenerateText).not.toHaveBeenCalled();
   });
 
   it('completes a Debug repair tool-calls run when the repair commit observation is the final answer', async () => {
diff --git a/src/server/services/agent/__tests__/RunPlanResolver.test.ts b/src/server/services/agent/__tests__/RunPlanResolver.test.ts
index 22fbabcf..c8b20e04 100644
--- a/src/server/services/agent/__tests__/RunPlanResolver.test.ts
+++ b/src/server/services/agent/__tests__/RunPlanResolver.test.ts
@@ -58,12 +58,12 @@ jest.mock('../AgentDefinitionRegistry', () => {
 
 jest.mock('../CustomAgentDefinitionService', () => {
   class MockCustomAgentDefinitionServiceError extends Error {
-    code: string;
+    reason: string;
 
-    constructor(code: string, message: string) {
+    constructor(reason: string, message: string) {
       super(message);
       this.name = 'CustomAgentDefinitionServiceError';
-      this.code = code;
+      this.reason = reason;
     }
   }
 
@@ -89,13 +89,13 @@ jest.mock('../InstructionTemplateService', () => {
     readonly details?: Record<string, unknown>;
 
     constructor(
-      public readonly code: string,
+      public readonly templateCode: string,
       message: string,
       options: { statusCode?: number; details?: Record<string, unknown> } = {}
     ) {
       super(message);
       this.name = 'InstructionTemplateServiceError';
-      this.statusCode = options.statusCode || (code === 'unknown_ref' ? 404 : 400);
+      this.statusCode = options.statusCode || (templateCode === 'unknown_ref' ? 404 : 400);
       this.details = options.details;
     }
   }
@@ -504,9 +504,11 @@ describe('AgentRunPlanResolver', () => {
 
     await expect(resolve()).rejects.toMatchObject({
       name: AgentRunPlanInstructionTemplateError.name,
-      code: 'unknown_ref',
+      code: 'instruction_template_invalid',
+      httpStatus: 422,
+      templateCode: 'unknown_ref',
       statusCode: 404,
-      details: { ref: 'system:missing' },
+      details: { templateCode: 'unknown_ref', ref: 'system:missing' },
     });
     expect(mockSeedSystemTemplates).toHaveBeenCalledTimes(1);
     expect(mockResolveInstructionRefs).toHaveBeenCalledWith(['system:missing']);
@@ -526,9 +528,11 @@ describe('AgentRunPlanResolver', () => {
 
     await expect(resolve()).rejects.toMatchObject({
       name: AgentRunPlanInstructionTemplateError.name,
-      code: 'invalid_ref',
+      code: 'instruction_template_invalid',
+      httpStatus: 422,
+      templateCode: 'invalid_ref',
       statusCode: 400,
-      details: { ref: 'invalid ref' },
+      details: { templateCode: 'invalid_ref', ref: 'invalid ref' },
     });
     expect(mockSeedSystemTemplates).toHaveBeenCalledTimes(1);
   });
@@ -641,6 +645,8 @@ describe('AgentRunPlanResolver', () => {
       expect.arrayContaining(['diagnostics_codefresh', 'diagnostics_kubernetes', 'github_write'])
     );
     expect(result.runPlanSnapshot.capabilities.provisionalCapabilityIds).not.toContain('workspace_shell');
+    // Debug no longer grants external_mcp_write; repairs go through github_write (least privilege).
+    expect(result.runPlanSnapshot.capabilities.provisionalCapabilityIds).not.toContain('external_mcp_write');
     expect(result.runPlanSnapshot.capabilities.resolvedCapabilityAccess).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
@@ -649,14 +655,11 @@ describe('AgentRunPlanResolver', () => {
           availability: 'system_only',
           approvalMode: 'require_approval',
         }),
-        expect.objectContaining({
-          capabilityId: 'external_mcp_write',
-          allowed: true,
-          availability: 'admin_only',
-          approvalMode: 'require_approval',
-        }),
       ])
     );
+    expect(result.runPlanSnapshot.capabilities.resolvedCapabilityAccess).not.toContainEqual(
+      expect.objectContaining({ capabilityId: 'external_mcp_write', allowed: true })
+    );
     expect(result.runPlanSnapshot.runtime.runtimeOptions).toEqual({ maxIterations: 12 });
     expect(result.runPlanSnapshot.runtime.approvalPolicy).toEqual({
       defaultMode: 'require_approval',
diff --git a/src/server/services/agent/__tests__/RunResumeEligibilityService.test.ts b/src/server/services/agent/__tests__/RunResumeEligibilityService.test.ts
index 4a4de27d..acac5fcf 100644
--- a/src/server/services/agent/__tests__/RunResumeEligibilityService.test.ts
+++ b/src/server/services/agent/__tests__/RunResumeEligibilityService.test.ts
@@ -150,6 +150,51 @@ describe('AgentRunResumeEligibilityService', () => {
     );
   });
 
+  it('keeps active leases replay-only when the heartbeat is fresh', () => {
+    // Fresh heartbeat (30s ago) within the 3-minute staleness window => still lease_active.
+    expect(
+      evaluate(
+        { leaseExpiresAt: activeLease, heartbeatAt: '2026-05-08T11:59:30.000Z' },
+        { heartbeatStaleMs: 3 * 60 * 1000 }
+      )
+    ).toEqual(
+      expect.objectContaining({
+        decision: 'replay_only',
+        reason: 'lease_active',
+      })
+    );
+  });
+
+  it('does not treat a heartbeat-stale run as lease_active even when the lease is active', () => {
+    // Stale heartbeat (5m ago) past the 3-min window: orphaned read-only run is auto-resume-eligible, not lease_active.
+    expect(
+      evaluate(
+        { leaseExpiresAt: activeLease, heartbeatAt: '2026-05-08T11:55:00.000Z' },
+        { heartbeatStaleMs: 3 * 60 * 1000 }
+      )
+    ).toEqual(
+      expect.objectContaining({
+        decision: 'auto_resume_allowed',
+        reason: 'read_only_expired_lease',
+      })
+    );
+  });
+
+  it('falls back to startedAt when a run never heartbeated', () => {
+    // No heartbeat; stale startedAt (5m) marks the run orphaned despite the active lease.
+    expect(
+      evaluate(
+        { leaseExpiresAt: activeLease, heartbeatAt: null, startedAt: '2026-05-08T11:55:00.000Z' },
+        { heartbeatStaleMs: 3 * 60 * 1000 }
+      )
+    ).toEqual(
+      expect.objectContaining({
+        decision: 'auto_resume_allowed',
+        reason: 'read_only_expired_lease',
+      })
+    );
+  });
+
   it('keeps approval-waiting runs out of stale recovery', () => {
     expect(evaluate({ status: 'waiting_for_approval' })).toEqual(
       expect.objectContaining({
diff --git a/src/server/services/agent/__tests__/RunService.test.ts b/src/server/services/agent/__tests__/RunService.test.ts
index 8004a824..df68344e 100644
--- a/src/server/services/agent/__tests__/RunService.test.ts
+++ b/src/server/services/agent/__tests__/RunService.test.ts
@@ -19,6 +19,7 @@ jest.mock('server/models/AgentRun', () => ({
   default: {
     query: jest.fn(),
     transaction: jest.fn(),
+    knex: jest.fn(),
   },
 }));
 
@@ -66,6 +67,7 @@ import { resolveAgentSessionDurabilityConfig } from 'server/lib/agentSession/run
 
 const mockRunQuery = AgentRun.query as jest.Mock;
 const mockRunTransaction = AgentRun.transaction as jest.Mock;
+const mockRunKnex = AgentRun.knex as jest.Mock;
 const mockSessionQuery = AgentSession.query as jest.Mock;
 const mockAppendStatusEvent = AgentRunEventService.appendStatusEvent as jest.Mock;
 const mockAppendStatusEventForRunInTransaction = AgentRunEventService.appendStatusEventForRunInTransaction as jest.Mock;
@@ -144,6 +146,7 @@ describe('AgentRunService', () => {
   beforeEach(() => {
     jest.clearAllMocks();
     mockRunTransaction.mockImplementation(async (callback) => callback({ trx: true }));
+    mockRunKnex.mockReturnValue({ raw: jest.fn().mockResolvedValue(undefined) });
     mockResolveDurabilityConfig.mockResolvedValue({
       runExecutionLeaseMs: 30 * 60 * 1000,
       queuedRunDispatchStaleMs: 30 * 1000,
@@ -466,7 +469,7 @@ describe('AgentRunService', () => {
   });
 
   describe('listRunsNeedingDispatch', () => {
-    it('finds stale queued runs and expired execution leases', async () => {
+    it('finds stale queued runs, expired execution leases, and heartbeat-stale runs', async () => {
       const staleQueuedBuilder: any = {
         where: jest.fn().mockReturnThis(),
       };
@@ -475,6 +478,26 @@ describe('AgentRunService', () => {
         whereNotNull: jest.fn().mockReturnThis(),
         where: jest.fn().mockReturnThis(),
       };
+      // Heartbeat-stale branch: starting/running runs past the staleness cutoff by heartbeat (or startedAt fallback).
+      const heartbeatFallbackBuilder: any = {
+        whereNull: jest.fn().mockReturnThis(),
+        where: jest.fn().mockReturnThis(),
+      };
+      const heartbeatPredicateBuilder: any = {
+        where: jest.fn().mockReturnThis(),
+        orWhere: jest.fn((callback) => {
+          callback(heartbeatFallbackBuilder);
+          return heartbeatPredicateBuilder;
+        }),
+      };
+      const heartbeatStaleBuilder: any = {
+        whereIn: jest.fn().mockReturnThis(),
+        where: jest.fn((callback) => {
+          callback(heartbeatPredicateBuilder);
+          return heartbeatStaleBuilder;
+        }),
+      };
+      const orWhereBuilders = [expiredLeaseBuilder, heartbeatStaleBuilder];
       const runs = [{ uuid: VALID_RUN_UUID }];
       const query: any = {
         where: jest.fn((callback) => {
@@ -482,7 +505,7 @@ describe('AgentRunService', () => {
           return query;
         }),
         orWhere: jest.fn((callback) => {
-          callback(expiredLeaseBuilder);
+          callback(orWhereBuilders.shift());
           return query;
         }),
         orderBy: jest.fn().mockReturnThis(),
@@ -504,13 +527,18 @@ describe('AgentRunService', () => {
       expect(expiredLeaseBuilder.whereIn).toHaveBeenCalledWith('status', ['starting', 'running']);
       expect(expiredLeaseBuilder.whereNotNull).toHaveBeenCalledWith('leaseExpiresAt');
       expect(expiredLeaseBuilder.where).toHaveBeenCalledWith('leaseExpiresAt', '<=', '2026-04-24T12:00:00.000Z');
+      // Cutoff = now - heartbeatStaleMs; 30-min lease derives a 3-min window, so 12:00:00 - 3m = 11:57:00.
+      expect(heartbeatStaleBuilder.whereIn).toHaveBeenCalledWith('status', ['starting', 'running']);
+      expect(heartbeatPredicateBuilder.where).toHaveBeenCalledWith('heartbeatAt', '<=', '2026-04-24T11:57:00.000Z');
+      expect(heartbeatFallbackBuilder.whereNull).toHaveBeenCalledWith('heartbeatAt');
+      expect(heartbeatFallbackBuilder.where).toHaveBeenCalledWith('startedAt', '<=', '2026-04-24T11:57:00.000Z');
       expect(query.orderBy).toHaveBeenCalledWith('updatedAt', 'asc');
       expect(query.limit).toHaveBeenCalledWith(25);
     });
   });
 
   describe('cancelRun', () => {
-    it('records cancellation through the shared status patch path', async () => {
+    it('records cancellation atomically with a recovery status event when the run is still active', async () => {
       const runningRun = {
         id: 1,
         uuid: VALID_RUN_UUID,
@@ -524,66 +552,108 @@ describe('AgentRunService', () => {
         .spyOn(AgentRunService, 'getOwnedRun')
         .mockResolvedValueOnce(runningRun as Awaited<ReturnType<typeof AgentRunService.getOwnedRun>>)
         .mockResolvedValueOnce(cancelledRun as Awaited<ReturnType<typeof AgentRunService.getOwnedRun>>);
-      const patchStatus = jest
-        .spyOn(AgentRunService, 'patchStatus')
-        .mockResolvedValue(cancelledRun as Awaited<ReturnType<typeof AgentRunService.patchStatus>>);
+      const findById = jest.fn().mockReturnValue({
+        forUpdate: jest.fn().mockResolvedValue(runningRun),
+      });
+      const patchAndFetchById = jest.fn().mockResolvedValue(cancelledRun);
+      mockAppendStatusEventForRunInTransaction.mockResolvedValue(7);
+      mockRunQuery.mockReturnValueOnce({ findById }).mockReturnValueOnce({ patchAndFetchById });
+      const raw = jest.fn().mockResolvedValue(undefined);
+      mockRunKnex.mockReturnValue({ raw });
 
       await expect(AgentRunService.cancelRun(VALID_RUN_UUID, 'sample-user')).resolves.toBe(cancelledRun);
 
-      expect(patchStatus).toHaveBeenCalledWith(
-        VALID_RUN_UUID,
-        'cancelled',
+      expect(findById).toHaveBeenCalledWith(1);
+      expect(patchAndFetchById).toHaveBeenCalledWith(
+        1,
         expect.objectContaining({
+          status: 'cancelled',
           cancelledAt: expect.any(String),
           completedAt: expect.any(String),
+          executionOwner: null,
+          leaseExpiresAt: null,
+          heartbeatAt: null,
         })
       );
+      expect(mockAppendStatusEventForRunInTransaction).toHaveBeenCalledWith(
+        cancelledRun,
+        'run.cancelled',
+        expect.objectContaining({
+          status: 'cancelled',
+        }),
+        { trx: true }
+      );
+      expect(mockNotifyRunEventsInserted).toHaveBeenCalledWith(VALID_RUN_UUID, 7);
+      // Fast cross-process abort is broadcast on the dedicated cancel channel.
+      expect(raw).toHaveBeenCalledWith('select pg_notify(?, ?)', [
+        'agent_run_cancel',
+        JSON.stringify({ runId: VALID_RUN_UUID }),
+      ]);
+      expect(mockAppendStatusEvent).not.toHaveBeenCalled();
       expect(getOwnedRun).toHaveBeenCalledTimes(2);
     });
-  });
 
-  describe('markFailed', () => {
-    it('uses the same serialized error on the run and run.failed event payload', async () => {
-      const findOne = jest.fn().mockResolvedValue({
-        id: 17,
+    it('does not rewrite a terminal run as cancelled when completion wins the race', async () => {
+      const completedRun = {
+        id: 1,
         uuid: VALID_RUN_UUID,
+        status: 'completed',
+      };
+      const getOwnedRun = jest
+        .spyOn(AgentRunService, 'getOwnedRun')
+        .mockResolvedValueOnce(completedRun as Awaited<ReturnType<typeof AgentRunService.getOwnedRun>>)
+        .mockResolvedValueOnce(completedRun as Awaited<ReturnType<typeof AgentRunService.getOwnedRun>>);
+      const patchAndFetchById = jest.fn();
+      const findById = jest.fn().mockReturnValue({
+        forUpdate: jest.fn().mockResolvedValue(completedRun),
       });
-      const patchAndFetchById = jest.fn().mockImplementation((_id, patch) =>
-        Promise.resolve({
-          id: 17,
-          uuid: VALID_RUN_UUID,
-          status: 'failed',
-          ...patch,
-        })
-      );
-      mockRunQuery.mockReturnValueOnce({ findOne }).mockReturnValueOnce({ patchAndFetchById });
-      const error = Object.assign(new Error('Sample run failure.'), {
-        name: 'SampleRunError',
-        code: 'sample_failure',
-        details: {
-          reason: 'sample',
-        },
-      });
+      mockRunQuery.mockReturnValueOnce({ findById });
+      const raw = jest.fn().mockResolvedValue(undefined);
+      mockRunKnex.mockReturnValue({ raw });
+
+      await expect(AgentRunService.cancelRun(VALID_RUN_UUID, 'sample-user')).resolves.toBe(completedRun);
 
-      const failedRun = await AgentRunService.markFailed(VALID_RUN_UUID, error, {
-        totalTokens: 12,
+      expect(mockRunQuery).toHaveBeenCalledTimes(1);
+      expect(findById).toHaveBeenCalledWith(1);
+      expect(patchAndFetchById).not.toHaveBeenCalled();
+      expect(mockAppendStatusEvent).not.toHaveBeenCalled();
+      expect(mockAppendStatusEventForRunInTransaction).not.toHaveBeenCalled();
+      expect(mockNotifyRunEventsInserted).not.toHaveBeenCalled();
+      // No cancellation transition occurred, so no cross-process abort is broadcast.
+      expect(raw).not.toHaveBeenCalled();
+      expect(getOwnedRun).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe('cross-process cancel listener', () => {
+    it('listens on the cancel channel and aborts the local controller on a cancel notification', async () => {
+      const listeners: Record<string, (arg: any) => void> = {};
+      const connection = {
+        on: jest.fn((event: string, listener: (arg: any) => void) => {
+          listeners[event] = listener;
+        }),
+        query: jest.fn().mockResolvedValue(undefined),
+      };
+      const acquireConnection = jest.fn().mockResolvedValue(connection);
+      mockRunKnex.mockReturnValue({ client: { acquireConnection, releaseConnection: jest.fn() } });
+
+      const controller = new AbortController();
+      const abortSpy = jest.spyOn(controller, 'abort');
+      // registerAbortController lazily opens the (first) shared listen connection.
+      AgentRunService.registerAbortController(VALID_RUN_UUID, controller);
+      await new Promise((resolve) => setImmediate(resolve));
+
+      expect(acquireConnection).toHaveBeenCalledTimes(1);
+      expect(connection.query).toHaveBeenCalledWith('LISTEN agent_run_cancel');
+
+      // A cancel notification for the registered run aborts its controller.
+      listeners['notification']({
+        channel: 'agent_run_cancel',
+        payload: JSON.stringify({ runId: VALID_RUN_UUID }),
       });
-      const patch = patchAndFetchById.mock.calls[0][1];
-      const eventPayload = mockAppendStatusEvent.mock.calls[0][2];
-
-      expect(failedRun.error).toEqual(patch.error);
-      expect(eventPayload.error).toEqual(failedRun.error);
-      expect(mockAppendStatusEvent).toHaveBeenCalledWith(
-        VALID_RUN_UUID,
-        'run.failed',
-        expect.objectContaining({
-          status: 'failed',
-          error: failedRun.error,
-          usageSummary: {
-            totalTokens: 12,
-          },
-        })
-      );
+      expect(abortSpy).toHaveBeenCalled();
+
+      AgentRunService.clearAbortController(VALID_RUN_UUID);
     });
   });
 
@@ -694,6 +764,84 @@ describe('AgentRunService', () => {
       expect(mockAppendStatusEventForRunInTransaction).not.toHaveBeenCalled();
     });
 
+    it('pauses a heartbeat-stale run whose lease has not yet expired', async () => {
+      // Active lease (1m out) but heartbeat 5m stale past the 3-min window: orphaned run must pause without waiting for lease expiry.
+      const run = {
+        id: 17,
+        uuid: VALID_RUN_UUID,
+        status: 'running',
+        executionOwner: 'worker-1',
+        leaseExpiresAt: '2026-05-08T12:01:00.000Z',
+        heartbeatAt: '2026-05-08T11:55:00.000Z',
+        usageSummary: {},
+      };
+      const pausedRun = {
+        ...run,
+        status: 'waiting_for_input',
+        executionOwner: null,
+        leaseExpiresAt: null,
+        heartbeatAt: null,
+        error: {
+          code: 'run_auto_resume_ineligible',
+          message:
+            'Lifecycle paused this run because automatic recovery is not safe. Review the run and continue manually.',
+          details: {
+            recovery: expect.any(Object),
+          },
+        },
+      };
+      const findOne = jest.fn().mockReturnValue({
+        forUpdate: jest.fn().mockResolvedValue(run),
+      });
+      const patchAndFetchById = jest.fn().mockResolvedValue(pausedRun);
+      mockAppendStatusEventForRunInTransaction.mockResolvedValue(46);
+      mockRunQuery.mockReturnValueOnce({ findOne }).mockReturnValueOnce({ patchAndFetchById });
+
+      await expect(
+        AgentRunService.markWaitingForInputForRecovery(VALID_RUN_UUID, eligibility, {
+          now: new Date('2026-05-08T12:00:00.000Z'),
+          expectedExecutionOwner: 'worker-1',
+          resumeAttemptId: 'resume-1',
+        })
+      ).resolves.toBe(pausedRun);
+
+      expect(patchAndFetchById).toHaveBeenCalledWith(
+        17,
+        expect.objectContaining({
+          status: 'waiting_for_input',
+          executionOwner: null,
+          leaseExpiresAt: null,
+          heartbeatAt: null,
+        })
+      );
+      expect(mockNotifyRunEventsInserted).toHaveBeenCalledWith(VALID_RUN_UUID, 46);
+    });
+
+    it('does not pause a run with an active lease and a fresh heartbeat', async () => {
+      const run = {
+        id: 17,
+        uuid: VALID_RUN_UUID,
+        status: 'running',
+        executionOwner: 'worker-1',
+        leaseExpiresAt: '2026-05-08T12:01:00.000Z',
+        heartbeatAt: '2026-05-08T11:59:30.000Z',
+        usageSummary: {},
+      };
+      const findOne = jest.fn().mockReturnValue({
+        forUpdate: jest.fn().mockResolvedValue(run),
+      });
+      mockRunQuery.mockReturnValueOnce({ findOne });
+
+      await expect(
+        AgentRunService.markWaitingForInputForRecovery(VALID_RUN_UUID, eligibility, {
+          now: new Date('2026-05-08T12:00:00.000Z'),
+          expectedExecutionOwner: 'worker-1',
+        })
+      ).resolves.toBeNull();
+
+      expect(mockAppendStatusEventForRunInTransaction).not.toHaveBeenCalled();
+    });
+
     it('can pause an owner-fenced resume run before the active lease expires', async () => {
       const run = {
         id: 17,
@@ -757,49 +905,6 @@ describe('AgentRunService', () => {
     });
   });
 
-  describe('patchStatus', () => {
-    it('emits canonical run status event names for approval waits and resumed runs', async () => {
-      const findOne = jest.fn().mockResolvedValue({
-        id: 17,
-        uuid: VALID_RUN_UUID,
-      });
-      const patchAndFetchById = jest.fn().mockImplementation((_id, patch) =>
-        Promise.resolve({
-          id: 17,
-          uuid: VALID_RUN_UUID,
-          usageSummary: {},
-          error: null,
-          ...patch,
-        })
-      );
-      mockRunQuery
-        .mockReturnValueOnce({ findOne })
-        .mockReturnValueOnce({ patchAndFetchById })
-        .mockReturnValueOnce({ findOne })
-        .mockReturnValueOnce({ patchAndFetchById });
-
-      await AgentRunService.patchStatus(VALID_RUN_UUID, 'waiting_for_approval');
-      await AgentRunService.patchStatus(VALID_RUN_UUID, 'queued');
-
-      expect(mockAppendStatusEvent).toHaveBeenNthCalledWith(
-        1,
-        VALID_RUN_UUID,
-        'run.waiting_for_approval',
-        expect.objectContaining({
-          status: 'waiting_for_approval',
-        })
-      );
-      expect(mockAppendStatusEvent).toHaveBeenNthCalledWith(
-        2,
-        VALID_RUN_UUID,
-        'run.queued',
-        expect.objectContaining({
-          status: 'queued',
-        })
-      );
-    });
-  });
-
   describe('owner-aware execution helpers', () => {
     it('updates a matching owner terminal status and emits one status event after the transition', async () => {
       const ownedRun = {
@@ -1073,6 +1178,7 @@ describe('AgentRunService', () => {
         sessionId: 23,
         status: 'running',
         leaseExpiresAt: new Date(Date.now() + 60_000).toISOString(),
+        heartbeatAt: new Date(Date.now() - 30_000).toISOString(),
       };
       const findOne = jest.fn().mockReturnValue({
         forUpdate: jest.fn().mockResolvedValue(run),
@@ -1083,5 +1189,50 @@ describe('AgentRunService', () => {
         AgentRunService.claimQueuedRunForExecution(VALID_RUN_UUID, 'worker-1', 30 * 60 * 1000)
       ).resolves.toBeNull();
     });
+
+    it('reclaims a heartbeat-stale running run even when its lease is still active', async () => {
+      // Active lease (10m out) but heartbeat 5m stale past the 3-min window: orphaned run must be reclaimable without waiting for lease expiry.
+      const run = {
+        id: 17,
+        uuid: VALID_RUN_UUID,
+        sessionId: 23,
+        status: 'running',
+        leaseExpiresAt: new Date(Date.now() + 10 * 60_000).toISOString(),
+        heartbeatAt: new Date(Date.now() - 5 * 60_000).toISOString(),
+      };
+      const findOne = jest.fn().mockReturnValue({
+        forUpdate: jest.fn().mockResolvedValue(run),
+      });
+      const patchAndFetchById = jest.fn().mockResolvedValue({
+        ...run,
+        status: 'starting',
+        executionOwner: 'worker-2',
+      });
+      mockRunQuery.mockReturnValueOnce({ findOne }).mockReturnValueOnce({ patchAndFetchById });
+      mockSessionQuery.mockReturnValue({
+        findById: jest.fn().mockReturnValue({
+          forUpdate: jest.fn().mockResolvedValue({ id: 23 }),
+        }),
+      });
+
+      await expect(
+        AgentRunService.claimQueuedRunForExecution(VALID_RUN_UUID, 'worker-2', 30 * 60 * 1000)
+      ).resolves.toEqual(
+        expect.objectContaining({
+          status: 'starting',
+          executionOwner: 'worker-2',
+        })
+      );
+
+      expect(patchAndFetchById).toHaveBeenCalledWith(
+        17,
+        expect.objectContaining({
+          status: 'starting',
+          executionOwner: 'worker-2',
+          leaseExpiresAt: expect.any(String),
+          heartbeatAt: expect.any(String),
+        })
+      );
+    });
   });
 });
diff --git a/src/server/services/agent/__tests__/debugToolLoopControls.test.ts b/src/server/services/agent/__tests__/debugToolLoopControls.test.ts
index d56d84e4..ec0c0568 100644
--- a/src/server/services/agent/__tests__/debugToolLoopControls.test.ts
+++ b/src/server/services/agent/__tests__/debugToolLoopControls.test.ts
@@ -237,8 +237,62 @@ describe('resolveDebugToolLoopControls', () => {
     expect(controls.activeTools).not.toEqual(
       expect.arrayContaining(['mcp__lifecycle__update_file', 'mcp__lifecycle__patch_k8s_resource'])
     );
-    expect(controls.effectiveMaxIterations).toBe(9);
-    expect(mockStepCountIs).toHaveBeenCalledWith(9);
+    expect(controls.effectiveMaxIterations).toBe(14);
+    expect(mockStepCountIs).toHaveBeenCalledWith(14);
+  });
+
+  it('strips workspace-provisioning tools for non-Debug build-context runs without an intent', () => {
+    const customBuildContextRunPlan = {
+      ...buildRunPlan(),
+      agent: {
+        id: 'custom.repo-helper',
+        label: 'Repo Helper',
+        sourceKind: 'build_context_chat',
+      },
+    } as AgentRunPlanSnapshotV1;
+    const controls = resolveDebugToolLoopControls({
+      runPlanSnapshot: customBuildContextRunPlan,
+      tools,
+      toolMetadata: metadata,
+      maxIterations: 14,
+    });
+
+    expect(controls.activeTools).toBeDefined();
+    expect(controls.activeTools).not.toEqual(
+      expect.arrayContaining([
+        'mcp__sandbox__workspace_exec',
+        'mcp__sandbox__workspace_write_file',
+        'mcp__sandbox__workspace_exec_mutation',
+      ])
+    );
+    // Custom agents aren't constrained to read-only; only workspace-provisioning tools are removed.
+    expect(controls.activeTools).toEqual(
+      expect.arrayContaining(['mcp__lifecycle__get_file', 'mcp__lifecycle__update_file'])
+    );
+    expect(controls.prepareStep).toBeUndefined();
+    expect(controls.effectiveMaxIterations).toBe(14);
+    expect(mockStepCountIs).toHaveBeenCalledWith(14);
+  });
+
+  it('leaves non-build-context runs without an intent unconstrained even if sandbox tools exist', () => {
+    const customWorkspaceRunPlan = {
+      ...buildRunPlan(),
+      agent: {
+        id: 'custom.repo-helper',
+        label: 'Repo Helper',
+        sourceKind: 'workspace_session',
+      },
+    } as AgentRunPlanSnapshotV1;
+    const controls = resolveDebugToolLoopControls({
+      runPlanSnapshot: customWorkspaceRunPlan,
+      tools,
+      toolMetadata: metadata,
+      maxIterations: 14,
+    });
+
+    expect(controls.activeTools).toBeUndefined();
+    expect(controls.prepareStep).toBeUndefined();
+    expect(controls.effectiveMaxIterations).toBe(14);
   });
 
   it('limits diagnosis to read tools, then reserves a final no-tool answer step', async () => {
@@ -265,12 +319,12 @@ describe('resolveDebugToolLoopControls', () => {
         'mcp__sample__stale_missing_tool',
       ])
     );
-    expect(controls.effectiveMaxIterations).toBe(9);
-    expect(mockStepCountIs).toHaveBeenCalledWith(9);
+    expect(controls.effectiveMaxIterations).toBe(14);
+    expect(mockStepCountIs).toHaveBeenCalledWith(14);
     expect(await controls.prepareStep?.({ stepNumber: 0 } as any)).toEqual({
       activeTools: controls.activeTools,
     });
-    expect(await controls.prepareStep?.({ stepNumber: 8 } as any)).toEqual({
+    expect(await controls.prepareStep?.({ stepNumber: 13 } as any)).toEqual({
       activeTools: [],
       toolChoice: 'none',
     });
@@ -305,9 +359,9 @@ describe('resolveDebugToolLoopControls', () => {
         'mcp__docs__update_docs',
       ])
     );
-    expect(controls.effectiveMaxIterations).toBe(9);
-    expect(mockStepCountIs).toHaveBeenCalledWith(9);
-    expect(await controls.prepareStep?.({ stepNumber: 8 } as any)).toEqual({
+    expect(controls.effectiveMaxIterations).toBe(99);
+    expect(mockStepCountIs).toHaveBeenCalledWith(99);
+    expect(await controls.prepareStep?.({ stepNumber: 98 } as any)).toEqual({
       activeTools: [],
       toolChoice: 'none',
     });
@@ -380,7 +434,7 @@ describe('resolveDebugToolLoopControls', () => {
     );
     expect(controls.activeTools).not.toContain('mcp__sample__unguarded_repair');
     expect(controls.activeTools).not.toContain('mcp__sample__denied_repair');
-    expect(controls.effectiveMaxIterations).toBe(10);
-    expect(mockStepCountIs).toHaveBeenCalledWith(10);
+    expect(controls.effectiveMaxIterations).toBe(14);
+    expect(mockStepCountIs).toHaveBeenCalledWith(14);
   });
 });
diff --git a/src/server/services/agent/__tests__/sessionOwnership.test.ts b/src/server/services/agent/__tests__/sessionOwnership.test.ts
new file mode 100644
index 00000000..0e5f7a9d
--- /dev/null
+++ b/src/server/services/agent/__tests__/sessionOwnership.test.ts
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const mockAgentSessionQuery = jest.fn();
+
+jest.mock('server/models/AgentSession', () => ({
+  __esModule: true,
+  default: {
+    query: (...args: unknown[]) => mockAgentSessionQuery(...args),
+  },
+}));
+
+import { getOwnedSession } from '../sessionOwnership';
+
+function buildQuery(result: unknown) {
+  const query = {
+    findOne: jest.fn().mockResolvedValue(result),
+  };
+  return query;
+}
+
+describe('getOwnedSession', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('returns the session scoped to uuid + userId', async () => {
+    const session = { id: 1, uuid: 'session-uuid', userId: 'user-1' };
+    const query = buildQuery(session);
+    mockAgentSessionQuery.mockReturnValue(query);
+
+    const result = await getOwnedSession('session-uuid', 'user-1');
+
+    expect(result).toBe(session);
+    expect(query.findOne).toHaveBeenCalledWith({ uuid: 'session-uuid', userId: 'user-1' });
+  });
+
+  it('throws when no session matches the requesting user', async () => {
+    mockAgentSessionQuery.mockReturnValue(buildQuery(undefined));
+
+    await expect(getOwnedSession('missing', 'user-1')).rejects.toThrow('Agent session not found');
+  });
+});
diff --git a/src/server/services/agent/__tests__/thinkingProviderOptions.test.ts b/src/server/services/agent/__tests__/thinkingProviderOptions.test.ts
new file mode 100644
index 00000000..96d36ddd
--- /dev/null
+++ b/src/server/services/agent/__tests__/thinkingProviderOptions.test.ts
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { resolveThinkingProviderOptions } from '../thinkingProviderOptions';
+
+describe('resolveThinkingProviderOptions', () => {
+  it('asks Gemini 3+ for thought summaries via thinkingLevel', () => {
+    for (const provider of ['gemini', 'google']) {
+      expect(resolveThinkingProviderOptions(provider, 'gemini-3.5-flash')).toEqual({
+        google: {
+          thinkingConfig: { includeThoughts: true, thinkingLevel: 'medium' },
+        },
+      });
+    }
+  });
+
+  it('asks legacy Gemini 2.x for thought summaries via thinkingBudget', () => {
+    expect(resolveThinkingProviderOptions('gemini', 'gemini-2.5-flash')).toEqual({
+      google: {
+        thinkingConfig: { includeThoughts: true, thinkingBudget: -1 },
+      },
+    });
+  });
+
+  it('enables Anthropic thinking with a bounded budget', () => {
+    expect(resolveThinkingProviderOptions('anthropic', 'claude-x')).toEqual({
+      anthropic: { thinking: { type: 'enabled', budgetTokens: 4096 } },
+    });
+  });
+
+  it('returns no options for providers without tool-callable reasoning', () => {
+    expect(resolveThinkingProviderOptions('openai', 'gpt-x')).toBeUndefined();
+    expect(resolveThinkingProviderOptions('unknown', 'x')).toBeUndefined();
+  });
+});
diff --git a/src/server/services/agent/capabilitySessionContext.ts b/src/server/services/agent/capabilitySessionContext.ts
new file mode 100644
index 00000000..23c3f389
--- /dev/null
+++ b/src/server/services/agent/capabilitySessionContext.ts
@@ -0,0 +1,233 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import AgentSession from 'server/models/AgentSession';
+import { getLogger } from 'server/lib/logger';
+import type { AgentRuntimeConfig } from 'server/services/types/agentRuntimeConfig';
+import type { LifecycleDiagnosticGithubSafety } from './diagnosticTools';
+import { YamlConfigParser } from 'server/lib/yamlConfigParser';
+import type { LifecycleConfig } from 'server/models/yaml/Config';
+import Build from 'server/models/Build';
+import type { DatabaseBuildScope } from 'server/services/agent/tools/shared/databaseClient';
+
+const LIFECYCLE_CONFIG_WRITE_PATTERNS = ['lifecycle.yaml', 'lifecycle.yml'];
+
+export function resolvePrimaryRepo(session: AgentSession): string | undefined {
+  const primaryRepo = (session.workspaceRepos || []).find((repo) => repo.primary)?.repo;
+  if (primaryRepo) {
+    return primaryRepo;
+  }
+
+  return session.selectedServices?.[0]?.repo || undefined;
+}
+
+function resolvePrimaryBranch(session: AgentSession): string | null {
+  const primaryWorkspaceRepo =
+    (session.workspaceRepos || []).find((repo) => repo.primary) || session.workspaceRepos?.[0];
+  if (primaryWorkspaceRepo?.branch) {
+    return primaryWorkspaceRepo.branch;
+  }
+
+  return session.selectedServices?.[0]?.branch || null;
+}
+
+function addReferencedFile(files: Set<string>, value: unknown) {
+  if (typeof value !== 'string') {
+    return;
+  }
+
+  const normalized = value.trim().replace(/^\/+/, '').replace(/^\.\//, '');
+  if (normalized) {
+    files.add(normalized);
+  }
+}
+
+function collectLifecycleConfigReferencedFiles(config: LifecycleConfig | null | undefined): string[] {
+  const files = new Set<string>();
+
+  for (const service of config?.services || []) {
+    const candidate = service as Record<string, any>;
+    addReferencedFile(files, candidate.github?.docker?.app?.dockerfilePath);
+    addReferencedFile(files, candidate.github?.docker?.init?.dockerfilePath);
+    addReferencedFile(files, candidate.helm?.docker?.app?.dockerfilePath);
+    addReferencedFile(files, candidate.helm?.docker?.init?.dockerfilePath);
+    addReferencedFile(files, candidate.helm?.envMapping?.app?.path);
+    addReferencedFile(files, candidate.helm?.envMapping?.init?.path);
+
+    for (const valueFile of candidate.helm?.chart?.valueFiles || []) {
+      addReferencedFile(files, valueFile);
+    }
+  }
+
+  return [...files];
+}
+
+function collectSelectedDeployReferencedFiles(session: AgentSession): string[] {
+  const files = new Set<string>();
+  const selectedService = session.selectedServices?.[0];
+  if (!selectedService) {
+    return [];
+  }
+
+  addReferencedFile(files, selectedService.dockerfilePath);
+  addReferencedFile(files, selectedService.initDockerfilePath);
+  for (const valueFile of selectedService.chartValueFiles || []) {
+    addReferencedFile(files, valueFile);
+  }
+
+  return [...files];
+}
+
+type LifecycleDiagnosticBuildScope = {
+  allowedNamespace: string | null;
+  allowedRepos: string[];
+  buildUuid: string | null;
+  pullRequestId: number | null;
+  databaseScope: DatabaseBuildScope | null;
+};
+
+function addRepoFullName(repos: Set<string>, value: unknown) {
+  if (typeof value === 'string') {
+    const normalized = value.trim().toLowerCase();
+    if (normalized.includes('/')) {
+      repos.add(normalized);
+    }
+  }
+}
+
+/** SECURITY: every diagnostic tool is locked to this scope (namespace, full repo set, build UUID, PR id, DB scope) from the authoritative Build row; falls back to session repos if it can't load. */
+async function resolveLifecycleDiagnosticBuildScope(session: AgentSession): Promise<LifecycleDiagnosticBuildScope> {
+  const repos = new Set<string>();
+  // Always include repos already known on the session as a baseline.
+  for (const workspaceRepo of session.workspaceRepos || []) {
+    addRepoFullName(repos, workspaceRepo.repo);
+  }
+  for (const selectedService of session.selectedServices || []) {
+    addRepoFullName(repos, selectedService.repo);
+  }
+
+  const scope: LifecycleDiagnosticBuildScope = {
+    allowedNamespace: null,
+    allowedRepos: [...repos],
+    buildUuid: session.buildUuid || null,
+    pullRequestId: null,
+    databaseScope: null,
+  };
+
+  if (!session.buildUuid) {
+    return scope;
+  }
+
+  try {
+    const build = await Build.query()
+      .findOne({ uuid: session.buildUuid })
+      .withGraphFetched('[pullRequest.repository, deploys.repository]');
+    if (!build) {
+      return scope;
+    }
+
+    addRepoFullName(repos, build.pullRequest?.repository?.fullName);
+    addRepoFullName(repos, build.pullRequest?.fullName);
+    for (const deploy of build.deploys || []) {
+      addRepoFullName(repos, deploy.repository?.fullName);
+    }
+
+    const pullRequestId =
+      typeof (build as { pullRequestId?: number | null }).pullRequestId === 'number'
+        ? (build as { pullRequestId?: number | null }).pullRequestId ?? null
+        : build.pullRequest?.id ?? null;
+    const repositoryIds = [
+      ...new Set(
+        [build.pullRequest?.repository?.id, ...(build.deploys || []).map((deploy) => deploy.repository?.id)].filter(
+          (id): id is number => typeof id === 'number'
+        )
+      ),
+    ];
+
+    scope.allowedNamespace = build.namespace || null;
+    scope.allowedRepos = [...repos];
+    scope.pullRequestId = pullRequestId;
+    scope.databaseScope = {
+      buildId: build.id,
+      buildUuid: build.uuid,
+      pullRequestId,
+      environmentId: typeof build.environmentId === 'number' ? build.environmentId : null,
+      repositoryIds,
+    };
+  } catch (error) {
+    getLogger().warn(
+      { error, buildUuid: session.buildUuid },
+      `AgentExec: lifecycle diagnostic build scope unavailable buildUuid=${session.buildUuid}`
+    );
+  }
+
+  return scope;
+}
+
+export async function resolveLifecycleDiagnosticGithubSafety({
+  session,
+  repoFullName,
+  config,
+}: {
+  session: AgentSession;
+  repoFullName?: string;
+  config?: AgentRuntimeConfig | null;
+}): Promise<LifecycleDiagnosticGithubSafety> {
+  const allowedBranch = resolvePrimaryBranch(session);
+  const allowedWritePatterns = [
+    ...new Set([...LIFECYCLE_CONFIG_WRITE_PATTERNS, ...(config?.allowedWritePatterns || [])]),
+  ];
+  const selectedDeployReferencedFiles = collectSelectedDeployReferencedFiles(session);
+  const buildScope = await resolveLifecycleDiagnosticBuildScope(session);
+  const safety: LifecycleDiagnosticGithubSafety = {
+    allowedBranch,
+    allowedWritePatterns,
+    excludedFilePatterns: config?.excludedFilePatterns || [],
+    referencedFiles: selectedDeployReferencedFiles,
+    allowedNamespace: buildScope.allowedNamespace,
+    allowedRepos: buildScope.allowedRepos,
+    buildUuid: buildScope.buildUuid,
+    pullRequestId: buildScope.pullRequestId,
+    databaseScope: buildScope.databaseScope,
+  };
+
+  if (!repoFullName || !allowedBranch) {
+    return safety;
+  }
+
+  try {
+    const lifecycleConfig = await new YamlConfigParser().parseYamlConfigFromBranch(repoFullName, allowedBranch);
+    safety.referencedFiles = [
+      ...new Set([...selectedDeployReferencedFiles, ...collectLifecycleConfigReferencedFiles(lifecycleConfig)]),
+    ];
+  } catch (error) {
+    getLogger().warn(
+      { error, repo: repoFullName, branch: allowedBranch },
+      `AgentExec: lifecycle config references unavailable repo=${repoFullName} branch=${allowedBranch}`
+    );
+  }
+
+  return safety;
+}
+
+export async function loadLatestSession(sessionUuid: string): Promise<AgentSession> {
+  const session = await AgentSession.query().findOne({ uuid: sessionUuid });
+  if (!session) {
+    throw new Error('Agent session not found');
+  }
+
+  return session;
+}
diff --git a/src/server/services/agent/capabilityToolHelpers.ts b/src/server/services/agent/capabilityToolHelpers.ts
new file mode 100644
index 00000000..39221adc
--- /dev/null
+++ b/src/server/services/agent/capabilityToolHelpers.ts
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { dynamicTool, jsonSchema } from 'ai';
+import type { AgentSessionToolRule } from 'server/services/types/agentSessionConfig';
+import type { ResolvedAgentCapabilityAccess } from './PolicyService';
+import type { AgentApprovalMode, AgentToolAuditRecord, AgentFileChangeData } from './types';
+import type { AgentCapabilityCatalogId } from './capabilityCatalog';
+import { buildAgentRuntimeToolMetadata, type AgentRuntimeToolMetadata } from './toolMetadata';
+
+const REDACTED_MCP_DEFAULT_ARG = '******';
+
+export type ToolExecutionHooks = {
+  onToolStarted?: (audit: AgentToolAuditRecord) => Promise<void>;
+  onToolFinished?: (audit: AgentToolAuditRecord & { result: unknown; status: 'completed' | 'failed' }) => Promise<void>;
+  onFileChange?: (change: AgentFileChangeData) => Promise<void>;
+  getActiveRunUuid?: () => string | null | undefined;
+};
+
+export function toAiJsonSchema(schema: unknown) {
+  return jsonSchema(schema as any);
+}
+
+export function toAiDynamicTool(config: unknown) {
+  return dynamicTool(config as any);
+}
+
+export function resolveToolApprovalMode({
+  toolRules,
+  toolKey,
+  capabilityMode,
+}: {
+  toolRules: AgentSessionToolRule[] | undefined;
+  toolKey: string;
+  capabilityMode: AgentApprovalMode;
+}): AgentApprovalMode {
+  const rule = toolRules?.find((item) => item.toolKey === toolKey);
+  return rule?.mode || capabilityMode;
+}
+
+export function recordToolMetadata(
+  toolMetadata: AgentRuntimeToolMetadata[] | undefined,
+  metadata: Omit<AgentRuntimeToolMetadata, 'effect' | 'resourceDomain' | 'workspaceNeed' | 'exposure'>
+) {
+  toolMetadata?.push(buildAgentRuntimeToolMetadata(metadata));
+}
+
+export function isCatalogCapabilityAllowed(
+  resolvedCapabilityAccess: ResolvedAgentCapabilityAccess[] | undefined,
+  capabilityId: AgentCapabilityCatalogId
+): boolean {
+  if (!resolvedCapabilityAccess) {
+    return false;
+  }
+
+  return resolvedCapabilityAccess.some((entry) => entry.capabilityId === capabilityId && entry.allowed);
+}
+
+export function selectedMcpConnectionRefs(connectionRefs?: string[]): Set<string> | undefined {
+  if (connectionRefs === undefined) {
+    return undefined;
+  }
+
+  return new Set(connectionRefs.map((connectionRef) => connectionRef.trim()).filter(Boolean));
+}
+
+export function redactMcpDefaultArgs(
+  args: Record<string, unknown>,
+  defaultArgs: Record<string, string> | undefined
+): Record<string, unknown> {
+  if (!defaultArgs || Object.keys(defaultArgs).length === 0) {
+    return args;
+  }
+
+  const redacted = { ...args };
+  for (const key of Object.keys(defaultArgs)) {
+    if (key in redacted) {
+      redacted[key] = REDACTED_MCP_DEFAULT_ARG;
+    }
+  }
+
+  return redacted;
+}
diff --git a/src/server/services/agent/chatWorkspaceToolRegistration.ts b/src/server/services/agent/chatWorkspaceToolRegistration.ts
new file mode 100644
index 00000000..a53f7ec9
--- /dev/null
+++ b/src/server/services/agent/chatWorkspaceToolRegistration.ts
@@ -0,0 +1,800 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { type ToolSet } from 'ai';
+import AgentSession from 'server/models/AgentSession';
+import AgentSessionService from 'server/services/agentSession';
+import { SESSION_WORKSPACE_GATEWAY_PORT } from 'server/lib/agentSession/podFactory';
+import { McpClientManager } from 'server/services/agentRuntime/mcp/client';
+import { usesSessionWorkspaceGatewayExecution } from 'server/services/agentRuntime/mcp/sessionPod';
+import type { RequestUserIdentity } from 'server/lib/get-user';
+import { getLogger } from 'server/lib/logger';
+import type { AgentSessionToolRule } from 'server/services/types/agentSessionConfig';
+import AgentPolicyService from './PolicyService';
+import type { ResolvedAgentCapabilityAccess } from './PolicyService';
+import type { AgentApprovalPolicy, AgentCapabilityKey, AgentToolAuditRecord } from './types';
+import type { AgentCapabilityCatalogId } from './capabilityCatalog';
+import type { ResolvedMcpServer } from 'server/services/agentRuntime/mcp/types';
+import { assertSafeWorkspaceMutationCommand, isReadOnlyWorkspaceCommand } from './sandboxExecSafety';
+import { buildProposedFileChanges, buildResultFileChanges, didToolResultFail } from './fileChanges';
+import { resolveAgentSessionDurabilityConfig } from 'server/lib/agentSession/runtimeConfig';
+import {
+  buildAgentToolKey,
+  CHAT_PUBLISH_HTTP_TOOL_NAME,
+  LIFECYCLE_BUILTIN_SERVER_SLUG,
+  SESSION_WORKSPACE_MUTATION_TOOL_NAME,
+  SESSION_WORKSPACE_READONLY_TOOL_NAME,
+  SESSION_WORKSPACE_SERVER_NAME,
+  SESSION_WORKSPACE_SERVER_SLUG,
+  buildWorkspaceMutationExecDescription,
+  buildWorkspaceReadonlyExecDescription,
+} from './toolKeys';
+import { SessionWorkspaceGatewayUnavailableError } from './errors';
+import AgentSandboxService from './SandboxService';
+import type { AgentRuntimeToolMetadata } from './toolMetadata';
+import {
+  isCatalogCapabilityAllowed,
+  recordToolMetadata,
+  resolveToolApprovalMode,
+  toAiDynamicTool,
+  toAiJsonSchema,
+  type ToolExecutionHooks,
+} from './capabilityToolHelpers';
+import { loadLatestSession } from './capabilitySessionContext';
+
+type SessionWorkspaceGatewayTimeouts = {
+  discoveryTimeoutMs: number;
+  executionTimeoutMs: number;
+};
+
+const WORKSPACE_EXEC_RUNTIME_TOOL_NAME = 'workspace.exec';
+const WORKSPACE_WRITE_FILE_RUNTIME_TOOL_NAME = 'workspace.write_file';
+const WORKSPACE_EDIT_FILE_RUNTIME_TOOL_NAME = 'workspace.edit_file';
+export const WORKSPACE_EXEC_INPUT_SCHEMA = {
+  type: 'object',
+  required: ['command'],
+  additionalProperties: false,
+  properties: {
+    command: {
+      type: 'string',
+      minLength: 1,
+      description: 'Command to run with bash -lc',
+    },
+    cwd: {
+      type: 'string',
+      description: 'Working directory relative to the workspace',
+    },
+    timeoutMs: {
+      type: 'integer',
+      minimum: 1,
+      maximum: 120000,
+      description: 'Command timeout in milliseconds',
+    },
+  },
+} as const;
+const WORKSPACE_WRITE_FILE_INPUT_SCHEMA = {
+  type: 'object',
+  required: ['path', 'content'],
+  additionalProperties: false,
+  properties: {
+    path: {
+      type: 'string',
+      minLength: 1,
+      description: 'Workspace-relative file path to write',
+    },
+    content: {
+      type: 'string',
+      description: 'Complete file content to write',
+    },
+  },
+} as const;
+const WORKSPACE_EDIT_FILE_INPUT_SCHEMA = {
+  type: 'object',
+  required: ['path', 'oldText', 'newText'],
+  additionalProperties: false,
+  properties: {
+    path: {
+      type: 'string',
+      minLength: 1,
+      description: 'Workspace-relative file path to edit',
+    },
+    oldText: {
+      type: 'string',
+      description: 'Exact existing text to replace',
+    },
+    newText: {
+      type: 'string',
+      description: 'Replacement text',
+    },
+  },
+} as const;
+const PUBLISH_HTTP_INPUT_SCHEMA = {
+  type: 'object',
+  required: ['port'],
+  additionalProperties: false,
+  properties: {
+    port: {
+      type: 'integer',
+      minimum: 1,
+      maximum: 65535,
+      description: 'Workspace HTTP port to expose through ingress',
+    },
+  },
+} as const;
+
+function resolveSessionWorkspaceGatewayBaseUrl(session: AgentSession): string | null {
+  if (!session.podName || !session.namespace || session.status !== 'active') {
+    return null;
+  }
+
+  return `http://${session.podName}.${session.namespace}.svc.cluster.local:${SESSION_WORKSPACE_GATEWAY_PORT}`;
+}
+
+export function isChatWorkspaceRuntimeReady(session: AgentSession): boolean {
+  return (
+    session.sessionKind === 'chat' &&
+    session.status === 'active' &&
+    session.workspaceStatus === 'ready' &&
+    Boolean(session.namespace) &&
+    Boolean(session.podName)
+  );
+}
+
+export async function resolveSessionWorkspaceGatewayServer(
+  session: AgentSession,
+  timeouts: SessionWorkspaceGatewayTimeouts
+): Promise<ResolvedMcpServer | null> {
+  const baseUrl =
+    (await AgentSandboxService.resolveWorkspaceGatewayBaseUrl(session.uuid)) ||
+    resolveSessionWorkspaceGatewayBaseUrl(session);
+  if (!baseUrl) {
+    return null;
+  }
+
+  const url = `${baseUrl}/mcp`;
+  const client = new McpClientManager();
+
+  try {
+    await client.connect({ type: 'http', url }, timeouts.discoveryTimeoutMs);
+    const discoveredTools = await client.listTools(timeouts.discoveryTimeoutMs);
+
+    return {
+      scope: 'session',
+      slug: 'sandbox',
+      name: 'Session Workspace',
+      transport: { type: 'http', url },
+      timeout: timeouts.executionTimeoutMs,
+      defaultArgs: {},
+      env: {},
+      discoveredTools,
+    };
+  } catch (error) {
+    getLogger().warn(
+      { error },
+      `AgentExec: workspace gateway unavailable sessionId=${session.uuid} namespace=${session.namespace} podName=${session.podName}`
+    );
+    throw new SessionWorkspaceGatewayUnavailableError({
+      sessionId: session.uuid,
+      cause: error,
+    });
+  } finally {
+    await client.close();
+  }
+}
+
+export function resolveSessionExecutionServer(
+  session: AgentSession,
+  server: ResolvedMcpServer
+): ResolvedMcpServer | null {
+  if (!usesSessionWorkspaceGatewayExecution(server.transport)) {
+    return server;
+  }
+
+  const baseUrl = resolveSessionWorkspaceGatewayBaseUrl(session);
+  if (!baseUrl) {
+    return null;
+  }
+
+  return {
+    ...server,
+    transport: {
+      type: 'http',
+      url: `${baseUrl}/servers/${encodeURIComponent(server.slug)}/mcp`,
+    },
+  };
+}
+
+export async function getFileChangePreviewChars(): Promise<number> {
+  return (await resolveAgentSessionDurabilityConfig()).fileChangePreviewChars;
+}
+
+async function ensureChatWorkspaceRuntime({
+  session,
+  userIdentity,
+  requestGitHubToken,
+  allowedActiveRunUuid,
+}: {
+  session: AgentSession;
+  userIdentity: RequestUserIdentity;
+  requestGitHubToken?: string | null;
+  allowedActiveRunUuid?: string | null;
+}): Promise<AgentSession> {
+  const latestSession = await loadLatestSession(session.uuid);
+  if (latestSession.sessionKind !== 'chat') {
+    return latestSession;
+  }
+
+  const ensured = await AgentSandboxService.ensureChatSandbox({
+    sessionId: latestSession.uuid,
+    userId: userIdentity.userId,
+    userIdentity,
+    githubToken: requestGitHubToken,
+    ...(allowedActiveRunUuid ? { allowedActiveRunUuid } : {}),
+  });
+
+  return ensured.session;
+}
+
+async function executeWorkspaceRuntimeTool({
+  session,
+  runtimeToolName,
+  input,
+  timeoutMs,
+  userIdentity,
+  requestGitHubToken,
+  allowedActiveRunUuid,
+}: {
+  session: AgentSession;
+  runtimeToolName: string;
+  input: Record<string, unknown>;
+  timeoutMs: number;
+  userIdentity: RequestUserIdentity;
+  requestGitHubToken?: string | null;
+  allowedActiveRunUuid?: string | null;
+}) {
+  const runtimeSession = await ensureChatWorkspaceRuntime({
+    session,
+    userIdentity,
+    requestGitHubToken,
+    allowedActiveRunUuid,
+  });
+  const baseUrl =
+    (await AgentSandboxService.resolveWorkspaceGatewayBaseUrl(runtimeSession.uuid)) ||
+    resolveSessionWorkspaceGatewayBaseUrl(runtimeSession);
+  if (!baseUrl) {
+    throw new SessionWorkspaceGatewayUnavailableError({
+      sessionId: runtimeSession.uuid,
+      cause: new Error('Session workspace gateway URL is not available'),
+    });
+  }
+
+  const client = new McpClientManager();
+  try {
+    await client.connect({ type: 'http', url: `${baseUrl}/mcp` }, timeoutMs);
+    return await client.callTool(runtimeToolName, input, timeoutMs);
+  } catch (error) {
+    throw new SessionWorkspaceGatewayUnavailableError({
+      sessionId: runtimeSession.uuid,
+      cause: error,
+    });
+  } finally {
+    await client.close();
+  }
+}
+
+export async function emitResultFileChanges({
+  hooks,
+  toolCallId,
+  sourceTool,
+  input,
+  result,
+  failed,
+}: {
+  hooks?: ToolExecutionHooks;
+  toolCallId?: string;
+  sourceTool: string;
+  input: Record<string, unknown>;
+  result: unknown;
+  failed: boolean;
+}) {
+  if (!toolCallId) {
+    return;
+  }
+
+  const changes = buildResultFileChanges({
+    toolCallId,
+    sourceTool,
+    input,
+    result,
+    failed,
+    previewChars: await getFileChangePreviewChars(),
+  });
+
+  for (const change of changes) {
+    await hooks?.onFileChange?.(change);
+  }
+}
+
+function registerChatWorkspaceExecTool({
+  tools,
+  session,
+  userIdentity,
+  approvalPolicy,
+  workspaceToolExecutionTimeoutMs,
+  requestGitHubToken,
+  hooks,
+  toolRules,
+  toolName,
+  capabilityKey,
+  description,
+  readOnly,
+  catalogCapabilityId,
+  resolvedCapabilityAccess,
+  toolMetadata,
+}: {
+  tools: ToolSet;
+  session: AgentSession;
+  userIdentity: RequestUserIdentity;
+  approvalPolicy: AgentApprovalPolicy;
+  workspaceToolExecutionTimeoutMs: number;
+  requestGitHubToken?: string | null;
+  hooks?: ToolExecutionHooks;
+  toolRules?: AgentSessionToolRule[];
+  toolName: string;
+  capabilityKey: AgentCapabilityKey;
+  description: string;
+  readOnly: boolean;
+  catalogCapabilityId: AgentCapabilityCatalogId;
+  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
+  toolMetadata?: AgentRuntimeToolMetadata[];
+}) {
+  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, catalogCapabilityId)) {
+    return;
+  }
+
+  const toolKey = buildAgentToolKey(SESSION_WORKSPACE_SERVER_SLUG, toolName);
+  const mode = resolveToolApprovalMode({
+    toolRules,
+    toolKey,
+    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
+  });
+
+  if (mode === 'deny') {
+    return;
+  }
+
+  tools[toolKey] = toAiDynamicTool({
+    description,
+    inputSchema: toAiJsonSchema(WORKSPACE_EXEC_INPUT_SCHEMA),
+    needsApproval: mode === 'require_approval',
+    execute: async (input, context) => {
+      const args = (input as Record<string, unknown>) || {};
+      const command = typeof args.command === 'string' ? args.command : '';
+      if (readOnly && !isReadOnlyWorkspaceCommand(command)) {
+        throw new Error(
+          'This command is not a safe read-only inspection command. Use the workspace exec mutation tool for state-changing, networked, or process-managing commands.'
+        );
+      }
+      if (!readOnly) {
+        assertSafeWorkspaceMutationCommand(command);
+      }
+
+      const toolCallId = context?.toolCallId;
+      const audit: AgentToolAuditRecord = {
+        source: 'mcp',
+        serverSlug: SESSION_WORKSPACE_SERVER_SLUG,
+        toolName,
+        toolCallId,
+        args,
+        capabilityKey,
+      };
+
+      await hooks?.onToolStarted?.(audit);
+
+      try {
+        const runtimeArgs = readOnly ? args : { ...args, captureFileChanges: true };
+        const result = await executeWorkspaceRuntimeTool({
+          session,
+          runtimeToolName: WORKSPACE_EXEC_RUNTIME_TOOL_NAME,
+          input: runtimeArgs,
+          timeoutMs: workspaceToolExecutionTimeoutMs,
+          userIdentity,
+          requestGitHubToken,
+          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
+        });
+        const failed = result.isError || didToolResultFail(result);
+        if (!readOnly) {
+          await emitResultFileChanges({
+            hooks,
+            toolCallId,
+            sourceTool: toolName,
+            input: args,
+            result,
+            failed,
+          });
+        }
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result,
+          status: failed ? 'failed' : 'completed',
+        });
+        return result;
+      } catch (error) {
+        getLogger().warn({ error }, `AgentExec: chat workspace tool failed sessionId=${session.uuid} tool=${toolName}`);
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result: {
+            error: error instanceof Error ? error.message : String(error),
+          },
+          status: 'failed',
+        });
+        throw error;
+      }
+    },
+  });
+  recordToolMetadata(toolMetadata, {
+    toolKey,
+    catalogCapabilityId,
+    capabilityKey,
+    approvalMode: mode,
+  });
+}
+
+function registerChatWorkspaceFileTool({
+  tools,
+  session,
+  userIdentity,
+  approvalPolicy,
+  workspaceToolExecutionTimeoutMs,
+  requestGitHubToken,
+  hooks,
+  toolRules,
+  toolName,
+  inputSchema,
+  description,
+  catalogCapabilityId,
+  resolvedCapabilityAccess,
+  toolMetadata,
+}: {
+  tools: ToolSet;
+  session: AgentSession;
+  userIdentity: RequestUserIdentity;
+  approvalPolicy: AgentApprovalPolicy;
+  workspaceToolExecutionTimeoutMs: number;
+  requestGitHubToken?: string | null;
+  hooks?: ToolExecutionHooks;
+  toolRules?: AgentSessionToolRule[];
+  toolName: string;
+  inputSchema: Record<string, unknown>;
+  description: string;
+  catalogCapabilityId: AgentCapabilityCatalogId;
+  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
+  toolMetadata?: AgentRuntimeToolMetadata[];
+}) {
+  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, catalogCapabilityId)) {
+    return;
+  }
+
+  const toolKey = buildAgentToolKey(SESSION_WORKSPACE_SERVER_SLUG, toolName);
+  const capabilityKey: AgentCapabilityKey = 'workspace_write';
+  const mode = resolveToolApprovalMode({
+    toolRules,
+    toolKey,
+    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
+  });
+
+  if (mode === 'deny') {
+    return;
+  }
+
+  tools[toolKey] = toAiDynamicTool({
+    description,
+    inputSchema: toAiJsonSchema(inputSchema),
+    needsApproval: mode === 'require_approval',
+    onInputAvailable: async ({ input, toolCallId }) => {
+      if (!toolCallId) {
+        return;
+      }
+
+      const args = (input as Record<string, unknown>) || {};
+      const changes = buildProposedFileChanges({
+        toolCallId,
+        sourceTool: toolName,
+        input: args,
+        previewChars: await getFileChangePreviewChars(),
+      });
+
+      for (const change of changes) {
+        await hooks?.onFileChange?.(change);
+      }
+    },
+    execute: async (input, context) => {
+      const args = (input as Record<string, unknown>) || {};
+      const toolCallId = context?.toolCallId;
+      const audit: AgentToolAuditRecord = {
+        source: 'mcp',
+        serverSlug: SESSION_WORKSPACE_SERVER_SLUG,
+        toolName,
+        toolCallId,
+        args,
+        capabilityKey,
+      };
+
+      await hooks?.onToolStarted?.(audit);
+
+      try {
+        const result = await executeWorkspaceRuntimeTool({
+          session,
+          runtimeToolName: toolName,
+          input: args,
+          timeoutMs: workspaceToolExecutionTimeoutMs,
+          userIdentity,
+          requestGitHubToken,
+          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
+        });
+        const failed = result.isError || didToolResultFail(result);
+        if (toolCallId) {
+          const changes = buildResultFileChanges({
+            toolCallId,
+            sourceTool: toolName,
+            input: args,
+            result,
+            failed,
+            previewChars: await getFileChangePreviewChars(),
+          });
+
+          for (const change of changes) {
+            await hooks?.onFileChange?.(change);
+          }
+        }
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result,
+          status: failed ? 'failed' : 'completed',
+        });
+        return result;
+      } catch (error) {
+        getLogger().warn(
+          { error },
+          `AgentExec: chat workspace file tool failed sessionId=${session.uuid} tool=${toolName}`
+        );
+        if (toolCallId) {
+          const changes = buildResultFileChanges({
+            toolCallId,
+            sourceTool: toolName,
+            input: args,
+            result: {
+              error: error instanceof Error ? error.message : String(error),
+            },
+            failed: true,
+            previewChars: await getFileChangePreviewChars(),
+          });
+
+          for (const change of changes) {
+            await hooks?.onFileChange?.(change);
+          }
+        }
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result: {
+            error: error instanceof Error ? error.message : String(error),
+          },
+          status: 'failed',
+        });
+        throw error;
+      }
+    },
+  });
+  recordToolMetadata(toolMetadata, {
+    toolKey,
+    catalogCapabilityId,
+    capabilityKey,
+    approvalMode: mode,
+  });
+}
+
+export function registerChatPublishHttpTool({
+  tools,
+  session,
+  approvalPolicy,
+  userIdentity,
+  requestGitHubToken,
+  hooks,
+  toolRules,
+  resolvedCapabilityAccess,
+  toolMetadata,
+}: {
+  tools: ToolSet;
+  session: AgentSession;
+  approvalPolicy: AgentApprovalPolicy;
+  userIdentity: RequestUserIdentity;
+  requestGitHubToken?: string | null;
+  hooks?: ToolExecutionHooks;
+  toolRules?: AgentSessionToolRule[];
+  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
+  toolMetadata?: AgentRuntimeToolMetadata[];
+}) {
+  const toolKey = buildAgentToolKey(LIFECYCLE_BUILTIN_SERVER_SLUG, CHAT_PUBLISH_HTTP_TOOL_NAME);
+  if (!isCatalogCapabilityAllowed(resolvedCapabilityAccess, 'preview_publish')) {
+    return;
+  }
+
+  const capabilityKey: AgentCapabilityKey = 'deploy_k8s_mutation';
+  const mode = resolveToolApprovalMode({
+    toolRules,
+    toolKey,
+    capabilityMode: AgentPolicyService.modeForCapability(approvalPolicy, capabilityKey),
+  });
+
+  if (mode === 'deny') {
+    return;
+  }
+
+  tools[toolKey] = toAiDynamicTool({
+    description:
+      'Expose a running HTTP app from the chat workspace through lifecycle-managed ingress and return the reachable URL.',
+    inputSchema: toAiJsonSchema(PUBLISH_HTTP_INPUT_SCHEMA),
+    needsApproval: mode === 'require_approval',
+    execute: async (input, context) => {
+      const args = (input as Record<string, unknown>) || {};
+      const toolCallId = context?.toolCallId;
+      const audit: AgentToolAuditRecord = {
+        source: 'mcp',
+        serverSlug: LIFECYCLE_BUILTIN_SERVER_SLUG,
+        toolName: CHAT_PUBLISH_HTTP_TOOL_NAME,
+        toolCallId,
+        args,
+        capabilityKey,
+      };
+
+      await hooks?.onToolStarted?.(audit);
+
+      try {
+        const runtimeSession = await ensureChatWorkspaceRuntime({
+          session,
+          userIdentity,
+          requestGitHubToken,
+          allowedActiveRunUuid: hooks?.getActiveRunUuid?.() ?? null,
+        });
+        const port = Number(args.port);
+        if (!Number.isInteger(port) || port < 1 || port > 65535) {
+          throw new Error('port must be an integer between 1 and 65535');
+        }
+
+        const result = await AgentSessionService.publishChatHttpPort({
+          sessionId: runtimeSession.uuid,
+          userId: userIdentity.userId,
+          port,
+        });
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result,
+          status: 'completed',
+        });
+        return result;
+      } catch (error) {
+        getLogger().warn({ error }, `AgentExec: chat publish failed sessionId=${session.uuid}`);
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result: {
+            error: error instanceof Error ? error.message : String(error),
+          },
+          status: 'failed',
+        });
+        throw error;
+      }
+    },
+  });
+  recordToolMetadata(toolMetadata, {
+    toolKey,
+    catalogCapabilityId: 'preview_publish',
+    capabilityKey,
+    approvalMode: mode,
+  });
+}
+
+export function registerChatWorkspaceTools({
+  tools,
+  session,
+  userIdentity,
+  approvalPolicy,
+  workspaceToolExecutionTimeoutMs,
+  requestGitHubToken,
+  hooks,
+  toolRules,
+  resolvedCapabilityAccess,
+  toolMetadata,
+}: {
+  tools: ToolSet;
+  session: AgentSession;
+  userIdentity: RequestUserIdentity;
+  approvalPolicy: AgentApprovalPolicy;
+  workspaceToolExecutionTimeoutMs: number;
+  requestGitHubToken?: string | null;
+  hooks?: ToolExecutionHooks;
+  toolRules?: AgentSessionToolRule[];
+  resolvedCapabilityAccess?: ResolvedAgentCapabilityAccess[];
+  toolMetadata?: AgentRuntimeToolMetadata[];
+}) {
+  registerChatWorkspaceExecTool({
+    tools,
+    session,
+    userIdentity,
+    approvalPolicy,
+    workspaceToolExecutionTimeoutMs,
+    requestGitHubToken,
+    hooks,
+    toolRules,
+    toolName: SESSION_WORKSPACE_READONLY_TOOL_NAME,
+    capabilityKey: 'read',
+    description: buildWorkspaceReadonlyExecDescription(SESSION_WORKSPACE_SERVER_NAME),
+    readOnly: true,
+    catalogCapabilityId: 'read_context',
+    resolvedCapabilityAccess,
+    toolMetadata,
+  });
+  registerChatWorkspaceExecTool({
+    tools,
+    session,
+    userIdentity,
+    approvalPolicy,
+    workspaceToolExecutionTimeoutMs,
+    requestGitHubToken,
+    hooks,
+    toolRules,
+    toolName: SESSION_WORKSPACE_MUTATION_TOOL_NAME,
+    capabilityKey: 'shell_exec',
+    description: buildWorkspaceMutationExecDescription(SESSION_WORKSPACE_SERVER_NAME),
+    readOnly: false,
+    catalogCapabilityId: 'workspace_shell',
+    resolvedCapabilityAccess,
+    toolMetadata,
+  });
+  registerChatWorkspaceFileTool({
+    tools,
+    session,
+    userIdentity,
+    approvalPolicy,
+    workspaceToolExecutionTimeoutMs,
+    requestGitHubToken,
+    hooks,
+    toolRules,
+    toolName: WORKSPACE_WRITE_FILE_RUNTIME_TOOL_NAME,
+    inputSchema: WORKSPACE_WRITE_FILE_INPUT_SCHEMA,
+    description:
+      'Write a file in the chat workspace. Use this when the user asks to create or replace file contents. This provisions the workspace only when the tool runs.',
+    catalogCapabilityId: 'workspace_files',
+    resolvedCapabilityAccess,
+    toolMetadata,
+  });
+  registerChatWorkspaceFileTool({
+    tools,
+    session,
+    userIdentity,
+    approvalPolicy,
+    workspaceToolExecutionTimeoutMs,
+    requestGitHubToken,
+    hooks,
+    toolRules,
+    toolName: WORKSPACE_EDIT_FILE_RUNTIME_TOOL_NAME,
+    inputSchema: WORKSPACE_EDIT_FILE_INPUT_SCHEMA,
+    description:
+      'Edit a file in the chat workspace by replacing exact text. Use this for targeted file modifications. This provisions the workspace only when the tool runs.',
+    catalogCapabilityId: 'workspace_files',
+    resolvedCapabilityAccess,
+    toolMetadata,
+  });
+}
diff --git a/src/server/services/agent/debugToolLoopControls.ts b/src/server/services/agent/debugToolLoopControls.ts
index a502c3fe..b228c94b 100644
--- a/src/server/services/agent/debugToolLoopControls.ts
+++ b/src/server/services/agent/debugToolLoopControls.ts
@@ -19,10 +19,7 @@ import type { AgentRuntimeToolMetadata } from './CapabilityService';
 import type { AgentDebugRunIntent, AgentRunPlanSnapshotV1 } from './runPlanTypes';
 import { isApprovalGatedWriteRuntimeTool, isReadOnlyRuntimeTool } from './toolMetadata';
 
-const DEBUG_READ_ONLY_MAX_STEPS = 8;
-const DEBUG_REPAIR_MAX_STEPS = 9;
-
-export type DebugToolLoopControls = {
+type DebugToolLoopControls = {
   activeTools?: string[];
   stopWhen: StopCondition<ToolSet>;
   effectiveMaxIterations: number;
@@ -41,6 +38,23 @@ function isBuildContextWorkspaceTool(metadata: AgentRuntimeToolMetadata): boolea
   );
 }
 
+// Build-context chats have no workspace, so any workspace/sandbox/git tool would provision one on first call. Strip them by source kind, independent of agent id or debug intent.
+function buildContextWorkspaceToolKeys(tools: ToolSet, toolMetadata: AgentRuntimeToolMetadata[]): Set<string> {
+  const registered = new Set(Object.keys(tools));
+  const excluded = new Set<string>();
+  for (const toolKey of registered) {
+    if (toolKey.startsWith('mcp__sandbox__')) {
+      excluded.add(toolKey);
+    }
+  }
+  for (const metadata of toolMetadata) {
+    if (registered.has(metadata.toolKey) && isBuildContextWorkspaceTool(metadata)) {
+      excluded.add(metadata.toolKey);
+    }
+  }
+  return excluded;
+}
+
 function isToolActiveForIntent(
   intent: AgentDebugRunIntent,
   metadata: AgentRuntimeToolMetadata,
@@ -57,19 +71,6 @@ function isToolActiveForIntent(
   return isReadOnlyRuntimeTool(metadata) || isApprovalGatedWriteRuntimeTool(metadata);
 }
 
-function resolveDebugMaxIterations(intent: AgentDebugRunIntent | undefined, maxIterations: number): number {
-  if (!intent || !isReadOnlyDebugIntent(intent)) {
-    return intent === 'repair' ? Math.min(maxIterations, DEBUG_REPAIR_MAX_STEPS + 1) : maxIterations;
-  }
-
-  return Math.min(maxIterations, DEBUG_READ_ONLY_MAX_STEPS + 1);
-}
-
-function resolveDebugToolStepLimit(intent: AgentDebugRunIntent, maxIterations: number): number {
-  const maxSteps = isReadOnlyDebugIntent(intent) ? DEBUG_READ_ONLY_MAX_STEPS : DEBUG_REPAIR_MAX_STEPS;
-  return Math.max(0, Math.min(maxIterations, maxSteps + 1) - 1);
-}
-
 export function resolveDebugIntent(runPlanSnapshot?: AgentRunPlanSnapshotV1 | null): AgentDebugRunIntent | null {
   if (!runPlanSnapshot) {
     return null;
@@ -95,12 +96,22 @@ export function resolveDebugToolLoopControls({
   toolMetadata: AgentRuntimeToolMetadata[];
   maxIterations: number;
 }): DebugToolLoopControls {
+  // maxIterations is the only budget knob; Debug adds intent-based tool scoping + a tools-off final step so the agent always writes a diagnosis.
   const intent = resolveDebugIntent(runPlanSnapshot);
-  const effectiveMaxIterations = resolveDebugMaxIterations(intent, maxIterations);
+  const effectiveMaxIterations = maxIterations;
   const stopWhen = stepCountIs(effectiveMaxIterations);
 
   if (!intent) {
-    return { stopWhen, effectiveMaxIterations };
+    // No debug intent (e.g. a custom agent), but build-context chats must still never be offered workspace-provisioning tools.
+    if (runPlanSnapshot?.agent.sourceKind !== 'build_context_chat') {
+      return { stopWhen, effectiveMaxIterations };
+    }
+    const excluded = buildContextWorkspaceToolKeys(tools, toolMetadata);
+    if (excluded.size === 0) {
+      return { stopWhen, effectiveMaxIterations };
+    }
+    const activeTools = Object.keys(tools).filter((toolKey) => !excluded.has(toolKey));
+    return { activeTools, stopWhen, effectiveMaxIterations };
   }
 
   const registeredToolKeys = new Set(Object.keys(tools));
@@ -113,7 +124,7 @@ export function resolveDebugToolLoopControls({
     ),
   ];
 
-  const toolStepLimit = resolveDebugToolStepLimit(intent, maxIterations);
+  const toolStepLimit = Math.max(0, effectiveMaxIterations - 1);
 
   return {
     activeTools,
diff --git a/src/server/services/agent/diagnosticTools.ts b/src/server/services/agent/diagnosticTools.ts
index 8ac337e4..f0f8f381 100644
--- a/src/server/services/agent/diagnosticTools.ts
+++ b/src/server/services/agent/diagnosticTools.ts
@@ -29,7 +29,7 @@ import { GetLifecycleLogsTool } from 'server/services/agent/tools/k8s/getLifecyc
 import { PatchK8sResourceTool } from 'server/services/agent/tools/k8s/patchK8sResource';
 import { GetPodLogsTool } from 'server/services/agent/tools/k8s/getPodLogs';
 import { QueryDatabaseTool } from 'server/services/agent/tools/k8s/queryDatabase';
-import { DatabaseClient } from 'server/services/agent/tools/shared/databaseClient';
+import { DatabaseClient, type DatabaseBuildScope } from 'server/services/agent/tools/shared/databaseClient';
 import { GitHubClient } from 'server/services/agent/tools/shared/githubClient';
 import { K8sClient } from 'server/services/agent/tools/shared/k8sClient';
 import type { Tool } from 'server/services/agent/tools/types';
@@ -85,6 +85,12 @@ export type LifecycleDiagnosticGithubSafety = {
   referencedFiles?: string[];
   excludedFilePatterns?: string[];
   allowedWritePatterns?: string[];
+  // SECURITY: the build's resolved scope. Every diagnostic tool is locked to these.
+  allowedNamespace?: string | null;
+  allowedRepos?: string[];
+  buildUuid?: string | null;
+  pullRequestId?: number | null;
+  databaseScope?: DatabaseBuildScope | null;
 };
 
 function resolveToolMode({
@@ -123,10 +129,18 @@ function configureGithubClient(client: GitHubClient, safety?: LifecycleDiagnosti
   client.setReferencedFiles(safety?.referencedFiles || []);
   client.setExcludedFilePatterns(safety?.excludedFilePatterns || []);
   client.setAllowedWritePatterns(safety?.allowedWritePatterns || []);
+  // SECURITY: lock GitHub reads/writes to the build's repositories.
+  client.setAllowedRepos(safety?.allowedRepos || null);
 
   return client;
 }
 
+function configureK8sClient(client: K8sClient, safety?: LifecycleDiagnosticGithubSafety): K8sClient {
+  // SECURITY: lock k8s reads/patches to the build's namespace.
+  client.setAllowedNamespace(safety?.allowedNamespace || null);
+  return client;
+}
+
 function readString(value: unknown): string | null {
   return typeof value === 'string' && value.trim() ? value : null;
 }
@@ -161,15 +175,20 @@ export async function shouldRequestUpdateFileApproval(
 function createLifecycleDiagnosticReadToolSpecs(
   safety?: LifecycleDiagnosticGithubSafety
 ): LifecycleDiagnosticToolSpec[] {
-  const k8sClient = new K8sClient();
+  const k8sClient = configureK8sClient(new K8sClient(), safety);
   const githubClient = configureGithubClient(new GitHubClient(), safety);
   const databaseClient = new DatabaseClient({ models });
+  // SECURITY: constrain DB reads to the build's own records.
+  databaseClient.setBuildScope(safety?.databaseScope || null);
+
+  const lifecycleLogsTool = new GetLifecycleLogsTool(k8sClient);
+  lifecycleLogsTool.setAllowedBuildUuid(safety?.buildUuid || null);
 
   const specs: Array<{ tool: Tool; catalogCapabilityId: AgentCapabilityCatalogId }> = [
     { tool: new GetCodefreshLogsTool(), catalogCapabilityId: 'diagnostics_codefresh' },
     { tool: new GetK8sResourcesTool(k8sClient), catalogCapabilityId: 'diagnostics_kubernetes' },
     { tool: new GetPodLogsTool(k8sClient), catalogCapabilityId: 'diagnostics_logs' },
-    { tool: new GetLifecycleLogsTool(k8sClient), catalogCapabilityId: 'diagnostics_logs' },
+    { tool: lifecycleLogsTool, catalogCapabilityId: 'diagnostics_logs' },
     { tool: new QueryDatabaseTool(databaseClient), catalogCapabilityId: 'diagnostics_database' },
     { tool: new GetFileTool(githubClient), catalogCapabilityId: 'github_read' },
     { tool: new ListDirectoryTool(githubClient), catalogCapabilityId: 'github_read' },
@@ -363,7 +382,7 @@ export async function buildUpdateFilePreview(
 function createLifecycleDiagnosticFixToolSpecs(
   safety?: LifecycleDiagnosticGithubSafety
 ): LifecycleDiagnosticToolSpec[] {
-  const k8sClient = new K8sClient();
+  const k8sClient = configureK8sClient(new K8sClient(), safety);
   const githubClient = configureGithubClient(new GitHubClient(), safety);
 
   return [
diff --git a/src/server/services/agent/errors.ts b/src/server/services/agent/errors.ts
index 1e63628d..842dfc44 100644
--- a/src/server/services/agent/errors.ts
+++ b/src/server/services/agent/errors.ts
@@ -26,15 +26,39 @@ function normalizeErrorMessage(error: unknown, fallback: string): string {
   return fallback;
 }
 
+interface AgentRunFailureAction {
+  kind: 'continue' | 'retry' | 'reconnect' | 'update_key' | 'navigate';
+  label: string;
+  href?: string;
+}
+
 export class AgentRunTerminalFailure extends Error {
   readonly code: string;
   readonly details?: Record<string, unknown>;
+  /** Whether retrying the same run as-is is worthwhile (rate limit / transient overload). */
+  readonly retryable?: boolean;
+  /** Suggested recovery affordance surfaced to the user. */
+  readonly nextAction?: AgentRunFailureAction;
 
-  constructor({ code, message, details }: { code: string; message: string; details?: Record<string, unknown> }) {
+  constructor({
+    code,
+    message,
+    details,
+    retryable,
+    nextAction,
+  }: {
+    code: string;
+    message: string;
+    details?: Record<string, unknown>;
+    retryable?: boolean;
+    nextAction?: AgentRunFailureAction;
+  }) {
     super(message);
     this.name = 'AgentRunTerminalFailure';
     this.code = code;
     this.details = details;
+    this.retryable = retryable;
+    this.nextAction = nextAction;
   }
 }
 
diff --git a/src/server/services/agent/mcpToolRegistration.ts b/src/server/services/agent/mcpToolRegistration.ts
new file mode 100644
index 00000000..a7ee8a09
--- /dev/null
+++ b/src/server/services/agent/mcpToolRegistration.ts
@@ -0,0 +1,181 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { type ToolSet } from 'ai';
+import AgentSession from 'server/models/AgentSession';
+import { sanitizeMcpErrorMessage, sanitizeMcpResult } from 'server/services/agentRuntime/mcp/config';
+import { McpClientManager } from 'server/services/agentRuntime/mcp/client';
+import { applyMcpDefaultToolArgs } from 'server/services/agentRuntime/mcp/runtimeConfig';
+import { getLogger } from 'server/lib/logger';
+import type { AgentApprovalMode, AgentCapabilityKey, AgentToolAuditRecord } from './types';
+import type { AgentCapabilityCatalogId } from './capabilityCatalog';
+import type { ResolvedMcpServer } from 'server/services/agentRuntime/mcp/types';
+import { buildProposedFileChanges, buildResultFileChanges, didToolResultFail } from './fileChanges';
+import { buildAgentToolKey } from './toolKeys';
+import type { AgentRuntimeToolMetadata } from './toolMetadata';
+import { recordToolMetadata, redactMcpDefaultArgs, toAiDynamicTool, toAiJsonSchema } from './capabilityToolHelpers';
+import type { ToolExecutionHooks } from './capabilityToolHelpers';
+import { getFileChangePreviewChars } from './chatWorkspaceToolRegistration';
+
+export function registerGenericMcpTool({
+  tools,
+  session,
+  server,
+  discoveredTool,
+  exposedToolName,
+  description,
+  capabilityKey,
+  mode,
+  catalogCapabilityId,
+  hooks,
+  toolMetadata,
+}: {
+  tools: ToolSet;
+  session: AgentSession;
+  server: ResolvedMcpServer;
+  discoveredTool: ResolvedMcpServer['discoveredTools'][number];
+  exposedToolName: string;
+  description: string;
+  capabilityKey: AgentCapabilityKey;
+  mode: AgentApprovalMode;
+  catalogCapabilityId: AgentCapabilityCatalogId;
+  hooks?: ToolExecutionHooks;
+  toolMetadata?: AgentRuntimeToolMetadata[];
+}) {
+  const toolKey = buildAgentToolKey(server.slug, exposedToolName);
+
+  tools[toolKey] = toAiDynamicTool({
+    description,
+    inputSchema: toAiJsonSchema(discoveredTool.inputSchema as Record<string, unknown>),
+    needsApproval: mode === 'require_approval',
+    onInputAvailable: async ({ input, toolCallId }) => {
+      if (!toolCallId) {
+        return;
+      }
+
+      const runtimeArgs = applyMcpDefaultToolArgs(
+        discoveredTool.inputSchema as Record<string, unknown>,
+        server.defaultArgs,
+        (input as Record<string, unknown>) || {}
+      );
+      const auditArgs = redactMcpDefaultArgs(runtimeArgs, server.defaultArgs);
+      const changes = buildProposedFileChanges({
+        toolCallId,
+        sourceTool: exposedToolName,
+        input: auditArgs,
+        previewChars: await getFileChangePreviewChars(),
+      });
+
+      for (const change of changes) {
+        await hooks?.onFileChange?.(change);
+      }
+    },
+    execute: async (input, context) => {
+      const toolCallId = context?.toolCallId;
+      const runtimeArgs = applyMcpDefaultToolArgs(
+        discoveredTool.inputSchema as Record<string, unknown>,
+        server.defaultArgs,
+        (input as Record<string, unknown>) || {}
+      );
+      const auditArgs = redactMcpDefaultArgs(runtimeArgs, server.defaultArgs);
+      const audit: AgentToolAuditRecord = {
+        source: 'mcp',
+        serverSlug: server.slug,
+        toolName: exposedToolName,
+        toolCallId,
+        args: auditArgs,
+        capabilityKey,
+      };
+
+      await hooks?.onToolStarted?.(audit);
+
+      const mcpSecretSources = [
+        {
+          compiledConfig: {
+            env: server.env,
+            defaultArgs: server.defaultArgs,
+          },
+          transport: server.transport,
+        },
+      ];
+      const client = new McpClientManager();
+      try {
+        await client.connect(server.transport, server.timeout);
+        const rawResult = await client.callTool(discoveredTool.name, runtimeArgs, server.timeout);
+        const failed = rawResult.isError || didToolResultFail(rawResult);
+        const result = failed ? sanitizeMcpResult(rawResult, mcpSecretSources) : rawResult;
+        if (toolCallId) {
+          const changes = buildResultFileChanges({
+            toolCallId,
+            sourceTool: exposedToolName,
+            input: auditArgs,
+            result,
+            failed,
+            previewChars: await getFileChangePreviewChars(),
+          });
+
+          for (const change of changes) {
+            await hooks?.onFileChange?.(change);
+          }
+        }
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result,
+          status: failed ? 'failed' : 'completed',
+        });
+        return result;
+      } catch (error) {
+        const errorMessage = sanitizeMcpErrorMessage(error, mcpSecretSources);
+        getLogger().warn(
+          { error: errorMessage },
+          `AgentExec: mcp tool failed sessionId=${session.uuid} server=${server.slug} tool=${exposedToolName}`
+        );
+        if (toolCallId) {
+          const changes = buildResultFileChanges({
+            toolCallId,
+            sourceTool: exposedToolName,
+            input: auditArgs,
+            result: {
+              error: errorMessage,
+            },
+            failed: true,
+            previewChars: await getFileChangePreviewChars(),
+          });
+
+          for (const change of changes) {
+            await hooks?.onFileChange?.(change);
+          }
+        }
+        await hooks?.onToolFinished?.({
+          ...audit,
+          result: {
+            error: errorMessage,
+          },
+          status: 'failed',
+        });
+        throw new Error(errorMessage);
+      } finally {
+        await client.close();
+      }
+    },
+  });
+  recordToolMetadata(toolMetadata, {
+    toolKey,
+    catalogCapabilityId,
+    capabilityKey,
+    approvalMode: mode,
+  });
+}
diff --git a/src/server/services/agent/runErrorClassification.ts b/src/server/services/agent/runErrorClassification.ts
new file mode 100644
index 00000000..8214a20f
--- /dev/null
+++ b/src/server/services/agent/runErrorClassification.ts
@@ -0,0 +1,146 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { APICallError } from 'ai';
+import { AgentRunTerminalFailure } from './errors';
+import { AgentRunOwnershipLostError } from './AgentRunOwnershipLostError';
+import { OAuthAuthorizationRequiredError } from '../agentRuntime/mcp/oauthProvider';
+
+/** Closed set of run-failure codes the UI presents; keep in sync with the UI failure presenter. */
+export type AgentRunFailureCode =
+  // finishReason-derived (see classifyTerminalRunFailure in RunExecutor)
+  | 'max_iterations_exceeded'
+  | 'token_limit_reached'
+  | 'content_filtered'
+  | 'stream_error'
+  | 'run_incomplete'
+  // thrown provider / SDK / infra errors (this file)
+  | 'provider_overloaded'
+  | 'provider_rate_limited'
+  | 'provider_auth_invalid'
+  | 'provider_quota_exhausted'
+  | 'model_unavailable'
+  | 'provider_request_invalid'
+  | 'mcp_oauth_required'
+  | 'run_ownership_lost'
+  | 'run_unknown_error';
+
+function looksLikeQuotaExhausted(error: APICallError): boolean {
+  const haystack = `${error.message} ${typeof error.responseBody === 'string' ? error.responseBody : ''}`.toLowerCase();
+  return (
+    haystack.includes('credit balance') ||
+    haystack.includes('quota') ||
+    haystack.includes('insufficient_quota') ||
+    haystack.includes('billing')
+  );
+}
+
+/** The single place provider/infra failures get a stable code + recovery action; returns null when unclassifiable so callers fall back. */
+export function classifyThrownRunError(error: unknown): AgentRunTerminalFailure | null {
+  if (error instanceof AgentRunTerminalFailure) {
+    return error;
+  }
+
+  if (error instanceof AgentRunOwnershipLostError) {
+    return new AgentRunTerminalFailure({
+      code: 'run_ownership_lost',
+      message: 'This response was taken over by another worker or was cancelled.',
+      retryable: false,
+    });
+  }
+
+  if (error instanceof OAuthAuthorizationRequiredError) {
+    return new AgentRunTerminalFailure({
+      code: 'mcp_oauth_required',
+      message: 'A connected MCP server needs to be re-authorized before the agent can continue.',
+      retryable: false,
+      nextAction: { kind: 'reconnect', label: 'Reconnect server' },
+    });
+  }
+
+  if (APICallError.isInstance(error)) {
+    const status = error.statusCode;
+    const provider = (error as { url?: string }).url || '';
+
+    if (status === 429) {
+      if (looksLikeQuotaExhausted(error)) {
+        return new AgentRunTerminalFailure({
+          code: 'provider_quota_exhausted',
+          message: 'The model provider rejected the request because the account is out of quota or credit.',
+          retryable: false,
+          nextAction: { kind: 'update_key', label: 'Check provider account', href: '/settings?tab=connections' },
+          details: { status, provider },
+        });
+      }
+      return new AgentRunTerminalFailure({
+        code: 'provider_rate_limited',
+        message: 'The model provider is rate limiting requests. Wait a moment, then try again.',
+        retryable: true,
+        nextAction: { kind: 'retry', label: 'Try again' },
+        details: { status, provider },
+      });
+    }
+
+    if (status === 529 || status === 503 || status === 502) {
+      return new AgentRunTerminalFailure({
+        code: 'provider_overloaded',
+        message: 'The model provider is temporarily overloaded. Try again in a moment.',
+        retryable: true,
+        nextAction: { kind: 'retry', label: 'Try again' },
+        details: { status, provider },
+      });
+    }
+
+    if (status === 401 || status === 403) {
+      return new AgentRunTerminalFailure({
+        code: 'provider_auth_invalid',
+        message: 'The model provider rejected the API key. Update the key, then try again.',
+        retryable: false,
+        nextAction: { kind: 'update_key', label: 'Update key', href: '/settings?tab=connections' },
+        details: { status, provider },
+      });
+    }
+
+    if (status === 404) {
+      return new AgentRunTerminalFailure({
+        code: 'model_unavailable',
+        message: 'The selected model is not available from the provider. Choose a different model, then try again.',
+        retryable: false,
+        nextAction: { kind: 'navigate', label: 'Change model' },
+        details: { status, provider },
+      });
+    }
+
+    if (typeof status === 'number' && status >= 400 && status < 500) {
+      return new AgentRunTerminalFailure({
+        code: 'provider_request_invalid',
+        message: 'The model provider rejected the request.',
+        retryable: false,
+        details: { status, provider },
+      });
+    }
+
+    return new AgentRunTerminalFailure({
+      code: 'provider_overloaded',
+      message: 'The model provider returned an error. Try again in a moment.',
+      retryable: true,
+      nextAction: { kind: 'retry', label: 'Try again' },
+      details: { status, provider },
+    });
+  }
+
+  return null;
+}
diff --git a/src/server/services/agent/runEventChunkCodec.ts b/src/server/services/agent/runEventChunkCodec.ts
new file mode 100644
index 00000000..976054b1
--- /dev/null
+++ b/src/server/services/agent/runEventChunkCodec.ts
@@ -0,0 +1,569 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import AgentRunEvent from 'server/models/AgentRunEvent';
+import { type AgentUiMessageChunk } from './streamChunks';
+import { readString } from './runEventUtils';
+
+export type ChunkEvent = {
+  eventType: string;
+  payload: Record<string, unknown>;
+};
+
+function cloneValue<T>(value: T): T {
+  return JSON.parse(JSON.stringify(value)) as T;
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return !!value && typeof value === 'object' && !Array.isArray(value);
+}
+
+function asRecord(value: unknown): Record<string, unknown> {
+  return isRecord(value) ? value : {};
+}
+
+function readBoolean(value: unknown): boolean | undefined {
+  return typeof value === 'boolean' ? value : undefined;
+}
+
+function pickDefined(source: Record<string, unknown>, keys: string[]): Record<string, unknown> {
+  const picked: Record<string, unknown> = {};
+
+  for (const key of keys) {
+    if (source[key] !== undefined) {
+      picked[key] = cloneValue(source[key]);
+    }
+  }
+
+  return picked;
+}
+
+function compactChunk(fields: Record<string, unknown>): AgentUiMessageChunk {
+  const chunk: Record<string, unknown> = {};
+
+  for (const [key, value] of Object.entries(fields)) {
+    if (value !== undefined) {
+      chunk[key] = value;
+    }
+  }
+
+  return chunk as AgentUiMessageChunk;
+}
+
+export function toChunkEvents(chunk: AgentUiMessageChunk): ChunkEvent[] {
+  const chunkRecord = chunk as unknown as Record<string, unknown>;
+
+  switch (chunk.type) {
+    case 'start':
+      return [
+        {
+          eventType: 'message.created',
+          payload: {
+            messageId: chunk.messageId,
+            metadata: chunk.messageMetadata || {},
+          },
+        },
+      ];
+    case 'message-metadata':
+      return [
+        {
+          eventType: 'message.metadata',
+          payload: {
+            metadata: cloneValue(chunk.messageMetadata || {}),
+          },
+        },
+      ];
+    case 'text-start':
+      return [
+        {
+          eventType: 'message.part.started',
+          payload: {
+            partType: 'text',
+            partId: chunk.id,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'text-delta':
+      return [
+        {
+          eventType: 'message.delta',
+          payload: {
+            partType: 'text',
+            partId: chunk.id,
+            delta: chunk.delta,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'text-end':
+      return [
+        {
+          eventType: 'message.part.completed',
+          payload: {
+            partType: 'text',
+            partId: chunk.id,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'reasoning-start':
+      return [
+        {
+          eventType: 'message.part.started',
+          payload: {
+            partType: 'reasoning',
+            partId: chunk.id,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'reasoning-delta':
+      return [
+        {
+          eventType: 'message.delta',
+          payload: {
+            partType: 'reasoning',
+            partId: chunk.id,
+            delta: chunk.delta,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'reasoning-end':
+      return [
+        {
+          eventType: 'message.part.completed',
+          payload: {
+            partType: 'reasoning',
+            partId: chunk.id,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'tool-input-start':
+      return [
+        {
+          eventType: 'tool.call.input.started',
+          payload: {
+            toolCallId: chunk.toolCallId,
+            toolName: chunk.toolName,
+            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'title']),
+          },
+        },
+      ];
+    case 'tool-input-delta':
+      return [
+        {
+          eventType: 'tool.call.input.delta',
+          payload: {
+            toolCallId: chunk.toolCallId,
+            inputTextDelta: chunk.inputTextDelta,
+          },
+        },
+      ];
+    case 'tool-input-available':
+    case 'tool-input-error':
+      return [
+        {
+          eventType: 'tool.call.started',
+          payload: {
+            toolCallId: chunk.toolCallId,
+            toolName: chunk.toolName,
+            inputStatus: chunk.type === 'tool-input-error' ? 'error' : 'available',
+            input: 'input' in chunk ? chunk.input : null,
+            errorText: 'errorText' in chunk ? chunk.errorText : null,
+            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'title']),
+          },
+        },
+      ];
+    case 'tool-output-available':
+    case 'tool-output-error':
+    case 'tool-output-denied':
+      return [
+        {
+          eventType: 'tool.call.completed',
+          payload: {
+            toolCallId: chunk.toolCallId,
+            output: 'output' in chunk ? chunk.output : null,
+            errorText: 'errorText' in chunk ? chunk.errorText : null,
+            status:
+              chunk.type === 'tool-output-available'
+                ? 'completed'
+                : chunk.type === 'tool-output-denied'
+                ? 'denied'
+                : 'failed',
+            ...pickDefined(chunkRecord, ['providerExecuted', 'providerMetadata', 'dynamic', 'preliminary']),
+          },
+        },
+      ];
+    case 'tool-approval-request':
+      return [
+        {
+          eventType: 'approval.requested',
+          payload: {
+            ...pickDefined(chunkRecord, ['actionId']),
+            approvalId: chunk.approvalId,
+            toolCallId: chunk.toolCallId,
+          },
+        },
+      ];
+    case 'data-file-change':
+      return [
+        {
+          eventType: 'tool.file_change',
+          payload: {
+            id: chunk.id,
+            data: cloneValue(chunk.data),
+            transient: chunk.transient,
+          },
+        },
+      ];
+    case 'source-url':
+      return [
+        {
+          eventType: 'message.source',
+          payload: {
+            sourceType: 'url',
+            sourceId: chunk.sourceId,
+            url: chunk.url,
+            ...pickDefined(chunkRecord, ['title', 'providerMetadata']),
+          },
+        },
+      ];
+    case 'source-document':
+      return [
+        {
+          eventType: 'message.source',
+          payload: {
+            sourceType: 'document',
+            sourceId: chunk.sourceId,
+            mediaType: chunk.mediaType,
+            title: chunk.title,
+            ...pickDefined(chunkRecord, ['filename', 'providerMetadata']),
+          },
+        },
+      ];
+    case 'file':
+      return [
+        {
+          eventType: 'message.file',
+          payload: {
+            url: chunk.url,
+            mediaType: chunk.mediaType,
+            ...pickDefined(chunkRecord, ['providerMetadata']),
+          },
+        },
+      ];
+    case 'start-step':
+      return [
+        {
+          eventType: 'run.step.started',
+          payload: {},
+        },
+      ];
+    case 'finish-step':
+      return [
+        {
+          eventType: 'run.step.completed',
+          payload: {},
+        },
+      ];
+    case 'finish':
+      return [
+        {
+          eventType: 'run.finished',
+          payload: {
+            finishReason: chunk.finishReason,
+            metadata: chunk.messageMetadata || {},
+          },
+        },
+      ];
+    case 'abort':
+      return [
+        {
+          eventType: 'run.aborted',
+          payload: {
+            reason: chunk.reason,
+          },
+        },
+      ];
+    case 'error':
+      return [
+        {
+          eventType: 'run.error',
+          payload: {
+            errorText: chunk.errorText,
+          },
+        },
+      ];
+  }
+
+  return [];
+}
+
+function chunkFromMessagePartEvent(eventType: string, payload: Record<string, unknown>): AgentUiMessageChunk | null {
+  const partType = readString(payload.partType);
+  const partId = readString(payload.partId) || readString(payload.messageId);
+  if ((partType !== 'text' && partType !== 'reasoning') || !partId) {
+    return null;
+  }
+
+  const providerMetadata = payload.providerMetadata;
+
+  if (eventType === 'message.part.started') {
+    return compactChunk({
+      type: partType === 'text' ? 'text-start' : 'reasoning-start',
+      id: partId,
+      providerMetadata,
+    });
+  }
+
+  if (eventType === 'message.delta') {
+    return compactChunk({
+      type: partType === 'text' ? 'text-delta' : 'reasoning-delta',
+      id: partId,
+      delta: readString(payload.delta) || '',
+      providerMetadata,
+    });
+  }
+
+  if (eventType === 'message.part.completed') {
+    return compactChunk({
+      type: partType === 'text' ? 'text-end' : 'reasoning-end',
+      id: partId,
+      providerMetadata,
+    });
+  }
+
+  return null;
+}
+
+function chunkFromToolStartedEvent(payload: Record<string, unknown>): AgentUiMessageChunk | null {
+  const toolCallId = readString(payload.toolCallId);
+  const toolName = readString(payload.toolName);
+  if (!toolCallId || !toolName) {
+    return null;
+  }
+
+  const inputStatus = readString(payload.inputStatus);
+  return compactChunk({
+    type: inputStatus === 'error' ? 'tool-input-error' : 'tool-input-available',
+    toolCallId,
+    toolName,
+    input: payload.input,
+    errorText: inputStatus === 'error' ? readString(payload.errorText) || 'Tool input failed.' : undefined,
+    providerExecuted: readBoolean(payload.providerExecuted),
+    providerMetadata: payload.providerMetadata,
+    dynamic: readBoolean(payload.dynamic),
+    title: readString(payload.title),
+  });
+}
+
+function chunkFromToolCompletedEvent(payload: Record<string, unknown>): AgentUiMessageChunk | null {
+  const toolCallId = readString(payload.toolCallId);
+  if (!toolCallId) {
+    return null;
+  }
+
+  const status = readString(payload.status);
+  if (status === 'denied') {
+    return compactChunk({
+      type: 'tool-output-denied',
+      toolCallId,
+    });
+  }
+
+  if (status === 'failed') {
+    return compactChunk({
+      type: 'tool-output-error',
+      toolCallId,
+      errorText: readString(payload.errorText) || 'Tool execution failed.',
+      providerExecuted: readBoolean(payload.providerExecuted),
+      providerMetadata: payload.providerMetadata,
+      dynamic: readBoolean(payload.dynamic),
+    });
+  }
+
+  return compactChunk({
+    type: 'tool-output-available',
+    toolCallId,
+    output: payload.output,
+    providerExecuted: readBoolean(payload.providerExecuted),
+    providerMetadata: payload.providerMetadata,
+    dynamic: readBoolean(payload.dynamic),
+    preliminary: readBoolean(payload.preliminary),
+  });
+}
+
+export function chunkFromEvent(event: AgentRunEvent): AgentUiMessageChunk | null {
+  const payload = asRecord(event.payload);
+
+  switch (event.eventType) {
+    case 'message.created':
+      return compactChunk({
+        type: 'start',
+        messageId: readString(payload.messageId),
+        messageMetadata: payload.metadata,
+      });
+    case 'message.metadata':
+      return compactChunk({
+        type: 'message-metadata',
+        messageMetadata: payload.metadata || {},
+      });
+    case 'message.part.started':
+    case 'message.delta':
+    case 'message.part.completed':
+      return chunkFromMessagePartEvent(event.eventType, payload);
+    case 'tool.call.input.started': {
+      const toolCallId = readString(payload.toolCallId);
+      const toolName = readString(payload.toolName);
+      if (!toolCallId || !toolName) {
+        return null;
+      }
+
+      return compactChunk({
+        type: 'tool-input-start',
+        toolCallId,
+        toolName,
+        providerExecuted: readBoolean(payload.providerExecuted),
+        providerMetadata: payload.providerMetadata,
+        dynamic: readBoolean(payload.dynamic),
+        title: readString(payload.title),
+      });
+    }
+    case 'tool.call.input.delta': {
+      const toolCallId = readString(payload.toolCallId);
+      if (!toolCallId) {
+        return null;
+      }
+
+      return compactChunk({
+        type: 'tool-input-delta',
+        toolCallId,
+        inputTextDelta: readString(payload.inputTextDelta) || '',
+      });
+    }
+    case 'tool.call.started':
+      return chunkFromToolStartedEvent(payload);
+    case 'tool.call.completed':
+      return chunkFromToolCompletedEvent(payload);
+    case 'approval.requested': {
+      const approvalId = readString(payload.approvalId);
+      const toolCallId = readString(payload.toolCallId);
+      if (!approvalId || !toolCallId) {
+        return null;
+      }
+
+      return compactChunk({
+        type: 'tool-approval-request',
+        actionId: readString(payload.actionId),
+        approvalId,
+        toolCallId,
+      });
+    }
+    case 'tool.file_change':
+      if (!payload.data) {
+        return null;
+      }
+
+      return compactChunk({
+        type: 'data-file-change',
+        id: readString(payload.id),
+        data: payload.data,
+        transient: readBoolean(payload.transient),
+      });
+    case 'message.source':
+      if (payload.sourceType === 'url') {
+        const sourceId = readString(payload.sourceId);
+        const url = readString(payload.url);
+        if (!sourceId || !url) {
+          return null;
+        }
+
+        return compactChunk({
+          type: 'source-url',
+          sourceId,
+          url,
+          title: readString(payload.title),
+          providerMetadata: payload.providerMetadata,
+        });
+      }
+
+      if (payload.sourceType === 'document') {
+        const sourceId = readString(payload.sourceId);
+        const mediaType = readString(payload.mediaType);
+        const title = readString(payload.title);
+        if (!sourceId || !mediaType || !title) {
+          return null;
+        }
+
+        return compactChunk({
+          type: 'source-document',
+          sourceId,
+          mediaType,
+          title,
+          filename: readString(payload.filename),
+          providerMetadata: payload.providerMetadata,
+        });
+      }
+
+      return null;
+    case 'message.file': {
+      const url = readString(payload.url);
+      const mediaType = readString(payload.mediaType);
+      if (!url || !mediaType) {
+        return null;
+      }
+
+      return compactChunk({
+        type: 'file',
+        url,
+        mediaType,
+        providerMetadata: payload.providerMetadata,
+      });
+    }
+    case 'run.step.started':
+      return compactChunk({ type: 'start-step' });
+    case 'run.step.completed':
+      return compactChunk({ type: 'finish-step' });
+    case 'run.finished':
+      return compactChunk({
+        type: 'finish',
+        finishReason: readString(payload.finishReason),
+        messageMetadata: payload.metadata,
+      });
+    case 'run.aborted':
+      return compactChunk({
+        type: 'abort',
+        reason: readString(payload.reason),
+      });
+    case 'run.error':
+      return compactChunk({
+        type: 'error',
+        errorText: readString(payload.errorText) || 'Agent run failed.',
+      });
+    case 'run.failed': {
+      const error = asRecord(payload.error);
+      return compactChunk({
+        type: 'error',
+        errorText: readString(error.message) || readString(payload.errorText) || 'Agent run failed.',
+      });
+    }
+    default:
+      return null;
+  }
+}
diff --git a/src/server/services/agent/runEventUtils.ts b/src/server/services/agent/runEventUtils.ts
new file mode 100644
index 00000000..3dd7195d
--- /dev/null
+++ b/src/server/services/agent/runEventUtils.ts
@@ -0,0 +1,19 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+export function readString(value: unknown): string | undefined {
+  return typeof value === 'string' ? value : undefined;
+}
diff --git a/src/server/services/agent/sessionOwnership.ts b/src/server/services/agent/sessionOwnership.ts
new file mode 100644
index 00000000..925b333a
--- /dev/null
+++ b/src/server/services/agent/sessionOwnership.ts
@@ -0,0 +1,11 @@
+import AgentSession from 'server/models/AgentSession';
+
+// Load an AgentSession scoped to the requesting user; throws if not found.
+export async function getOwnedSession(sessionUuid: string, userId: string): Promise<AgentSession> {
+  const session = await AgentSession.query().findOne({ uuid: sessionUuid, userId });
+  if (!session) {
+    throw new Error('Agent session not found');
+  }
+
+  return session;
+}
diff --git a/src/server/services/agent/systemAgentDefinitions.ts b/src/server/services/agent/systemAgentDefinitions.ts
index 3ae07944..98531c09 100644
--- a/src/server/services/agent/systemAgentDefinitions.ts
+++ b/src/server/services/agent/systemAgentDefinitions.ts
@@ -54,7 +54,6 @@ export const SYSTEM_AGENT_DEFINITIONS: Record<SystemAgentDefinitionId, AgentDefi
     description: 'Investigate build and environment context.',
     instructionRefs: ['system:debug'],
     capabilityRefs: [
-      'read_context',
       'diagnostics_logs',
       'diagnostics_codefresh',
       'diagnostics_kubernetes',
@@ -62,7 +61,6 @@ export const SYSTEM_AGENT_DEFINITIONS: Record<SystemAgentDefinitionId, AgentDefi
       'github_read',
       'github_write',
       'external_mcp_read',
-      'external_mcp_write',
     ],
     resourcePolicy: {
       sourceKinds: ['build_context_chat'],
diff --git a/src/server/services/agent/systemInstructionTemplates.ts b/src/server/services/agent/systemInstructionTemplates.ts
index 1878a39a..f235ccd8 100644
--- a/src/server/services/agent/systemInstructionTemplates.ts
+++ b/src/server/services/agent/systemInstructionTemplates.ts
@@ -27,36 +27,62 @@ export type SystemInstructionTemplateDefinition = {
 };
 
 export const DEBUG_INSTRUCTION_TEMPLATE_DEFAULT_CONTENT = [
-  'Lifecycle debugging profile:',
-  '- Compare desired config state with actual runtime state before diagnosing.',
-  '- Investigate build failures before deploy failures.',
-  '- Cite specific evidence before diagnosing a root cause.',
-  '- Say when there is not enough evidence instead of fabricating a cause.',
-  '- Lead with the most likely cause and only the evidence needed to support it.',
+  'You are the Lifecycle Debug Agent. Investigate why a Lifecycle environment (a pull-request deployment) is failing or misbehaving and return a confident, evidence-backed root cause and the single best next step. Investigate first; do not change anything unless the user asks for a repair.',
+  '',
+  'Lifecycle model:',
+  '- A Build is one PR environment deployed into a single Kubernetes namespace. It fans out into Deploys.',
+  '- Each Deploy is one Service with two phases: a build phase (buildPipelineId produces a container image) and a deploy phase (deployPipelineId runs that image in the namespace). A failure belongs to one specific phase of one specific Deploy — always name which.',
+  '- Desired state lives in the repo: lifecycle.yaml, referenced Dockerfiles, and Helm values. Actual state lives in Kubernetes. Diagnosing means comparing desired vs actual.',
+  '- Kubernetes patches are ephemeral: a patch/scale/restart is wiped by the next deploy. A durable fix is almost always a repo change, not a live k8s patch.',
+  '',
+  'Start from the snapshot:',
+  '- The "Initial Lifecycle snapshot" in your context already gives namespace, build status, each Deploy status, buildPipelineId/deployPipelineId, the pull request, and lifecycleConfig presence. Read it before calling any tool; do not spend a tool call re-fetching what it already states.',
+  '- Values labeled *AtStart are from session start and may be stale — re-verify with tools when the user says state changed or before you conclude.',
+  '- If the snapshot says UNAVAILABLE, gather build/deploy/k8s state with tools and note that baseline context was missing.',
+  '',
+  'Investigation order:',
+  '- Classify first: identify which Deploy and which phase (build, deploy, or runtime) is failing.',
+  "- Build-phase failure: read build logs first with get_codefresh_logs using that Deploy's buildPipelineId (copy it EXACTLY from the DEPLOYS section). If the Deploy has no buildPipelineId (a non-Codefresh build), build logs may not be retrievable — say so and diagnose from build status/message and Kubernetes evidence.",
+  "- Deploy/runtime failure (build ok): inspect Kubernetes — get_k8s_resources pods (read each container's waiting/terminated reason and restart count), then get_k8s_resources events (for scheduling/image/quota errors), then the failing pod's logs.",
+  '- For a crashing or restarting pod, call get_pod_logs with previous:true — the live container is usually empty; the crash output is in the previous instance.',
+  '- When config is implicated, read lifecycle.yaml / the referenced Dockerfile / Helm values with get_file and compare to what Kubernetes actually has.',
+  '- Stop gathering once the evidence pins one root cause; do not keep calling tools after the cause is clear.',
+  '',
+  'Failure playbooks (signal → most likely cause → confirm with):',
+  '- ImagePullBackOff / ErrImagePull → wrong image ref or a tag not built/pushed, or registry auth → the container image ref vs build output, and pod events.',
+  '- CrashLoopBackOff → app crashes on startup → previous-instance logs; usually a missing/invalid env var, a failed dependency connection, or a bad start command.',
+  '- OOMKilled → memory limit too low or a leak → the container memory limit and the previous logs.',
+  '- FailedScheduling (events) → insufficient resources, an unschedulable node selector/affinity, or a pending PVC.',
+  '- Readiness/liveness probe failing → pod runs but never becomes ready → the probe path/port vs where the app actually serves.',
+  "- Init-container error → a precondition (migration, dependency wait) failed → that init container's logs specifically.",
+  '- Build failure → read the build logs end to end; usually a Dockerfile error, a missing path, a dependency-install failure, or a failing pipeline step.',
+  '- Missing/incorrect env or secret → app errors referencing a config key → the deployed env/configmap vs lifecycle.yaml.',
+  '- Helm values / manifest error → deploy phase fails to render or apply → the deploy logs and Helm values.',
+  "- Dependency not ready → a Deploy depends on another that is still building or failed → the roster for the upstream Deploy's status.",
+  '',
+  'Evidence discipline:',
+  '- Cite the specific evidence (log line, event, status, or config value) that supports your conclusion.',
+  '- Empty or zero-line tool output means the evidence could NOT be retrieved — not that the layer is healthy. Say you could not fetch it (and retry or narrow the query); never declare a build or service clean on no data.',
+  '- When output is truncated, do not assume the omitted region is error-free; if your conclusion depends on it, narrow the query (label_selector, single pod, smaller tail) and re-fetch.',
+  '- Say when there is not enough evidence instead of fabricating a cause. When uncertain, say what is missing and what would clarify it in a plain sentence — do not use a confidence label.',
+  '- Lead with the most likely cause and only the evidence needed to support it. Keep findings concise and lead with the highest-impact finding.',
   '- Do not use rigid report headings such as Likely Cause, Evidence, Confidence, or Next Choices unless the user asks for a report.',
-  '- When the conclusion is uncertain, say what is missing and what could clarify it in a plain sentence — do not use a confidence label.',
-  '- End with concise next choices when the user needs to decide what happens next.',
-  '- Keep findings concise and lead with the highest-impact finding.',
-  '- Ask a clarifying question only when you cannot proceed without it: missing access to required data, ambiguous environment or user goal, or two equally plausible causes that require user judgment.',
+  '- Ask a clarifying question only when you cannot proceed without it: missing access to required data, an ambiguous environment or goal, or two equally plausible causes that require user judgment.',
   '- Summarize tool results compactly in prose rather than repeating raw output.',
-  '- Use available tools for fresh facts when the user says state changed or context is incomplete.',
-  '- Do not begin repair work unless the user explicitly asks to continue into repair or otherwise states repair intent.',
-  '- Only perform mutating fixes through approval-gated actions when those tools are available.',
+  '',
+  'Repair (only when asked):',
+  '- Do not begin repair work unless the user explicitly asks to continue into repair or otherwise states repair intent. When a user asks to fix an issue as their first message, give a brief diagnosis before offering Repair.',
+  '- Only perform mutating fixes through approval-gated actions when those tools are available. Before asking for repair approval, state the intended outcome and why the change should address the diagnosed failure.',
+  '- Keep changes localized to obvious config, manifest, repository reference, Dockerfile path, or Helm values fixes. Prefer a durable repo fix over an ephemeral Kubernetes patch. Do not run tests or arbitrary workspace commands in Debug repair.',
   '- When a repair tool returns commit_url, include the plain commit URL in the repair summary instead of Markdown link syntax.',
-  '- When the fix is an obvious localized or config change, lead with the fix and frame Repair as the clear next step.',
-  '- When evidence is incomplete or causes are unclear, frame Investigate more as the next step before offering Repair.',
-  '- When understanding the issue benefits from browsing files manually, mention Open workspace as optional depth.',
-  '- When the fix requires commands, tests, or broad code edits, state that a workspace-backed Develop session is better suited.',
-  '- Only name Continue in Develop when that action is available in the UI; otherwise say to start or open an Agent Session workspace first. For no-workspace build chats, the next visible action is Start workspace, not Continue in Develop.',
-  '- When a user asks to fix an issue as their first message, provide a brief diagnosis before offering Repair.',
-  '- Stop when the user goal is resolved or when the next step requires a user choice. Do not continue into repair loops automatically.',
-  '- Before asking for repair approval, state the intended outcome and why the change should address the diagnosed failure.',
-  '- During repair, keep changes localized to obvious config, manifest, repository reference, Dockerfile path, or Helm values fixes.',
-  '- Do not run tests or arbitrary workspace commands in Debug repair. Do not promise Develop handoff actions that are not visible; point users to the available workspace or Agent Session action when verification needs commands, tests, or broad editing.',
-  '- After an approved repair commit, observe whether the GitHub webhook starts a new build and whether the environment recovers before declaring success.',
-  '- Do not say you will keep monitoring after the response ends, and do not name an Observe action; if the rebuild is still running, say the user can wait, refresh, or ask to investigate again.',
-  '- If webhook automation does not start, say that no rebuild was observed and offer the next user-controlled action instead of using a direct rebuild tool.',
+  '- After an approved repair commit, observe whether the GitHub webhook starts a new build and whether the environment recovers before declaring success. If webhook automation does not start, say no rebuild was observed and offer the next user-controlled action instead of using a direct rebuild tool. Do not say you will keep monitoring after the response ends, and do not name an Observe action; if a rebuild is still running, say the user can wait, refresh, or ask to investigate again.',
   '- If validation reveals a new failure, say the previous issue was fixed, explain the new blocker, and offer the next action.',
+  '',
+  'Next steps and handoff:',
+  '- When the fix is an obvious localized or config change, lead with the fix and frame Repair as the clear next step. When evidence is incomplete or causes are unclear, frame Investigate more as the next step before offering Repair.',
+  '- When understanding the issue benefits from browsing files manually, mention Open workspace as optional depth. When the fix requires commands, tests, or broad code edits, state that a workspace-backed Develop session is better suited. Do not promise Develop handoff actions that are not visible.',
+  '- Only name Continue in Develop when that action is available in the UI; otherwise say to start or open an Agent Session workspace first. For no-workspace build chats, the next visible action is Start workspace, not Continue in Develop.',
+  '- End with concise next choices when the user needs to decide what happens next. Stop when the user goal is resolved or when the next step requires a user choice; do not continue into repair loops automatically.',
 ].join('\n');
 
 export const SYSTEM_INSTRUCTION_TEMPLATE_DEFINITIONS: SystemInstructionTemplateDefinition[] = [
@@ -64,7 +90,7 @@ export const SYSTEM_INSTRUCTION_TEMPLATE_DEFINITIONS: SystemInstructionTemplateD
     ref: 'system:debug',
     name: 'Debug',
     description: 'Investigate build and environment context.',
-    defaultVersion: 3,
+    defaultVersion: 4,
     defaultContent: DEBUG_INSTRUCTION_TEMPLATE_DEFAULT_CONTENT,
   },
   {
diff --git a/src/server/services/agent/thinkingProviderOptions.ts b/src/server/services/agent/thinkingProviderOptions.ts
new file mode 100644
index 00000000..e584d06b
--- /dev/null
+++ b/src/server/services/agent/thinkingProviderOptions.ts
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Gemini 2.x takes a numeric budget (-1 = dynamic); 3+ takes a level ('medium' reliably surfaces thinking).
+const GEMINI_DYNAMIC_THINKING_BUDGET = -1;
+const GEMINI_3_THINKING_LEVEL = 'medium';
+const ANTHROPIC_THINKING_BUDGET_TOKENS = 4_096;
+
+function isLegacyGemini2(modelId: string): boolean {
+  return /gemini-2\./i.test(modelId);
+}
+
+// Provider-keyed reasoning options; undefined for providers that can't stream reasoning in the tool loop.
+export function resolveThinkingProviderOptions(provider: string, modelId: string) {
+  switch (provider) {
+    case 'gemini':
+    case 'google':
+      return {
+        google: {
+          thinkingConfig: isLegacyGemini2(modelId)
+            ? {
+                includeThoughts: true,
+                thinkingBudget: GEMINI_DYNAMIC_THINKING_BUDGET,
+              }
+            : {
+                includeThoughts: true,
+                thinkingLevel: GEMINI_3_THINKING_LEVEL,
+              },
+        },
+      };
+    case 'anthropic':
+      return {
+        anthropic: {
+          thinking: {
+            type: 'enabled',
+            budgetTokens: ANTHROPIC_THINKING_BUDGET_TOKENS,
+          },
+        },
+      };
+    default:
+      return undefined;
+  }
+}
diff --git a/src/server/services/agent/tools/__tests__/outputLimiter.test.ts b/src/server/services/agent/tools/__tests__/outputLimiter.test.ts
index d8735eff..1c5f592d 100644
--- a/src/server/services/agent/tools/__tests__/outputLimiter.test.ts
+++ b/src/server/services/agent/tools/__tests__/outputLimiter.test.ts
@@ -96,6 +96,45 @@ describe('OutputLimiter', () => {
       const result = OutputLimiter.truncateLogOutput(content, 5000, 50, 100);
       expect(result.length).toBeLessThanOrEqual(5000);
     });
+
+    it('retains an error signal that falls in the dropped middle', () => {
+      const lines = Array.from({ length: 500 }, (_, i) =>
+        i === 250 ? 'Error: something exploded at runtime' : `line${i}`
+      );
+      const content = lines.join('\n');
+      const result = OutputLimiter.truncateLogOutput(content, 100000, 50, 100);
+      expect(result).toContain('Error: something exploded at runtime');
+      expect(result).toContain('retained error region');
+      // head + tail still present
+      expect(result).toContain('line0');
+      expect(result).toContain('line499');
+    });
+
+    it('does not add an error region when no signal is in the middle', () => {
+      const lines = Array.from({ length: 500 }, (_, i) => `line${i}`);
+      const content = lines.join('\n');
+      const result = OutputLimiter.truncateLogOutput(content, 100000, 50, 100);
+      expect(result).not.toContain('retained error region');
+      expect(result).toContain('lines omitted of 500 total');
+    });
+
+    it('keeps the error region within the byte cap by trimming head/tail', () => {
+      const lines = Array.from({ length: 400 }, (_, i) =>
+        i === 200 ? `${'E'.repeat(200)} panic: boom` : `${'x'.repeat(200)}-line${i}`
+      );
+      const content = lines.join('\n');
+      const result = OutputLimiter.truncateLogOutput(content, 8000, 50, 100);
+      expect(result.length).toBeLessThanOrEqual(8000);
+      expect(result).toContain('panic: boom');
+    });
+
+    it('respects retainErrorRegion=false (legacy behavior)', () => {
+      const lines = Array.from({ length: 500 }, (_, i) => (i === 250 ? 'Error: hidden in the middle' : `line${i}`));
+      const content = lines.join('\n');
+      const result = OutputLimiter.truncateLogOutput(content, 100000, 50, 100, false);
+      expect(result).not.toContain('Error: hidden in the middle');
+      expect(result).not.toContain('retained error region');
+    });
   });
 
   describe('truncateJsonSafely', () => {
diff --git a/src/server/services/agent/tools/codefresh/__tests__/getCodefreshLogs.test.ts b/src/server/services/agent/tools/codefresh/__tests__/getCodefreshLogs.test.ts
new file mode 100644
index 00000000..85926b1b
--- /dev/null
+++ b/src/server/services/agent/tools/codefresh/__tests__/getCodefreshLogs.test.ts
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2025 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const mockGetLogsResult = jest.fn();
+jest.mock('server/lib/codefresh', () => ({
+  getLogsResult: (...args: any[]) => mockGetLogsResult(...args),
+}));
+
+import { GetCodefreshLogsTool } from '../getCodefreshLogs';
+
+describe('GetCodefreshLogsTool', () => {
+  let tool: GetCodefreshLogsTool;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    tool = new GetCodefreshLogsTool();
+  });
+
+  it('returns logs on a successful fetch', async () => {
+    mockGetLogsResult.mockResolvedValue({ ok: true, output: 'line1\nline2\nline3' });
+    const result = await tool.execute({ pipeline_id: 'abc123' });
+    expect(result.success).toBe(true);
+    const data = JSON.parse(result.agentContent as string);
+    expect(data.logs).toContain('line1');
+    expect(data.totalLines).toBe(3);
+  });
+
+  it('reports LOGS_UNAVAILABLE (retryable) when fetch fails', async () => {
+    mockGetLogsResult.mockResolvedValue({ ok: false, reason: 'codefresh logs: not found' });
+    const result = await tool.execute({ pipeline_id: 'badid' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('LOGS_UNAVAILABLE');
+    expect(result.error?.recoverable).toBe(true);
+    expect(result.error?.message).toContain('badid');
+    expect(result.error?.message).toContain('do NOT assume the build is clean');
+  });
+
+  it('reports LOGS_UNAVAILABLE when fetch succeeds but logs are empty/whitespace', async () => {
+    mockGetLogsResult.mockResolvedValue({ ok: true, output: '   \n\n  \t ' });
+    const result = await tool.execute({ pipeline_id: 'emptybuild' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('LOGS_UNAVAILABLE');
+    expect(result.error?.recoverable).toBe(true);
+  });
+
+  it('requires a pipeline_id', async () => {
+    const result = await tool.execute({});
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('INVALID_PARAMETERS');
+    expect(mockGetLogsResult).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/server/services/agent/tools/codefresh/getCodefreshLogs.ts b/src/server/services/agent/tools/codefresh/getCodefreshLogs.ts
index eaf4be08..e812b7a1 100644
--- a/src/server/services/agent/tools/codefresh/getCodefreshLogs.ts
+++ b/src/server/services/agent/tools/codefresh/getCodefreshLogs.ts
@@ -16,7 +16,7 @@
 
 import { BaseTool } from '../baseTool';
 import { ToolResult, ToolSafetyLevel } from '../types';
-import { getLogs } from 'server/lib/codefresh';
+import { getLogsResult } from 'server/lib/codefresh';
 import { OutputLimiter } from '../outputLimiter';
 
 function deduplicateLines(lines: string[]): string[] {
@@ -85,7 +85,19 @@ export class GetCodefreshLogsTool extends BaseTool {
 
       const maxLines = lines || 500;
 
-      const logs = await getLogs(pipelineId);
+      const fetched = await getLogsResult(pipelineId);
+
+      // Never report no-data as clean: a failed fetch or blank output is retryable-unavailable.
+      const hasContent = fetched.ok && fetched.output.replace(/\s/g, '').length > 0;
+      if (!hasContent) {
+        return this.createErrorResult(
+          `No logs returned for pipeline_id ${pipelineId}. It may be wrong, expired, or the build has not started. Verify the buildPipelineId/deployPipelineId from the DEPLOYS section and retry; do NOT assume the build is clean.`,
+          'LOGS_UNAVAILABLE',
+          true
+        );
+      }
+
+      const logs = fetched.output;
 
       const sanitizedLogs = String(logs)
         .replace(/\r\n/g, '\n')
diff --git a/src/server/services/agent/tools/github/__tests__/getFile.test.ts b/src/server/services/agent/tools/github/__tests__/getFile.test.ts
index 2b57daae..b558a585 100644
--- a/src/server/services/agent/tools/github/__tests__/getFile.test.ts
+++ b/src/server/services/agent/tools/github/__tests__/getFile.test.ts
@@ -21,6 +21,7 @@ const mockGithubClient = {
   getOctokit: jest.fn().mockResolvedValue(mockOctokit),
   isFilePathAllowed: jest.fn().mockReturnValue(true),
   isFileExcluded: jest.fn().mockReturnValue(false),
+  isRepoAllowed: jest.fn().mockReturnValue(true),
   validateBranch: jest.fn().mockReturnValue({ valid: true }),
 } as any;
 
@@ -38,6 +39,7 @@ describe('GetFileTool', () => {
     jest.clearAllMocks();
     mockGithubClient.getOctokit.mockResolvedValue(mockOctokit);
     mockGithubClient.isFilePathAllowed.mockReturnValue(true);
+    mockGithubClient.isRepoAllowed.mockReturnValue(true);
     tool = new GetFileTool(mockGithubClient);
   });
 
@@ -68,6 +70,16 @@ describe('GetFileTool', () => {
     expect(result.error?.code).toBe('FILE_ACCESS_DENIED');
   });
 
+  it('rejects repositories outside the build scope', async () => {
+    mockGithubClient.isRepoAllowed.mockReturnValue(false);
+
+    const result = await tool.execute({ ...baseArgs, repository_owner: 'other-org', repository_name: 'other-repo' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('FILE_ACCESS_DENIED');
+    expect(result.agentContent).toContain('other-org/other-repo');
+    expect(mockOctokit.request).not.toHaveBeenCalled();
+  });
+
   it('returns error for non-file path', async () => {
     mockOctokit.request.mockResolvedValue({
       data: { type: 'dir' },
diff --git a/src/server/services/agent/tools/github/__tests__/listDirectory.test.ts b/src/server/services/agent/tools/github/__tests__/listDirectory.test.ts
index e0df09d8..daa3b755 100644
--- a/src/server/services/agent/tools/github/__tests__/listDirectory.test.ts
+++ b/src/server/services/agent/tools/github/__tests__/listDirectory.test.ts
@@ -21,6 +21,7 @@ const mockGithubClient = {
   getOctokit: jest.fn().mockResolvedValue(mockOctokit),
   isFilePathAllowed: jest.fn().mockReturnValue(true),
   isFileExcluded: jest.fn().mockReturnValue(false),
+  isRepoAllowed: jest.fn().mockReturnValue(true),
 } as any;
 
 describe('ListDirectoryTool', () => {
@@ -37,6 +38,7 @@ describe('ListDirectoryTool', () => {
     jest.clearAllMocks();
     mockGithubClient.getOctokit.mockResolvedValue(mockOctokit);
     mockGithubClient.isFileExcluded.mockReturnValue(false);
+    mockGithubClient.isRepoAllowed.mockReturnValue(true);
     tool = new ListDirectoryTool(mockGithubClient);
   });
 
@@ -72,6 +74,16 @@ describe('ListDirectoryTool', () => {
     expect(data.items[0].name).toBe('index.ts');
   });
 
+  it('rejects repositories outside the build scope', async () => {
+    mockGithubClient.isRepoAllowed.mockReturnValue(false);
+
+    const result = await tool.execute({ ...baseArgs, repository_owner: 'other-org', repository_name: 'other-repo' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('FILE_ACCESS_DENIED');
+    expect(result.agentContent).toContain('other-org/other-repo');
+    expect(mockOctokit.request).not.toHaveBeenCalled();
+  });
+
   it('returns error for non-directory path', async () => {
     mockOctokit.request.mockResolvedValue({
       data: { type: 'file', content: 'abc' },
diff --git a/src/server/services/agent/tools/github/getFile.ts b/src/server/services/agent/tools/github/getFile.ts
index a4b72af7..ed588f06 100644
--- a/src/server/services/agent/tools/github/getFile.ts
+++ b/src/server/services/agent/tools/github/getFile.ts
@@ -24,12 +24,18 @@ export class GetFileTool extends BaseTool {
 
   constructor(private githubClient: GitHubClient) {
     super(
-      'Read any file from the repository. Returns raw file content and total line count. Use this to read configuration files (lifecycle.yaml, lifecycle.yml), Dockerfiles, Helm values, source code, or any other file.',
+      "Read any file from one of THIS environment's repositories. Returns raw file content and total line count. Use this to read configuration files (lifecycle.yaml, lifecycle.yml), Dockerfiles, Helm values, source code, or any other file. Default repository_owner/repository_name to this build's primary repository; repositories outside this environment are rejected.",
       {
         type: 'object',
         properties: {
-          repository_owner: { type: 'string', description: 'Repository owner' },
-          repository_name: { type: 'string', description: 'Repository name' },
+          repository_owner: {
+            type: 'string',
+            description: "Repository owner. Defaults to this build's primary repo owner.",
+          },
+          repository_name: {
+            type: 'string',
+            description: "Repository name. Defaults to this build's primary repo name.",
+          },
           branch: { type: 'string', description: 'Branch name' },
           file_path: {
             type: 'string',
@@ -55,6 +61,15 @@ export class GetFileTool extends BaseTool {
       const branch = args.branch as string;
       const filePath = args.file_path as string;
 
+      // SECURITY: lock to the build's repositories; reject out-of-scope repos.
+      if (!this.githubClient.isRepoAllowed(owner, repo)) {
+        return this.createErrorResult(
+          `Repository "${owner}/${repo}" is outside this environment's repositories and cannot be accessed.`,
+          'FILE_ACCESS_DENIED',
+          false
+        );
+      }
+
       if (!this.githubClient.isFilePathAllowed(filePath, 'read')) {
         return this.createErrorResult(
           `File "${filePath}" is restricted by access control policy and cannot be read.`,
diff --git a/src/server/services/agent/tools/github/listDirectory.ts b/src/server/services/agent/tools/github/listDirectory.ts
index 7389af61..b9d51d2f 100644
--- a/src/server/services/agent/tools/github/listDirectory.ts
+++ b/src/server/services/agent/tools/github/listDirectory.ts
@@ -23,12 +23,18 @@ export class ListDirectoryTool extends BaseTool {
 
   constructor(private githubClient: GitHubClient) {
     super(
-      'List files and directories in ANY repository path. PROACTIVE USE: When you see "no such file or directory" errors in build/deploy logs, IMMEDIATELY call this to discover the correct filename. Example: If logs show "sysops/dockerfiles/app.dockerfile: no such file", call list_directory("sysops/dockerfiles") to find the actual file.',
+      'List files and directories in a path of one of THIS environment\'s repositories. PROACTIVE USE: When you see "no such file or directory" errors in build/deploy logs, IMMEDIATELY call this to discover the correct filename. Example: If logs show "sysops/dockerfiles/app.dockerfile: no such file", call list_directory("sysops/dockerfiles") to find the actual file. Default repository_owner/repository_name to this build\'s primary repository; repositories outside this environment are rejected.',
       {
         type: 'object',
         properties: {
-          repository_owner: { type: 'string', description: 'Repository owner' },
-          repository_name: { type: 'string', description: 'Repository name' },
+          repository_owner: {
+            type: 'string',
+            description: "Repository owner. Defaults to this build's primary repo owner.",
+          },
+          repository_name: {
+            type: 'string',
+            description: "Repository name. Defaults to this build's primary repo name.",
+          },
           branch: { type: 'string', description: 'Branch name' },
           directory_path: {
             type: 'string',
@@ -54,6 +60,15 @@ export class ListDirectoryTool extends BaseTool {
       const branch = args.branch as string;
       const directoryPath = args.directory_path as string;
 
+      // SECURITY: lock to the build's repositories; reject out-of-scope repos.
+      if (!this.githubClient.isRepoAllowed(owner, repo)) {
+        return this.createErrorResult(
+          `Repository "${owner}/${repo}" is outside this environment's repositories and cannot be accessed.`,
+          'FILE_ACCESS_DENIED',
+          false
+        );
+      }
+
       const octokit = await this.githubClient.getOctokit('agent-runtime-list-directory');
 
       const response = await octokit.request(`GET /repos/${owner}/${repo}/contents/${directoryPath}`, {
diff --git a/src/server/services/agent/tools/k8s/__tests__/getK8sResources.test.ts b/src/server/services/agent/tools/k8s/__tests__/getK8sResources.test.ts
index 341db1e0..88b6b3d3 100644
--- a/src/server/services/agent/tools/k8s/__tests__/getK8sResources.test.ts
+++ b/src/server/services/agent/tools/k8s/__tests__/getK8sResources.test.ts
@@ -16,6 +16,9 @@
 
 import { GetK8sResourcesTool } from '../getK8sResources';
 
+// Namespace scope state shared by the mock's resolveNamespace; mirrors K8sClient.
+let mockAllowedNamespace: string | null = null;
+
 const mockK8sClient = {
   coreApi: {
     listNamespacedPod: jest.fn(),
@@ -42,6 +45,23 @@ const mockK8sClient = {
   networkingApi: {
     listNamespacedIngress: jest.fn(),
   },
+  setAllowedNamespace: (ns: string | null | undefined) => {
+    mockAllowedNamespace = ns?.trim() || null;
+  },
+  resolveNamespace: (requested?: string | null) => {
+    const requestedTrimmed = requested?.trim() || null;
+    if (!mockAllowedNamespace) {
+      if (!requestedTrimmed) throw new Error('namespace is required');
+      return requestedTrimmed;
+    }
+    if (!requestedTrimmed) return mockAllowedNamespace;
+    if (requestedTrimmed !== mockAllowedNamespace) {
+      throw new Error(
+        `namespace "${requestedTrimmed}" is outside this environment's namespace "${mockAllowedNamespace}" and cannot be accessed.`
+      );
+    }
+    return mockAllowedNamespace;
+  },
 } as any;
 
 describe('GetK8sResourcesTool', () => {
@@ -49,6 +69,7 @@ describe('GetK8sResourcesTool', () => {
 
   beforeEach(() => {
     jest.clearAllMocks();
+    mockAllowedNamespace = null;
     tool = new GetK8sResourcesTool(mockK8sClient);
   });
 
@@ -191,4 +212,127 @@ describe('GetK8sResourcesTool', () => {
     expect(result.success).toBe(false);
     expect(result.agentContent).toContain('Forbidden');
   });
+
+  it('rejects a namespace outside the build scope', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+
+    const result = await tool.execute({ namespace: 'env-other', resource_type: 'pods' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('NAMESPACE_NOT_ALLOWED');
+    expect(result.agentContent).toContain('env-other');
+    expect(result.agentContent).toContain('env-mine');
+    expect(mockK8sClient.coreApi.listNamespacedPod).not.toHaveBeenCalled();
+  });
+
+  it('defaults to the build namespace when none is supplied', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+    mockK8sClient.coreApi.listNamespacedPod.mockResolvedValue({ body: { items: [] } });
+
+    const result = await tool.execute({ resource_type: 'pods' });
+    expect(result.success).toBe(true);
+    expect(mockK8sClient.coreApi.listNamespacedPod).toHaveBeenCalledWith(
+      'env-mine',
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      undefined
+    );
+  });
+
+  it('allows the matching build namespace', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+    mockK8sClient.coreApi.listNamespacedPod.mockResolvedValue({ body: { items: [] } });
+
+    const result = await tool.execute({ namespace: 'env-mine', resource_type: 'pods' });
+    expect(result.success).toBe(true);
+    expect(mockK8sClient.coreApi.listNamespacedPod).toHaveBeenCalled();
+  });
+
+  it('never returns secret values, only metadata/keys', async () => {
+    mockK8sClient.coreApi.listNamespacedSecret.mockResolvedValue({
+      body: {
+        items: [
+          {
+            metadata: { name: 'db-creds' },
+            type: 'Opaque',
+            data: { DB_PASSWORD: 'c3VwZXItc2VjcmV0', DB_USER: 'YWRtaW4=' },
+          },
+        ],
+      },
+    });
+
+    const result = await tool.execute({ namespace: 'test-ns', resource_type: 'secrets' });
+    expect(result.success).toBe(true);
+    const data = JSON.parse(result.agentContent as string);
+    expect(data.secrets[0].name).toBe('db-creds');
+    expect(data.secrets[0].keys).toEqual(['DB_PASSWORD', 'DB_USER']);
+    expect(result.agentContent).not.toContain('c3VwZXItc2VjcmV0');
+    expect(result.agentContent).not.toContain('super-secret');
+  });
+
+  it('surfaces waiting reason for non-running containers (ImagePullBackOff)', async () => {
+    mockK8sClient.coreApi.listNamespacedPod.mockResolvedValue({
+      body: {
+        items: [
+          {
+            metadata: { name: 'pod-bad', creationTimestamp: '2025-01-01T00:00:00Z' },
+            status: {
+              phase: 'Pending',
+              containerStatuses: [
+                {
+                  name: 'app',
+                  ready: false,
+                  restartCount: 0,
+                  state: {
+                    waiting: { reason: 'ImagePullBackOff', message: 'Back-off pulling image "nope:latest"' },
+                  },
+                },
+              ],
+            },
+          },
+        ],
+      },
+    });
+
+    const result = await tool.execute({ namespace: 'test-ns', resource_type: 'pods' });
+    expect(result.success).toBe(true);
+    const data = JSON.parse(result.agentContent as string);
+    expect(data.pods[0].containers[0].waiting).toEqual({
+      reason: 'ImagePullBackOff',
+      message: 'Back-off pulling image "nope:latest"',
+    });
+    expect((result.displayContent as { content: string }).content).toContain('ImagePullBackOff');
+  });
+
+  it('surfaces terminated reason and lastState (OOMKilled / CrashLoop)', async () => {
+    mockK8sClient.coreApi.listNamespacedPod.mockResolvedValue({
+      body: {
+        items: [
+          {
+            metadata: { name: 'pod-crash', creationTimestamp: '2025-01-01T00:00:00Z' },
+            status: {
+              phase: 'Running',
+              containerStatuses: [
+                {
+                  name: 'app',
+                  ready: false,
+                  restartCount: 7,
+                  state: { waiting: { reason: 'CrashLoopBackOff', message: 'back-off restarting' } },
+                  lastState: { terminated: { reason: 'OOMKilled', exitCode: 137 } },
+                },
+              ],
+            },
+          },
+        ],
+      },
+    });
+
+    const result = await tool.execute({ namespace: 'test-ns', resource_type: 'pods' });
+    expect(result.success).toBe(true);
+    const data = JSON.parse(result.agentContent as string);
+    const container = data.pods[0].containers[0];
+    expect(container.waiting.reason).toBe('CrashLoopBackOff');
+    expect(container.lastState.terminated).toEqual({ reason: 'OOMKilled', exitCode: 137 });
+  });
 });
diff --git a/src/server/services/agent/tools/k8s/__tests__/getPodLogs.test.ts b/src/server/services/agent/tools/k8s/__tests__/getPodLogs.test.ts
index b737ad4f..771b75c0 100644
--- a/src/server/services/agent/tools/k8s/__tests__/getPodLogs.test.ts
+++ b/src/server/services/agent/tools/k8s/__tests__/getPodLogs.test.ts
@@ -16,10 +16,29 @@
 
 import { GetPodLogsTool } from '../getPodLogs';
 
+let mockAllowedNamespace: string | null = null;
+
 const mockK8sClient = {
   coreApi: {
     readNamespacedPodLog: jest.fn(),
   },
+  setAllowedNamespace: (ns: string | null | undefined) => {
+    mockAllowedNamespace = ns?.trim() || null;
+  },
+  resolveNamespace: (requested?: string | null) => {
+    const requestedTrimmed = requested?.trim() || null;
+    if (!mockAllowedNamespace) {
+      if (!requestedTrimmed) throw new Error('namespace is required');
+      return requestedTrimmed;
+    }
+    if (!requestedTrimmed) return mockAllowedNamespace;
+    if (requestedTrimmed !== mockAllowedNamespace) {
+      throw new Error(
+        `namespace "${requestedTrimmed}" is outside this environment's namespace "${mockAllowedNamespace}" and cannot be accessed.`
+      );
+    }
+    return mockAllowedNamespace;
+  },
 } as any;
 
 describe('GetPodLogsTool', () => {
@@ -27,6 +46,7 @@ describe('GetPodLogsTool', () => {
 
   beforeEach(() => {
     jest.clearAllMocks();
+    mockAllowedNamespace = null;
     tool = new GetPodLogsTool(mockK8sClient);
   });
 
@@ -47,7 +67,7 @@ describe('GetPodLogsTool', () => {
       undefined,
       undefined,
       undefined,
-      undefined,
+      false,
       undefined,
       100
     );
@@ -65,12 +85,33 @@ describe('GetPodLogsTool', () => {
       undefined,
       undefined,
       undefined,
-      undefined,
+      false,
       undefined,
       50
     );
   });
 
+  it('reads the previous (crashed) container instance when previous=true', async () => {
+    mockK8sClient.coreApi.readNamespacedPodLog.mockResolvedValue({ body: 'crash output' });
+
+    const result = await tool.execute({ pod_name: 'my-pod', namespace: 'test-ns', previous: true });
+    expect(result.success).toBe(true);
+    const data = JSON.parse(result.agentContent as string);
+    expect(data.previous).toBe(true);
+    expect(mockK8sClient.coreApi.readNamespacedPodLog).toHaveBeenCalledWith(
+      'my-pod',
+      'test-ns',
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      true,
+      undefined,
+      100
+    );
+  });
+
   it('handles API error', async () => {
     mockK8sClient.coreApi.readNamespacedPodLog.mockRejectedValue(new Error('Pod not found'));
 
@@ -84,4 +125,33 @@ describe('GetPodLogsTool', () => {
     expect(result.success).toBe(false);
     expect(result.error?.code).toBe('CANCELLED');
   });
+
+  it('rejects a namespace outside the build scope', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+
+    const result = await tool.execute({ pod_name: 'my-pod', namespace: 'env-other' });
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('NAMESPACE_NOT_ALLOWED');
+    expect(mockK8sClient.coreApi.readNamespacedPodLog).not.toHaveBeenCalled();
+  });
+
+  it('defaults to the build namespace when none is supplied', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+    mockK8sClient.coreApi.readNamespacedPodLog.mockResolvedValue({ body: 'logs' });
+
+    const result = await tool.execute({ pod_name: 'my-pod' });
+    expect(result.success).toBe(true);
+    expect(mockK8sClient.coreApi.readNamespacedPodLog).toHaveBeenCalledWith(
+      'my-pod',
+      'env-mine',
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      false,
+      undefined,
+      100
+    );
+  });
 });
diff --git a/src/server/services/agent/tools/k8s/__tests__/patchK8sResource.test.ts b/src/server/services/agent/tools/k8s/__tests__/patchK8sResource.test.ts
index 5b2e1555..b5902a72 100644
--- a/src/server/services/agent/tools/k8s/__tests__/patchK8sResource.test.ts
+++ b/src/server/services/agent/tools/k8s/__tests__/patchK8sResource.test.ts
@@ -16,6 +16,8 @@
 
 import { PatchK8sResourceTool } from '../patchK8sResource';
 
+let mockAllowedNamespace: string | null = null;
+
 const mockK8sClient = {
   coreApi: {
     deleteNamespacedPod: jest.fn(),
@@ -27,6 +29,23 @@ const mockK8sClient = {
   batchApi: {
     deleteNamespacedJob: jest.fn(),
   },
+  setAllowedNamespace: (ns: string | null | undefined) => {
+    mockAllowedNamespace = ns?.trim() || null;
+  },
+  resolveNamespace: (requested?: string | null) => {
+    const requestedTrimmed = requested?.trim() || null;
+    if (!mockAllowedNamespace) {
+      if (!requestedTrimmed) throw new Error('namespace is required');
+      return requestedTrimmed;
+    }
+    if (!requestedTrimmed) return mockAllowedNamespace;
+    if (requestedTrimmed !== mockAllowedNamespace) {
+      throw new Error(
+        `namespace "${requestedTrimmed}" is outside this environment's namespace "${mockAllowedNamespace}" and cannot be accessed.`
+      );
+    }
+    return mockAllowedNamespace;
+  },
 } as any;
 
 const deploymentResponse = {
@@ -42,6 +61,7 @@ describe('PatchK8sResourceTool', () => {
 
   beforeEach(() => {
     jest.clearAllMocks();
+    mockAllowedNamespace = null;
     tool = new PatchK8sResourceTool(mockK8sClient);
   });
 
@@ -103,9 +123,7 @@ describe('PatchK8sResourceTool', () => {
     expect(patchArg.spec.template.metadata.annotations['kubectl.kubernetes.io/restartedAt']).toBeDefined();
   });
 
-  it('delete pod', async () => {
-    mockK8sClient.coreApi.deleteNamespacedPod.mockResolvedValue({});
-
+  it('rejects delete: it is removed from the supported operations', async () => {
     const result = await tool.execute({
       namespace: 'test-ns',
       resource_type: 'pod',
@@ -113,30 +131,11 @@ describe('PatchK8sResourceTool', () => {
       operation: 'delete',
     });
 
-    expect(result.success).toBe(true);
-    expect(mockK8sClient.coreApi.deleteNamespacedPod).toHaveBeenCalledWith('my-pod', 'test-ns');
-  });
-
-  it('delete job', async () => {
-    mockK8sClient.batchApi.deleteNamespacedJob.mockResolvedValue({});
-
-    const result = await tool.execute({
-      namespace: 'test-ns',
-      resource_type: 'job',
-      name: 'my-job',
-      operation: 'delete',
-    });
-
-    expect(result.success).toBe(true);
-    expect(mockK8sClient.batchApi.deleteNamespacedJob).toHaveBeenCalledWith(
-      'my-job',
-      'test-ns',
-      undefined,
-      undefined,
-      undefined,
-      undefined,
-      'Background'
-    );
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('INVALID_OPERATION');
+    expect(result.agentContent).toContain('Unknown operation');
+    expect(mockK8sClient.coreApi.deleteNamespacedPod).not.toHaveBeenCalled();
+    expect(mockK8sClient.batchApi.deleteNamespacedJob).not.toHaveBeenCalled();
   });
 
   it('returns error for missing patch object on patch operation', async () => {
@@ -183,4 +182,46 @@ describe('PatchK8sResourceTool', () => {
     expect(result.success).toBe(false);
     expect(result.error?.code).toBe('CANCELLED');
   });
+
+  it('rejects a namespace outside the build scope during execution', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+
+    const result = await tool.execute({
+      namespace: 'env-other',
+      resource_type: 'deployment',
+      name: 'my-deploy',
+      operation: 'restart',
+    });
+
+    expect(result.success).toBe(false);
+    expect(result.error?.code).toBe('NAMESPACE_NOT_ALLOWED');
+    expect(mockK8sClient.appsApi.patchNamespacedDeployment).not.toHaveBeenCalled();
+  });
+
+  it('rejects a foreign namespace BEFORE presenting an approval', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+
+    await expect(
+      tool.shouldConfirmExecution({
+        namespace: 'env-other',
+        resource_type: 'deployment',
+        name: 'my-deploy',
+        operation: 'restart',
+      })
+    ).rejects.toThrow('env-other');
+  });
+
+  it('presents an approval for the matching build namespace', async () => {
+    mockK8sClient.setAllowedNamespace('env-mine');
+
+    const confirmation = await tool.shouldConfirmExecution({
+      namespace: 'env-mine',
+      resource_type: 'deployment',
+      name: 'my-deploy',
+      operation: 'restart',
+    });
+
+    expect(confirmation).not.toBe(false);
+    expect((confirmation as any).description).toContain('env-mine');
+  });
 });
diff --git a/src/server/services/agent/tools/k8s/getK8sResources.ts b/src/server/services/agent/tools/k8s/getK8sResources.ts
index 4ef722d3..a630c908 100644
--- a/src/server/services/agent/tools/k8s/getK8sResources.ts
+++ b/src/server/services/agent/tools/k8s/getK8sResources.ts
@@ -24,15 +24,19 @@ export class GetK8sResourcesTool extends BaseTool {
 
   constructor(private k8sClient: K8sClient) {
     super(
-      'Get any Kubernetes resource type in a namespace. Supports: pods, deployments, services, ingresses, secrets, configmaps, jobs, statefulsets, daemonsets, replicasets, events. Use this to discover what resources exist in the namespace.',
+      "Get any Kubernetes resource type in THIS environment's namespace. Supports: pods, deployments, services, ingresses, secrets (metadata/keys only — values are never returned), configmaps, jobs, statefulsets, daemonsets, replicasets, events. The namespace defaults to this environment's namespace; you do not need to pass it, and any other namespace is rejected.",
       {
         type: 'object',
         properties: {
-          namespace: { type: 'string', description: 'The Kubernetes namespace' },
+          namespace: {
+            type: 'string',
+            description:
+              "Optional. Defaults to this environment's namespace. If provided, it MUST equal the environment's namespace; any other value is rejected.",
+          },
           resource_type: {
             type: 'string',
             description:
-              'Resource type to list (e.g., "pods", "deployments", "services", "ingresses", "secrets", "configmaps", "jobs", "statefulsets", "events"). Accepts singular or plural forms.',
+              'Resource type to list (e.g., "pods", "deployments", "services", "ingresses", "secrets", "configmaps", "jobs", "statefulsets", "events"). Accepts singular or plural forms. "secrets" returns only name/type/keys — never values.',
             enum: [
               'pods',
               'deployments',
@@ -61,7 +65,7 @@ export class GetK8sResourcesTool extends BaseTool {
               'Optional: Filter by field selector (e.g., "involvedObject.name=mypod"). Primarily used for events.',
           },
         },
-        required: ['namespace', 'resource_type'],
+        required: ['resource_type'],
       },
       ToolSafetyLevel.SAFE,
       'k8s'
@@ -73,8 +77,15 @@ export class GetK8sResourcesTool extends BaseTool {
       return this.createErrorResult('Operation cancelled', 'CANCELLED', false);
     }
 
+    let namespace: string;
+    try {
+      // SECURITY: lock to the build's namespace; reject any foreign namespace.
+      namespace = this.k8sClient.resolveNamespace(args.namespace as string | undefined);
+    } catch (error: any) {
+      return this.createErrorResult(error.message || 'Namespace not allowed', 'NAMESPACE_NOT_ALLOWED', false);
+    }
+
     try {
-      const namespace = args.namespace as string;
       const resourceType = args.resource_type as string;
       const name = args.name as string | undefined;
       const labelSelector = args.label_selector as string | undefined;
@@ -146,6 +157,47 @@ export class GetK8sResourcesTool extends BaseTool {
     }
   }
 
+  /** Compact container status with waiting/terminated reason+exitCode and lastState, so one call reveals why a pod is unhealthy. */
+  static summarizeContainerStatus(c: any): any {
+    const stateKey = Object.keys(c.state || {})[0];
+    const waiting = c.state?.waiting;
+    const terminated = c.state?.terminated;
+    const lastTerminated = c.lastState?.terminated;
+
+    const summary: any = {
+      name: c.name,
+      ready: c.ready,
+      state: stateKey,
+      restarts: c.restartCount,
+    };
+
+    if (waiting && (waiting.reason || waiting.message)) {
+      summary.waiting = {
+        reason: waiting.reason,
+        message: waiting.message,
+      };
+    }
+
+    if (terminated) {
+      summary.terminated = {
+        reason: terminated.reason,
+        message: terminated.message,
+        exitCode: terminated.exitCode,
+      };
+    }
+
+    if (lastTerminated && (lastTerminated.reason || lastTerminated.exitCode !== undefined)) {
+      summary.lastState = {
+        terminated: {
+          reason: lastTerminated.reason,
+          exitCode: lastTerminated.exitCode,
+        },
+      };
+    }
+
+    return summary;
+  }
+
   private async getPods(namespace: string, labelSelector?: string): Promise<any> {
     const response = await this.k8sClient.coreApi.listNamespacedPod(
       namespace,
@@ -166,12 +218,7 @@ export class GetK8sResourcesTool extends BaseTool {
         ? pod.status.containerStatuses.reduce((sum, c) => sum + c.restartCount, 0)
         : 0,
       age: pod.metadata?.creationTimestamp,
-      containers: pod.status?.containerStatuses?.map((c) => ({
-        name: c.name,
-        ready: c.ready,
-        state: Object.keys(c.state || {})[0],
-        restarts: c.restartCount,
-      })),
+      containers: pod.status?.containerStatuses?.map((c) => GetK8sResourcesTool.summarizeContainerStatus(c)),
     }));
 
     const isUnhealthy = (pod: any) =>
@@ -560,10 +607,37 @@ export class GetK8sResourcesTool extends BaseTool {
     };
   }
 
+  private static podContainerReasons(pod: any): string {
+    const reasons: string[] = [];
+    for (const c of pod.containers || []) {
+      if (c.waiting?.reason) {
+        reasons.push(`${c.name}: ${c.waiting.reason}`);
+      } else if (c.terminated?.reason) {
+        reasons.push(
+          `${c.name}: ${c.terminated.reason}${
+            c.terminated.exitCode !== undefined ? ` (exit ${c.terminated.exitCode})` : ''
+          }`
+        );
+      } else if (c.lastState?.terminated?.reason) {
+        reasons.push(
+          `${c.name}: last ${c.lastState.terminated.reason}${
+            c.lastState.terminated.exitCode !== undefined ? ` (exit ${c.lastState.terminated.exitCode})` : ''
+          }`
+        );
+      }
+    }
+    return reasons.length > 0 ? ` [${reasons.join('; ')}]` : '';
+  }
+
   private formatDisplay(resourceType: string, result: any): string {
     if (resourceType === 'pod' && result.total && result.unhealthyPods) {
       const unhealthyList = result.unhealthyPods
-        .map((p: any) => `  - ${p.name}: ${p.phase} (${p.ready} ready, ${p.restarts} restarts)`)
+        .map(
+          (p: any) =>
+            `  - ${p.name}: ${p.phase} (${p.ready} ready, ${
+              p.restarts
+            } restarts)${GetK8sResourcesTool.podContainerReasons(p)}`
+        )
         .join('\n');
       return `${result.total} pods (${result.unhealthyCount} unhealthy, ${result.healthyCount} healthy)${
         unhealthyList ? `\nUnhealthy:\n${unhealthyList}` : ''
@@ -572,7 +646,12 @@ export class GetK8sResourcesTool extends BaseTool {
 
     if (resourceType === 'pod' && result.pods) {
       return `Found ${result.pods.length} pods:\n${result.pods
-        .map((p: any) => `  - ${p.name}: ${p.phase} (${p.ready} ready, ${p.restarts} restarts)`)
+        .map(
+          (p: any) =>
+            `  - ${p.name}: ${p.phase} (${p.ready} ready, ${
+              p.restarts
+            } restarts)${GetK8sResourcesTool.podContainerReasons(p)}`
+        )
         .join('\n')}`;
     }
 
diff --git a/src/server/services/agent/tools/k8s/getLifecycleLogs.ts b/src/server/services/agent/tools/k8s/getLifecycleLogs.ts
index 21467780..0325a1bc 100644
--- a/src/server/services/agent/tools/k8s/getLifecycleLogs.ts
+++ b/src/server/services/agent/tools/k8s/getLifecycleLogs.ts
@@ -53,13 +53,20 @@ function deduplicateLines(lines: string[]): string[] {
 export class GetLifecycleLogsTool extends BaseTool {
   static readonly Name = 'get_lifecycle_logs';
 
+  // SECURITY: filter logs to this build's UUID so a model can't read another build's logs.
+  private allowedBuildUuid: string | null = null;
+
   constructor(private k8sClient: K8sClient) {
     super(
-      'Fetch logs from the Lifecycle control plane services (lifecycle-worker or lifecycle-web pods in lifecycle-app namespace), filtered by build UUID and correlation ID. First finds logs matching the build UUID, extracts correlationId from matched structured log lines, then expands the search to include all logs sharing that correlationId. This gives a complete picture of the request lifecycle across services. For user service logs, use get_pod_logs instead.',
+      "Fetch logs from the Lifecycle control plane services (lifecycle-worker or lifecycle-web pods in lifecycle-app namespace), filtered by THIS build's UUID and correlation ID. First finds logs matching the build UUID, extracts correlationId from matched structured log lines, then expands the search to include all logs sharing that correlationId. This gives a complete picture of the request lifecycle across services. For user service logs, use get_pod_logs instead. build_uuid defaults to this build; any other build UUID is rejected.",
       {
         type: 'object',
         properties: {
-          build_uuid: { type: 'string', description: 'The build UUID to filter logs for' },
+          build_uuid: {
+            type: 'string',
+            description:
+              "Optional. Defaults to this build's UUID. If provided, it MUST equal this build's UUID; any other value is rejected.",
+          },
           service_type: {
             type: 'string',
             description:
@@ -74,20 +81,55 @@ export class GetLifecycleLogsTool extends BaseTool {
               'Optional: directly filter by correlation ID instead of discovering it from build UUID. Useful for follow-up queries.',
           },
         },
-        required: ['build_uuid'],
+        required: [],
       },
       ToolSafetyLevel.SAFE,
       'k8s'
     );
   }
 
+  setAllowedBuildUuid(buildUuid: string | null | undefined): void {
+    this.allowedBuildUuid = buildUuid?.trim() || null;
+  }
+
+  private resolveBuildUuid(requested: string | null | undefined): string {
+    const allowed = this.allowedBuildUuid;
+    const requestedTrimmed = requested?.trim() || null;
+
+    if (!allowed) {
+      if (!requestedTrimmed) {
+        throw new Error('build_uuid is required');
+      }
+      return requestedTrimmed;
+    }
+
+    if (!requestedTrimmed) {
+      return allowed;
+    }
+
+    if (requestedTrimmed !== allowed) {
+      throw new Error(
+        `build_uuid "${requestedTrimmed}" is outside this environment's build "${allowed}" and cannot be accessed.`
+      );
+    }
+
+    return allowed;
+  }
+
   async execute(args: Record<string, unknown>, signal?: AbortSignal): Promise<ToolResult> {
     if (this.checkAborted(signal)) {
       return this.createErrorResult('Operation cancelled', 'CANCELLED', false);
     }
 
+    let buildUuid: string;
+    try {
+      // SECURITY: lock to this build's UUID; reject any foreign build UUID.
+      buildUuid = this.resolveBuildUuid(args.build_uuid as string | undefined);
+    } catch (error: any) {
+      return this.createErrorResult(error.message || 'build_uuid not allowed', 'BUILD_NOT_ALLOWED', false);
+    }
+
     try {
-      const buildUuid = args.build_uuid as string;
       const serviceType = (args.service_type as string) || 'worker';
       const tailLines = (args.tail_lines as number) || 200;
       const sinceMinutes = (args.since_minutes as number) || 30;
diff --git a/src/server/services/agent/tools/k8s/getPodLogs.ts b/src/server/services/agent/tools/k8s/getPodLogs.ts
index e2977fd3..a08629d5 100644
--- a/src/server/services/agent/tools/k8s/getPodLogs.ts
+++ b/src/server/services/agent/tools/k8s/getPodLogs.ts
@@ -55,13 +55,22 @@ export class GetPodLogsTool extends BaseTool {
 
   constructor(private k8sClient: K8sClient) {
     super(
-      'Fetch recent logs from a specific pod. Use this to diagnose application errors.',
+      "Fetch logs from a pod in THIS environment's namespace. For a crashing or restarting pod (CrashLoopBackOff, non-zero restart count), set previous=true to read the crashed instance — the current container is often empty or pre-crash. Pass container for multi-container pods. The namespace defaults to this environment's namespace; any other namespace is rejected.",
       {
         type: 'object',
         properties: {
           pod_name: { type: 'string', description: 'The pod name' },
-          namespace: { type: 'string', description: 'The Kubernetes namespace' },
+          namespace: {
+            type: 'string',
+            description:
+              "Optional. Defaults to this environment's namespace. If provided, it MUST equal the environment's namespace; any other value is rejected.",
+          },
           container: { type: 'string', description: 'Optional specific container name' },
+          previous: {
+            type: 'boolean',
+            description:
+              'Read logs from the PREVIOUS (crashed/restarted) container instance. Essential for CrashLoopBackOff: the current instance is usually empty; the previous one holds the crash output.',
+          },
           tail_lines: { type: 'number', description: 'Number of lines from the end of logs (default: 100)' },
           head_lines: {
             type: 'number',
@@ -69,7 +78,7 @@ export class GetPodLogsTool extends BaseTool {
               'Number of lines from the start of logs (default: 50). Combined with tail_lines for head+tail truncation.',
           },
         },
-        required: ['pod_name', 'namespace'],
+        required: ['pod_name'],
       },
       ToolSafetyLevel.SAFE,
       'k8s'
@@ -81,9 +90,17 @@ export class GetPodLogsTool extends BaseTool {
       return this.createErrorResult('Operation cancelled', 'CANCELLED', false);
     }
 
+    let namespace: string;
+    try {
+      // SECURITY: lock to the build's namespace; reject any foreign namespace.
+      namespace = this.k8sClient.resolveNamespace(args.namespace as string | undefined);
+    } catch (error: any) {
+      return this.createErrorResult(error.message || 'Namespace not allowed', 'NAMESPACE_NOT_ALLOWED', false);
+    }
+
+    const previous = args.previous === true;
     try {
       const podName = args.pod_name as string;
-      const namespace = args.namespace as string;
       const container = args.container as string | undefined;
       const tailLines = (args.tail_lines as number) || 100;
       const headLines = (args.head_lines as number) || 50;
@@ -96,7 +113,7 @@ export class GetPodLogsTool extends BaseTool {
         undefined,
         undefined,
         undefined,
-        undefined,
+        previous,
         undefined,
         tailLines
       );
@@ -119,16 +136,27 @@ export class GetPodLogsTool extends BaseTool {
 
       const processedLogs = OutputLimiter.truncateLogOutput(finalLines.join('\n'), 30000, headLines, tailLines);
 
-      const displayContent = `Pod logs: ${finalLines.length} lines from ${podName} (${dedupedLines.length} total, head=${headLines} tail=${tailLines})`;
+      const displayContent = `Pod logs: ${finalLines.length} lines from ${podName}${
+        previous ? ' (previous instance)' : ''
+      } (${dedupedLines.length} total, head=${headLines} tail=${tailLines})`;
 
       const result = {
         success: true,
+        previous,
         logs: processedLogs,
       };
 
       return this.createSuccessResult(JSON.stringify(result), displayContent);
     } catch (error: any) {
-      return this.createErrorResult(error.message || 'Failed to fetch pod logs', 'EXECUTION_ERROR');
+      const message: string = error?.message || 'Failed to fetch pod logs';
+      if (previous && /previous terminated container|not found/i.test(message)) {
+        return this.createErrorResult(
+          'No previous (crashed) container instance found — the pod has not restarted yet, or kept no prior instance. Read current logs (omit previous), or check container status and events for the waiting/terminated reason.',
+          'NO_PREVIOUS_CONTAINER',
+          false
+        );
+      }
+      return this.createErrorResult(message, 'EXECUTION_ERROR');
     }
   }
 }
diff --git a/src/server/services/agent/tools/k8s/patchK8sResource.ts b/src/server/services/agent/tools/k8s/patchK8sResource.ts
index dddc0ebf..ce6f3a0e 100644
--- a/src/server/services/agent/tools/k8s/patchK8sResource.ts
+++ b/src/server/services/agent/tools/k8s/patchK8sResource.ts
@@ -23,11 +23,15 @@ export class PatchK8sResourceTool extends BaseTool {
 
   constructor(private k8sClient: K8sClient) {
     super(
-      'Modify Kubernetes resources. Supports operations: patch (update config), scale (change replicas), restart (rolling restart), delete (delete pod/job). Use this to fix misconfigurations or manage resource lifecycle.',
+      "Modify Kubernetes resources in THIS environment's namespace. Supports operations: patch (update config), scale (change replicas), restart (rolling restart). IMPORTANT: these changes are EPHEMERAL — Lifecycle reverts patch/scale/restart on the next deploy/reconcile. Use them only to validate a hypothesis or temporarily unblock. The DURABLE fix is an approval-gated update_file to lifecycle.yaml or its referenced files. The namespace defaults to this environment's namespace; any other namespace is rejected.",
       {
         type: 'object',
         properties: {
-          namespace: { type: 'string', description: 'The Kubernetes namespace' },
+          namespace: {
+            type: 'string',
+            description:
+              "Optional. Defaults to this environment's namespace. If provided, it MUST equal the environment's namespace; any other value is rejected.",
+          },
           resource_type: {
             type: 'string',
             description: 'Resource type (e.g., "deployment", "pod", "job")',
@@ -35,8 +39,9 @@ export class PatchK8sResourceTool extends BaseTool {
           name: { type: 'string', description: 'The resource name' },
           operation: {
             type: 'string',
-            description: 'Operation to perform: "patch", "scale", "restart", or "delete"',
-            enum: ['patch', 'scale', 'restart', 'delete'],
+            description:
+              'Operation to perform: "patch", "scale", or "restart". All are ephemeral and reverted on the next deploy/reconcile.',
+            enum: ['patch', 'scale', 'restart'],
           },
           patch: {
             type: 'object',
@@ -58,12 +63,13 @@ export class PatchK8sResourceTool extends BaseTool {
   async shouldConfirmExecution(args: Record<string, unknown>): Promise<ConfirmationDetails | false> {
     const resourceType = args.resource_type as string;
     const name = args.name as string;
-    const namespace = args.namespace as string;
     const operation = args.operation as string;
+    // SECURITY: enforce namespace scope before approval; a foreign namespace must never become approvable.
+    const namespace = this.k8sClient.resolveNamespace(args.namespace as string | undefined);
     return {
       title: `${operation} Kubernetes resource`,
       description: `${operation} ${resourceType}/${name} in namespace ${namespace}`,
-      impact: 'This will modify a live Kubernetes resource.',
+      impact: 'This will modify a live Kubernetes resource. The change is ephemeral and reverted on the next deploy.',
       confirmButtonText: `${operation.charAt(0).toUpperCase() + operation.slice(1)}`,
     };
   }
@@ -73,8 +79,15 @@ export class PatchK8sResourceTool extends BaseTool {
       return this.createErrorResult('Operation cancelled', 'CANCELLED', false);
     }
 
+    let namespace: string;
+    try {
+      // SECURITY: lock to the build's namespace; reject any foreign namespace.
+      namespace = this.k8sClient.resolveNamespace(args.namespace as string | undefined);
+    } catch (error: any) {
+      return this.createErrorResult(error.message || 'Namespace not allowed', 'NAMESPACE_NOT_ALLOWED', false);
+    }
+
     try {
-      const namespace = args.namespace as string;
       const resourceType = args.resource_type as string;
       const name = args.name as string;
       const operation = args.operation as string;
@@ -105,13 +118,9 @@ export class PatchK8sResourceTool extends BaseTool {
           result = await this.handleRestartOperation(normalizedType, name, namespace);
           break;
 
-        case 'delete':
-          result = await this.handleDeleteOperation(normalizedType, name, namespace);
-          break;
-
         default:
           return this.createErrorResult(
-            `Unknown operation: ${operation}. Supported operations: patch, scale, restart, delete`,
+            `Unknown operation: ${operation}. Supported operations: patch, scale, restart`,
             'INVALID_OPERATION'
           );
       }
@@ -149,15 +158,6 @@ export class PatchK8sResourceTool extends BaseTool {
     throw new Error(`Restart operation not supported for resource type: ${resourceType}`);
   }
 
-  private async handleDeleteOperation(resourceType: string, name: string, namespace: string): Promise<any> {
-    if (resourceType === 'pod') {
-      return this.deletePod(name, namespace);
-    } else if (resourceType === 'job') {
-      return this.deleteJob(name, namespace);
-    }
-    throw new Error(`Delete operation not supported for resource type: ${resourceType}`);
-  }
-
   private async patchDeployment(name: string, namespace: string, patch: any): Promise<any> {
     const response = await this.k8sClient.appsApi.patchNamespacedDeployment(
       name,
@@ -262,30 +262,4 @@ export class PatchK8sResourceTool extends BaseTool {
       restartedAt: now,
     };
   }
-
-  private async deletePod(podName: string, namespace: string): Promise<any> {
-    await this.k8sClient.coreApi.deleteNamespacedPod(podName, namespace);
-
-    return {
-      success: true,
-      message: `Successfully deleted pod ${podName}. Kubernetes will create a new pod automatically.`,
-    };
-  }
-
-  private async deleteJob(jobName: string, namespace: string): Promise<any> {
-    await this.k8sClient.batchApi.deleteNamespacedJob(
-      jobName,
-      namespace,
-      undefined,
-      undefined,
-      undefined,
-      undefined,
-      'Background'
-    );
-
-    return {
-      success: true,
-      message: `Successfully deleted job ${jobName}.`,
-    };
-  }
 }
diff --git a/src/server/services/agent/tools/k8s/queryDatabase.ts b/src/server/services/agent/tools/k8s/queryDatabase.ts
index d9f0a7c1..2822e7dc 100644
--- a/src/server/services/agent/tools/k8s/queryDatabase.ts
+++ b/src/server/services/agent/tools/k8s/queryDatabase.ts
@@ -23,7 +23,7 @@ export class QueryDatabaseTool extends BaseTool {
 
   constructor(private databaseClient: DatabaseClient) {
     super(
-      'Read-only database query to fetch fresh Lifecycle data. Use this to get current build/deploy status, check deployables, or verify configuration. CRITICAL: READ-ONLY - no write operations allowed. TABLE-SPECIFIC RELATIONS: builds (pullRequest, environment, deploys, deployables), deploys (build, deployable, repository, service), deployables (repository, deploys), pull_requests (repository, build), repositories (pullRequests, deployables), environments (builds). Use dot notation for nested relations like "deploys.repository".',
+      "Read-only database query to fetch fresh Lifecycle data for THIS build only. Every query is automatically scoped to this build's own records (builds.uuid = this build, deploys/deployables of this build, this build's pull request / environment / repositories); you cannot read other tenants' rows. Use this to get current build/deploy status, check deployables, or verify configuration. CRITICAL: READ-ONLY - no write operations allowed. TABLE-SPECIFIC RELATIONS: builds (pullRequest, environment, deploys, deployables), deploys (build, deployable, repository, service), deployables (repository, deploys), pull_requests (repository, build), repositories (pullRequests, deployables), environments (builds). Use dot notation for nested relations like \"deploys.repository\".",
       {
         type: 'object',
         properties: {
@@ -51,7 +51,7 @@ export class QueryDatabaseTool extends BaseTool {
           select: {
             type: 'array',
             items: { type: 'string' },
-            description: 'Columns to return (default: all). Invalid column names are silently ignored.',
+            description: 'Columns to return (default: all). Use exact column names from the table schema.',
           },
           orderBy: {
             type: 'string',
@@ -84,7 +84,7 @@ export class QueryDatabaseTool extends BaseTool {
       const orderBy = args.orderBy as string | undefined;
       const offset = args.offset as number | undefined;
 
-      const { records, totalCount } = await this.databaseClient.queryTable({
+      const { records, totalCount, warnings } = await this.databaseClient.queryTable({
         table,
         filters,
         relations,
@@ -100,6 +100,7 @@ export class QueryDatabaseTool extends BaseTool {
         count: records.length,
         totalCount,
         records,
+        ...(warnings && warnings.length > 0 ? { warnings } : {}),
       };
 
       const displayContent = `Found ${records.length} ${table} (${totalCount} total)`;
diff --git a/src/server/services/agent/tools/outputLimiter.ts b/src/server/services/agent/tools/outputLimiter.ts
index 7956ba57..b0344300 100644
--- a/src/server/services/agent/tools/outputLimiter.ts
+++ b/src/server/services/agent/tools/outputLimiter.ts
@@ -17,6 +17,11 @@
 const DEFAULT_MAX_CHARS = 30000;
 const MARKER_RESERVE = 200;
 
+// Signals that usually carry the actual failure in long logs.
+const ERROR_SIGNAL_RE =
+  /\b(error|fatal|exception|panic|traceback|failed|cannot|denied|segfault|oom(killed)?|exit code|stack trace|unhandled)\b/i;
+const ERROR_WINDOW = 20; // lines retained around the last error signal
+
 function makeMarker(kept: number, total: number): string {
   return `\n[Truncated: showing ${kept} of ${total} chars — use tighter filters to get specific data]`;
 }
@@ -84,7 +89,9 @@ export class OutputLimiter {
     content: string,
     maxChars: number = DEFAULT_MAX_CHARS,
     headLines: number = 50,
-    tailLines: number = 100
+    tailLines: number = 100,
+    // Retain a window around the last error signal in the dropped middle.
+    retainErrorRegion: boolean = true
   ): string {
     const lines = content.split('\n');
     if (lines.length <= headLines + tailLines) {
@@ -92,16 +99,73 @@ export class OutputLimiter {
       return OutputLimiter.truncate(content, maxChars);
     }
 
-    const head = lines.slice(0, headLines);
-    const tail = lines.slice(-tailLines);
-    const omitted = lines.length - headLines - tailLines;
-    const marker = `\n... [Truncated: ${omitted} lines omitted of ${lines.length} total] ...\n`;
-    const result = head.join('\n') + marker + tail.join('\n');
+    const headEnd = headLines;
+    const tailStart = lines.length - tailLines;
+
+    // Find the last error-signal line that falls in the omitted middle.
+    let errorIdx = -1;
+    if (retainErrorRegion) {
+      for (let i = tailStart - 1; i >= headEnd; i--) {
+        if (ERROR_SIGNAL_RE.test(lines[i])) {
+          errorIdx = i;
+          break;
+        }
+      }
+    }
 
-    if (result.length > maxChars) {
-      return OutputLimiter.truncate(result, maxChars);
+    if (errorIdx === -1) {
+      const head = lines.slice(0, headLines);
+      const tail = lines.slice(-tailLines);
+      const omitted = lines.length - headLines - tailLines;
+      const marker = `\n... [Truncated: ${omitted} lines omitted of ${lines.length} total] ...\n`;
+      const result = head.join('\n') + marker + tail.join('\n');
+      return result.length > maxChars ? OutputLimiter.truncate(result, maxChars) : result;
     }
-    return result;
+
+    return OutputLimiter.buildWithErrorRegion(lines, headLines, tailLines, errorIdx, maxChars);
+  }
+
+  // Splice an error-region window between head and tail, trimming head/tail to stay under the cap.
+  private static buildWithErrorRegion(
+    lines: string[],
+    headLines: number,
+    tailLines: number,
+    errorIdx: number,
+    maxChars: number
+  ): string {
+    const half = Math.floor(ERROR_WINDOW / 2);
+    const total = lines.length;
+
+    const render = (h: number, t: number): string => {
+      const errStart = Math.max(h, errorIdx - half);
+      const errEnd = Math.min(total - t, errorIdx + half + 1);
+      const head = lines.slice(0, h);
+      const errRegion = lines.slice(errStart, errEnd);
+      const tail = lines.slice(total - t);
+
+      const headOmitted = errStart - h;
+      const tailOmitted = total - t - errEnd;
+      const errMarker = `\n... [retained error region — ${errRegion.length} lines around last error signal] ...\n`;
+      const parts = [head.join('\n')];
+      if (headOmitted > 0) parts.push(`\n... [Truncated: ${headOmitted} lines omitted] ...`);
+      parts.push(errMarker + errRegion.join('\n'));
+      if (tailOmitted > 0) parts.push(`\n... [Truncated: ${tailOmitted} lines omitted] ...\n`);
+      else parts.push('\n');
+      parts.push(tail.join('\n'));
+      return parts.join('');
+    };
+
+    let h = headLines;
+    let t = tailLines;
+    let result = render(h, t);
+    // Trim head/tail proportionally until under the cap (error region is never dropped).
+    while (result.length > maxChars && (h > 5 || t > 5)) {
+      if (h > 5) h = Math.max(5, h - Math.ceil(h * 0.25));
+      if (t > 5) t = Math.max(5, t - Math.ceil(t * 0.25));
+      result = render(h, t);
+    }
+
+    return result.length > maxChars ? OutputLimiter.truncate(result, maxChars) : result;
   }
 
   static truncateJsonSafely(jsonString: string, maxChars: number = DEFAULT_MAX_CHARS): string {
diff --git a/src/server/services/agent/tools/shared/__tests__/databaseClient.test.ts b/src/server/services/agent/tools/shared/__tests__/databaseClient.test.ts
new file mode 100644
index 00000000..7b5fa0dc
--- /dev/null
+++ b/src/server/services/agent/tools/shared/__tests__/databaseClient.test.ts
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { DatabaseClient } from '../databaseClient';
+
+type RecordedCall = { method: string; args: any[] };
+
+/** Query-builder spy recording calls; exposes ONLY read methods, so any write call throws (proves read-only). */
+function createQueryBuilder(rows: any[], calls: RecordedCall[]) {
+  const builder: any = {};
+  const chain = (method: string) =>
+    jest.fn((...args: any[]) => {
+      calls.push({ method, args });
+      return builder;
+    });
+
+  builder.where = chain('where');
+  builder.whereIn = chain('whereIn');
+  builder.select = chain('select');
+  builder.orderBy = chain('orderBy');
+  builder.withGraphFetched = chain('withGraphFetched');
+  builder.limit = chain('limit');
+  builder.offset = chain('offset');
+  builder.resultSize = jest.fn(async () => rows.length);
+  builder.then = (resolve: (value: any[]) => unknown) => Promise.resolve(rows).then(resolve);
+  return builder;
+}
+
+function buildDb(rows: any[], calls: RecordedCall[]) {
+  const query = jest.fn(() => createQueryBuilder(rows, calls));
+  return {
+    models: {
+      Build: { query },
+      Deploy: { query },
+      Deployable: { query },
+      PullRequest: { query },
+      Repository: { query },
+      Environment: { query },
+    },
+  };
+}
+
+const SCOPE = {
+  buildId: 42,
+  buildUuid: 'my-build-uuid',
+  pullRequestId: 7,
+  environmentId: 3,
+  repositoryIds: [11, 12],
+};
+
+describe('DatabaseClient build scoping', () => {
+  let calls: RecordedCall[];
+
+  function makeClient(rows: any[] = [], scope: any = SCOPE) {
+    calls = [];
+    const client = new DatabaseClient(buildDb(rows, calls));
+    client.setBuildScope(scope);
+    return client;
+  }
+
+  it('rejects queries entirely when no build scope is configured', async () => {
+    const client = new DatabaseClient(buildDb([], (calls = [])));
+    await expect(client.queryTable({ table: 'builds' })).rejects.toThrow(/not scoped to a build/);
+  });
+
+  it('scopes builds queries to this build uuid (ANDed under model filters)', async () => {
+    const client = makeClient([{ uuid: 'my-build-uuid', status: 'deployed' }]);
+    await client.queryTable({ table: 'builds', filters: { status: 'deployed' } });
+
+    // mandatory scope clause is applied
+    expect(calls).toContainEqual({ method: 'where', args: ['uuid', 'my-build-uuid'] });
+    // model filter is ANDed on top, never replaces the scope
+    expect(calls).toContainEqual({ method: 'where', args: [{ status: 'deployed' }] });
+  });
+
+  it('scopes deploys to this build id', async () => {
+    const client = makeClient([]);
+    await client.queryTable({ table: 'deploys' });
+    expect(calls).toContainEqual({ method: 'where', args: ['buildId', 42] });
+  });
+
+  it('scopes repositories to the build repository ids via whereIn', async () => {
+    const client = makeClient([]);
+    await client.queryTable({ table: 'repositories' });
+    expect(calls).toContainEqual({ method: 'whereIn', args: ['id', [11, 12]] });
+  });
+
+  it('scopes pull_requests to this build pull request id', async () => {
+    const client = makeClient([]);
+    await client.queryTable({ table: 'pull_requests' });
+    expect(calls).toContainEqual({ method: 'where', args: ['id', 7] });
+  });
+
+  it('a model filter cannot widen the scope to another build', async () => {
+    const client = makeClient([]);
+    await client.queryTable({ table: 'builds', filters: { uuid: 'someone-elses-build' } });
+    // Build scope is still ANDed, so a foreign uuid can only ever return zero rows.
+    expect(calls).toContainEqual({ method: 'where', args: ['uuid', 'my-build-uuid'] });
+    expect(calls).toContainEqual({ method: 'where', args: [{ uuid: 'someone-elses-build' }] });
+  });
+
+  it('rejects pull_requests when the build has no pull request', async () => {
+    calls = [];
+    const client = new DatabaseClient(buildDb([], calls));
+    client.setBuildScope({ ...SCOPE, pullRequestId: null });
+    await expect(client.queryTable({ table: 'pull_requests' })).rejects.toThrow(/no associated pull request/);
+  });
+
+  it('rejects wildcard-only LIKE patterns that would dump all rows', async () => {
+    const client = makeClient([]);
+    await expect(client.queryTable({ table: 'deploys', filters: { uuid: '%' } })).rejects.toThrow(
+      /wildcard-only pattern/
+    );
+    await expect(client.queryTable({ table: 'deploys', filters: { uuid: '%%' } })).rejects.toThrow(
+      /wildcard-only pattern/
+    );
+    await expect(client.queryTable({ table: 'deploys', filters: { uuid: '__' } })).rejects.toThrow(
+      /wildcard-only pattern/
+    );
+  });
+
+  it('still allows a specific LIKE pattern within the build scope', async () => {
+    const client = makeClient([]);
+    await client.queryTable({ table: 'deploys', filters: { uuid: 'sample-service-%' } });
+    expect(calls).toContainEqual({ method: 'where', args: ['buildId', 42] });
+    expect(calls).toContainEqual({ method: 'where', args: ['uuid', 'like', 'sample-service-%'] });
+  });
+
+  it('surfaces unknown selected columns as a warning instead of silently dropping them', async () => {
+    const client = makeClient([{ uuid: 'my-build-uuid' }]);
+    const result = await client.queryTable({ table: 'builds', select: ['uuid', 'not_a_real_column'] });
+    expect(result.warnings).toBeDefined();
+    expect(result.warnings?.[0]).toContain('not_a_real_column');
+    // valid columns are still selected
+    expect(calls.some((c) => c.method === 'select')).toBe(true);
+  });
+
+  it('rejects tables not allowed', async () => {
+    const client = makeClient([]);
+    await expect(client.queryTable({ table: 'users' as any })).rejects.toThrow(/not allowed/);
+  });
+
+  it('exposes no write methods on the query builder (read-only)', () => {
+    const builder = createQueryBuilder([], []);
+    expect((builder as any).insert).toBeUndefined();
+    expect((builder as any).update).toBeUndefined();
+    expect((builder as any).delete).toBeUndefined();
+    expect((builder as any).patch).toBeUndefined();
+  });
+});
diff --git a/src/server/services/agent/tools/shared/databaseClient.test.ts b/src/server/services/agent/tools/shared/databaseClient.test.ts
index cfa1c2e6..049d74ac 100644
--- a/src/server/services/agent/tools/shared/databaseClient.test.ts
+++ b/src/server/services/agent/tools/shared/databaseClient.test.ts
@@ -55,6 +55,13 @@ describe('DatabaseClient diagnostic schema', () => {
     const countQuery = createQuery(records);
     const dataQuery = createQuery(records);
     const { client } = createClientWithQueries(dataQuery, countQuery);
+    client.setBuildScope({
+      buildId: 1,
+      buildUuid: 'sample-build',
+      pullRequestId: 11,
+      environmentId: 1,
+      repositoryIds: [1],
+    });
 
     await client.queryTable({
       table: 'builds',
@@ -73,6 +80,13 @@ describe('DatabaseClient diagnostic schema', () => {
     const countQuery = createQuery([]);
     const dataQuery = createQuery([]);
     const { client } = createClientWithQueries(dataQuery, countQuery);
+    client.setBuildScope({
+      buildId: 1,
+      buildUuid: 'sample-build',
+      pullRequestId: 11,
+      environmentId: 1,
+      repositoryIds: [1],
+    });
 
     await expect(
       client.queryTable({
@@ -87,6 +101,13 @@ describe('DatabaseClient diagnostic schema', () => {
     const countQuery = createQuery([]);
     const dataQuery = createQuery([]);
     const { client } = createClientWithQueries(dataQuery, countQuery);
+    client.setBuildScope({
+      buildId: 1,
+      buildUuid: 'sample-build',
+      pullRequestId: 11,
+      environmentId: 1,
+      repositoryIds: [1],
+    });
 
     await expect(
       client.queryTable({
diff --git a/src/server/services/agent/tools/shared/databaseClient.ts b/src/server/services/agent/tools/shared/databaseClient.ts
index df2dd692..a460d2f5 100644
--- a/src/server/services/agent/tools/shared/databaseClient.ts
+++ b/src/server/services/agent/tools/shared/databaseClient.ts
@@ -27,6 +27,7 @@ interface QueryOptions {
 interface QueryResult {
   records: any[];
   totalCount: number;
+  warnings?: string[];
 }
 
 interface TableSchema {
@@ -36,9 +37,60 @@ interface TableSchema {
   relations: Record<string, string>;
 }
 
+/** SECURITY: the single build a session is scoped to; every query is constrained to its rows so a model can't read other tenants'. */
+export interface DatabaseBuildScope {
+  buildId: number;
+  buildUuid: string;
+  pullRequestId?: number | null;
+  environmentId?: number | null;
+  repositoryIds?: number[];
+}
+
 export class DatabaseClient {
+  // SECURITY: when set, a mandatory per-table WHERE is ANDed onto every query and can't be widened by model filters.
+  private buildScope: DatabaseBuildScope | null = null;
+
   constructor(private db: any) {}
 
+  setBuildScope(scope: DatabaseBuildScope | null | undefined): void {
+    this.buildScope = scope ?? null;
+  }
+
+  /** Mandatory scope clause for a table; throws if the build doesn't constrain it (table not queryable) or scope is unresolved. */
+  private buildScopeClause(table: string): { column: string; value?: any; in?: any[] } {
+    const scope = this.buildScope;
+    if (!scope) {
+      // Without a resolved build scope these tools must not expose any rows.
+      throw new Error('Database access is not scoped to a build; query rejected.');
+    }
+
+    switch (table) {
+      case 'builds':
+        return { column: 'uuid', value: scope.buildUuid };
+      case 'deploys':
+        return { column: 'buildId', value: scope.buildId };
+      case 'deployables':
+        return { column: 'buildId', value: scope.buildId };
+      case 'pull_requests':
+        if (scope.pullRequestId == null) {
+          throw new Error('This build has no associated pull request; pull_requests is not queryable.');
+        }
+        return { column: 'id', value: scope.pullRequestId };
+      case 'repositories':
+        if (!scope.repositoryIds || scope.repositoryIds.length === 0) {
+          throw new Error('This build has no associated repositories; repositories is not queryable.');
+        }
+        return { column: 'id', in: scope.repositoryIds };
+      case 'environments':
+        if (scope.environmentId == null) {
+          throw new Error('This build has no associated environment; environments is not queryable.');
+        }
+        return { column: 'id', value: scope.environmentId };
+      default:
+        throw new Error(`Table '${table}' cannot be scoped to a build; query rejected.`);
+    }
+  }
+
   async queryTable(options: QueryOptions): Promise<QueryResult>;
   async queryTable(
     table: string,
@@ -81,6 +133,16 @@ export class DatabaseClient {
     let query = Model.query();
     let countQuery = Model.query();
 
+    // SECURITY: build scope applied first and ANDed with all model filters, which can only narrow within it.
+    const scopeClause = this.buildScopeClause(opts.table);
+    if (scopeClause.in !== undefined) {
+      query = query.whereIn(scopeClause.column, scopeClause.in);
+      countQuery = countQuery.whereIn(scopeClause.column, scopeClause.in);
+    } else {
+      query = query.where(scopeClause.column, scopeClause.value);
+      countQuery = countQuery.where(scopeClause.column, scopeClause.value);
+    }
+
     if (opts.filters) {
       const validFilters: Record<string, any> = {};
       const likeFilters: Array<{ column: string; pattern: string }> = [];
@@ -89,6 +151,13 @@ export class DatabaseClient {
         const column = this.normalizeColumnName(schema, key);
         if (schema.columns.includes(column)) {
           if (typeof value === 'string' && (value.includes('%') || value.includes('_'))) {
+            // SECURITY: reject wildcard-only / empty LIKE patterns that would match every row.
+            const literal = value.replace(/[%_]/g, '').trim();
+            if (literal.length === 0) {
+              throw new Error(
+                `Filter for '${key}' is a wildcard-only pattern ("${value}") which would match all rows. Provide a specific value.`
+              );
+            }
             likeFilters.push({ column, pattern: value });
           } else {
             validFilters[column] = value;
@@ -126,7 +195,18 @@ export class DatabaseClient {
 
     const totalCount = await countQuery.resultSize();
 
+    const warnings: string[] = [];
     if (opts.select && opts.select.length > 0) {
+      const unknownSelectColumns = opts.select.filter(
+        (col: string) => !schema.columns.includes(this.normalizeColumnName(schema, col))
+      );
+      if (unknownSelectColumns.length > 0) {
+        warnings.push(
+          `Ignored unknown select columns: ${unknownSelectColumns.join(', ')}. Valid columns for ${
+            opts.table
+          }: ${schema.columns.join(', ')}`
+        );
+      }
       const validColumns = opts.select
         .map((col: string) => this.normalizeColumnName(schema, col))
         .filter(
@@ -166,7 +246,7 @@ export class DatabaseClient {
         ? records.map((record: any) => this.compactRelations(record, topLevelRelations))
         : records;
 
-    return { records: compactedRecords, totalCount };
+    return { records: compactedRecords, totalCount, ...(warnings.length > 0 ? { warnings } : {}) };
   }
 
   private compactRelations(record: any, relationNames: string[]): any {
diff --git a/src/server/services/agent/tools/shared/githubClient.ts b/src/server/services/agent/tools/shared/githubClient.ts
index a2f333fa..eb272934 100644
--- a/src/server/services/agent/tools/shared/githubClient.ts
+++ b/src/server/services/agent/tools/shared/githubClient.ts
@@ -22,15 +22,57 @@ export class GitHubClient {
   private referencedFiles: Set<string> = new Set();
   private excludedFilePatterns: string[] = [];
   private allowedWritePatterns: string[] = [];
+  // SECURITY: owner/repo set this build spans; reads outside it are rejected to prevent cross-tenant access.
+  private allowedRepos: Set<string> | null = null;
 
   private normalizeFilePath(filePath: string): string {
     return filePath.trim().replace(/^\/+/, '').replace(/^\.\//, '');
   }
 
+  private normalizeRepoKey(owner: string, repo: string): string {
+    return `${owner}/${repo}`.trim().toLowerCase();
+  }
+
   setAllowedBranch(branch: string) {
     this.allowedBranch = branch;
   }
 
+  setAllowedRepos(repos: string[] | null | undefined): void {
+    if (!repos || repos.length === 0) {
+      this.allowedRepos = null;
+      return;
+    }
+
+    this.allowedRepos = new Set(
+      repos.map((entry) => entry?.trim().toLowerCase()).filter((entry): entry is string => Boolean(entry))
+    );
+  }
+
+  isRepoAllowed(owner: string, repo: string): boolean {
+    if (!this.allowedRepos) {
+      return true;
+    }
+
+    return this.allowedRepos.has(this.normalizeRepoKey(owner, repo));
+  }
+
+  getAllowedRepos(): string[] {
+    return this.allowedRepos ? [...this.allowedRepos] : [];
+  }
+
+  /**
+   * Throws a FILE_ACCESS_DENIED-style error when owner/repo is outside the build scope.
+   */
+  assertRepoAllowed(owner: string, repo: string): void {
+    if (!this.isRepoAllowed(owner, repo)) {
+      throw new Error(
+        `Repository "${owner}/${repo}" is outside this environment's repositories (${this.getAllowedRepos().join(
+          ', '
+        )}) and cannot be accessed.`
+      );
+    }
+  }
+
   setReferencedFiles(files: string[]) {
     this.referencedFiles = new Set(files.map((file) => this.normalizeFilePath(file).toLowerCase()));
   }
diff --git a/src/server/services/agent/tools/shared/k8sClient.ts b/src/server/services/agent/tools/shared/k8sClient.ts
index 2fb8c6c5..eddc279a 100644
--- a/src/server/services/agent/tools/shared/k8sClient.ts
+++ b/src/server/services/agent/tools/shared/k8sClient.ts
@@ -23,6 +23,9 @@ export class K8sClient {
   public readonly batchApi: k8s.BatchV1Api;
   public readonly networkingApi: k8s.NetworkingV1Api;
 
+  // SECURITY: the build's own namespace; tools reject any non-matching model-supplied namespace to prevent cross-tenant access.
+  private allowedNamespace: string | null = null;
+
   constructor() {
     this.kc = new k8s.KubeConfig();
     this.kc.loadFromDefault();
@@ -31,4 +34,37 @@ export class K8sClient {
     this.batchApi = this.kc.makeApiClient(k8s.BatchV1Api);
     this.networkingApi = this.kc.makeApiClient(k8s.NetworkingV1Api);
   }
+
+  setAllowedNamespace(namespace: string | null | undefined): void {
+    this.allowedNamespace = namespace?.trim() || null;
+  }
+
+  getAllowedNamespace(): string | null {
+    return this.allowedNamespace;
+  }
+
+  /** Resolve the namespace under build scope: defaults to the scope when omitted, rejects any mismatch; used as-is when no scope is set. */
+  resolveNamespace(requested: string | null | undefined): string {
+    const allowed = this.allowedNamespace;
+    const requestedTrimmed = requested?.trim() || null;
+
+    if (!allowed) {
+      if (!requestedTrimmed) {
+        throw new Error('namespace is required');
+      }
+      return requestedTrimmed;
+    }
+
+    if (!requestedTrimmed) {
+      return allowed;
+    }
+
+    if (requestedTrimmed !== allowed) {
+      throw new Error(
+        `namespace "${requestedTrimmed}" is outside this environment's namespace "${allowed}" and cannot be accessed.`
+      );
+    }
+
+    return allowed;
+  }
 }
diff --git a/src/server/services/agentRuntime/mcp/__tests__/config.test.ts b/src/server/services/agentRuntime/mcp/__tests__/config.test.ts
index 926f5543..a5ab04f8 100644
--- a/src/server/services/agentRuntime/mcp/__tests__/config.test.ts
+++ b/src/server/services/agentRuntime/mcp/__tests__/config.test.ts
@@ -19,6 +19,7 @@ const mockListTools = jest.fn();
 const mockClose = jest.fn();
 const mockListMaskedStatesByScopes = jest.fn();
 const mockListDecryptedConnectionsByScopes = jest.fn();
+const mockUpsertConnection = jest.fn();
 
 jest.mock('../client', () => ({
   McpClientManager: jest.fn().mockImplementation(() => ({
@@ -37,6 +38,7 @@ jest.mock('server/services/userMcpConnection', () => ({
   default: {
     listMaskedStatesByScopes: (...args: unknown[]) => mockListMaskedStatesByScopes(...args),
     listDecryptedConnectionsByScopes: (...args: unknown[]) => mockListDecryptedConnectionsByScopes(...args),
+    upsertConnection: (...args: unknown[]) => mockUpsertConnection(...args),
   },
 }));
 
@@ -354,6 +356,102 @@ describe('McpConfigService', () => {
 
       expect(result).toEqual([]);
     });
+
+    it('persists refreshed OAuth tokens through the resolved auth provider', async () => {
+      const globalConfig = {
+        slug: 'sample-oauth',
+        name: 'Sample OAuth',
+        scope: 'global',
+        transport: { type: 'http', url: 'https://mcp.example.com/v1/mcp', headers: {} },
+        sharedConfig: {},
+        authConfig: {
+          mode: 'oauth',
+          provider: 'generic-oauth2.1',
+          scope: 'sample.read',
+        },
+        timeout: 30000,
+        sharedDiscoveredTools: [],
+      };
+      const discoveredTools = [{ name: 'userTool', inputSchema: {} }];
+
+      MockModel.query
+        .mockReturnValueOnce({
+          where: jest.fn().mockReturnValue({
+            whereNull: jest.fn().mockResolvedValue([globalConfig]),
+          }),
+        })
+        .mockReturnValueOnce({
+          where: jest.fn().mockReturnValue({
+            whereNull: jest.fn().mockResolvedValue([]),
+          }),
+        });
+
+      mockListDecryptedConnectionsByScopes.mockResolvedValue(
+        new Map([
+          [
+            'global:sample-oauth',
+            {
+              state: {
+                type: 'oauth',
+                tokens: {
+                  access_token: 'expired-access-token',
+                  refresh_token: 'sample-refresh-token',
+                  token_type: 'Bearer',
+                },
+                clientInformation: {
+                  client_id: 'sample-client-id',
+                  client_secret: 'sample-client-secret',
+                },
+              },
+              discoveredTools,
+              validationError: null,
+              validatedAt: '2026-04-06T16:00:00.000Z',
+              updatedAt: '2026-04-06T16:00:00.000Z',
+            },
+          ],
+        ])
+      );
+
+      const result = await service.resolveServersForRepo('example-org/example-repo', undefined, {
+        userId: 'sample-user',
+        githubUsername: 'sample-user',
+      } as any);
+      const authProvider = result[0]?.transport.type === 'http' ? result[0].transport.authProvider : undefined;
+
+      expect(authProvider).toBeDefined();
+      await expect(authProvider?.tokens()).resolves.toMatchObject({
+        access_token: 'expired-access-token',
+        refresh_token: 'sample-refresh-token',
+      });
+
+      await authProvider?.saveTokens({
+        access_token: 'fresh-access-token',
+        refresh_token: 'sample-refresh-token',
+        token_type: 'Bearer',
+        expires_in: 3600,
+      });
+
+      expect(mockUpsertConnection).toHaveBeenCalledWith(
+        expect.objectContaining({
+          userId: 'sample-user',
+          ownerGithubUsername: 'sample-user',
+          scope: 'global',
+          slug: 'sample-oauth',
+          discoveredTools,
+          validationError: null,
+          validatedAt: '2026-04-06T16:00:00.000Z',
+          state: expect.objectContaining({
+            type: 'oauth',
+            tokens: expect.objectContaining({
+              access_token: 'fresh-access-token',
+              refresh_token: 'sample-refresh-token',
+              token_type: 'Bearer',
+              expires_in: 3600,
+            }),
+          }),
+        })
+      );
+    });
   });
 
   describe('resolveSessionPodServersForRepo', () => {
diff --git a/src/server/services/agentRuntime/mcp/__tests__/oauthProvider.test.ts b/src/server/services/agentRuntime/mcp/__tests__/oauthProvider.test.ts
index d0802747..960778a9 100644
--- a/src/server/services/agentRuntime/mcp/__tests__/oauthProvider.test.ts
+++ b/src/server/services/agentRuntime/mcp/__tests__/oauthProvider.test.ts
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-import { PersistentOAuthClientProvider } from '../oauthProvider';
+import { OAuthAuthorizationRequiredError, PersistentOAuthClientProvider } from '../oauthProvider';
 
 describe('PersistentOAuthClientProvider', () => {
   it('includes a valid client URI in dynamic registration metadata', () => {
@@ -43,4 +43,27 @@ describe('PersistentOAuthClientProvider', () => {
       scope: 'sample.read',
     });
   });
+
+  it('tells non-interactive callers to reconnect when OAuth authorization is required', async () => {
+    const provider = new PersistentOAuthClientProvider({
+      userId: 'sample-user',
+      ownerGithubUsername: 'sample-user',
+      scope: 'global',
+      slug: 'sample-oauth',
+      definitionFingerprint: 'sample-definition-fingerprint',
+      authConfig: {
+        mode: 'oauth',
+        provider: 'generic-oauth2.1',
+      },
+      redirectUrl: 'https://app.example.com/api/v2/ai/agent/mcp-connections/sample-oauth/oauth/callback',
+      interactive: false,
+    });
+
+    await expect(provider.redirectToAuthorization(new URL('https://auth.example.com/authorize'))).rejects.toThrow(
+      OAuthAuthorizationRequiredError
+    );
+    await expect(provider.redirectToAuthorization(new URL('https://auth.example.com/authorize'))).rejects.toThrow(
+      'MCP OAuth connection expired or needs authorization. Reconnect this MCP connection to continue.'
+    );
+  });
 });
diff --git a/src/server/services/agentRuntime/mcp/config.ts b/src/server/services/agentRuntime/mcp/config.ts
index 6d81257f..6e7a3bf7 100644
--- a/src/server/services/agentRuntime/mcp/config.ts
+++ b/src/server/services/agentRuntime/mcp/config.ts
@@ -36,41 +36,32 @@ import { usesSessionWorkspaceGatewayExecution } from './sessionPod';
 import type {
   AgentMcpConnection,
   CreateMcpServerConfigInput,
-  McpCompiledConnectionConfig,
   McpAuthConfig,
   McpDiscoveredTool,
   McpResolvedTransportConfig,
   McpServerConfigRecord,
-  McpSharedConnectionConfig,
   McpTransportConfig,
   ResolvedMcpServer,
   UpdateMcpServerConfigInput,
 } from './types';
+import {
+  redactMcpConfigSecrets,
+  redactSharedConfigSecrets,
+  restoreRedactedSharedConfig,
+  restoreRedactedTransport,
+  sanitizeMcpErrorMessage,
+  sanitizeMcpResult,
+  sharedConfigContainsRedactedSecret,
+  sharedConfigContainsSecretValue,
+  transportTargetChanged,
+} from './mcpConfigSecrets';
+
+// Preserve this module's public surface: secret-hygiene helpers were moved to ./mcpConfigSecrets.
+export { redactMcpConfigSecrets, redactSharedConfigSecrets, sanitizeMcpErrorMessage, sanitizeMcpResult };
 
 const SLUG_REGEX = /^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$/;
 const MAX_SLUG_LENGTH = 100;
 const VALIDATION_TIMEOUT_MS = 5000;
-const REDACTED_SHARED_SECRET = '******';
-const MIN_SECRET_REDACTION_LENGTH = 4;
-const NON_SECRET_REDACTION_VALUES = new Set([
-  'bearer',
-  'basic',
-  'false',
-  'http',
-  'https',
-  'none',
-  'null',
-  'oauth',
-  'true',
-]);
-const SHARED_SECRET_SECTIONS: (keyof McpSharedConnectionConfig)[] = ['headers', 'query', 'env', 'defaultArgs'];
-
-export type McpErrorRedactionSource = {
-  values?: Record<string, unknown> | null;
-  compiledConfig?: Partial<McpCompiledConnectionConfig> | null;
-  transport?: McpResolvedTransportConfig | McpTransportConfig | null;
-  extraSecrets?: unknown[];
-};
 
 function buildConnectionKey(scope: string, slug: string): string {
   return `${scope}:${slug}`;
@@ -106,488 +97,6 @@ function buildOAuthCallbackUrl(slug: string, scope: string): string {
   return url.toString();
 }
 
-function addSecretValue(secrets: Set<string>, value: unknown): void {
-  if (typeof value !== 'string') {
-    return;
-  }
-
-  const secret = value.trim();
-  if (
-    secret.length < MIN_SECRET_REDACTION_LENGTH ||
-    secret === REDACTED_SHARED_SECRET ||
-    NON_SECRET_REDACTION_VALUES.has(secret.toLowerCase())
-  ) {
-    return;
-  }
-
-  secrets.add(secret);
-  secrets.add(encodeURIComponent(secret));
-  const encoded = new URLSearchParams({ value: secret }).toString().slice('value='.length);
-  secrets.add(encoded);
-}
-
-function collectUnknownSecretValues(value: unknown, secrets: Set<string>): void {
-  if (typeof value === 'string') {
-    addSecretValue(secrets, value);
-    return;
-  }
-
-  if (!value || typeof value !== 'object') {
-    return;
-  }
-
-  if (Array.isArray(value)) {
-    for (const item of value) {
-      collectUnknownSecretValues(item, secrets);
-    }
-    return;
-  }
-
-  for (const item of Object.values(value)) {
-    collectUnknownSecretValues(item, secrets);
-  }
-}
-
-function collectRecordSecretValues(values: Record<string, unknown> | undefined | null, secrets: Set<string>): void {
-  if (!values) {
-    return;
-  }
-
-  for (const value of Object.values(values)) {
-    addSecretValue(secrets, value);
-  }
-}
-
-function collectCompiledConfigSecretValues(
-  config: Partial<McpCompiledConnectionConfig> | undefined | null,
-  secrets: Set<string>
-): void {
-  if (!config) {
-    return;
-  }
-
-  collectRecordSecretValues(config.headers, secrets);
-  collectRecordSecretValues(config.query, secrets);
-  collectRecordSecretValues(config.env, secrets);
-  collectRecordSecretValues(config.defaultArgs, secrets);
-}
-
-function collectTransportSecretValues(
-  transport: McpResolvedTransportConfig | McpTransportConfig | undefined | null,
-  secrets: Set<string>
-): void {
-  if (!transport) {
-    return;
-  }
-
-  if (transport.type === 'http' || transport.type === 'sse') {
-    collectRecordSecretValues(transport.headers, secrets);
-    collectRawQuerySecretValues(transport.url, secrets);
-    try {
-      const parsed = new URL(transport.url);
-      parsed.searchParams.forEach((value) => {
-        addSecretValue(secrets, value);
-      });
-    } catch {
-      // Ignore non-URL transport strings; normalized transports should already be valid URLs.
-    }
-    return;
-  }
-
-  if (transport.type === 'stdio') {
-    collectRecordSecretValues(transport.env, secrets);
-  }
-}
-
-function collectRawQuerySecretValues(url: string, secrets: Set<string>): void {
-  const queryStart = url.indexOf('?');
-  if (queryStart === -1) {
-    return;
-  }
-
-  const hashStart = url.indexOf('#', queryStart);
-  const query = url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart);
-  for (const part of query.split('&')) {
-    if (!part) {
-      continue;
-    }
-
-    const valueStart = part.indexOf('=');
-    const rawValue = valueStart === -1 ? '' : part.slice(valueStart + 1);
-    if (!rawValue) {
-      continue;
-    }
-
-    addSecretValue(secrets, rawValue);
-    try {
-      addSecretValue(secrets, decodeURIComponent(rawValue));
-    } catch {
-      // Ignore malformed percent-encoding; the raw value is still redacted.
-    }
-  }
-}
-
-function collectMcpSecretValues(sources: McpErrorRedactionSource[]): Set<string> {
-  const secrets = new Set<string>();
-
-  for (const source of sources) {
-    collectUnknownSecretValues(source.values, secrets);
-    collectCompiledConfigSecretValues(source.compiledConfig, secrets);
-    collectTransportSecretValues(source.transport, secrets);
-    for (const secret of source.extraSecrets || []) {
-      collectUnknownSecretValues(secret, secrets);
-    }
-  }
-
-  return secrets;
-}
-
-function redactSecretValues(value: string, secrets: Set<string>): string {
-  return Array.from(secrets)
-    .sort((a, b) => b.length - a.length)
-    .reduce((current, secret) => current.split(secret).join(REDACTED_SHARED_SECRET), value);
-}
-
-function sanitizeUnknownValue(value: unknown, secrets: Set<string>): unknown {
-  if (typeof value === 'string') {
-    return redactSecretValues(value, secrets);
-  }
-
-  if (!value || typeof value !== 'object') {
-    return value;
-  }
-
-  if (Array.isArray(value)) {
-    return value.map((item) => sanitizeUnknownValue(item, secrets));
-  }
-
-  return Object.fromEntries(Object.entries(value).map(([key, item]) => [key, sanitizeUnknownValue(item, secrets)]));
-}
-
-export function sanitizeMcpErrorMessage(error: unknown, sources: McpErrorRedactionSource[] = []): string {
-  const message = error instanceof Error ? error.message : String(error);
-  return redactSecretValues(message, collectMcpSecretValues(sources));
-}
-
-export function sanitizeMcpResult<T>(result: T, sources: McpErrorRedactionSource[] = []): T {
-  return sanitizeUnknownValue(result, collectMcpSecretValues(sources)) as T;
-}
-
-export function redactSharedConfigSecrets<T extends { sharedConfig?: McpSharedConnectionConfig | null }>(config: T): T {
-  const sharedConfig = config.sharedConfig ? { ...config.sharedConfig } : undefined;
-  let changed = false;
-
-  if (sharedConfig) {
-    for (const section of SHARED_SECRET_SECTIONS) {
-      const values = sharedConfig[section];
-      if (!values || typeof values !== 'object') {
-        continue;
-      }
-
-      sharedConfig[section] = Object.fromEntries(
-        Object.keys(values).map((key) => [key, REDACTED_SHARED_SECRET])
-      ) as Record<string, string>;
-      changed = true;
-    }
-  }
-
-  if (!changed) {
-    return config;
-  }
-
-  return {
-    ...config,
-    ...(sharedConfig ? { sharedConfig } : {}),
-  };
-}
-
-function redactSecretRecord(values: Record<string, string> | undefined): Record<string, string> | undefined {
-  return values ? Object.fromEntries(Object.keys(values).map((key) => [key, REDACTED_SHARED_SECRET])) : values;
-}
-
-function redactTransportUrlQuery(url: string): string {
-  const queryStart = url.indexOf('?');
-  if (queryStart === -1) {
-    return url;
-  }
-
-  const hashStart = url.indexOf('#', queryStart);
-  const base = url.slice(0, queryStart);
-  const query = url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart);
-  const hash = hashStart === -1 ? '' : url.slice(hashStart);
-  if (!query) {
-    return url;
-  }
-
-  const redactedQuery = query
-    .split('&')
-    .map((part) => {
-      if (!part) {
-        return part;
-      }
-
-      const valueStart = part.indexOf('=');
-      const key = valueStart === -1 ? part : part.slice(0, valueStart);
-      return `${key}=${REDACTED_SHARED_SECRET}`;
-    })
-    .join('&');
-
-  return `${base}?${redactedQuery}${hash}`;
-}
-
-export function redactMcpConfigSecrets<
-  T extends { sharedConfig?: McpSharedConnectionConfig | null; transport?: McpTransportConfig | null }
->(config: T): T {
-  const redacted = redactSharedConfigSecrets(config);
-  if (!redacted.transport) {
-    return redacted;
-  }
-
-  if (redacted.transport.type === 'http' || redacted.transport.type === 'sse') {
-    const url = redactTransportUrlQuery(redacted.transport.url);
-    if (!redacted.transport.headers && url === redacted.transport.url) {
-      return redacted;
-    }
-
-    return {
-      ...redacted,
-      transport: {
-        ...redacted.transport,
-        url,
-        ...(redacted.transport.headers ? { headers: redactSecretRecord(redacted.transport.headers) } : {}),
-      },
-    };
-  }
-
-  if (!redacted.transport.env) {
-    return redacted;
-  }
-
-  return {
-    ...redacted,
-    transport: {
-      ...redacted.transport,
-      env: redactSecretRecord(redacted.transport.env),
-    },
-  };
-}
-
-function restoreRedactedSharedConfig(
-  nextSharedConfig: McpServerConfigRecord['sharedConfig'],
-  existingSharedConfig: McpServerConfigRecord['sharedConfig']
-): McpServerConfigRecord['sharedConfig'] {
-  if (!nextSharedConfig || !existingSharedConfig) {
-    return nextSharedConfig;
-  }
-
-  let changed = false;
-  const sharedConfig = { ...nextSharedConfig };
-
-  for (const section of SHARED_SECRET_SECTIONS) {
-    const nextValues = sharedConfig[section];
-    const existingValues = existingSharedConfig[section];
-    if (!nextValues || !existingValues) {
-      continue;
-    }
-
-    const restoredValues = { ...nextValues };
-    for (const [key, value] of Object.entries(restoredValues)) {
-      if (value === REDACTED_SHARED_SECRET && existingValues[key]) {
-        restoredValues[key] = existingValues[key];
-        changed = true;
-      }
-    }
-
-    sharedConfig[section] = restoredValues;
-  }
-
-  return changed ? sharedConfig : nextSharedConfig;
-}
-
-function restoreRedactedSecretRecord(
-  nextValues: Record<string, string> | undefined,
-  existingValues: Record<string, string> | undefined
-): Record<string, string> | undefined {
-  if (!nextValues || !existingValues) {
-    return nextValues;
-  }
-
-  let changed = false;
-  const restoredValues = { ...nextValues };
-  for (const [key, value] of Object.entries(restoredValues)) {
-    if (value === REDACTED_SHARED_SECRET && existingValues[key]) {
-      restoredValues[key] = existingValues[key];
-      changed = true;
-    }
-  }
-
-  return changed ? restoredValues : nextValues;
-}
-
-function parseUrlQueryParts(url: string): { base: string; query: string; hash: string } | null {
-  const queryStart = url.indexOf('?');
-  if (queryStart === -1) {
-    return null;
-  }
-
-  const hashStart = url.indexOf('#', queryStart);
-  return {
-    base: url.slice(0, queryStart),
-    query: url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart),
-    hash: hashStart === -1 ? '' : url.slice(hashStart),
-  };
-}
-
-function restoreRedactedTransportUrlQuery(nextUrl: string, existingUrl: string): string {
-  const nextParts = parseUrlQueryParts(nextUrl);
-  const existingParts = parseUrlQueryParts(existingUrl);
-  if (!nextParts || !existingParts || !nextParts.query || !existingParts.query) {
-    return nextUrl;
-  }
-
-  const existingValuesByKey = new Map<string, string[]>();
-  for (const part of existingParts.query.split('&')) {
-    const valueStart = part.indexOf('=');
-    if (valueStart === -1) {
-      continue;
-    }
-
-    const key = part.slice(0, valueStart);
-    const value = part.slice(valueStart + 1);
-    existingValuesByKey.set(key, [...(existingValuesByKey.get(key) || []), value]);
-  }
-
-  let changed = false;
-  const restoredQuery = nextParts.query
-    .split('&')
-    .map((part) => {
-      const valueStart = part.indexOf('=');
-      if (valueStart === -1) {
-        return part;
-      }
-
-      const key = part.slice(0, valueStart);
-      const value = part.slice(valueStart + 1);
-      if (value !== REDACTED_SHARED_SECRET) {
-        return part;
-      }
-
-      const existingValues = existingValuesByKey.get(key);
-      const existingValue = existingValues?.shift();
-      if (!existingValue) {
-        return part;
-      }
-
-      changed = true;
-      return `${key}=${existingValue}`;
-    })
-    .join('&');
-
-  return changed ? `${nextParts.base}?${restoredQuery}${nextParts.hash}` : nextUrl;
-}
-
-function transportTargetChanged(nextTransport: McpTransportConfig, existingTransport: McpTransportConfig): boolean {
-  if (nextTransport.type !== existingTransport.type) {
-    return true;
-  }
-
-  if (nextTransport.type === 'http' || nextTransport.type === 'sse') {
-    if (existingTransport.type !== 'http' && existingTransport.type !== 'sse') {
-      return true;
-    }
-
-    try {
-      const nextUrl = new URL(nextTransport.url);
-      const existingUrl = new URL(existingTransport.url);
-      return (
-        nextUrl.protocol !== existingUrl.protocol ||
-        nextUrl.host !== existingUrl.host ||
-        nextUrl.pathname !== existingUrl.pathname
-      );
-    } catch {
-      return nextTransport.url.split('?')[0] !== existingTransport.url.split('?')[0];
-    }
-  }
-
-  if (existingTransport.type !== 'stdio') {
-    return true;
-  }
-
-  return (
-    nextTransport.command !== existingTransport.command ||
-    JSON.stringify(nextTransport.args || []) !== JSON.stringify(existingTransport.args || [])
-  );
-}
-
-function recordContainsRedactedSecret(values: Record<string, string> | undefined): boolean {
-  return !!values && Object.values(values).some((value) => value === REDACTED_SHARED_SECRET);
-}
-
-function transportContainsRedactedSecret(transport: McpTransportConfig): boolean {
-  if (transport.type === 'http' || transport.type === 'sse') {
-    let redactedQueryValue = false;
-    new URLSearchParams(parseUrlQueryParts(transport.url)?.query || '').forEach((value) => {
-      if (value === REDACTED_SHARED_SECRET) {
-        redactedQueryValue = true;
-      }
-    });
-    return recordContainsRedactedSecret(transport.headers) || redactedQueryValue;
-  }
-
-  return recordContainsRedactedSecret(transport.env);
-}
-
-function sharedConfigContainsRedactedSecret(sharedConfig: McpSharedConnectionConfig): boolean {
-  return SHARED_SECRET_SECTIONS.some((section) => recordContainsRedactedSecret(sharedConfig[section]));
-}
-
-function sharedConfigContainsSecretValue(sharedConfig: McpSharedConnectionConfig): boolean {
-  return SHARED_SECRET_SECTIONS.some((section) => {
-    const values = sharedConfig[section];
-    return !!values && Object.values(values).length > 0;
-  });
-}
-
-function restoreRedactedTransport(
-  nextTransport: McpTransportConfig,
-  existingTransport: McpTransportConfig
-): McpTransportConfig {
-  if (nextTransport.type !== existingTransport.type) {
-    if (transportContainsRedactedSecret(nextTransport)) {
-      throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
-    }
-
-    return nextTransport;
-  }
-
-  if (nextTransport.type === 'http' || nextTransport.type === 'sse') {
-    if (existingTransport.type !== 'http' && existingTransport.type !== 'sse') {
-      return nextTransport;
-    }
-
-    if (transportTargetChanged(nextTransport, existingTransport) && transportContainsRedactedSecret(nextTransport)) {
-      throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
-    }
-
-    const headers = restoreRedactedSecretRecord(nextTransport.headers, existingTransport.headers);
-    const url = restoreRedactedTransportUrlQuery(nextTransport.url, existingTransport.url);
-    return headers === nextTransport.headers && url === nextTransport.url
-      ? nextTransport
-      : { ...nextTransport, url, headers };
-  }
-
-  if (existingTransport.type !== 'stdio') {
-    return nextTransport;
-  }
-
-  if (transportTargetChanged(nextTransport, existingTransport) && transportContainsRedactedSecret(nextTransport)) {
-    throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
-  }
-
-  const env = restoreRedactedSecretRecord(nextTransport.env, existingTransport.env);
-  return env === nextTransport.env ? nextTransport : { ...nextTransport, env };
-}
-
 export class McpConfigService {
   private async listEffectiveConfigs(repoFullName?: string): Promise<McpServerConfig[]> {
     const [globalConfigs, repoConfigs] = await Promise.all([
diff --git a/src/server/services/agentRuntime/mcp/mcpConfigSecrets.ts b/src/server/services/agentRuntime/mcp/mcpConfigSecrets.ts
new file mode 100644
index 00000000..4b4f150d
--- /dev/null
+++ b/src/server/services/agentRuntime/mcp/mcpConfigSecrets.ts
@@ -0,0 +1,532 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// MCP config secret hygiene: collect, redact for display/logs, restore on update, and detect presence.
+
+import type {
+  McpCompiledConnectionConfig,
+  McpResolvedTransportConfig,
+  McpServerConfigRecord,
+  McpSharedConnectionConfig,
+  McpTransportConfig,
+} from './types';
+
+const REDACTED_SHARED_SECRET = '******';
+const MIN_SECRET_REDACTION_LENGTH = 4;
+const NON_SECRET_REDACTION_VALUES = new Set([
+  'bearer',
+  'basic',
+  'false',
+  'http',
+  'https',
+  'none',
+  'null',
+  'oauth',
+  'true',
+]);
+const SHARED_SECRET_SECTIONS: (keyof McpSharedConnectionConfig)[] = ['headers', 'query', 'env', 'defaultArgs'];
+
+export type McpErrorRedactionSource = {
+  values?: Record<string, unknown> | null;
+  compiledConfig?: Partial<McpCompiledConnectionConfig> | null;
+  transport?: McpResolvedTransportConfig | McpTransportConfig | null;
+  extraSecrets?: unknown[];
+};
+
+function addSecretValue(secrets: Set<string>, value: unknown): void {
+  if (typeof value !== 'string') {
+    return;
+  }
+
+  const secret = value.trim();
+  if (
+    secret.length < MIN_SECRET_REDACTION_LENGTH ||
+    secret === REDACTED_SHARED_SECRET ||
+    NON_SECRET_REDACTION_VALUES.has(secret.toLowerCase())
+  ) {
+    return;
+  }
+
+  secrets.add(secret);
+  secrets.add(encodeURIComponent(secret));
+  const encoded = new URLSearchParams({ value: secret }).toString().slice('value='.length);
+  secrets.add(encoded);
+}
+
+function collectUnknownSecretValues(value: unknown, secrets: Set<string>): void {
+  if (typeof value === 'string') {
+    addSecretValue(secrets, value);
+    return;
+  }
+
+  if (!value || typeof value !== 'object') {
+    return;
+  }
+
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      collectUnknownSecretValues(item, secrets);
+    }
+    return;
+  }
+
+  for (const item of Object.values(value)) {
+    collectUnknownSecretValues(item, secrets);
+  }
+}
+
+function collectRecordSecretValues(values: Record<string, unknown> | undefined | null, secrets: Set<string>): void {
+  if (!values) {
+    return;
+  }
+
+  for (const value of Object.values(values)) {
+    addSecretValue(secrets, value);
+  }
+}
+
+function collectCompiledConfigSecretValues(
+  config: Partial<McpCompiledConnectionConfig> | undefined | null,
+  secrets: Set<string>
+): void {
+  if (!config) {
+    return;
+  }
+
+  collectRecordSecretValues(config.headers, secrets);
+  collectRecordSecretValues(config.query, secrets);
+  collectRecordSecretValues(config.env, secrets);
+  collectRecordSecretValues(config.defaultArgs, secrets);
+}
+
+function collectTransportSecretValues(
+  transport: McpResolvedTransportConfig | McpTransportConfig | undefined | null,
+  secrets: Set<string>
+): void {
+  if (!transport) {
+    return;
+  }
+
+  if (transport.type === 'http' || transport.type === 'sse') {
+    collectRecordSecretValues(transport.headers, secrets);
+    collectRawQuerySecretValues(transport.url, secrets);
+    try {
+      const parsed = new URL(transport.url);
+      parsed.searchParams.forEach((value) => {
+        addSecretValue(secrets, value);
+      });
+    } catch {
+      // Ignore non-URL transport strings; normalized transports should already be valid URLs.
+    }
+    return;
+  }
+
+  if (transport.type === 'stdio') {
+    collectRecordSecretValues(transport.env, secrets);
+  }
+}
+
+function collectRawQuerySecretValues(url: string, secrets: Set<string>): void {
+  const queryStart = url.indexOf('?');
+  if (queryStart === -1) {
+    return;
+  }
+
+  const hashStart = url.indexOf('#', queryStart);
+  const query = url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart);
+  for (const part of query.split('&')) {
+    if (!part) {
+      continue;
+    }
+
+    const valueStart = part.indexOf('=');
+    const rawValue = valueStart === -1 ? '' : part.slice(valueStart + 1);
+    if (!rawValue) {
+      continue;
+    }
+
+    addSecretValue(secrets, rawValue);
+    try {
+      addSecretValue(secrets, decodeURIComponent(rawValue));
+    } catch {
+      // Ignore malformed percent-encoding; the raw value is still redacted.
+    }
+  }
+}
+
+function collectMcpSecretValues(sources: McpErrorRedactionSource[]): Set<string> {
+  const secrets = new Set<string>();
+
+  for (const source of sources) {
+    collectUnknownSecretValues(source.values, secrets);
+    collectCompiledConfigSecretValues(source.compiledConfig, secrets);
+    collectTransportSecretValues(source.transport, secrets);
+    for (const secret of source.extraSecrets || []) {
+      collectUnknownSecretValues(secret, secrets);
+    }
+  }
+
+  return secrets;
+}
+
+function redactSecretValues(value: string, secrets: Set<string>): string {
+  return Array.from(secrets)
+    .sort((a, b) => b.length - a.length)
+    .reduce((current, secret) => current.split(secret).join(REDACTED_SHARED_SECRET), value);
+}
+
+function sanitizeUnknownValue(value: unknown, secrets: Set<string>): unknown {
+  if (typeof value === 'string') {
+    return redactSecretValues(value, secrets);
+  }
+
+  if (!value || typeof value !== 'object') {
+    return value;
+  }
+
+  if (Array.isArray(value)) {
+    return value.map((item) => sanitizeUnknownValue(item, secrets));
+  }
+
+  return Object.fromEntries(Object.entries(value).map(([key, item]) => [key, sanitizeUnknownValue(item, secrets)]));
+}
+
+export function sanitizeMcpErrorMessage(error: unknown, sources: McpErrorRedactionSource[] = []): string {
+  const message = error instanceof Error ? error.message : String(error);
+  return redactSecretValues(message, collectMcpSecretValues(sources));
+}
+
+export function sanitizeMcpResult<T>(result: T, sources: McpErrorRedactionSource[] = []): T {
+  return sanitizeUnknownValue(result, collectMcpSecretValues(sources)) as T;
+}
+
+export function redactSharedConfigSecrets<T extends { sharedConfig?: McpSharedConnectionConfig | null }>(config: T): T {
+  const sharedConfig = config.sharedConfig ? { ...config.sharedConfig } : undefined;
+  let changed = false;
+
+  if (sharedConfig) {
+    for (const section of SHARED_SECRET_SECTIONS) {
+      const values = sharedConfig[section];
+      if (!values || typeof values !== 'object') {
+        continue;
+      }
+
+      sharedConfig[section] = Object.fromEntries(
+        Object.keys(values).map((key) => [key, REDACTED_SHARED_SECRET])
+      ) as Record<string, string>;
+      changed = true;
+    }
+  }
+
+  if (!changed) {
+    return config;
+  }
+
+  return {
+    ...config,
+    ...(sharedConfig ? { sharedConfig } : {}),
+  };
+}
+
+function redactSecretRecord(values: Record<string, string> | undefined): Record<string, string> | undefined {
+  return values ? Object.fromEntries(Object.keys(values).map((key) => [key, REDACTED_SHARED_SECRET])) : values;
+}
+
+function redactTransportUrlQuery(url: string): string {
+  const queryStart = url.indexOf('?');
+  if (queryStart === -1) {
+    return url;
+  }
+
+  const hashStart = url.indexOf('#', queryStart);
+  const base = url.slice(0, queryStart);
+  const query = url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart);
+  const hash = hashStart === -1 ? '' : url.slice(hashStart);
+  if (!query) {
+    return url;
+  }
+
+  const redactedQuery = query
+    .split('&')
+    .map((part) => {
+      if (!part) {
+        return part;
+      }
+
+      const valueStart = part.indexOf('=');
+      const key = valueStart === -1 ? part : part.slice(0, valueStart);
+      return `${key}=${REDACTED_SHARED_SECRET}`;
+    })
+    .join('&');
+
+  return `${base}?${redactedQuery}${hash}`;
+}
+
+export function redactMcpConfigSecrets<
+  T extends { sharedConfig?: McpSharedConnectionConfig | null; transport?: McpTransportConfig | null }
+>(config: T): T {
+  const redacted = redactSharedConfigSecrets(config);
+  if (!redacted.transport) {
+    return redacted;
+  }
+
+  if (redacted.transport.type === 'http' || redacted.transport.type === 'sse') {
+    const url = redactTransportUrlQuery(redacted.transport.url);
+    if (!redacted.transport.headers && url === redacted.transport.url) {
+      return redacted;
+    }
+
+    return {
+      ...redacted,
+      transport: {
+        ...redacted.transport,
+        url,
+        ...(redacted.transport.headers ? { headers: redactSecretRecord(redacted.transport.headers) } : {}),
+      },
+    };
+  }
+
+  if (!redacted.transport.env) {
+    return redacted;
+  }
+
+  return {
+    ...redacted,
+    transport: {
+      ...redacted.transport,
+      env: redactSecretRecord(redacted.transport.env),
+    },
+  };
+}
+
+export function restoreRedactedSharedConfig(
+  nextSharedConfig: McpServerConfigRecord['sharedConfig'],
+  existingSharedConfig: McpServerConfigRecord['sharedConfig']
+): McpServerConfigRecord['sharedConfig'] {
+  if (!nextSharedConfig || !existingSharedConfig) {
+    return nextSharedConfig;
+  }
+
+  let changed = false;
+  const sharedConfig = { ...nextSharedConfig };
+
+  for (const section of SHARED_SECRET_SECTIONS) {
+    const nextValues = sharedConfig[section];
+    const existingValues = existingSharedConfig[section];
+    if (!nextValues || !existingValues) {
+      continue;
+    }
+
+    const restoredValues = { ...nextValues };
+    for (const [key, value] of Object.entries(restoredValues)) {
+      if (value === REDACTED_SHARED_SECRET && existingValues[key]) {
+        restoredValues[key] = existingValues[key];
+        changed = true;
+      }
+    }
+
+    sharedConfig[section] = restoredValues;
+  }
+
+  return changed ? sharedConfig : nextSharedConfig;
+}
+
+function restoreRedactedSecretRecord(
+  nextValues: Record<string, string> | undefined,
+  existingValues: Record<string, string> | undefined
+): Record<string, string> | undefined {
+  if (!nextValues || !existingValues) {
+    return nextValues;
+  }
+
+  let changed = false;
+  const restoredValues = { ...nextValues };
+  for (const [key, value] of Object.entries(restoredValues)) {
+    if (value === REDACTED_SHARED_SECRET && existingValues[key]) {
+      restoredValues[key] = existingValues[key];
+      changed = true;
+    }
+  }
+
+  return changed ? restoredValues : nextValues;
+}
+
+function parseUrlQueryParts(url: string): { base: string; query: string; hash: string } | null {
+  const queryStart = url.indexOf('?');
+  if (queryStart === -1) {
+    return null;
+  }
+
+  const hashStart = url.indexOf('#', queryStart);
+  return {
+    base: url.slice(0, queryStart),
+    query: url.slice(queryStart + 1, hashStart === -1 ? undefined : hashStart),
+    hash: hashStart === -1 ? '' : url.slice(hashStart),
+  };
+}
+
+function restoreRedactedTransportUrlQuery(nextUrl: string, existingUrl: string): string {
+  const nextParts = parseUrlQueryParts(nextUrl);
+  const existingParts = parseUrlQueryParts(existingUrl);
+  if (!nextParts || !existingParts || !nextParts.query || !existingParts.query) {
+    return nextUrl;
+  }
+
+  const existingValuesByKey = new Map<string, string[]>();
+  for (const part of existingParts.query.split('&')) {
+    const valueStart = part.indexOf('=');
+    if (valueStart === -1) {
+      continue;
+    }
+
+    const key = part.slice(0, valueStart);
+    const value = part.slice(valueStart + 1);
+    existingValuesByKey.set(key, [...(existingValuesByKey.get(key) || []), value]);
+  }
+
+  let changed = false;
+  const restoredQuery = nextParts.query
+    .split('&')
+    .map((part) => {
+      const valueStart = part.indexOf('=');
+      if (valueStart === -1) {
+        return part;
+      }
+
+      const key = part.slice(0, valueStart);
+      const value = part.slice(valueStart + 1);
+      if (value !== REDACTED_SHARED_SECRET) {
+        return part;
+      }
+
+      const existingValues = existingValuesByKey.get(key);
+      const existingValue = existingValues?.shift();
+      if (!existingValue) {
+        return part;
+      }
+
+      changed = true;
+      return `${key}=${existingValue}`;
+    })
+    .join('&');
+
+  return changed ? `${nextParts.base}?${restoredQuery}${nextParts.hash}` : nextUrl;
+}
+
+export function transportTargetChanged(
+  nextTransport: McpTransportConfig,
+  existingTransport: McpTransportConfig
+): boolean {
+  if (nextTransport.type !== existingTransport.type) {
+    return true;
+  }
+
+  if (nextTransport.type === 'http' || nextTransport.type === 'sse') {
+    if (existingTransport.type !== 'http' && existingTransport.type !== 'sse') {
+      return true;
+    }
+
+    try {
+      const nextUrl = new URL(nextTransport.url);
+      const existingUrl = new URL(existingTransport.url);
+      return (
+        nextUrl.protocol !== existingUrl.protocol ||
+        nextUrl.host !== existingUrl.host ||
+        nextUrl.pathname !== existingUrl.pathname
+      );
+    } catch {
+      return nextTransport.url.split('?')[0] !== existingTransport.url.split('?')[0];
+    }
+  }
+
+  if (existingTransport.type !== 'stdio') {
+    return true;
+  }
+
+  return (
+    nextTransport.command !== existingTransport.command ||
+    JSON.stringify(nextTransport.args || []) !== JSON.stringify(existingTransport.args || [])
+  );
+}
+
+function recordContainsRedactedSecret(values: Record<string, string> | undefined): boolean {
+  return !!values && Object.values(values).some((value) => value === REDACTED_SHARED_SECRET);
+}
+
+function transportContainsRedactedSecret(transport: McpTransportConfig): boolean {
+  if (transport.type === 'http' || transport.type === 'sse') {
+    let redactedQueryValue = false;
+    new URLSearchParams(parseUrlQueryParts(transport.url)?.query || '').forEach((value) => {
+      if (value === REDACTED_SHARED_SECRET) {
+        redactedQueryValue = true;
+      }
+    });
+    return recordContainsRedactedSecret(transport.headers) || redactedQueryValue;
+  }
+
+  return recordContainsRedactedSecret(transport.env);
+}
+
+export function sharedConfigContainsRedactedSecret(sharedConfig: McpSharedConnectionConfig): boolean {
+  return SHARED_SECRET_SECTIONS.some((section) => recordContainsRedactedSecret(sharedConfig[section]));
+}
+
+export function sharedConfigContainsSecretValue(sharedConfig: McpSharedConnectionConfig): boolean {
+  return SHARED_SECRET_SECTIONS.some((section) => {
+    const values = sharedConfig[section];
+    return !!values && Object.values(values).length > 0;
+  });
+}
+
+export function restoreRedactedTransport(
+  nextTransport: McpTransportConfig,
+  existingTransport: McpTransportConfig
+): McpTransportConfig {
+  if (nextTransport.type !== existingTransport.type) {
+    if (transportContainsRedactedSecret(nextTransport)) {
+      throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
+    }
+
+    return nextTransport;
+  }
+
+  if (nextTransport.type === 'http' || nextTransport.type === 'sse') {
+    if (existingTransport.type !== 'http' && existingTransport.type !== 'sse') {
+      return nextTransport;
+    }
+
+    if (transportTargetChanged(nextTransport, existingTransport) && transportContainsRedactedSecret(nextTransport)) {
+      throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
+    }
+
+    const headers = restoreRedactedSecretRecord(nextTransport.headers, existingTransport.headers);
+    const url = restoreRedactedTransportUrlQuery(nextTransport.url, existingTransport.url);
+    return headers === nextTransport.headers && url === nextTransport.url
+      ? nextTransport
+      : { ...nextTransport, url, headers };
+  }
+
+  if (existingTransport.type !== 'stdio') {
+    return nextTransport;
+  }
+
+  if (transportTargetChanged(nextTransport, existingTransport) && transportContainsRedactedSecret(nextTransport)) {
+    throw new Error('Re-enter MCP transport secrets when changing the MCP transport target');
+  }
+
+  const env = restoreRedactedSecretRecord(nextTransport.env, existingTransport.env);
+  return env === nextTransport.env ? nextTransport : { ...nextTransport, env };
+}
diff --git a/src/server/services/agentRuntime/mcp/oauthProvider.ts b/src/server/services/agentRuntime/mcp/oauthProvider.ts
index d5103b81..783c53c0 100644
--- a/src/server/services/agentRuntime/mcp/oauthProvider.ts
+++ b/src/server/services/agentRuntime/mcp/oauthProvider.ts
@@ -22,7 +22,9 @@ import type { McpDiscoveredTool, McpOauthAuthConfig, McpStoredUserConnectionStat
 type PersistedOAuthState = Extract<McpStoredUserConnectionState, { type: 'oauth' }>;
 
 export class OAuthAuthorizationRequiredError extends Error {
-  constructor(message = 'OAuth authorization is required for this MCP connection') {
+  constructor(
+    message = 'MCP OAuth connection expired or needs authorization. Reconnect this MCP connection to continue.'
+  ) {
     super(message);
     this.name = 'OAuthAuthorizationRequiredError';
   }
diff --git a/src/server/services/agentSession.ts b/src/server/services/agentSession.ts
index 5cf30eef..1f44eeda 100644
--- a/src/server/services/agentSession.ts
+++ b/src/server/services/agentSession.ts
@@ -167,7 +167,7 @@ export function buildAgentSessionPodName(sessionUuid: string, buildUuid?: string
   return normalizeKubernetesLabelValue(`agent-${identifier}`.toLowerCase()).replace(/[_.]/g, '-');
 }
 
-export function buildChatSessionNamespace(sessionUuid: string): string {
+function buildChatSessionNamespace(sessionUuid: string): string {
   return normalizeKubernetesLabelValue(`chat-${sessionUuid.slice(0, 8)}`.toLowerCase()).replace(/[_.]/g, '-');
 }
 
@@ -838,7 +838,7 @@ export interface CreateChatSessionOptions {
   model?: string;
 }
 
-export interface CreateChatRuntimeOptions {
+interface CreateChatRuntimeOptions {
   sessionId: string;
   userId: string;
   userIdentity?: RequestUserIdentity;
@@ -1196,6 +1196,8 @@ export default class AgentSessionService {
     const workspaceAction = opts.workspaceAction || (failureOrigin === 'resume' ? 'resume' : 'provision');
     const claimSandboxStatus = failureOrigin === 'resume' ? 'resuming' : 'provisioning';
     const failureRetryable = opts.failureRetryable ?? workspaceAction === 'retry';
+    // A resume reuses the persisted PVC; its data must never be deleted on a failed resume.
+    const resumeReusesExistingPvc = failureOrigin === 'resume' || Boolean(session.pvcName);
     let namespace = chatNamespace;
     let podName = fallbackPodName;
     let pvcName = fallbackPvcName;
@@ -1403,11 +1405,14 @@ export default class AgentSessionService {
         redis.del(`${SESSION_REDIS_PREFIX}${session.uuid}`).catch(() => {}),
       ];
       if (resourcesStarted) {
-        cleanupTasks.push(
-          deleteAgentRuntimeResources(namespace, podName, apiKeySecretName).catch(() => {}),
-          ownsPvc ? deleteAgentPvc(namespace, pvcName).catch(() => {}) : Promise.resolve(),
-          deleteNamespace(namespace).catch(() => {})
-        );
+        cleanupTasks.push(deleteAgentRuntimeResources(namespace, podName, apiKeySecretName).catch(() => {}));
+        // Never destroy a resume's persisted PVC/namespace on a transient failure (sr-1).
+        if (!resumeReusesExistingPvc) {
+          cleanupTasks.push(
+            ownsPvc ? deleteAgentPvc(namespace, pvcName).catch(() => {}) : Promise.resolve(),
+            deleteNamespace(namespace).catch(() => {})
+          );
+        }
       }
       await Promise.all(cleanupTasks);
 
@@ -1515,18 +1520,20 @@ export default class AgentSessionService {
     const redis = RedisClient.getInstance().getRedis();
     const apiKeySecretName = `agent-secret-${session.uuid.slice(0, 8)}`;
     const suspendClaimedAt = new Date().toISOString();
-    const { session: claimedSession } = await WorkspaceRuntimeStateService.claimWorkspaceAction(session.id, {
+    // sr-3: capture the live pod before the claim nulls podName, so a crash mid-suspend never leaves a dead URL.
+    const namespace = session.namespace;
+    const podName = session.podName;
+    await WorkspaceRuntimeStateService.claimWorkspaceAction(session.id, {
       action: 'suspend',
       claimedAt: suspendClaimedAt,
       sessionPatch: {
         status: 'active',
         chatStatus: AgentChatStatus.READY,
         workspaceStatus: AgentWorkspaceStatus.READY,
+        podName: null,
       } as unknown as Partial<AgentSession>,
       sandboxStatus: 'suspending',
     });
-    const namespace = claimedSession.namespace || session.namespace;
-    const podName = claimedSession.podName || session.podName;
     try {
       await deleteAgentRuntimeResources(namespace, podName, apiKeySecretName);
     } catch (error) {
@@ -1606,6 +1613,8 @@ export default class AgentSessionService {
       ...opts,
       failureOrigin: 'resume',
       failureStage: 'resume',
+      // Resume failures are retryable: the PVC persists (sr-1), so FAILED→retry recovers it.
+      failureRetryable: true,
     });
   }
 
@@ -2502,7 +2511,7 @@ export default class AgentSessionService {
     const [session, effectiveConfig, approvalPolicy] = await Promise.all([
       AgentSession.query()
         .findOne({ uuid: sessionId })
-        .select('id', 'namespace', 'buildUuid', 'skillPlan', 'sessionKind'),
+        .select('id', 'namespace', 'buildUuid', 'skillPlan', 'sessionKind', 'workspaceStatus', 'podName'),
       AgentSessionConfigService.getInstance().getEffectiveConfig(repoFullName),
       AgentPolicyService.getEffectivePolicy(repoFullName),
     ]);
@@ -2523,7 +2532,11 @@ export default class AgentSessionService {
         namespace: session.namespace || null,
         buildUuid: session.buildUuid,
       });
-      const toolLines = session.namespace
+      const hasReadyWorkspace =
+        session.workspaceStatus === AgentWorkspaceStatus.READY &&
+        Boolean(session.namespace) &&
+        Boolean(session.podName);
+      const toolLines = hasReadyWorkspace
         ? buildSessionWorkspacePromptLines({
             approvalPolicy,
             toolRules: effectiveConfig.toolRules,
@@ -2535,12 +2548,18 @@ export default class AgentSessionService {
         resolvedConfiguredPrompt,
         buildAgentSessionDynamicSystemPrompt({
           ...context,
+          // Fall back to build.namespace so build-context chats still emit the namespace line.
+          namespace: context.namespace || context.build?.namespace || null,
           toolLines,
         })
       );
     } catch (error) {
+      // Disclose missing grounding so the model gathers state via tools instead of assuming a clean baseline.
       logger().warn({ error, sessionId }, `Session: prompt context resolution failed sessionId=${sessionId}`);
-      return resolvedConfiguredPrompt;
+      return combineAgentSessionAppendSystemPrompt(
+        resolvedConfiguredPrompt,
+        'Initial Lifecycle snapshot: UNAVAILABLE (context lookup failed) — gather build/deploy/k8s state via tools and note in your answer that baseline context was unavailable.'
+      );
     }
   }
 
diff --git a/src/server/services/sitesConfig.test.ts b/src/server/services/sitesConfig.test.ts
new file mode 100644
index 00000000..ebc16cdd
--- /dev/null
+++ b/src/server/services/sitesConfig.test.ts
@@ -0,0 +1,169 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import type { SitesConfig } from './types/globalConfig';
+
+const mockGetConfig = jest.fn();
+const mockSetConfig = jest.fn();
+
+jest.mock('./globalConfig', () => ({
+  __esModule: true,
+  default: {
+    getInstance: jest.fn(() => ({
+      getConfig: (...args: unknown[]) => mockGetConfig(...args),
+      setConfig: (...args: unknown[]) => mockSetConfig(...args),
+    })),
+  },
+}));
+
+import SitesConfigService, { DEFAULT_SITES_CONFIG } from './sitesConfig';
+
+// Exercises the (module-private) normalizer through the public read path.
+async function normalizeViaGetConfig(stored: Partial<SitesConfig> | undefined): Promise<SitesConfig> {
+  mockGetConfig.mockResolvedValueOnce(stored);
+  return SitesConfigService.getInstance().getConfig();
+}
+
+describe('SitesConfigService normalization', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('returns the full default config when nothing is stored', async () => {
+    const result = await normalizeViaGetConfig(undefined);
+    expect(result).toEqual(DEFAULT_SITES_CONFIG);
+  });
+
+  describe('port', () => {
+    it('keeps a valid in-range port', async () => {
+      expect((await normalizeViaGetConfig({ port: 8080 })).port).toBe(8080);
+    });
+
+    it('coerces a numeric string port', async () => {
+      expect((await normalizeViaGetConfig({ port: '8080' as unknown as number })).port).toBe(8080);
+    });
+
+    it('rejects an out-of-range port as null', async () => {
+      expect((await normalizeViaGetConfig({ port: 70000 })).port).toBeNull();
+    });
+
+    it('rejects a non-positive port as null', async () => {
+      expect((await normalizeViaGetConfig({ port: 0 })).port).toBeNull();
+    });
+  });
+
+  describe('hostPrefix', () => {
+    it('slugifies illegal characters and collapses dashes', async () => {
+      expect((await normalizeViaGetConfig({ hostPrefix: 'My Site!!' })).hostPrefix).toBe('my-site');
+    });
+
+    it('strips leading and trailing dashes', async () => {
+      expect((await normalizeViaGetConfig({ hostPrefix: '--preview--' })).hostPrefix).toBe('preview');
+    });
+
+    it('falls back to the default when the value normalizes to empty', async () => {
+      expect((await normalizeViaGetConfig({ hostPrefix: '---' })).hostPrefix).toBe(DEFAULT_SITES_CONFIG.hostPrefix);
+    });
+  });
+
+  describe('upload.allowedExtensions', () => {
+    it('lowercases, strips leading dots, and de-duplicates', async () => {
+      const result = await normalizeViaGetConfig({
+        upload: { ...DEFAULT_SITES_CONFIG.upload!, allowedExtensions: ['.HTML', 'html', 'CSS ', '  ', '.png'] },
+      });
+      expect(result.upload?.allowedExtensions).toEqual(['html', 'css', 'png']);
+    });
+
+    it('falls back to defaults when the provided extension list is empty', async () => {
+      const result = await normalizeViaGetConfig({
+        upload: { ...DEFAULT_SITES_CONFIG.upload!, allowedExtensions: [] },
+      });
+      expect(result.upload?.allowedExtensions).toEqual(DEFAULT_SITES_CONFIG.upload?.allowedExtensions);
+    });
+  });
+
+  describe('upload positive integers', () => {
+    it('falls back to defaults for non-positive / non-integer values', async () => {
+      const result = await normalizeViaGetConfig({
+        upload: { ...DEFAULT_SITES_CONFIG.upload!, maxFiles: 0, maxUploadBytes: -5 },
+      });
+      expect(result.upload?.maxFiles).toBe(DEFAULT_SITES_CONFIG.upload?.maxFiles);
+      expect(result.upload?.maxUploadBytes).toBe(DEFAULT_SITES_CONFIG.upload?.maxUploadBytes);
+    });
+  });
+
+  describe('storage', () => {
+    it('strips leading/trailing slashes and collapses duplicate slashes in the prefix', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, prefix: '/sites//foo/' },
+      });
+      expect(result.storage?.prefix).toBe('sites/foo');
+    });
+
+    it('coerces an unknown backend to minio', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, backend: 'weird' as unknown as 'minio' },
+      });
+      expect(result.storage?.backend).toBe('minio');
+    });
+
+    it('defaults forcePathStyle to true for minio when explicitly null', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, backend: 'minio', forcePathStyle: null },
+      });
+      expect(result.storage?.forcePathStyle).toBe(true);
+    });
+
+    it('defaults forcePathStyle to false for s3 when explicitly null', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, backend: 's3', forcePathStyle: null },
+      });
+      expect(result.storage?.forcePathStyle).toBe(false);
+    });
+
+    it('preserves an explicit forcePathStyle override', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, backend: 'minio', forcePathStyle: false },
+      });
+      expect(result.storage?.forcePathStyle).toBe(false);
+    });
+
+    it('falls back to the default endpoint handling (null) when blank', async () => {
+      const result = await normalizeViaGetConfig({
+        storage: { ...DEFAULT_SITES_CONFIG.storage!, endpoint: '   ' },
+      });
+      expect(result.storage?.endpoint).toBeNull();
+    });
+  });
+
+  describe('setConfig', () => {
+    it('persists the normalized config under the sites key', async () => {
+      mockSetConfig.mockResolvedValueOnce(undefined);
+      const result = await SitesConfigService.getInstance().setConfig({
+        ...DEFAULT_SITES_CONFIG,
+        hostPrefix: 'My Site!!',
+        port: 70000,
+      });
+
+      expect(result.hostPrefix).toBe('my-site');
+      expect(result.port).toBeNull();
+      expect(mockSetConfig).toHaveBeenCalledWith(
+        'sites',
+        expect.objectContaining({ hostPrefix: 'my-site', port: null })
+      );
+    });
+  });
+});
diff --git a/src/server/services/sitesConfig.ts b/src/server/services/sitesConfig.ts
new file mode 100644
index 00000000..d736d431
--- /dev/null
+++ b/src/server/services/sitesConfig.ts
@@ -0,0 +1,206 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import type { SitesConfig } from './types/globalConfig';
+import GlobalConfigService from './globalConfig';
+
+const SITES_CONFIG_KEY = 'sites';
+
+export const DEFAULT_SITES_CONFIG: SitesConfig = {
+  enabled: false,
+  domain: 'localhost',
+  port: null,
+  hostPrefix: 'site',
+  ttl: {
+    enabled: true,
+    defaultDays: 7,
+    extensionDays: 7,
+  },
+  upload: {
+    maxUploadBytes: 10 * 1024 * 1024,
+    maxExtractedBytes: 10 * 1024 * 1024,
+    maxFiles: 500,
+    allowedExtensions: [
+      'html',
+      'zip',
+      'json',
+      'md',
+      'markdown',
+      'txt',
+      'css',
+      'js',
+      'mjs',
+      'map',
+      'csv',
+      'xml',
+      'svg',
+      'png',
+      'jpg',
+      'jpeg',
+      'gif',
+      'webp',
+      'avif',
+      'ico',
+      'webmanifest',
+      'wasm',
+      'woff',
+      'woff2',
+      'ttf',
+      'otf',
+      'pdf',
+    ],
+  },
+  storage: {
+    backend: 'minio',
+    bucket: 'lifecycle-sites',
+    prefix: 'sites',
+    region: 'us-west-2',
+    endpoint: null,
+    forcePathStyle: true,
+  },
+  cleanup: {
+    enabled: true,
+    intervalMinutes: 15,
+  },
+};
+
+function normalizeOptionalString(value: string | null | undefined): string | null {
+  const trimmed = value?.trim();
+  return trimmed ? trimmed : null;
+}
+
+function normalizePositiveInteger(value: number | string | undefined, fallback: number): number {
+  const parsed = typeof value === 'string' ? Number(value) : value;
+  return Number.isInteger(parsed) && Number(parsed) > 0 ? Number(parsed) : fallback;
+}
+
+function normalizePort(value: number | string | null | undefined): number | null {
+  if (value === null || value === undefined || value === '') {
+    return null;
+  }
+
+  const parsed = typeof value === 'string' ? Number(value) : value;
+  return Number.isInteger(parsed) && Number(parsed) > 0 && Number(parsed) <= 65535 ? Number(parsed) : null;
+}
+
+function normalizeHostPrefix(value: string | null | undefined): string {
+  const normalized = (value || DEFAULT_SITES_CONFIG.hostPrefix || 'site')
+    .trim()
+    .toLowerCase()
+    .replace(/[^a-z0-9-]/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .replace(/-{2,}/g, '-');
+
+  return normalized || DEFAULT_SITES_CONFIG.hostPrefix || 'site';
+}
+
+function normalizeExtensions(values: string[] | undefined): string[] {
+  const extensions = values?.length ? values : DEFAULT_SITES_CONFIG.upload?.allowedExtensions || [];
+  return Array.from(
+    new Set(extensions.map((extension) => extension.trim().toLowerCase().replace(/^\./, '')).filter(Boolean))
+  );
+}
+
+function mergeSitesConfig(config: SitesConfig | undefined): SitesConfig {
+  return {
+    ...DEFAULT_SITES_CONFIG,
+    ...config,
+    ttl: {
+      ...DEFAULT_SITES_CONFIG.ttl,
+      ...config?.ttl,
+    },
+    upload: {
+      ...DEFAULT_SITES_CONFIG.upload,
+      ...config?.upload,
+    },
+    storage: {
+      ...DEFAULT_SITES_CONFIG.storage,
+      ...config?.storage,
+    },
+    cleanup: {
+      ...DEFAULT_SITES_CONFIG.cleanup,
+      ...config?.cleanup,
+    },
+  };
+}
+
+function normalizeSitesConfig(config: SitesConfig | undefined): SitesConfig {
+  const merged = mergeSitesConfig(config);
+  const storageBackend = merged.storage?.backend === 's3' ? 's3' : 'minio';
+
+  return {
+    enabled: merged.enabled ?? false,
+    domain: normalizeOptionalString(merged.domain) || DEFAULT_SITES_CONFIG.domain,
+    port: normalizePort(merged.port),
+    hostPrefix: normalizeHostPrefix(merged.hostPrefix),
+    ttl: {
+      enabled: merged.ttl?.enabled ?? true,
+      defaultDays: normalizePositiveInteger(merged.ttl?.defaultDays, DEFAULT_SITES_CONFIG.ttl?.defaultDays || 7),
+      extensionDays: normalizePositiveInteger(merged.ttl?.extensionDays, DEFAULT_SITES_CONFIG.ttl?.extensionDays || 7),
+    },
+    upload: {
+      maxUploadBytes: normalizePositiveInteger(
+        merged.upload?.maxUploadBytes,
+        DEFAULT_SITES_CONFIG.upload?.maxUploadBytes || 10 * 1024 * 1024
+      ),
+      maxExtractedBytes: normalizePositiveInteger(
+        merged.upload?.maxExtractedBytes,
+        DEFAULT_SITES_CONFIG.upload?.maxExtractedBytes || 10 * 1024 * 1024
+      ),
+      maxFiles: normalizePositiveInteger(merged.upload?.maxFiles, DEFAULT_SITES_CONFIG.upload?.maxFiles || 500),
+      allowedExtensions: normalizeExtensions(merged.upload?.allowedExtensions || merged.upload?.allowedTypes),
+    },
+    storage: {
+      backend: storageBackend,
+      bucket: normalizeOptionalString(merged.storage?.bucket) || DEFAULT_SITES_CONFIG.storage?.bucket,
+      prefix: (normalizeOptionalString(merged.storage?.prefix) || DEFAULT_SITES_CONFIG.storage?.prefix || 'sites')
+        .replace(/^\/+|\/+$/g, '')
+        .replace(/\/{2,}/g, '/'),
+      region: normalizeOptionalString(merged.storage?.region) || DEFAULT_SITES_CONFIG.storage?.region,
+      endpoint: normalizeOptionalString(merged.storage?.endpoint),
+      forcePathStyle: merged.storage?.forcePathStyle ?? storageBackend === 'minio',
+    },
+    cleanup: {
+      enabled: merged.cleanup?.enabled ?? true,
+      intervalMinutes: normalizePositiveInteger(
+        merged.cleanup?.intervalMinutes,
+        DEFAULT_SITES_CONFIG.cleanup?.intervalMinutes || 15
+      ),
+    },
+  };
+}
+
+export default class SitesConfigService {
+  private static instance: SitesConfigService;
+
+  static getInstance(): SitesConfigService {
+    if (!this.instance) {
+      this.instance = new SitesConfigService();
+    }
+    return this.instance;
+  }
+
+  async getConfig(): Promise<SitesConfig> {
+    const config = (await GlobalConfigService.getInstance().getConfig(SITES_CONFIG_KEY)) as SitesConfig | undefined;
+    return normalizeSitesConfig(config);
+  }
+
+  async setConfig(config: SitesConfig): Promise<SitesConfig> {
+    const normalized = normalizeSitesConfig(config);
+    await GlobalConfigService.getInstance().setConfig(SITES_CONFIG_KEY, normalized);
+    return normalized;
+  }
+}
diff --git a/src/shared/openApiSpec.ts b/src/shared/openApiSpec.ts
index 41e7bcfa..f9c6faa4 100644
--- a/src/shared/openApiSpec.ts
+++ b/src/shared/openApiSpec.ts
@@ -167,6 +167,22 @@ export const openApiSpecificationForV2Api: OAS3Options = {
           type: 'object',
           properties: {
             message: { type: 'string' },
+            code: {
+              type: 'string',
+              description: 'Stable, machine-readable error discriminant. UIs switch on this, not on the message.',
+            },
+            details: { type: 'object', additionalProperties: true, nullable: true },
+            nextAction: {
+              type: 'object',
+              nullable: true,
+              description: 'Suggested recovery affordance for the user.',
+              properties: {
+                kind: { type: 'string', enum: ['continue', 'retry', 'reconnect', 'update_key', 'navigate'] },
+                label: { type: 'string' },
+                href: { type: 'string' },
+              },
+              required: ['kind', 'label'],
+            },
           },
           required: ['message'],
         },
@@ -226,6 +242,104 @@ export const openApiSpecificationForV2Api: OAS3Options = {
           required: ['file'],
         },
 
+        SitesStorageBackend: {
+          type: 'string',
+          enum: ['s3', 'minio'],
+        },
+
+        SitesTtlConfig: {
+          type: 'object',
+          properties: {
+            enabled: { type: 'boolean' },
+            defaultDays: { type: 'integer', minimum: 1 },
+            extensionDays: { type: 'integer', minimum: 1 },
+          },
+          required: ['enabled', 'defaultDays', 'extensionDays'],
+          additionalProperties: false,
+        },
+
+        SitesUploadConfig: {
+          type: 'object',
+          properties: {
+            maxUploadBytes: { type: 'integer', minimum: 1 },
+            maxExtractedBytes: { type: 'integer', minimum: 1 },
+            maxFiles: { type: 'integer', minimum: 1 },
+            allowedExtensions: {
+              type: 'array',
+              minItems: 1,
+              items: { type: 'string' },
+            },
+            allowedTypes: {
+              type: 'array',
+              minItems: 1,
+              items: { type: 'string' },
+              deprecated: true,
+            },
+          },
+          required: ['maxUploadBytes', 'maxExtractedBytes', 'maxFiles', 'allowedExtensions'],
+          additionalProperties: false,
+        },
+
+        SitesStorageConfig: {
+          type: 'object',
+          properties: {
+            backend: { $ref: '#/components/schemas/SitesStorageBackend' },
+            bucket: { type: 'string' },
+            prefix: { type: 'string' },
+            region: { type: 'string' },
+            endpoint: { type: 'string', nullable: true },
+            forcePathStyle: { type: 'boolean', nullable: true },
+          },
+          required: ['backend', 'bucket', 'prefix', 'region', 'endpoint', 'forcePathStyle'],
+          additionalProperties: false,
+        },
+
+        SitesCleanupConfig: {
+          type: 'object',
+          properties: {
+            enabled: { type: 'boolean' },
+            intervalMinutes: { type: 'integer', minimum: 1 },
+          },
+          required: ['enabled', 'intervalMinutes'],
+          additionalProperties: false,
+        },
+
+        SitesConfig: {
+          type: 'object',
+          description: 'Global Sites hosting configuration stored in global_config under the sites key.',
+          properties: {
+            enabled: { type: 'boolean' },
+            domain: { type: 'string', example: 'sites.example.com' },
+            port: { type: 'integer', nullable: true, minimum: 1, maximum: 65535 },
+            hostPrefix: { type: 'string', minLength: 1, example: 'site' },
+            ttl: { $ref: '#/components/schemas/SitesTtlConfig' },
+            upload: { $ref: '#/components/schemas/SitesUploadConfig' },
+            storage: { $ref: '#/components/schemas/SitesStorageConfig' },
+            cleanup: { $ref: '#/components/schemas/SitesCleanupConfig' },
+          },
+          required: ['enabled', 'domain', 'port', 'hostPrefix', 'ttl', 'upload', 'storage', 'cleanup'],
+          additionalProperties: false,
+        },
+
+        SitesConfigSuccessResponse: {
+          allOf: [
+            { $ref: '#/components/schemas/SuccessApiResponse' },
+            {
+              type: 'object',
+              properties: {
+                data: {
+                  type: 'object',
+                  properties: {
+                    config: { $ref: '#/components/schemas/SitesConfig' },
+                  },
+                  required: ['config'],
+                },
+              },
+              required: ['data'],
+            },
+          ],
+        },
+
         SiteSuccessResponse: {
           allOf: [
             { $ref: '#/components/schemas/SuccessApiResponse' },
@@ -1575,6 +1689,22 @@ export const openApiSpecificationForV2Api: OAS3Options = {
             recordedAt: { type: 'string', format: 'date-time' },
             retryable: { type: 'boolean' },
             origin: { $ref: '#/components/schemas/WorkspaceRuntimeFailureOrigin' },
+            code: {
+              type: 'string',
+              description:
+                'Stable, machine-readable failure discriminant so durable failures honor the coded error contract.',
+            },
+            nextAction: {
+              type: 'object',
+              nullable: true,
+              description: 'Suggested recovery affordance for the user.',
+              properties: {
+                kind: { type: 'string', enum: ['continue', 'retry', 'reconnect', 'update_key', 'navigate'] },
+                label: { type: 'string' },
+                href: { type: 'string' },
+              },
+              required: ['kind', 'label'],
+            },
           },
           required: ['stage', 'title', 'message', 'recordedAt', 'retryable', 'origin'],
           additionalProperties: false,
diff --git a/sysops/dockerfiles/tilt.app.Dockerfile b/sysops/dockerfiles/tilt.app.Dockerfile
index 556f4ee7..2696a053 100644
--- a/sysops/dockerfiles/tilt.app.Dockerfile
+++ b/sysops/dockerfiles/tilt.app.Dockerfile
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1
 # Copyright 2025 GoodRx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -65,6 +66,12 @@ ENV APP_REDIS_HOST=${APP_REDIS_HOST}
 ENV APP_REDIS_PORT=${APP_REDIS_PORT}
 ENV APP_REDIS_PASSWORD=${APP_REDIS_PASSWORD}
 
+# LIFECYCLE_BUILD=prod: bake a bundle for `pnpm start`; dev runs `pnpm dev` via the entrypoint instead.
+ARG LIFECYCLE_BUILD=dev
+# build:local (LOCAL ONLY) skips ESLint and reuses the BuildKit-cached .next/cache; prod CI still runs `pnpm build`.
+RUN --mount=type=cache,target=/app/.next/cache,id=lifecycle-next-cache \
+    if [ "$LIFECYCLE_BUILD" = "prod" ]; then pnpm build:local; fi
+
 # Expose the required port
 ENV PORT 3000
 EXPOSE 3000
diff --git a/sysops/tilt/lifecycle-keycloak-values.yaml b/sysops/tilt/lifecycle-keycloak-values.yaml
index 955e7053..8af608f9 100644
--- a/sysops/tilt/lifecycle-keycloak-values.yaml
+++ b/sysops/tilt/lifecycle-keycloak-values.yaml
@@ -36,6 +36,8 @@ githubIdp:
   githubJsonFormat: true
 
 internalIdp:
+  internalUrl: http://lifecycle-keycloak-service.lifecycle-app.svc.cluster.local:8080
+  mapAdminRole: true
   users:
     bootstrapUser:
       username: lifecycle
diff --git a/sysops/tilt/scripts/app_setup_entrypoint.sh b/sysops/tilt/scripts/app_setup_entrypoint.sh
index c52b26d9..cc0d9324 100644
--- a/sysops/tilt/scripts/app_setup_entrypoint.sh
+++ b/sysops/tilt/scripts/app_setup_entrypoint.sh
@@ -44,4 +44,10 @@ if [ "${LIFECYCLE_MODE:-all}" != "job" ]; then
   fi
 fi
 
+# LIFECYCLE_SERVE=prod: serve the built bundle for incremental SSE; dev's on-demand compile batches reasoning replays.
+if [ "${LIFECYCLE_SERVE:-dev}" = "prod" ]; then
+  echo "Lifecycle: LIFECYCLE_SERVE=prod -> starting production server (pnpm start)..."
+  exec pnpm start
+fi
+
 exec pnpm dev
diff --git a/sysops/tilt/scripts/sync_keycloak_github_idp.sh b/sysops/tilt/scripts/sync_keycloak_github_idp.sh
index 60391377..67b318fb 100644
--- a/sysops/tilt/scripts/sync_keycloak_github_idp.sh
+++ b/sysops/tilt/scripts/sync_keycloak_github_idp.sh
@@ -26,6 +26,15 @@ if [ -z "$github_client_id" ] || [ "$github_client_id" = "local-github-client-id
   exit 0
 fi
 
+echo "Keycloak: Waiting for lifecycle-keycloak statefulset to be ready..."
+if ! kubectl -n "$namespace" rollout status statefulset/lifecycle-keycloak --timeout=300s; then
+  echo "Keycloak: Timeout waiting for lifecycle-keycloak statefulset to be ready"
+  exit 1
+fi
+
+# Give Tilt a brief moment to establish port-forwarding
+sleep 5
+
 tmp_current="$(mktemp)"
 tmp_updated="$(mktemp)"
 trap 'rm -f "$tmp_current" "$tmp_updated"' EXIT
@@ -39,7 +48,7 @@ get_admin_token() {
     --data-urlencode 'client_id=admin-cli' | jq -r '.access_token'
 }
 
-for attempt in 1 2 3 4 5 6 7 8 9 10; do
+for attempt in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
   token="$(get_admin_token || true)"
   if [ -n "$token" ] && [ "$token" != "null" ]; then
     status="$(curl -sS --max-time 10 -o "$tmp_current" -w '%{http_code}' \
diff --git a/ws-server.ts b/ws-server.ts
index 896ec692..61a4b14c 100644
--- a/ws-server.ts
+++ b/ws-server.ts
@@ -27,7 +27,7 @@ moduleAlias.addAliases({
   scripts: join(__dirname, 'scripts'),
 });
 
-import { createServer, IncomingMessage, ServerResponse, request as httpRequest } from 'http';
+import { createServer, IncomingMessage, ServerResponse, request as httpRequest, STATUS_CODES } from 'http';
 import type { Socket } from 'net';
 import { parse, URL } from 'url';
 import next from 'next';
@@ -39,6 +39,15 @@ import SitesService from './src/server/services/sites';
 import {
   buildWorkspaceEditorProxyHeaders,
   serializeSocketHttpResponse,
+  EDITOR_PROXY_TIMEOUT_MS,
+  EDITOR_PROXY_PING_INTERVAL_MS,
+  EDITOR_PROXY_PONG_DEADLINE_MS,
+  editorProxyConnections,
+  classifyEditorProxyFailure,
+  resolveEditorProxyFailureMapping,
+  buildWorkspaceEditorErrorPage,
+  isEditorNavigationRequest,
+  type EditorProxyFailureContext,
 } from './src/server/lib/agentSession/workspaceEditorProxy';
 
 const dev = process.env.NODE_ENV !== 'production';
@@ -121,6 +130,33 @@ function getSessionWorkspaceEditorCookiePath(sessionId: string): string {
   return `${SESSION_WORKSPACE_EDITOR_PATH_PREFIX}${encodeURIComponent(sessionId)}`;
 }
 
+function decodeJwtPayload(token: string): Record<string, unknown> | null {
+  const payloadSegment = token.split('.')[1];
+  if (!payloadSegment) {
+    return null;
+  }
+
+  try {
+    const normalizedPayload = payloadSegment.replace(/-/g, '+').replace(/_/g, '/');
+    const paddedPayload = normalizedPayload.padEnd(
+      normalizedPayload.length + ((4 - (normalizedPayload.length % 4)) % 4),
+      '='
+    );
+    return JSON.parse(Buffer.from(paddedPayload, 'base64').toString('utf8')) as Record<string, unknown>;
+  } catch {
+    return null;
+  }
+}
+
+function getJwtCookieMaxAgeSeconds(token: string): number | null {
+  const exp = decodeJwtPayload(token)?.exp;
+  if (typeof exp !== 'number' || !Number.isFinite(exp)) {
+    return null;
+  }
+
+  return Math.max(Math.floor(exp - Date.now() / 1000), 0);
+}
+
 function isSendableCloseCode(code?: number): code is number {
   if (typeof code !== 'number') {
     return false;
@@ -136,9 +172,11 @@ function isSendableCloseCode(code?: number): code is number {
 function buildSessionWorkspaceEditorCookie(request: IncomingMessage, sessionId: string, token: string): string {
   const isSecure =
     request.headers['x-forwarded-proto'] === 'https' || (request.socket as { encrypted?: boolean }).encrypted === true;
+  const maxAgeSeconds = getJwtCookieMaxAgeSeconds(token);
   const cookieParts = [
     `${SESSION_WORKSPACE_EDITOR_COOKIE_NAME}=${encodeURIComponent(token)}`,
     `Path=${getSessionWorkspaceEditorCookiePath(sessionId)}`,
+    ...(maxAgeSeconds === null ? [] : [`Max-Age=${maxAgeSeconds}`]),
     'HttpOnly',
     'SameSite=Lax',
   ];
@@ -205,24 +243,6 @@ function buildProxyHeaders(request: IncomingMessage, target: URL, forwardedPrefi
   });
 }
 
-function resolveSessionWorkspaceEditorErrorStatus(error: unknown): number {
-  const message = error instanceof Error ? error.message : String(error);
-
-  if (message === 'Authentication token is required') {
-    return 401;
-  }
-
-  if (message === 'Forbidden: you do not own this session') {
-    return 403;
-  }
-
-  if (message === 'Session not found or not active') {
-    return 404;
-  }
-
-  return 502;
-}
-
 async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, socket: Socket, head: Buffer) {
   const parsedUrl = parse(request.url || '', true);
   const match = parseSessionWorkspaceEditorPath(parsedUrl.pathname);
@@ -240,10 +260,38 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
 
   let upstreamSocket: Socket | null = null;
   let proxyReq: ReturnType<typeof httpRequest> | null = null;
+  // rh-2: track this socket pair as a live connection.
+  const registryToken = {};
+  let registered = false;
+  // Once the pipe is live, release-on-close owns the registry slot; finally must not release it.
+  let pipeEstablished = false;
+  let clientClosedEarly = false;
+
+  // rh-2: bind client close/error before the connect await so a disconnect aborts the pending proxyReq.
+  const onEarlyClientClose = () => {
+    clientClosedEarly = true;
+    if (proxyReq) {
+      proxyReq.destroy();
+    }
+    if (upstreamSocket && !upstreamSocket.destroyed) {
+      upstreamSocket.destroy();
+    }
+  };
+  socket.on('close', onEarlyClientClose);
+  socket.on('error', onEarlyClientClose);
 
   try {
     const queryToken = typeof parsedUrl.query.token === 'string' ? parsedUrl.query.token : null;
     const session = await resolveOwnedAgentSession(request, match.sessionId, queryToken);
+
+    if (clientClosedEarly) {
+      return;
+    }
+    if (!editorProxyConnections.tryRegister(match.sessionId, registryToken)) {
+      throw new EditorProxyError('editor-proxy-capacity');
+    }
+    registered = true;
+
     const forwardedPrefix = getSessionWorkspaceEditorCookiePath(match.sessionId);
     const targetUrl = buildSessionWorkspaceEditorServiceUrl(
       session,
@@ -268,9 +316,24 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
         headers: proxyHeaders,
       });
 
+      // rh-2: bound the connect/upgrade phase so a half-open upstream can't hold it open indefinitely.
+      proxyReq.setTimeout(EDITOR_PROXY_TIMEOUT_MS, () => {
+        proxyReq?.destroy(new EditorProxyError('editor-proxy-timeout'));
+      });
+
       proxyReq.on('upgrade', (upstreamRes, proxiedSocket, upstreamHead) => {
         upstreamSocket = proxiedSocket as Socket;
 
+        // Hand off from the early connect-phase guards to steady-state pipe teardown.
+        socket.removeListener('close', onEarlyClientClose);
+        socket.removeListener('error', onEarlyClientClose);
+
+        if (clientClosedEarly || socket.destroyed) {
+          upstreamSocket.destroy();
+          resolve();
+          return;
+        }
+
         socket.write(
           serializeSocketHttpResponse({
             statusCode: upstreamRes.statusCode || 101,
@@ -287,6 +350,23 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
           upstreamSocket.write(head);
         }
 
+        // rh-2: a byte-pipe can't parse WS frames, so enforce liveness via a bidirectional idle timeout (any traffic resets it).
+        const idleMs = EDITOR_PROXY_PING_INTERVAL_MS + EDITOR_PROXY_PONG_DEADLINE_MS;
+        const reapIdle = (source: 'client' | 'upstream') => {
+          logger.warn(
+            { ...editorLogCtx, sessionId: match.sessionId, source, idleMs },
+            `SessionEditor: idle timeout source=${source} sessionId=${match.sessionId}`
+          );
+          if (!socket.destroyed) {
+            socket.destroy();
+          }
+          if (upstreamSocket && !upstreamSocket.destroyed) {
+            upstreamSocket.destroy();
+          }
+        };
+        socket.setTimeout(idleMs, () => reapIdle('client'));
+        upstreamSocket.setTimeout(idleMs, () => reapIdle('upstream'));
+
         socket.on('error', (error) => {
           logger.warn(
             { ...editorLogCtx, error },
@@ -308,6 +388,10 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
         });
 
         socket.on('close', () => {
+          if (registered) {
+            registered = false;
+            editorProxyConnections.release(match.sessionId, registryToken);
+          }
           if (upstreamSocket && !upstreamSocket.destroyed) {
             upstreamSocket.end();
           }
@@ -319,6 +403,7 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
           }
         });
 
+        pipeEstablished = true;
         socket.pipe(upstreamSocket);
         upstreamSocket.pipe(socket);
         socket.resume();
@@ -353,9 +438,12 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
       proxyReq.end();
     });
   } catch (error: any) {
+    const ctx = extractEditorFailureContext(error);
+    const reason = classifyEditorProxyFailure(error, ctx);
+    const mapping = resolveEditorProxyFailureMapping(reason);
     logger.error(
-      { ...editorLogCtx, error, sessionId: match.sessionId },
-      `SessionEditor: websocket setup failed sessionId=${match.sessionId}`
+      { ...editorLogCtx, error, sessionId: match.sessionId, reason, status: mapping.status },
+      `SessionEditor: websocket setup failed sessionId=${match.sessionId} reason=${reason}`
     );
 
     if (proxyReq) {
@@ -366,15 +454,35 @@ async function handleSessionWorkspaceEditorUpgrade(request: IncomingMessage, soc
       upstreamSocket.destroy();
     }
 
+    // WS handshakes aren't browser navigations; reply with a coded status line on the raw socket, not HTML.
     if (!socket.destroyed) {
       socket.end(
         serializeSocketHttpResponse({
-          statusCode: resolveSessionWorkspaceEditorErrorStatus(error),
-          statusMessage: 'Bad Gateway',
-          body: error instanceof Error ? error.message : String(error),
+          statusCode: mapping.status,
+          statusMessage: STATUS_CODES[mapping.status] || 'Bad Gateway',
+          headers: { 'X-Editor-Proxy-Reason': reason },
+          body: mapping.message,
         })
       );
     }
+  } finally {
+    socket.removeListener('close', onEarlyClientClose);
+    socket.removeListener('error', onEarlyClientClose);
+    // Release only when the pipe never went live; a live pipe's slot is released by its socket 'close' handler.
+    if (registered && !pipeEstablished) {
+      registered = false;
+      editorProxyConnections.release(match.sessionId, registryToken);
+    }
+  }
+}
+
+// err-4: coded error carrying failure context so callers can map suspended vs pod-gone vs auth.
+class EditorProxyError extends Error {
+  failureContext: EditorProxyFailureContext;
+  constructor(message: string, failureContext: EditorProxyFailureContext = {}) {
+    super(message);
+    this.name = 'EditorProxyError';
+    this.failureContext = failureContext;
   }
 }
 
@@ -386,13 +494,21 @@ async function resolveOwnedAgentSession(
   const AgentSessionService = (await import('./src/server/services/agentSession')).default;
   const session = await AgentSessionService.getSession(sessionId);
   if (!session || session.status !== 'active') {
-    throw new Error('Session not found or not active');
+    throw new EditorProxyError('Session not found or not active', { podMissing: true });
+  }
+
+  // sr-3 guard: a crashed suspend can leave status=active over a dead pod; treat not-ready as unavailable to emit a coded page, not a 502.
+  if (session.workspaceStatus !== 'ready') {
+    throw new EditorProxyError('Workspace is not ready', { workspaceUnavailable: true });
+  }
+  if (!session.podName || !session.namespace) {
+    throw new EditorProxyError('Workspace runtime is gone', { podMissing: true });
   }
 
   if (process.env.ENABLE_AUTH === 'true') {
     const headerToken = request.headers.authorization?.split(' ')[1];
     const cookieToken = parseCookieHeader(request.headers.cookie)[SESSION_WORKSPACE_EDITOR_COOKIE_NAME];
-    const rawToken = headerToken || cookieToken || queryToken;
+    const rawToken = headerToken || queryToken || cookieToken;
 
     if (!rawToken) {
       throw new Error('Authentication token is required');
@@ -422,6 +538,22 @@ function closeSocket(ws: WebSocket, code: number, reason: string) {
   ws.close(1000, safeReason);
 }
 
+// Same-origin deep-link back to the Lifecycle session for the branded error page CTA.
+function buildEditorSessionDeepLink(request: IncomingMessage, sessionId: string): string {
+  const proto =
+    (typeof request.headers['x-forwarded-proto'] === 'string' && request.headers['x-forwarded-proto']) ||
+    ((request.socket as { encrypted?: boolean }).encrypted ? 'https' : 'http');
+  const host =
+    (typeof request.headers['x-forwarded-host'] === 'string' && request.headers['x-forwarded-host']) ||
+    request.headers.host;
+  const path = `/new/${encodeURIComponent(sessionId)}`;
+  return host ? `${proto}://${host}${path}` : path;
+}
+
+function extractEditorFailureContext(error: unknown): EditorProxyFailureContext {
+  return error instanceof EditorProxyError ? error.failureContext : {};
+}
+
 async function handleSessionWorkspaceEditorHttp(
   req: IncomingMessage,
   res: ServerResponse,
@@ -433,9 +565,19 @@ async function handleSessionWorkspaceEditorHttp(
     return false;
   }
 
+  // rh-2: track this in-flight HTTP proxy as a live connection and cap it.
+  const registryToken = {};
+  let registered = false;
+
   try {
     const queryToken = typeof query.token === 'string' ? query.token : null;
     const session = await resolveOwnedAgentSession(req, match.sessionId, queryToken);
+
+    if (!editorProxyConnections.tryRegister(match.sessionId, registryToken)) {
+      throw new EditorProxyError('editor-proxy-capacity');
+    }
+    registered = true;
+
     const forwardedPrefix = getSessionWorkspaceEditorCookiePath(match.sessionId);
     const targetUrl = buildSessionWorkspaceEditorServiceUrl(session, match.forwardPath, query);
     const proxyHeaders = buildProxyHeaders(req, targetUrl, forwardedPrefix);
@@ -475,6 +617,12 @@ async function handleSessionWorkspaceEditorHttp(
         }
       );
 
+      // rh-2: bound the upstream so a half-open code-server can't hang the request; surface as a coded timeout.
+      proxyReq.setTimeout(EDITOR_PROXY_TIMEOUT_MS, () => {
+        proxyReq.destroy(new EditorProxyError('editor-proxy-timeout'));
+      });
+      // rh-2: an early client disconnect must abort the upstream request.
+      req.on('close', () => proxyReq.destroy());
       proxyReq.on('error', reject);
 
       if (req.method && !['GET', 'HEAD'].includes(req.method.toUpperCase())) {
@@ -486,18 +634,33 @@ async function handleSessionWorkspaceEditorHttp(
 
     return true;
   } catch (error: any) {
+    const ctx = extractEditorFailureContext(error);
+    const reason = classifyEditorProxyFailure(error, ctx);
+    const mapping = resolveEditorProxyFailureMapping(reason);
     logger.error(
-      { error, path: pathname, sessionId: match.sessionId },
-      `SessionEditor: proxy failed sessionId=${match.sessionId} path=${pathname}`
+      { error, path: pathname, sessionId: match.sessionId, reason, status: mapping.status },
+      `SessionEditor: proxy failed sessionId=${match.sessionId} path=${pathname} reason=${reason}`
     );
-    res.statusCode =
-      error?.message?.includes('Forbidden') || error?.message?.includes('Authentication')
-        ? 401
-        : error?.message?.includes('Session not found')
-        ? 404
-        : 502;
-    res.end(error?.message || 'Editor proxy failed');
+
+    if (!res.headersSent) {
+      res.statusCode = mapping.status;
+      // err-4: navigation failures get a branded HTML page with a deep-link; assets keep a coded status + plain body.
+      if (isEditorNavigationRequest(req.headers)) {
+        const sessionUrl = buildEditorSessionDeepLink(req, match.sessionId);
+        res.setHeader('Content-Type', 'text/html; charset=utf-8');
+        res.end(buildWorkspaceEditorErrorPage({ reason, sessionUrl }));
+      } else {
+        res.setHeader('X-Editor-Proxy-Reason', reason);
+        res.end(mapping.message);
+      }
+    } else if (!(res as ServerResponse & { writableEnded?: boolean }).writableEnded) {
+      res.end();
+    }
     return true;
+  } finally {
+    if (registered) {
+      editorProxyConnections.release(match.sessionId, registryToken);
+    }
   }
 }
 
@@ -691,6 +854,15 @@ app.prepare().then(() => {
 
   httpServer.listen(port);
 
+  // rh-2: log the live editor-proxy connection gauge so leaked sockets surface; unref so it never holds the process.
+  const editorProxyGauge = setInterval(() => {
+    const live = editorProxyConnections.size();
+    if (live > 0) {
+      logger.info({ liveEditorProxyConnections: live }, 'SessionEditor: live connection gauge');
+    }
+  }, 60_000);
+  editorProxyGauge.unref();
+
   httpServer.on('error', (error) => {
     logger.error({ err: error }, 'HTTP Server Error');
     process.exit(1);

From fa5787de7ae02dee01232be27bb6cc21e1f2c34b Mon Sep 17 00:00:00 2001
From: vmelikyan <vahan.melikyan@gmail.com>
Date: Mon, 1 Jun 2026 20:59:07 -0700
Subject: [PATCH 2/2] GAR support

---
 next.config.js                                |   1 +
 package.json                                  |   1 +
 pnpm-lock.yaml                                | 141 +++++++++
 src/server/lib/config/ConfigBuilder.ts        |   2 +
 .../nativeBuild/__tests__/buildkit.test.ts    | 244 +++++++++++++++
 .../__tests__/registryAuth.test.ts            | 235 +++++++++++++++
 src/server/lib/nativeBuild/engines.ts         | 269 +++++++++++------
 src/server/lib/nativeBuild/registryAuth.ts    | 280 ++++++++++++++++++
 src/server/services/types/globalConfig.ts     |   6 +
 9 files changed, 1092 insertions(+), 87 deletions(-)
 create mode 100644 src/server/lib/nativeBuild/__tests__/registryAuth.test.ts
 create mode 100644 src/server/lib/nativeBuild/registryAuth.ts

diff --git a/next.config.js b/next.config.js
index e1aa223f..439c8fbe 100644
--- a/next.config.js
+++ b/next.config.js
@@ -25,6 +25,7 @@ module.exports = {
       'dd-trace',
       'knex',
       '@aws-sdk/client-s3',
+      'google-auth-library',
     ],
   },
   env: {
diff --git a/package.json b/package.json
index 8bceef92..d52ff11d 100644
--- a/package.json
+++ b/package.json
@@ -51,6 +51,7 @@
     "fastly": "^7.0.1",
     "flatted": "^3.0.4",
     "framer-motion": "^12.23.24",
+    "google-auth-library": "^10.6.2",
     "haikunator": "^2.1.2",
     "hot-shots": "^10.0.0",
     "ioredis": "^4.27.3",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7e09834d..ca62721b 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -79,6 +79,9 @@ importers:
       framer-motion:
         specifier: ^12.23.24
         version: 12.23.24(react-dom@18.2.0(react@18.2.0))(react@18.2.0)
+      google-auth-library:
+        specifier: ^10.6.2
+        version: 10.6.2
       haikunator:
         specifier: ^2.1.2
         version: 2.1.2
@@ -3823,6 +3826,11 @@ packages:
     engines: { node: '>=0.4.0' }
     hasBin: true
 
+  agent-base@7.1.4:
+    resolution:
+      { integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ== }
+    engines: { node: '>= 14' }
+
   aggregate-error@3.1.0:
     resolution:
       { integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA== }
@@ -4128,6 +4136,10 @@ packages:
       { integrity: sha512-GPEid2Y9QU1Exl1rpO9B2IPJGHPSupF5GnVIP0blYvNOMer2bTvSWs1jGOUg04hTmu67nmLsQ9TBo1puaotBHg== }
     engines: { node: '>=0.6' }
 
+  bignumber.js@9.3.1:
+    resolution:
+      { integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ== }
+
   binary-extensions@2.3.0:
     resolution:
       { integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw== }
@@ -4660,6 +4672,11 @@ packages:
       { integrity: sha512-jRFi8UDGo6j+odZiEpjazZaWqEal3w/basFjQHQEwVtZJGDpxbH1MeYluwCS8Xq5wmLJooDlMgvVarmWfGM44g== }
     engines: { node: '>=0.10' }
 
+  data-uri-to-buffer@4.0.1:
+    resolution:
+      { integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A== }
+    engines: { node: '>= 12' }
+
   data-view-buffer@1.0.2:
     resolution:
       { integrity: sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ== }
@@ -5394,6 +5411,11 @@ packages:
     resolution:
       { integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA== }
 
+  fetch-blob@3.2.0:
+    resolution:
+      { integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ== }
+    engines: { node: ^12.20 || >= 14.13 }
+
   file-entry-cache@6.0.1:
     resolution:
       { integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg== }
@@ -5509,6 +5531,11 @@ packages:
       { integrity: sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww== }
     engines: { node: '>=0.4.x' }
 
+  formdata-polyfill@4.0.10:
+    resolution:
+      { integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g== }
+    engines: { node: '>=12.20.0' }
+
   formidable@1.2.6:
     resolution:
       { integrity: sha512-KcpbcpuLNOwrEjnbpMC0gS+X8ciDoZE1kkqzat4a8vrprf+s9pKNQ/QIwWfbfs4ltgmFl3MD177SNTkve3BwGQ== }
@@ -5575,6 +5602,16 @@ packages:
     resolution:
       { integrity: sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== }
 
+  gaxios@7.1.4:
+    resolution:
+      { integrity: sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA== }
+    engines: { node: '>=18' }
+
+  gcp-metadata@8.1.2:
+    resolution:
+      { integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg== }
+    engines: { node: '>=18' }
+
   generator-function@2.0.1:
     resolution:
       { integrity: sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g== }
@@ -5727,6 +5764,16 @@ packages:
       { integrity: sha512-jWsQfayf13NvqKUIL3Ta+CIqMnvlaIDFveWE/dpOZ9+3AMEJozsxDvKA02zync9UuvOM8rOXzsD5GqKP4OnWPQ== }
     engines: { node: ^12.20.0 || ^14.13.1 || >=16.0.0 }
 
+  google-auth-library@10.6.2:
+    resolution:
+      { integrity: sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw== }
+    engines: { node: '>=18' }
+
+  google-logging-utils@1.1.3:
+    resolution:
+      { integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA== }
+    engines: { node: '>=14' }
+
   gopd@1.0.1:
     resolution:
       { integrity: sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA== }
@@ -5882,6 +5929,11 @@ packages:
       { integrity: sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ== }
     engines: { node: '>=0.8', npm: '>=1.3.7' }
 
+  https-proxy-agent@7.0.6:
+    resolution:
+      { integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw== }
+    engines: { node: '>= 14' }
+
   human-signals@2.1.0:
     resolution:
       { integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== }
@@ -6635,6 +6687,10 @@ packages:
     engines: { node: '>=6' }
     hasBin: true
 
+  json-bigint@1.0.0:
+    resolution:
+      { integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ== }
+
   json-parse-even-better-errors@2.3.1:
     resolution:
       { integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== }
@@ -6717,10 +6773,18 @@ packages:
     resolution:
       { integrity: sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA== }
 
+  jwa@2.0.1:
+    resolution:
+      { integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg== }
+
   jws@3.2.2:
     resolution:
       { integrity: sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA== }
 
+  jws@4.0.1:
+    resolution:
+      { integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA== }
+
   kind-of@6.0.3:
     resolution:
       { integrity: sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== }
@@ -7255,6 +7319,11 @@ packages:
       { integrity: sha512-VBlAiynj3VMLrotgwOS3OyECFxas5y7ltLcK4t41lMUZeaK15Ym4QRkqN0EQKAFL42q9i21EPKjzLUPfltR72A== }
     engines: { node: ^12.20.0 || ^14.13.1 || >=16.0.0 }
 
+  node-fetch@3.3.2:
+    resolution:
+      { integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA== }
+    engines: { node: ^12.20 || >= 14.13 }
+
   node-gyp-build-optional-packages@5.2.2:
     resolution:
       { integrity: sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw== }
@@ -13979,6 +14048,8 @@ snapshots:
 
   acorn@8.9.0: {}
 
+  agent-base@7.1.4: {}
+
   aggregate-error@3.1.0:
     dependencies:
       clean-stack: 2.2.0
@@ -14291,6 +14362,8 @@ snapshots:
 
   big-integer@1.6.51: {}
 
+  bignumber.js@9.3.1: {}
+
   binary-extensions@2.3.0: {}
 
   bindings@1.5.0:
@@ -14684,6 +14757,8 @@ snapshots:
     dependencies:
       assert-plus: 1.0.0
 
+  data-uri-to-buffer@4.0.1: {}
+
   data-view-buffer@1.0.2:
     dependencies:
       call-bound: 1.0.4
@@ -15539,6 +15614,11 @@ snapshots:
     dependencies:
       bser: 2.1.1
 
+  fetch-blob@3.2.0:
+    dependencies:
+      node-domexception: 1.0.0
+      web-streams-polyfill: 3.3.3
+
   file-entry-cache@6.0.1:
     dependencies:
       flat-cache: 3.0.4
@@ -15640,6 +15720,10 @@ snapshots:
 
   format@0.2.2: {}
 
+  formdata-polyfill@4.0.10:
+    dependencies:
+      fetch-blob: 3.2.0
+
   formidable@1.2.6: {}
 
   forwarded@0.2.0: {}
@@ -15684,6 +15768,22 @@ snapshots:
 
   functions-have-names@1.2.3: {}
 
+  gaxios@7.1.4:
+    dependencies:
+      extend: 3.0.2
+      https-proxy-agent: 7.0.6
+      node-fetch: 3.3.2
+    transitivePeerDependencies:
+      - supports-color
+
+  gcp-metadata@8.1.2:
+    dependencies:
+      gaxios: 7.1.4
+      google-logging-utils: 1.1.3
+      json-bigint: 1.0.0
+    transitivePeerDependencies:
+      - supports-color
+
   generator-function@2.0.1: {}
 
   gensync@1.0.0-beta.2: {}
@@ -15852,6 +15952,19 @@ snapshots:
       merge2: 1.4.1
       slash: 4.0.0
 
+  google-auth-library@10.6.2:
+    dependencies:
+      base64-js: 1.5.1
+      ecdsa-sig-formatter: 1.0.11
+      gaxios: 7.1.4
+      gcp-metadata: 8.1.2
+      google-logging-utils: 1.1.3
+      jws: 4.0.1
+    transitivePeerDependencies:
+      - supports-color
+
+  google-logging-utils@1.1.3: {}
+
   gopd@1.0.1:
     dependencies:
       get-intrinsic: 1.2.1
@@ -15968,6 +16081,13 @@ snapshots:
       jsprim: 1.4.2
       sshpk: 1.18.0
 
+  https-proxy-agent@7.0.6:
+    dependencies:
+      agent-base: 7.1.4
+      debug: 4.4.3
+    transitivePeerDependencies:
+      - supports-color
+
   human-signals@2.1.0: {}
 
   human-signals@3.0.1: {}
@@ -16740,6 +16860,10 @@ snapshots:
 
   jsesc@3.0.2: {}
 
+  json-bigint@1.0.0:
+    dependencies:
+      bignumber.js: 9.3.1
+
   json-parse-even-better-errors@2.3.1: {}
 
   json-schema-traverse@0.4.1: {}
@@ -16816,11 +16940,22 @@ snapshots:
       ecdsa-sig-formatter: 1.0.11
       safe-buffer: 5.2.1
 
+  jwa@2.0.1:
+    dependencies:
+      buffer-equal-constant-time: 1.0.1
+      ecdsa-sig-formatter: 1.0.11
+      safe-buffer: 5.2.1
+
   jws@3.2.2:
     dependencies:
       jwa: 1.4.1
       safe-buffer: 5.2.1
 
+  jws@4.0.1:
+    dependencies:
+      jwa: 2.0.1
+      safe-buffer: 5.2.1
+
   kind-of@6.0.3: {}
 
   kleur@3.0.3: {}
@@ -17187,6 +17322,12 @@ snapshots:
       node-domexception: 1.0.0
       web-streams-polyfill: 3.3.3
 
+  node-fetch@3.3.2:
+    dependencies:
+      data-uri-to-buffer: 4.0.1
+      fetch-blob: 3.2.0
+      formdata-polyfill: 4.0.10
+
   node-gyp-build-optional-packages@5.2.2:
     dependencies:
       detect-libc: 2.0.1
diff --git a/src/server/lib/config/ConfigBuilder.ts b/src/server/lib/config/ConfigBuilder.ts
index bade3c4f..9d84c661 100644
--- a/src/server/lib/config/ConfigBuilder.ts
+++ b/src/server/lib/config/ConfigBuilder.ts
@@ -17,6 +17,7 @@
 import { merge, cloneDeep } from 'lodash';
 import { mergeKeyValueArrays } from 'shared/utils';
 import {
+  NativeBuildRegistryAuth,
   NativeHelmConfig as GlobalNativeHelmConfig,
   NativeHelmPostRendererConfig as GlobalNativeHelmPostRendererConfig,
 } from 'server/services/types/globalConfig';
@@ -43,6 +44,7 @@ export interface BuildConfig {
   engine?: BuilderEngine;
   serviceAccount?: string;
   jobTimeout?: number;
+  registryAuth?: NativeBuildRegistryAuth[];
   resources?: {
     requests?: Record<string, string>;
     limits?: Record<string, string>;
diff --git a/src/server/lib/nativeBuild/__tests__/buildkit.test.ts b/src/server/lib/nativeBuild/__tests__/buildkit.test.ts
index ff8dc943..857993d9 100644
--- a/src/server/lib/nativeBuild/__tests__/buildkit.test.ts
+++ b/src/server/lib/nativeBuild/__tests__/buildkit.test.ts
@@ -18,6 +18,7 @@ import { buildkitBuild, NativeBuildOptions, generateSecretArgsScript } from '../
 import { shellPromise } from '../../shell';
 import { waitForJobAndGetLogs, getGitHubToken } from '../utils';
 import GlobalConfigService from '../../../services/globalConfig';
+import { createNativeBuildRegistryAuthSecret, deleteNativeBuildRegistryAuthSecret } from '../registryAuth';
 
 // Mock dependencies
 jest.mock('../../shell');
@@ -35,6 +36,14 @@ jest.mock('../utils', () => {
   };
 });
 jest.mock('../../../services/globalConfig');
+jest.mock('../registryAuth', () => {
+  const actual = jest.requireActual('../registryAuth');
+  return {
+    ...actual,
+    createNativeBuildRegistryAuthSecret: jest.fn(),
+    deleteNativeBuildRegistryAuthSecret: jest.fn(),
+  };
+});
 jest.mock('../../../models', () => ({
   Build: {
     query: jest.fn().mockReturnValue({
@@ -109,6 +118,8 @@ describe('buildkitBuild', () => {
     });
 
     (getGitHubToken as jest.Mock).mockResolvedValue('github-token-123');
+    (createNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
+    (deleteNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
 
     (shellPromise as jest.Mock).mockResolvedValue('');
 
@@ -260,6 +271,27 @@ describe('buildkitBuild', () => {
     expect(fullCommand).toContain('export AWS_RETRY_MODE=adaptive');
   });
 
+  it('preserves the existing ECR output login flow', async () => {
+    await buildkitBuild(mockDeploy, mockOptions);
+
+    const kubectlCalls = (shellPromise as jest.Mock).mock.calls;
+    const applyCall = kubectlCalls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).toContain('REGISTRY_DOMAIN=\\"123456789.dkr.ecr.us-east-1.amazonaws.com\\"');
+    expect(fullCommand).toContain('Detected AWS ECR registry');
+    expect(fullCommand).toContain('AWS_REGION=$(echo \\"${REGISTRY_DOMAIN}\\" | sed');
+    expect(fullCommand).toContain('aws sts get-caller-identity');
+    expect(fullCommand).toContain('aws ecr get-login-password --region ${AWS_REGION}');
+    expect(fullCommand).toContain(
+      'echo \\"$ECR_PASSWORD\\" | docker login --username AWS --password-stdin ${REGISTRY_DOMAIN}'
+    );
+    expect(fullCommand).toContain('export DOCKER_CONFIG=~/.docker');
+    expect(fullCommand).toContain(
+      'type=image,name=123456789.dkr.ecr.us-east-1.amazonaws.com/test-repo:v1.0.0,push=true'
+    );
+  });
+
   it('renders registry domain safely for non-ECR buildkit targets', async () => {
     const optionsWithCustomRegistry = {
       ...mockOptions,
@@ -342,6 +374,207 @@ describe('buildkitBuild', () => {
   });
 });
 
+describe('native build GAR registry auth', () => {
+  const garRegistry = 'us-central1-docker.pkg.dev';
+  const mockDeploy = {
+    deployable: { name: 'test-service' },
+    $fetchGraph: jest.fn(),
+    build: { isStatic: false },
+  } as any;
+
+  const baseOptions: NativeBuildOptions = {
+    ecrRepo: 'test-repo',
+    ecrDomain: 'registry.internal.svc.cluster.local',
+    envVars: { NODE_ENV: 'production' },
+    dockerfilePath: 'Dockerfile',
+    tag: 'v1.0.0',
+    revision: 'abc123def456789',
+    repo: 'owner/repo',
+    branch: 'main',
+    namespace: 'env-test-123',
+    buildId: '456',
+    buildUuid: 'abc123',
+    deployUuid: 'test-service-abc123',
+    jobTimeout: 1800,
+  };
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    (GlobalConfigService.getInstance as jest.Mock).mockReturnValue({
+      getAllConfigs: jest.fn().mockResolvedValue({
+        buildDefaults: {
+          cacheRegistry: `${garRegistry}/project/cache`,
+          registryAuth: [{ type: 'gar', registry: garRegistry }],
+        },
+      }),
+    });
+    (getGitHubToken as jest.Mock).mockResolvedValue('github-token-123');
+    (createNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
+    (deleteNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
+    (shellPromise as jest.Mock).mockResolvedValue('');
+    (waitForJobAndGetLogs as jest.Mock).mockResolvedValue({
+      logs: 'Build completed successfully',
+      success: true,
+    });
+  });
+
+  it('seeds BuildKit with GAR credentials while keeping Distribution output insecure', async () => {
+    await buildkitBuild(mockDeploy, baseOptions);
+
+    const createSecretArgs = (createNativeBuildRegistryAuthSecret as jest.Mock).mock.calls[0][0];
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(createSecretArgs).toEqual(
+      expect.objectContaining({
+        namespace: 'env-test-123',
+        registryAuth: [{ type: 'gar', registry: garRegistry }],
+        buildUuid: 'abc123',
+        deployUuid: 'test-service-abc123',
+      })
+    );
+    expect(createSecretArgs.secretName).toMatch(/-registry-auth$/);
+    expect(fullCommand).toContain('name: "registry-auth-copy"');
+    expect(fullCommand).toContain(`secretName: "${createSecretArgs.secretName}"`);
+    expect(fullCommand).toContain('mountPath: "/root/.docker"');
+    expect(fullCommand).toContain(
+      'type=image,name=registry.internal.svc.cluster.local/test-repo:v1.0.0,push=true,registry.insecure=true'
+    );
+    expect(fullCommand).toContain(`type=registry,ref=${garRegistry}/project/cache/test-repo/test-service/abc123:cache`);
+    expect(fullCommand).not.toContain(
+      `type=registry,ref=${garRegistry}/project/cache/test-repo/test-service/abc123:cache,insecure=true`
+    );
+    expect(fullCommand).not.toContain('gar-access-token');
+    expect(deleteNativeBuildRegistryAuthSecret).toHaveBeenCalledWith('env-test-123', createSecretArgs.secretName);
+  });
+
+  it('keeps GAR output and cache transport secure for BuildKit', async () => {
+    await buildkitBuild(mockDeploy, {
+      ...baseOptions,
+      ecrDomain: garRegistry,
+      ecrRepo: 'project/output',
+    });
+
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).toContain(
+      `type=image,name=${garRegistry}/project/output:v1.0.0,push=true,oci-mediatypes=false`
+    );
+    expect(fullCommand).not.toContain(
+      `type=image,name=${garRegistry}/project/output:v1.0.0,push=true,registry.insecure=true`
+    );
+    expect(fullCommand).not.toContain(
+      `type=registry,ref=${garRegistry}/project/cache/test-repo/test-service/abc123:cache,insecure=true`
+    );
+  });
+
+  it('keeps BuildKit ECR destination login when GAR credentials are configured', async () => {
+    await buildkitBuild(mockDeploy, {
+      ...baseOptions,
+      ecrDomain: '123456789.dkr.ecr.us-east-1.amazonaws.com',
+    });
+
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).toContain('name: "registry-auth-copy"');
+    expect(fullCommand).toContain('mountPath: "/root/.docker"');
+    expect(fullCommand).toContain('REGISTRY_DOMAIN=\\"123456789.dkr.ecr.us-east-1.amazonaws.com\\"');
+    expect(fullCommand).toContain('Detected AWS ECR registry');
+    expect(fullCommand).toContain(
+      'echo \\"$ECR_PASSWORD\\" | docker login --username AWS --password-stdin ${REGISTRY_DOMAIN}'
+    );
+    expect(fullCommand).toContain(
+      'type=image,name=123456789.dkr.ecr.us-east-1.amazonaws.com/test-repo:v1.0.0,push=true,registry.insecure=true'
+    );
+  });
+
+  it('cleans the temporary Secret when Job creation fails', async () => {
+    (shellPromise as jest.Mock).mockRejectedValue(new Error('kubectl apply failed'));
+
+    await expect(buildkitBuild(mockDeploy, baseOptions)).rejects.toThrow('kubectl apply failed');
+
+    const createSecretArgs = (createNativeBuildRegistryAuthSecret as jest.Mock).mock.calls[0][0];
+    expect(deleteNativeBuildRegistryAuthSecret).toHaveBeenCalledWith('env-test-123', createSecretArgs.secretName);
+  });
+
+  it('does not create a Job when temporary Secret creation fails', async () => {
+    (createNativeBuildRegistryAuthSecret as jest.Mock).mockRejectedValue(new Error('secret creation failed'));
+
+    await expect(buildkitBuild(mockDeploy, baseOptions)).rejects.toThrow('secret creation failed');
+
+    expect(shellPromise).not.toHaveBeenCalled();
+    expect(deleteNativeBuildRegistryAuthSecret).not.toHaveBeenCalled();
+  });
+
+  it('rejects invalid GAR configuration before creating a Secret or Job', async () => {
+    (GlobalConfigService.getInstance as jest.Mock).mockReturnValue({
+      getAllConfigs: jest.fn().mockResolvedValue({
+        buildDefaults: {
+          registryAuth: [{ type: 'gar', registry: 'https://us-central1-docker.pkg.dev/project/repo' }],
+        },
+      }),
+    });
+
+    await expect(buildkitBuild(mockDeploy, baseOptions)).rejects.toThrow('Build: invalid GAR registry');
+
+    expect(createNativeBuildRegistryAuthSecret).not.toHaveBeenCalled();
+    expect(shellPromise).not.toHaveBeenCalled();
+  });
+
+  it('seeds Kaniko with GAR credentials without overwriting them for Distribution output', async () => {
+    const { kanikoBuild } = require('../engines');
+    await kanikoBuild(mockDeploy, baseOptions);
+
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).toContain('name: "registry-auth-copy"');
+    expect(fullCommand).not.toContain('name: "registry-login"');
+    expect(fullCommand).toContain('mountPath: "/kaniko/.docker"');
+    expect(fullCommand).toContain('--insecure-registry=registry.internal.svc.cluster.local');
+    expect(fullCommand).not.toContain(`--insecure-registry=${garRegistry}`);
+  });
+
+  it('keeps GAR output and cache transport secure for Kaniko', async () => {
+    const { kanikoBuild } = require('../engines');
+    await kanikoBuild(mockDeploy, {
+      ...baseOptions,
+      ecrDomain: garRegistry,
+      ecrRepo: 'project/output',
+    });
+
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).not.toContain('--insecure-registry');
+    expect(fullCommand).not.toContain('name: "registry-login"');
+    expect(fullCommand).toContain(`--destination=${garRegistry}/project/output:v1.0.0`);
+  });
+
+  it('merges GAR credentials with ECR output credentials for Kaniko only when GAR is configured', async () => {
+    const { kanikoBuild } = require('../engines');
+    await kanikoBuild(mockDeploy, {
+      ...baseOptions,
+      ecrDomain: '123456789.dkr.ecr.us-east-1.amazonaws.com',
+    });
+
+    const applyCall = (shellPromise as jest.Mock).mock.calls.find((call) => call[0].includes('kubectl apply'));
+    const fullCommand = applyCall[0];
+
+    expect(fullCommand).toContain('name: "registry-auth-copy"');
+    expect(fullCommand).toContain('name: "registry-login"');
+    expect(fullCommand).toContain('> /docker-config/ecr-config.json');
+    expect(fullCommand).toContain('name: "registry-auth-merge"');
+    expect(fullCommand).toContain('apk add --no-cache jq');
+    expect(fullCommand).toContain(
+      "jq -s '.[0] * .[1]' /docker-config/config.json /docker-config/ecr-config.json > /docker-config/config.json.tmp"
+    );
+    expect(fullCommand).not.toContain('--insecure-registry');
+  });
+});
+
 describe('build resource precedence', () => {
   const mockDeploy = {
     deployable: { name: 'test-service' },
@@ -367,6 +600,8 @@ describe('build resource precedence', () => {
   beforeEach(() => {
     jest.clearAllMocks();
     (getGitHubToken as jest.Mock).mockResolvedValue('github-token-123');
+    (createNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
+    (deleteNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
     (shellPromise as jest.Mock).mockResolvedValue('');
     (waitForJobAndGetLogs as jest.Mock).mockResolvedValue({
       logs: 'Build completed successfully',
@@ -542,6 +777,8 @@ describe('build pod annotations', () => {
   beforeEach(() => {
     jest.clearAllMocks();
     (getGitHubToken as jest.Mock).mockResolvedValue('github-token-123');
+    (createNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
+    (deleteNativeBuildRegistryAuthSecret as jest.Mock).mockResolvedValue(undefined);
     (shellPromise as jest.Mock).mockResolvedValue('');
     (waitForJobAndGetLogs as jest.Mock).mockResolvedValue({
       logs: 'Build completed successfully',
@@ -732,6 +969,13 @@ describe('kaniko registry login bootstrap', () => {
     expect(fullCommand).toContain('export AWS_MAX_ATTEMPTS=5');
     expect(fullCommand).toContain('export AWS_RETRY_MODE=adaptive');
     expect(fullCommand).toContain('aws ecr get-login-password --region us-east-1');
+    expect(fullCommand).toContain(
+      'echo \'{\\"auths\\":{\\"123456789.dkr.ecr.us-east-1.amazonaws.com\\":{\\"auth\\":\\"\'$(echo -n \\"AWS:$PASSWORD\\" | base64)\'\\"}}}\' > /workspace/.docker/config.json'
+    );
+    expect(fullCommand).toContain('mountPath: "/kaniko/.docker"');
+    expect(fullCommand).toContain('subPath: ".docker"');
+    expect(fullCommand).toContain('name: "DOCKER_CONFIG"');
+    expect(fullCommand).toContain('value: "/kaniko/.docker"');
   });
 
   it('keeps non-ECR login bootstrap generic', async () => {
diff --git a/src/server/lib/nativeBuild/__tests__/registryAuth.test.ts b/src/server/lib/nativeBuild/__tests__/registryAuth.test.ts
new file mode 100644
index 00000000..d1bf5132
--- /dev/null
+++ b/src/server/lib/nativeBuild/__tests__/registryAuth.test.ts
@@ -0,0 +1,235 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const mockCreateSecret = jest.fn();
+const mockDeleteSecret = jest.fn();
+const mockGetAccessToken = jest.fn();
+const mockWarn = jest.fn();
+
+jest.mock('@kubernetes/client-node', () => {
+  const actual = jest.requireActual('@kubernetes/client-node');
+  return {
+    ...actual,
+    KubeConfig: jest.fn().mockImplementation(() => ({
+      loadFromDefault: jest.fn(),
+      makeApiClient: jest.fn().mockReturnValue({
+        createNamespacedSecret: mockCreateSecret,
+        deleteNamespacedSecret: mockDeleteSecret,
+      }),
+    })),
+  };
+});
+
+jest.mock('google-auth-library', () => ({
+  GoogleAuth: jest.fn().mockImplementation(() => ({
+    getAccessToken: mockGetAccessToken,
+  })),
+}));
+
+jest.mock('server/lib/logger', () => ({
+  getLogger: () => ({
+    info: jest.fn(),
+    warn: mockWarn,
+  }),
+}));
+
+import {
+  buildGarDockerConfig,
+  buildNativeBuildRegistryAuthSecretName,
+  createNativeBuildRegistryAuthSecret,
+  deleteNativeBuildRegistryAuthSecret,
+  getKanikoInsecureRegistries,
+  normalizeNativeBuildRegistryAuth,
+} from '../registryAuth';
+
+describe('native build registry auth', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockGetAccessToken.mockResolvedValue('gar-access-token');
+    mockCreateSecret.mockResolvedValue({ body: {} });
+    mockDeleteSecret.mockResolvedValue({});
+  });
+
+  describe('normalizeNativeBuildRegistryAuth', () => {
+    it('keeps missing and empty registry authentication as a no-op', () => {
+      expect(normalizeNativeBuildRegistryAuth(undefined)).toEqual([]);
+      expect(normalizeNativeBuildRegistryAuth([])).toEqual([]);
+    });
+
+    it('normalizes GAR registry hostnames', () => {
+      expect(
+        normalizeNativeBuildRegistryAuth([
+          {
+            type: 'gar',
+            registry: '  US-CENTRAL1-DOCKER.PKG.DEV  ',
+          },
+        ])
+      ).toEqual([
+        {
+          type: 'gar',
+          registry: 'us-central1-docker.pkg.dev',
+        },
+      ]);
+    });
+
+    it.each([
+      ['non-array registryAuth', { type: 'gar', registry: 'us-central1-docker.pkg.dev' }],
+      ['non-object entry', ['gar']],
+      ['unsupported provider', [{ type: 'ecr', registry: '123456789.dkr.ecr.us-east-1.amazonaws.com' }]],
+      ['registry path', [{ type: 'gar', registry: 'us-central1-docker.pkg.dev/project/repo' }]],
+      ['registry scheme', [{ type: 'gar', registry: 'https://us-central1-docker.pkg.dev' }]],
+      [
+        'duplicate normalized registry',
+        [
+          { type: 'gar', registry: 'us-central1-docker.pkg.dev' },
+          { type: 'gar', registry: ' US-CENTRAL1-DOCKER.PKG.DEV ' },
+        ],
+      ],
+    ])('rejects %s', (_name, value) => {
+      expect(() => normalizeNativeBuildRegistryAuth(value)).toThrow('Build:');
+    });
+  });
+
+  describe('buildGarDockerConfig', () => {
+    it('creates Docker credentials for each GAR host using one OAuth token', () => {
+      const dockerConfig = JSON.parse(
+        buildGarDockerConfig(
+          [
+            { type: 'gar', registry: 'us-central1-docker.pkg.dev' },
+            { type: 'gar', registry: 'us-east1-docker.pkg.dev' },
+          ],
+          'gar-access-token'
+        )
+      );
+      const expectedAuth = Buffer.from('oauth2accesstoken:gar-access-token').toString('base64');
+
+      expect(dockerConfig).toEqual({
+        auths: {
+          'us-central1-docker.pkg.dev': { auth: expectedAuth },
+          'us-east1-docker.pkg.dev': { auth: expectedAuth },
+        },
+      });
+    });
+  });
+
+  describe('buildNativeBuildRegistryAuthSecretName', () => {
+    it('derives a valid secret name while preserving the unique job suffix', () => {
+      const secretName = buildNativeBuildRegistryAuthSecretName({
+        deployUuid: 'subs-process-cancellations-solitary-glitter-950234',
+        jobId: 'a1b2c',
+        shortSha: '0d84142',
+      });
+
+      expect(secretName).toMatch(/-build-a1b2c-0d84142-registry-auth$/);
+      expect(secretName.length).toBeLessThanOrEqual(63);
+    });
+  });
+
+  describe('getKanikoInsecureRegistries', () => {
+    it('keeps Distribution insecure while leaving GAR and ECR on HTTPS', () => {
+      expect(
+        getKanikoInsecureRegistries(
+          [
+            'registry.internal.svc.cluster.local/repo:tag',
+            'us-central1-docker.pkg.dev/project/repo/cache',
+            '123456789.dkr.ecr.us-east-1.amazonaws.com/repo:tag',
+          ],
+          [{ type: 'gar', registry: 'us-central1-docker.pkg.dev' }]
+        )
+      ).toEqual(['registry.internal.svc.cluster.local']);
+    });
+  });
+
+  describe('createNativeBuildRegistryAuthSecret', () => {
+    it('stores GAR credentials in a temporary dockerconfigjson Secret', async () => {
+      await createNativeBuildRegistryAuthSecret({
+        namespace: 'env-test-123',
+        secretName: 'test-build-registry-auth',
+        registryAuth: [{ type: 'gar', registry: 'us-central1-docker.pkg.dev' }],
+        buildUuid: 'build-123',
+        deployUuid: 'deploy-123',
+      });
+
+      expect(mockGetAccessToken).toHaveBeenCalledTimes(1);
+      expect(mockCreateSecret).toHaveBeenCalledWith(
+        'env-test-123',
+        expect.objectContaining({
+          metadata: {
+            name: 'test-build-registry-auth',
+            namespace: 'env-test-123',
+            labels: {
+              'app.kubernetes.io/managed-by': 'lifecycle',
+              'app.kubernetes.io/component': 'native-build-registry-auth',
+              lc_uuid: 'build-123',
+              deploy_uuid: 'deploy-123',
+            },
+          },
+          type: 'kubernetes.io/dockerconfigjson',
+          stringData: {
+            '.dockerconfigjson': buildGarDockerConfig(
+              [{ type: 'gar', registry: 'us-central1-docker.pkg.dev' }],
+              'gar-access-token'
+            ),
+          },
+        })
+      );
+    });
+
+    it('fails closed when ADC does not return a token', async () => {
+      mockGetAccessToken.mockResolvedValue(null);
+
+      await expect(
+        createNativeBuildRegistryAuthSecret({
+          namespace: 'env-test-123',
+          secretName: 'test-build-registry-auth',
+          registryAuth: [{ type: 'gar', registry: 'us-central1-docker.pkg.dev' }],
+          deployUuid: 'deploy-123',
+        })
+      ).rejects.toThrow(
+        'Build: GAR access token acquisition failed registries=us-central1-docker.pkg.dev verify=google_application_default_credentials'
+      );
+      expect(mockCreateSecret).not.toHaveBeenCalled();
+    });
+
+    it('fails closed without exposing credentials when Secret creation fails', async () => {
+      mockCreateSecret.mockRejectedValue(new Error('gar-access-token'));
+
+      await expect(
+        createNativeBuildRegistryAuthSecret({
+          namespace: 'env-test-123',
+          secretName: 'test-build-registry-auth',
+          registryAuth: [{ type: 'gar', registry: 'us-central1-docker.pkg.dev' }],
+          deployUuid: 'deploy-123',
+        })
+      ).rejects.toThrow(
+        'Build: registry auth Secret creation failed secretName=test-build-registry-auth namespace=env-test-123'
+      );
+    });
+  });
+
+  describe('deleteNativeBuildRegistryAuthSecret', () => {
+    it('does not fail the build when Secret cleanup fails', async () => {
+      mockDeleteSecret.mockRejectedValue(new Error('cleanup failed'));
+
+      await expect(deleteNativeBuildRegistryAuthSecret('env-test-123', 'test-build-registry-auth')).resolves.toBe(
+        undefined
+      );
+      expect(mockWarn).toHaveBeenCalledWith(
+        'Build: registry auth cleanup failed secretName=test-build-registry-auth namespace=env-test-123'
+      );
+    });
+  });
+});
diff --git a/src/server/lib/nativeBuild/engines.ts b/src/server/lib/nativeBuild/engines.ts
index 2f9e59bd..84c7d987 100644
--- a/src/server/lib/nativeBuild/engines.ts
+++ b/src/server/lib/nativeBuild/engines.ts
@@ -28,6 +28,21 @@ import { createBuildJob } from '../kubernetes/jobFactory';
 import { buildNativeBuildJobName } from '../kubernetes/jobNames';
 import * as yaml from 'js-yaml';
 import { getLogArchivalService } from '../../services/logArchival';
+import {
+  buildNativeBuildRegistryAuthSecretName,
+  createKanikoRegistryAuthMergeInitContainer,
+  createNativeBuildRegistryAuthSecret,
+  createRegistryAuthCopyInitContainer,
+  createRegistryAuthVolumes,
+  deleteNativeBuildRegistryAuthSecret,
+  DOCKER_CONFIG_MOUNT_PATH,
+  DOCKER_CONFIG_VOLUME_NAME,
+  GarRegistryAuth,
+  getKanikoInsecureRegistries,
+  isConfiguredGarRegistry,
+  KANIKO_DOCKER_CONFIG_MOUNT_PATH,
+  normalizeNativeBuildRegistryAuth,
+} from './registryAuth';
 
 export interface NativeBuildOptions {
   ecrRepo: string;
@@ -64,7 +79,7 @@ interface BuildEngine {
   createArgs: (options: BuildArgOptions) => string[];
   envVars?: Record<string, string>;
   // eslint-disable-next-line no-unused-vars
-  getCacheRef: (cacheRegistry: string, ecrRepo: string) => string;
+  getCacheRef: (cacheRegistry: string | undefined, ecrRepo: string) => string;
 }
 
 interface BuildArgOptions {
@@ -74,6 +89,7 @@ interface BuildArgOptions {
   cacheRef: string;
   buildArgs: Record<string, string>;
   ecrDomain: string;
+  registryAuth?: GarRegistryAuth[];
   secretEnvKeys?: string[];
 }
 
@@ -94,7 +110,18 @@ const ENGINES: Record<string, BuildEngine> = {
     name: 'buildkit',
     image: 'moby/buildkit:v0.29.0',
     command: ['/bin/sh', '-c'],
-    createArgs: ({ contextPath, dockerfilePath, destination, cacheRef, buildArgs, ecrDomain, secretEnvKeys }) => {
+    createArgs: ({
+      contextPath,
+      dockerfilePath,
+      destination,
+      cacheRef,
+      buildArgs,
+      ecrDomain,
+      registryAuth = [],
+      secretEnvKeys,
+    }) => {
+      const outputInsecureOption = isConfiguredGarRegistry(destination, registryAuth) ? '' : ',registry.insecure=true';
+      const cacheInsecureOption = isConfiguredGarRegistry(cacheRef, registryAuth) ? '' : ',insecure=true';
       const buildctlArgs = [
         'build',
         '--frontend',
@@ -106,11 +133,11 @@ const ENGINES: Record<string, BuildEngine> = {
         '--opt',
         `filename=${dockerfilePath}`,
         '--output',
-        `type=image,name=${destination},push=true,registry.insecure=true,oci-mediatypes=false`,
+        `type=image,name=${destination},push=true${outputInsecureOption},oci-mediatypes=false`,
         '--export-cache',
-        `type=registry,ref=${cacheRef},mode=max,compression=zstd,oci-mediatypes=true,insecure=true`,
+        `type=registry,ref=${cacheRef},mode=max,compression=zstd,oci-mediatypes=true${cacheInsecureOption}`,
         '--import-cache',
-        `type=registry,ref=${cacheRef},insecure=true`,
+        `type=registry,ref=${cacheRef}${cacheInsecureOption}`,
       ];
 
       Object.entries(buildArgs).forEach(([key, value]) => {
@@ -176,14 +203,20 @@ buildctl ${buildctlArgs.join(' \\\n  ')} $SECRET_BUILD_ARGS
     name: 'kaniko',
     image: 'gcr.io/kaniko-project/executor:v1.9.2',
     command: ['/kaniko/executor'],
-    createArgs: ({ contextPath, dockerfilePath, destination, cacheRef, buildArgs }) => {
+    createArgs: ({ contextPath, dockerfilePath, destination, cacheRef, buildArgs, registryAuth = [] }) => {
+      const insecureRegistryArgs =
+        registryAuth.length === 0
+          ? ['--insecure-registry']
+          : getKanikoInsecureRegistries([destination, cacheRef], registryAuth).map(
+              (registry) => `--insecure-registry=${registry}`
+            );
       const args = [
         `--context=${contextPath}`,
         `--dockerfile=${contextPath}/${dockerfilePath}`,
         `--destination=${destination}`,
         '--cache=true',
         `--cache-repo=${cacheRef}`,
-        '--insecure-registry',
+        ...insecureRegistryArgs,
         '--push-retry=3',
         '--snapshot-mode=time',
       ];
@@ -216,7 +249,8 @@ function createBuildContainer(
   buildArgs: Record<string, string>,
   ecrDomain: string,
   secretRefs?: string[],
-  secretEnvKeys?: string[]
+  secretEnvKeys?: string[],
+  registryAuth: GarRegistryAuth[] = []
 ): any {
   const args = engine.createArgs({
     contextPath,
@@ -225,6 +259,7 @@ function createBuildContainer(
     cacheRef,
     buildArgs,
     ecrDomain,
+    registryAuth,
     secretEnvKeys,
   });
 
@@ -237,13 +272,22 @@ function createBuildContainer(
     },
   ];
 
-  if (engine.name === 'kaniko') {
+  if (engine.name === 'kaniko' && registryAuth.length === 0) {
     volumeMounts.push({
       name: 'workspace',
-      mountPath: '/kaniko/.docker',
+      mountPath: KANIKO_DOCKER_CONFIG_MOUNT_PATH,
       subPath: '.docker',
     } as any);
-    containerEnvVars['DOCKER_CONFIG'] = '/kaniko/.docker';
+    containerEnvVars['DOCKER_CONFIG'] = KANIKO_DOCKER_CONFIG_MOUNT_PATH;
+  } else if (registryAuth.length > 0) {
+    volumeMounts.push({
+      name: DOCKER_CONFIG_VOLUME_NAME,
+      mountPath: engine.name === 'kaniko' ? KANIKO_DOCKER_CONFIG_MOUNT_PATH : '/root/.docker',
+    } as any);
+
+    if (engine.name === 'kaniko') {
+      containerEnvVars['DOCKER_CONFIG'] = KANIKO_DOCKER_CONFIG_MOUNT_PATH;
+    }
   }
 
   const container: any = {
@@ -276,6 +320,7 @@ export async function buildWithEngine(
   const engine = ENGINES[engineName];
   const globalConfig = await GlobalConfigService.getInstance().getAllConfigs();
   const buildDefaults = globalConfig.buildDefaults || {};
+  const registryAuth = normalizeNativeBuildRegistryAuth(buildDefaults.registryAuth);
 
   const serviceAccount = options.serviceAccount || buildDefaults.serviceAccount || 'native-build-sa';
   const jobTimeout = options.jobTimeout || buildDefaults.jobTimeout || 2100;
@@ -297,6 +342,14 @@ export async function buildWithEngine(
     jobId,
     shortSha,
   });
+  const registryAuthSecretName =
+    registryAuth.length > 0
+      ? buildNativeBuildRegistryAuthSecretName({
+          deployUuid: options.deployUuid,
+          jobId,
+          shortSha,
+        })
+      : undefined;
   const contextPath = `/workspace/repo-${shortRepoName}`;
 
   getLogger().debug(`Build: preparing ${engine.name} job dockerfile=${options.dockerfilePath}`);
@@ -319,12 +372,15 @@ export async function buildWithEngine(
   const ecrMatch = registryDomain.match(ecrRegex);
   if (ecrMatch) {
     const region = ecrMatch[1] || 'us-west-2';
+    const dockerConfigDirectory = registryAuth.length > 0 ? DOCKER_CONFIG_MOUNT_PATH : '/workspace/.docker';
+    const dockerConfigPath =
+      registryAuth.length > 0 ? `${DOCKER_CONFIG_MOUNT_PATH}/ecr-config.json` : '/workspace/.docker/config.json';
     registryLoginScript = [
       'set -e',
       'export AWS_MAX_ATTEMPTS=5',
       'export AWS_RETRY_MODE=adaptive',
-      `aws ecr get-login-password --region ${region} | { read PASSWORD; mkdir -p /workspace/.docker && ` +
-        `echo '{"auths":{"${registryDomain}":{"auth":"'$(echo -n "AWS:$PASSWORD" | base64)'"}}}' > /workspace/.docker/config.json; }`,
+      `aws ecr get-login-password --region ${region} | { read PASSWORD; mkdir -p ${dockerConfigDirectory} && ` +
+        `echo '{"auths":{"${registryDomain}":{"auth":"'$(echo -n "AWS:$PASSWORD" | base64)'"}}}' > ${dockerConfigPath}; }`,
     ].join('\n');
   } else {
     registryLoginScript =
@@ -340,8 +396,8 @@ export async function buildWithEngine(
     env: [{ name: 'AWS_REGION', value: process.env.AWS_REGION || 'us-west-2' }],
     volumeMounts: [
       {
-        name: 'workspace',
-        mountPath: '/workspace',
+        name: registryAuth.length > 0 ? DOCKER_CONFIG_VOLUME_NAME : 'workspace',
+        mountPath: registryAuth.length > 0 ? DOCKER_CONFIG_MOUNT_PATH : '/workspace',
       },
     ],
   };
@@ -362,7 +418,7 @@ export async function buildWithEngine(
     };
   }
 
-  const containers = [];
+  const containers: any[] = [];
   let cacheRef = engine.getCacheRef(cacheRegistry, options.ecrRepo);
 
   // Scope cache per service + build-uuid to prevent concurrent PR builds from corrupting shared cache entries
@@ -384,7 +440,8 @@ export async function buildWithEngine(
       options.envVars,
       options.ecrDomain,
       options.secretRefs,
-      options.secretEnvKeys
+      options.secretEnvKeys,
+      registryAuth
     )
   );
 
@@ -403,7 +460,8 @@ export async function buildWithEngine(
         options.envVars,
         options.ecrDomain,
         options.secretRefs,
-        options.secretEnvKeys
+        options.secretEnvKeys,
+        registryAuth
       )
     );
     getLogger().debug('Build: including init image');
@@ -412,8 +470,23 @@ export async function buildWithEngine(
   await deploy.$fetchGraph('build');
   const isStatic = deploy.build?.isStatic || false;
 
-  // For buildkit, only git clone is needed. For kaniko, we need registry login too.
-  const initContainers = engineName === 'buildkit' ? [gitCloneContainer] : [gitCloneContainer, registryLoginContainer];
+  const registryAuthInitContainers = registryAuthSecretName ? [createRegistryAuthCopyInitContainer()] : [];
+  let initContainers;
+
+  if (engineName === 'buildkit') {
+    initContainers = [gitCloneContainer, ...registryAuthInitContainers];
+  } else if (registryAuth.length === 0) {
+    initContainers = [gitCloneContainer, registryLoginContainer];
+  } else if (ecrMatch) {
+    initContainers = [
+      gitCloneContainer,
+      ...registryAuthInitContainers,
+      registryLoginContainer,
+      createKanikoRegistryAuthMergeInitContainer(),
+    ];
+  } else {
+    initContainers = [gitCloneContainer, ...registryAuthInitContainers];
+  }
 
   const job = createBuildJob({
     jobName,
@@ -436,93 +509,115 @@ export async function buildWithEngine(
         name: 'workspace',
         emptyDir: {},
       },
+      ...(registryAuthSecretName ? createRegistryAuthVolumes(registryAuthSecretName) : []),
     ],
     podAnnotations,
   });
 
   const jobYaml = yaml.dump(job, { quotingType: '"', forceQuotes: true });
-  await shellPromise(`cat <<'EOF' | kubectl apply -f -
-${jobYaml}
-EOF`);
-  getLogger().debug(`Job: created ${jobName}`);
-
   const logArchivalEnabled = globalConfig.logArchival?.enabled;
+  let registryAuthSecretCreated = false;
 
   try {
-    const { logs, success, startedAt, completedAt, duration } = await waitForJobAndGetLogs(
-      jobName,
-      options.namespace,
-      jobTimeout
-    );
-
-    if (logArchivalEnabled) {
-      try {
-        const archivalService = getLogArchivalService();
-        await archivalService.archiveLogs(
-          {
-            jobName,
-            jobType: 'build',
-            serviceName,
-            namespace: options.namespace,
-            status: success ? 'Complete' : 'Failed',
-            sha: options.revision,
-            deployUuid: options.deployUuid,
-            buildUuid: options.buildId,
-            engine: engineName,
-            startedAt,
-            completedAt,
-            duration,
-            archivedAt: new Date().toISOString(),
-          },
-          logs
-        );
-      } catch (archiveError) {
-        getLogger().warn({ error: archiveError }, `LogArchival: failed to archive build logs jobName=${jobName}`);
-      }
+    if (registryAuthSecretName) {
+      await createNativeBuildRegistryAuthSecret({
+        namespace: options.namespace,
+        secretName: registryAuthSecretName,
+        registryAuth,
+        buildUuid: options.buildUuid,
+        deployUuid: options.deployUuid,
+      });
+      registryAuthSecretCreated = true;
     }
 
-    return { success, logs, jobName };
-  } catch (error) {
-    getLogger({ error }).error(`Job: log retrieval failed name=${jobName}`);
+    await shellPromise(`cat <<'EOF' | kubectl apply -f -
+${jobYaml}
+EOF`);
+    getLogger().debug(`Job: created ${jobName}`);
 
     try {
-      const jobStatus = await shellPromise(
-        `kubectl get job ${jobName} -n ${options.namespace} -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}'`
+      const { logs, success, startedAt, completedAt, duration } = await waitForJobAndGetLogs(
+        jobName,
+        options.namespace,
+        jobTimeout
       );
-      const jobSucceeded = jobStatus.trim() === 'True';
 
-      if (jobSucceeded) {
-        getLogger().debug(`Job: completed (logs unavailable) job=${jobName}`);
-        return { success: true, logs: 'Log retrieval failed but job completed successfully', jobName };
+      if (logArchivalEnabled) {
+        try {
+          const archivalService = getLogArchivalService();
+          await archivalService.archiveLogs(
+            {
+              jobName,
+              jobType: 'build',
+              serviceName,
+              namespace: options.namespace,
+              status: success ? 'Complete' : 'Failed',
+              sha: options.revision,
+              deployUuid: options.deployUuid,
+              buildUuid: options.buildId,
+              engine: engineName,
+              startedAt,
+              completedAt,
+              duration,
+              archivedAt: new Date().toISOString(),
+            },
+            logs
+          );
+        } catch (archiveError) {
+          getLogger().warn({ error: archiveError }, `LogArchival: failed to archive build logs jobName=${jobName}`);
+        }
       }
-    } catch (statusError) {
-      getLogger({ error: statusError }).error(`Job: status check failed name=${jobName}`);
-    }
 
-    if (logArchivalEnabled) {
+      return { success, logs, jobName };
+    } catch (error) {
+      getLogger({ error }).error(`Job: log retrieval failed name=${jobName}`);
+
       try {
-        const archivalService = getLogArchivalService();
-        await archivalService.archiveLogs(
-          {
-            jobName,
-            jobType: 'build',
-            serviceName,
-            namespace: options.namespace,
-            status: 'Failed',
-            sha: options.revision,
-            deployUuid: options.deployUuid,
-            buildUuid: options.buildId,
-            engine: engineName,
-            archivedAt: new Date().toISOString(),
-          },
-          `Build failed: ${error.message}`
+        const jobStatus = await shellPromise(
+          `kubectl get job ${jobName} -n ${options.namespace} -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}'`
         );
-      } catch (archiveError) {
-        getLogger().warn({ error: archiveError }, `LogArchival: failed to archive build error logs jobName=${jobName}`);
+        const jobSucceeded = jobStatus.trim() === 'True';
+
+        if (jobSucceeded) {
+          getLogger().debug(`Job: completed (logs unavailable) job=${jobName}`);
+          return { success: true, logs: 'Log retrieval failed but job completed successfully', jobName };
+        }
+      } catch (statusError) {
+        getLogger({ error: statusError }).error(`Job: status check failed name=${jobName}`);
       }
-    }
 
-    return { success: false, logs: `Build failed: ${error.message}`, jobName };
+      if (logArchivalEnabled) {
+        try {
+          const archivalService = getLogArchivalService();
+          await archivalService.archiveLogs(
+            {
+              jobName,
+              jobType: 'build',
+              serviceName,
+              namespace: options.namespace,
+              status: 'Failed',
+              sha: options.revision,
+              deployUuid: options.deployUuid,
+              buildUuid: options.buildId,
+              engine: engineName,
+              archivedAt: new Date().toISOString(),
+            },
+            `Build failed: ${error.message}`
+          );
+        } catch (archiveError) {
+          getLogger().warn(
+            { error: archiveError },
+            `LogArchival: failed to archive build error logs jobName=${jobName}`
+          );
+        }
+      }
+
+      return { success: false, logs: `Build failed: ${error.message}`, jobName };
+    }
+  } finally {
+    if (registryAuthSecretCreated && registryAuthSecretName) {
+      await deleteNativeBuildRegistryAuthSecret(options.namespace, registryAuthSecretName);
+    }
   }
 }
 
diff --git a/src/server/lib/nativeBuild/registryAuth.ts b/src/server/lib/nativeBuild/registryAuth.ts
new file mode 100644
index 00000000..8d7694d1
--- /dev/null
+++ b/src/server/lib/nativeBuild/registryAuth.ts
@@ -0,0 +1,280 @@
+/**
+ * Copyright 2026 GoodRx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as k8s from '@kubernetes/client-node';
+import { GoogleAuth } from 'google-auth-library';
+import { buildLifecycleLabels } from 'server/lib/kubernetes/labels';
+import { buildNativeBuildJobName, KUBERNETES_NAME_MAX_LENGTH } from 'server/lib/kubernetes/jobNames';
+import { getLogger } from 'server/lib/logger';
+
+export interface GarRegistryAuth {
+  type: 'gar';
+  registry: string;
+}
+
+export const DOCKER_CONFIG_VOLUME_NAME = 'docker-config';
+export const DOCKER_CONFIG_MOUNT_PATH = '/docker-config';
+export const KANIKO_DOCKER_CONFIG_MOUNT_PATH = '/kaniko/.docker';
+
+const GAR_DOCKER_USERNAME = 'oauth2accesstoken';
+const GAR_REGISTRY_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*-docker\.pkg\.dev$/;
+const ECR_REGISTRY_PATTERN = /^[0-9]+\.dkr\.ecr\.[a-z0-9-]+\.amazonaws\.com$/;
+const REGISTRY_AUTH_SECRET_SUFFIX = '-registry-auth';
+const REGISTRY_AUTH_SOURCE_VOLUME_NAME = 'registry-auth-source';
+const REGISTRY_AUTH_SOURCE_MOUNT_PATH = '/registry-auth';
+const GOOGLE_CLOUD_PLATFORM_SCOPE = 'https://www.googleapis.com/auth/cloud-platform';
+
+function getCoreApi(): k8s.CoreV1Api {
+  const kc = new k8s.KubeConfig();
+  kc.loadFromDefault();
+  return kc.makeApiClient(k8s.CoreV1Api);
+}
+
+function formatRegistryList(registryAuth: GarRegistryAuth[]): string {
+  return registryAuth.map(({ registry }) => registry).join(',');
+}
+
+export function normalizeNativeBuildRegistryAuth(value: unknown): GarRegistryAuth[] {
+  if (value === undefined || value === null) {
+    return [];
+  }
+
+  if (!Array.isArray(value)) {
+    throw new Error('Build: invalid registryAuth configuration expected=array');
+  }
+
+  const seenRegistries = new Set<string>();
+
+  return value.map((entry, index) => {
+    if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
+      throw new Error(`Build: invalid registryAuth entry index=${index} expected=object`);
+    }
+
+    const { registry, type } = entry as Record<string, unknown>;
+    if (type !== 'gar') {
+      throw new Error(`Build: unsupported registryAuth provider index=${index} type=${String(type)}`);
+    }
+
+    if (typeof registry !== 'string') {
+      throw new Error(`Build: invalid GAR registry index=${index} expected=hostname`);
+    }
+
+    const normalizedRegistry = registry.trim().toLowerCase();
+    if (!GAR_REGISTRY_PATTERN.test(normalizedRegistry)) {
+      throw new Error(`Build: invalid GAR registry index=${index} registry=${normalizedRegistry} expected=hostname`);
+    }
+
+    if (seenRegistries.has(normalizedRegistry)) {
+      throw new Error(`Build: duplicate GAR registry registry=${normalizedRegistry}`);
+    }
+
+    seenRegistries.add(normalizedRegistry);
+    return {
+      type: 'gar',
+      registry: normalizedRegistry,
+    };
+  });
+}
+
+export function getRegistryHost(reference: string): string {
+  return reference.split('/')[0].trim().toLowerCase();
+}
+
+export function isConfiguredGarRegistry(reference: string, registryAuth: GarRegistryAuth[]): boolean {
+  const host = getRegistryHost(reference);
+  return registryAuth.some(({ registry }) => registry === host);
+}
+
+export function isEcrRegistry(reference: string): boolean {
+  return ECR_REGISTRY_PATTERN.test(getRegistryHost(reference));
+}
+
+export function getKanikoInsecureRegistries(references: string[], registryAuth: GarRegistryAuth[]): string[] {
+  return [
+    ...new Set(
+      references
+        .map(getRegistryHost)
+        .filter((registry) => registry && registry !== 'undefined')
+        .filter((registry) => !isConfiguredGarRegistry(registry, registryAuth) && !isEcrRegistry(registry))
+    ),
+  ];
+}
+
+export function buildNativeBuildRegistryAuthSecretName({
+  deployUuid,
+  jobId,
+  shortSha,
+}: {
+  deployUuid: string;
+  jobId: string;
+  shortSha: string;
+}): string {
+  const jobName = buildNativeBuildJobName({
+    deployUuid,
+    jobId,
+    shortSha,
+    maxLength: KUBERNETES_NAME_MAX_LENGTH - REGISTRY_AUTH_SECRET_SUFFIX.length,
+  });
+
+  return `${jobName}${REGISTRY_AUTH_SECRET_SUFFIX}`;
+}
+
+export function createRegistryAuthVolumes(secretName: string): any[] {
+  return [
+    {
+      name: REGISTRY_AUTH_SOURCE_VOLUME_NAME,
+      secret: {
+        secretName,
+        items: [
+          {
+            key: '.dockerconfigjson',
+            path: '.dockerconfigjson',
+          },
+        ],
+      },
+    },
+    {
+      name: DOCKER_CONFIG_VOLUME_NAME,
+      emptyDir: {},
+    },
+  ];
+}
+
+export function createRegistryAuthCopyInitContainer(): any {
+  return {
+    name: 'registry-auth-copy',
+    image: 'alpine:3.18',
+    command: ['/bin/sh', '-c'],
+    args: [
+      `set -e
+mkdir -p ${DOCKER_CONFIG_MOUNT_PATH}
+cp ${REGISTRY_AUTH_SOURCE_MOUNT_PATH}/.dockerconfigjson ${DOCKER_CONFIG_MOUNT_PATH}/config.json
+chmod 600 ${DOCKER_CONFIG_MOUNT_PATH}/config.json`,
+    ],
+    volumeMounts: [
+      {
+        name: REGISTRY_AUTH_SOURCE_VOLUME_NAME,
+        mountPath: REGISTRY_AUTH_SOURCE_MOUNT_PATH,
+        readOnly: true,
+      },
+      {
+        name: DOCKER_CONFIG_VOLUME_NAME,
+        mountPath: DOCKER_CONFIG_MOUNT_PATH,
+      },
+    ],
+  };
+}
+
+export function createKanikoRegistryAuthMergeInitContainer(): any {
+  return {
+    name: 'registry-auth-merge',
+    image: 'alpine:3.18',
+    command: ['/bin/sh', '-c'],
+    args: [
+      `set -e
+apk add --no-cache jq
+jq -s '.[0] * .[1]' ${DOCKER_CONFIG_MOUNT_PATH}/config.json ${DOCKER_CONFIG_MOUNT_PATH}/ecr-config.json > ${DOCKER_CONFIG_MOUNT_PATH}/config.json.tmp
+mv ${DOCKER_CONFIG_MOUNT_PATH}/config.json.tmp ${DOCKER_CONFIG_MOUNT_PATH}/config.json
+rm ${DOCKER_CONFIG_MOUNT_PATH}/ecr-config.json`,
+    ],
+    volumeMounts: [
+      {
+        name: DOCKER_CONFIG_VOLUME_NAME,
+        mountPath: DOCKER_CONFIG_MOUNT_PATH,
+      },
+    ],
+  };
+}
+
+export function buildGarDockerConfig(registryAuth: GarRegistryAuth[], accessToken: string): string {
+  const auth = Buffer.from(`${GAR_DOCKER_USERNAME}:${accessToken}`, 'utf8').toString('base64');
+
+  return JSON.stringify({
+    auths: Object.fromEntries(registryAuth.map(({ registry }) => [registry, { auth }])),
+  });
+}
+
+export async function createNativeBuildRegistryAuthSecret({
+  namespace,
+  secretName,
+  registryAuth,
+  buildUuid,
+  deployUuid,
+}: {
+  namespace: string;
+  secretName: string;
+  registryAuth: GarRegistryAuth[];
+  buildUuid?: string;
+  deployUuid: string;
+}): Promise<void> {
+  let accessToken: string | null | undefined;
+
+  try {
+    const auth = new GoogleAuth({ scopes: [GOOGLE_CLOUD_PLATFORM_SCOPE] });
+    accessToken = await auth.getAccessToken();
+  } catch {
+    throw new Error(
+      `Build: GAR access token acquisition failed registries=${formatRegistryList(
+        registryAuth
+      )} verify=google_application_default_credentials`
+    );
+  }
+
+  if (!accessToken) {
+    throw new Error(
+      `Build: GAR access token acquisition failed registries=${formatRegistryList(
+        registryAuth
+      )} verify=google_application_default_credentials`
+    );
+  }
+
+  const secret: k8s.V1Secret = {
+    apiVersion: 'v1',
+    kind: 'Secret',
+    metadata: {
+      name: secretName,
+      namespace,
+      labels: {
+        ...buildLifecycleLabels({ buildUuid, deployUuid }),
+        'app.kubernetes.io/component': 'native-build-registry-auth',
+      },
+    },
+    type: 'kubernetes.io/dockerconfigjson',
+    stringData: {
+      '.dockerconfigjson': buildGarDockerConfig(registryAuth, accessToken),
+    },
+  };
+
+  try {
+    await getCoreApi().createNamespacedSecret(namespace, secret);
+    getLogger().info(`Build: registry auth prepared secretName=${secretName} namespace=${namespace}`);
+  } catch {
+    throw new Error(`Build: registry auth Secret creation failed secretName=${secretName} namespace=${namespace}`);
+  }
+}
+
+export async function deleteNativeBuildRegistryAuthSecret(namespace: string, secretName: string): Promise<void> {
+  try {
+    await getCoreApi().deleteNamespacedSecret(secretName, namespace);
+    getLogger().info(`Build: registry auth cleaned secretName=${secretName} namespace=${namespace}`);
+  } catch (error: any) {
+    if (error instanceof k8s.HttpError && error.response?.statusCode === 404) {
+      return;
+    }
+
+    getLogger().warn(`Build: registry auth cleanup failed secretName=${secretName} namespace=${namespace}`);
+  }
+}
diff --git a/src/server/services/types/globalConfig.ts b/src/server/services/types/globalConfig.ts
index d640ae6c..40822740 100644
--- a/src/server/services/types/globalConfig.ts
+++ b/src/server/services/types/globalConfig.ts
@@ -252,11 +252,17 @@ export type NativeHelmConfig = {
   postRenderer?: NativeHelmPostRendererConfig;
 };
 
+export type NativeBuildRegistryAuth = {
+  type: 'gar';
+  registry: string;
+};
+
 export type BuildDefaults = {
   engine?: BuilderEngine;
   jobTimeout?: number;
   serviceAccount?: string;
   cacheRegistry?: string;
+  registryAuth?: NativeBuildRegistryAuth[];
   podAnnotations?: Record<string, string>;
   resources?: {
     buildkit?: ResourceRequirements;