From 9c53fbc684dc7970ef50bd3201411a14fcc9402f Mon Sep 17 00:00:00 2001
From: Maksym Yezhov <maxim.ezhov@shopify.com>
Date: Wed, 27 May 2026 22:27:37 -0700
Subject: [PATCH] chore: v2 - ai assistant - unit tests

---
 src/agent/skills/loader.test.ts               | 141 +++++
 src/agent/tools/csomTools.test.ts             | 290 +++++++++
 src/agent/tools/debugTools.test.ts            |  94 +++
 src/agent/tools/runTools.test.ts              |  54 ++
 src/agent/tools/searchDocs.test.ts            | 235 ++++++++
 src/agent/tools/searchDocs.ts                 |   2 +-
 src/agent/util/truncate.test.ts               | 156 +++++
 .../AiChat/serializeSpecForAi.test.ts         | 189 ++++++
 .../components/AiChat/toolBridge.test.ts      | 570 ++++++++++++++++++
 9 files changed, 1730 insertions(+), 1 deletion(-)
 create mode 100644 src/agent/skills/loader.test.ts
 create mode 100644 src/agent/tools/csomTools.test.ts
 create mode 100644 src/agent/tools/debugTools.test.ts
 create mode 100644 src/agent/tools/runTools.test.ts
 create mode 100644 src/agent/tools/searchDocs.test.ts
 create mode 100644 src/agent/util/truncate.test.ts
 create mode 100644 src/routes/v2/pages/Editor/components/AiChat/serializeSpecForAi.test.ts
 create mode 100644 src/routes/v2/pages/Editor/components/AiChat/toolBridge.test.ts

diff --git a/src/agent/skills/loader.test.ts b/src/agent/skills/loader.test.ts
new file mode 100644
index 000000000..bbde5de2c
--- /dev/null
+++ b/src/agent/skills/loader.test.ts
@@ -0,0 +1,141 @@
+import "fake-indexeddb/auto";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { agentDb } from "../idb/agentDb";
+import { SkillsLoader } from "./loader";
+
+// Hoisted alongside `vi.mock` so the mock factories can close over them.
+// Plain module-level consts wouldn't be initialized in time because
+// `vi.mock` is hoisted to the top of the file.
+const { SKILLS_BASE_URL, TEST_GIT_COMMIT, TEST_VERSION, STALE_VERSION } =
+  vi.hoisted(() => {
+    const gitCommit = "abc1234567";
+    return {
+      SKILLS_BASE_URL: "/agent-skills",
+      TEST_GIT_COMMIT: gitCommit,
+      TEST_VERSION: gitCommit.substring(0, 6),
+      STALE_VERSION: "deadbeef",
+    };
+  });
+
+vi.mock("../config", () => ({
+  config: { skillsBaseUrl: SKILLS_BASE_URL },
+  requireSkillsBaseUrl: () => SKILLS_BASE_URL,
+}));
+
+vi.mock("@/utils/constants", () => ({
+  GIT_COMMIT: TEST_GIT_COMMIT,
+}));
+
+const fetchMock = vi.fn();
+const originalFetch = globalThis.fetch;
+
+function textResponse(body: string, status = 200): Response {
+  return new Response(body, {
+    status,
+    headers: { "Content-Type": "text/markdown" },
+  });
+}
+
+beforeEach(() => {
+  globalThis.fetch = fetchMock as unknown as typeof fetch;
+  fetchMock.mockReset();
+});
+
+afterEach(async () => {
+  globalThis.fetch = originalFetch;
+  await agentDb.skills.clear();
+});
+
+describe("SkillsLoader", () => {
+  it("fetches on cold cache and writes IDB row tagged with the current version", async () => {
+    fetchMock.mockResolvedValue(
+      textResponse("# Tangle Best Practices\nBe concise."),
+    );
+
+    const loader = new SkillsLoader();
+    const result = await loader.getSkill("tangleBestPractices");
+
+    expect(result).toContain("Be concise.");
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    expect(fetchMock).toHaveBeenCalledWith(
+      `${SKILLS_BASE_URL}/tangleBestPractices/SKILL.md`,
+    );
+
+    const row = await agentDb.skills.get("tangleBestPractices");
+    expect(row?.version).toBe(TEST_VERSION);
+    expect(row?.content).toContain("Be concise.");
+  });
+
+  it("serves the IDB row without a network call when the version matches", async () => {
+    await agentDb.skills.put({
+      id: "tangleBestPractices",
+      version: TEST_VERSION,
+      content: "cached body",
+    });
+
+    const loader = new SkillsLoader();
+    const result = await loader.getSkill("tangleBestPractices");
+
+    expect(result).toBe("cached body");
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  it("refetches and overwrites when the IDB row's version is stale", async () => {
+    await agentDb.skills.put({
+      id: "tangleBestPractices",
+      version: STALE_VERSION,
+      content: "old body",
+    });
+    fetchMock.mockResolvedValue(textResponse("new body"));
+
+    const loader = new SkillsLoader();
+    const result = await loader.getSkill("tangleBestPractices");
+
+    expect(result).toBe("new body");
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const row = await agentDb.skills.get("tangleBestPractices");
+    expect(row?.content).toBe("new body");
+    expect(row?.version).toBe(TEST_VERSION);
+  });
+
+  it("falls back to the stale IDB row when fetch throws", async () => {
+    await agentDb.skills.put({
+      id: "tangleBestPractices",
+      version: STALE_VERSION,
+      content: "stale body",
+    });
+    fetchMock.mockRejectedValue(new Error("offline"));
+
+    const loader = new SkillsLoader();
+    const result = await loader.getSkill("tangleBestPractices");
+
+    expect(result).toBe("stale body");
+  });
+
+  it("resolves with empty string when fetch fails and no IDB row exists", async () => {
+    fetchMock.mockResolvedValue(
+      new Response("", { status: 404, statusText: "Not Found" }),
+    );
+
+    const loader = new SkillsLoader();
+    const result = await loader.getSkill("tangleBestPractices");
+
+    expect(result).toBe("");
+  });
+
+  it("dedupes concurrent calls for the same id into a single fetch", async () => {
+    fetchMock.mockResolvedValue(textResponse("body"));
+
+    const loader = new SkillsLoader();
+    const [a, b] = await Promise.all([
+      loader.getSkill("tangleBestPractices"),
+      loader.getSkill("tangleBestPractices"),
+    ]);
+
+    expect(a).toBe("body");
+    expect(b).toBe("body");
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/src/agent/tools/csomTools.test.ts b/src/agent/tools/csomTools.test.ts
new file mode 100644
index 000000000..69865aa52
--- /dev/null
+++ b/src/agent/tools/csomTools.test.ts
@@ -0,0 +1,290 @@
+import { RunContext } from "@openai/agents-core";
+import { describe, expect, it, vi } from "vitest";
+
+import type { ToolBridgeApi } from "../toolBridgeApi";
+import { createCsomTools } from "./csomTools";
+
+type FunctionTool = ReturnType<typeof createCsomTools>["allTools"][number];
+
+interface JsonSchemaNode {
+  type?: string | string[];
+  properties?: Record<string, JsonSchemaNode>;
+  anyOf?: JsonSchemaNode[];
+  additionalProperties?: boolean | JsonSchemaNode;
+  $ref?: string;
+}
+
+/**
+ * Lightweight stub bridge: only the methods a test sets via `overrides`
+ * are ever called. Anything else returns a vi.fn() so unrelated tools
+ * don't blow up if they're inspected through the same factory call.
+ */
+function makeBridge(overrides: Partial<ToolBridgeApi> = {}): ToolBridgeApi {
+  const stub = vi.fn();
+  return new Proxy({} as ToolBridgeApi, {
+    get(_target, prop: string) {
+      if (prop in overrides) {
+        return (overrides as Record<string, unknown>)[prop];
+      }
+      return stub;
+    },
+  });
+}
+
+function findTool(
+  tools: ReadonlyArray<FunctionTool>,
+  name: string,
+): FunctionTool {
+  const found = tools.find((t) => t.name === name);
+  if (!found) throw new Error(`Tool not found: ${name}`);
+  return found;
+}
+
+async function invoke(tool: FunctionTool, payload: unknown): Promise<unknown> {
+  const ctx = new RunContext();
+  const raw = await tool.invoke(ctx, JSON.stringify(payload));
+  return typeof raw === "string" ? JSON.parse(raw) : raw;
+}
+
+function getImplementationAnyOf(schema: JsonSchemaNode): JsonSchemaNode[] {
+  const componentRef = schema.properties?.componentRef;
+  if (!componentRef) return [];
+
+  const specAnyOf = componentRef.properties?.spec?.anyOf;
+  if (!specAnyOf) return [];
+
+  const specObjectSchema = specAnyOf.find((entry) => entry.type === "object");
+  if (!specObjectSchema) return [];
+
+  return specObjectSchema.properties?.implementation?.anyOf ?? [];
+}
+
+describe("createCsomTools", () => {
+  it("exposes the full 18-tool surface", () => {
+    const { allTools } = createCsomTools(makeBridge());
+    const names = allTools.map((t) => t.name).sort();
+    expect(names).toEqual(
+      [
+        "add_input",
+        "add_output",
+        "add_task",
+        "connect_nodes",
+        "create_subgraph",
+        "delete_edge",
+        "delete_input",
+        "delete_output",
+        "delete_task",
+        "get_pipeline_state",
+        "rename_input",
+        "rename_output",
+        "rename_task",
+        "set_pipeline_description",
+        "set_pipeline_name",
+        "set_task_argument",
+        "unpack_subgraph",
+        "validate_pipeline",
+      ].sort(),
+    );
+  });
+
+  it("get_pipeline_state JSON-stringifies the bridge result for the model", async () => {
+    const getPipelineState = vi.fn().mockResolvedValue({
+      name: "Pipe",
+      inputs: [],
+      outputs: [],
+      tasks: [],
+      bindings: [],
+    });
+    const { allTools } = createCsomTools(makeBridge({ getPipelineState }));
+
+    const result = await invoke(findTool(allTools, "get_pipeline_state"), {});
+    expect(result).toEqual({
+      name: "Pipe",
+      inputs: [],
+      outputs: [],
+      tasks: [],
+      bindings: [],
+    });
+    expect(getPipelineState).toHaveBeenCalledOnce();
+  });
+
+  it("validate_pipeline JSON-stringifies the validation result for the model", async () => {
+    const validatePipeline = vi.fn().mockResolvedValue({
+      valid: true,
+      issueCount: 0,
+      issues: [],
+    });
+    const { allTools } = createCsomTools(makeBridge({ validatePipeline }));
+
+    const result = await invoke(findTool(allTools, "validate_pipeline"), {});
+    expect(result).toEqual({ valid: true, issueCount: 0, issues: [] });
+  });
+
+  it("add_task strips null fields from the componentRef before calling the bridge", async () => {
+    const addTask = vi.fn().mockResolvedValue({
+      success: true,
+      taskId: "task_42",
+      name: "Loader",
+    });
+    const { allTools } = createCsomTools(makeBridge({ addTask }));
+
+    await invoke(findTool(allTools, "add_task"), {
+      name: "Loader",
+      componentRef: {
+        name: "loader",
+        url: null,
+        spec: {
+          name: "Loader",
+          description: null,
+          inputs: [{ name: "path", type: "String", description: null }],
+          outputs: null,
+          implementation: { container: { image: "loader:1" } },
+        },
+      },
+    });
+
+    expect(addTask).toHaveBeenCalledOnce();
+    const call = addTask.mock.calls[0][0];
+    expect(call.name).toBe("Loader");
+    expect(call.componentRef).toEqual({
+      name: "loader",
+      spec: {
+        name: "Loader",
+        inputs: [{ name: "path", type: "String" }],
+        implementation: { container: { image: "loader:1" } },
+      },
+    });
+  });
+
+  it("add_task implementation schema has typed anyOf branches", () => {
+    // Regression guard for OpenAI structured-outputs strict mode: every
+    // `anyOf` branch must declare a concrete `type` (or `$ref`), or tool
+    // registration fails before the model even runs.
+    const { allTools } = createCsomTools(makeBridge());
+    const addTaskTool = findTool(allTools, "add_task");
+
+    const implementationAnyOf = getImplementationAnyOf(
+      addTaskTool.parameters as JsonSchemaNode,
+    );
+
+    expect(implementationAnyOf).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ type: "object" }),
+        expect.objectContaining({ type: "null" }),
+      ]),
+    );
+    expect(
+      implementationAnyOf.every((entry) => typeof entry.type === "string"),
+    ).toBe(true);
+
+    const objectBranch = implementationAnyOf.find(
+      (entry) => entry.type === "object",
+    );
+    expect(objectBranch).toBeDefined();
+    const additionalProperties = objectBranch?.additionalProperties;
+    expect(additionalProperties).not.toEqual({});
+    if (
+      additionalProperties &&
+      typeof additionalProperties === "object" &&
+      !Array.isArray(additionalProperties)
+    ) {
+      expect(
+        typeof additionalProperties.type === "string" ||
+          typeof additionalProperties.$ref === "string",
+      ).toBe(true);
+    }
+  });
+
+  it("rename_task forwards (entityId, newName) in the right order", async () => {
+    // Both args are strings — TypeScript can't catch a swap, so this
+    // pin is the only guard against a regression.
+    const renameTask = vi.fn().mockResolvedValue({ success: true });
+    const { allTools } = createCsomTools(makeBridge({ renameTask }));
+
+    await invoke(findTool(allTools, "rename_task"), {
+      entityId: "task_1",
+      newName: "Renamed",
+    });
+    expect(renameTask).toHaveBeenCalledWith("task_1", "Renamed");
+  });
+
+  it("rename_input forwards (entityId, newName) in the right order", async () => {
+    const renameInput = vi.fn().mockResolvedValue({ success: true });
+    const { allTools } = createCsomTools(makeBridge({ renameInput }));
+
+    await invoke(findTool(allTools, "rename_input"), {
+      entityId: "input_1",
+      newName: "renamed",
+    });
+    expect(renameInput).toHaveBeenCalledWith("input_1", "renamed");
+  });
+
+  it("rename_output forwards (entityId, newName) in the right order", async () => {
+    const renameOutput = vi.fn().mockResolvedValue({ success: true });
+    const { allTools } = createCsomTools(makeBridge({ renameOutput }));
+
+    await invoke(findTool(allTools, "rename_output"), {
+      entityId: "output_1",
+      newName: "renamed",
+    });
+    expect(renameOutput).toHaveBeenCalledWith("output_1", "renamed");
+  });
+
+  it("set_task_argument forwards (taskEntityId, inputName, value) in the right order", async () => {
+    // Three string positional args — TypeScript can't catch a swap.
+    const setTaskArgument = vi.fn().mockResolvedValue({ success: true });
+    const { allTools } = createCsomTools(makeBridge({ setTaskArgument }));
+
+    await invoke(findTool(allTools, "set_task_argument"), {
+      taskEntityId: "task_1",
+      inputName: "path",
+      value: "data.csv",
+    });
+    expect(setTaskArgument).toHaveBeenCalledWith("task_1", "path", "data.csv");
+  });
+
+  it("add_input normalizes null optional fields to undefined", async () => {
+    const addInput = vi.fn().mockResolvedValue({
+      success: true,
+      inputId: "input_42",
+      name: "threshold",
+    });
+    const { allTools } = createCsomTools(makeBridge({ addInput }));
+
+    await invoke(findTool(allTools, "add_input"), {
+      name: "threshold",
+      type: "Float",
+      description: null,
+      defaultValue: null,
+      optional: null,
+    });
+
+    expect(addInput).toHaveBeenCalledWith({
+      name: "threshold",
+      type: "Float",
+      description: undefined,
+      defaultValue: undefined,
+      optional: undefined,
+    });
+  });
+
+  it("add_output strips null optional fields to undefined", async () => {
+    const addOutput = vi.fn().mockResolvedValue({
+      success: true,
+      outputId: "output_42",
+      name: "metrics",
+    });
+    const { allTools } = createCsomTools(makeBridge({ addOutput }));
+
+    await invoke(findTool(allTools, "add_output"), {
+      name: "metrics",
+      type: null,
+      description: null,
+    });
+    expect(addOutput).toHaveBeenCalledWith({
+      name: "metrics",
+      type: undefined,
+      description: undefined,
+    });
+  });
+});
diff --git a/src/agent/tools/debugTools.test.ts b/src/agent/tools/debugTools.test.ts
new file mode 100644
index 000000000..6e48467d3
--- /dev/null
+++ b/src/agent/tools/debugTools.test.ts
@@ -0,0 +1,94 @@
+import { RunContext } from "@openai/agents-core";
+import { describe, expect, it, vi } from "vitest";
+
+import type { ToolBridgeApi } from "../toolBridgeApi";
+import { createDebugTools } from "./debugTools";
+
+type FunctionTool = ReturnType<typeof createDebugTools>["allTools"][number];
+
+function makeBridge(overrides: Partial<ToolBridgeApi> = {}): ToolBridgeApi {
+  const stub = vi.fn();
+  return new Proxy({} as ToolBridgeApi, {
+    get(_target, prop: string) {
+      if (prop in overrides) {
+        return (overrides as Record<string, unknown>)[prop];
+      }
+      return stub;
+    },
+  });
+}
+
+async function invoke(tool: FunctionTool, payload: unknown): Promise<unknown> {
+  const ctx = new RunContext();
+  const raw = await tool.invoke(ctx, JSON.stringify(payload));
+  return typeof raw === "string" ? JSON.parse(raw) : raw;
+}
+
+/**
+ * The truncation logic itself is tested in `src/agent/util/truncate.test.ts`.
+ * These tests only verify that each debug tool wires its corresponding
+ * `truncate*` helper into the bridge response — i.e. truncation is
+ * actually applied at the boundary, not silently bypassed.
+ */
+describe("createDebugTools", () => {
+  it("exposes the four read-only tool names", () => {
+    const { allTools } = createDebugTools(makeBridge());
+    expect(allTools.map((t) => t.name).sort()).toEqual([
+      "get_container_log",
+      "get_container_state",
+      "get_execution_details",
+      "get_execution_state",
+    ]);
+  });
+
+  it("get_execution_details applies artifact-map truncation to the bridge result", async () => {
+    const getExecutionDetails = vi.fn().mockResolvedValue({
+      id: "exec-1",
+      task_spec: {},
+      child_task_execution_ids: {},
+      input_artifacts: { in: { id: "art1" } },
+      output_artifacts: { out: { id: "art2" } },
+    });
+    const { getExecutionDetails: tool } = createDebugTools(
+      makeBridge({ getExecutionDetails }),
+    );
+
+    const result = (await invoke(tool, { executionId: "exec-1" })) as {
+      input_artifacts: Record<string, unknown>;
+      output_artifacts: Record<string, unknown>;
+    };
+    expect(result.input_artifacts).toEqual({});
+    expect(result.output_artifacts).toEqual({});
+  });
+
+  it("get_container_state applies debug_info truncation to the bridge result", async () => {
+    const debug_info: Record<string, unknown> = {};
+    for (let i = 0; i < 30; i++) debug_info[`k${i}`] = `v${i}`;
+    const getContainerState = vi
+      .fn()
+      .mockResolvedValue({ status: "FAILED", debug_info });
+    const { getContainerState: tool } = createDebugTools(
+      makeBridge({ getContainerState }),
+    );
+
+    const result = (await invoke(tool, { executionId: "exec-1" })) as {
+      debug_info: Record<string, unknown>;
+    };
+    expect(Object.keys(result.debug_info).length).toBeLessThanOrEqual(20);
+  });
+
+  it("get_container_log applies log truncation to the bridge result", async () => {
+    const longLog = "L".repeat(20_000);
+    const getContainerLog = vi.fn().mockResolvedValue({ log_text: longLog });
+    const { getContainerLog: tool } = createDebugTools(
+      makeBridge({ getContainerLog }),
+    );
+
+    const result = (await invoke(tool, { executionId: "exec-1" })) as {
+      log_text: string;
+      truncated?: boolean;
+    };
+    expect(result.truncated).toBe(true);
+    expect(result.log_text.length).toBeLessThan(longLog.length);
+  });
+});
diff --git a/src/agent/tools/runTools.test.ts b/src/agent/tools/runTools.test.ts
new file mode 100644
index 000000000..a3e2b846f
--- /dev/null
+++ b/src/agent/tools/runTools.test.ts
@@ -0,0 +1,54 @@
+import { RunContext } from "@openai/agents-core";
+import { describe, expect, it, vi } from "vitest";
+
+import type { ToolBridgeApi } from "../toolBridgeApi";
+import { createRunTools } from "./runTools";
+
+type FunctionTool = ReturnType<typeof createRunTools>["allTools"][number];
+
+function makeBridge(overrides: Partial<ToolBridgeApi> = {}): ToolBridgeApi {
+  const stub = vi.fn();
+  return new Proxy({} as ToolBridgeApi, {
+    get(_target, prop: string) {
+      if (prop in overrides) {
+        return (overrides as Record<string, unknown>)[prop];
+      }
+      return stub;
+    },
+  });
+}
+
+async function invoke(tool: FunctionTool, payload: unknown): Promise<unknown> {
+  const ctx = new RunContext();
+  const raw = await tool.invoke(ctx, JSON.stringify(payload));
+  return typeof raw === "string" ? JSON.parse(raw) : raw;
+}
+
+describe("createRunTools", () => {
+  it("exposes allTools containing every tool by name", () => {
+    const { allTools } = createRunTools(makeBridge());
+    const names = allTools.map((t) => t.name);
+    expect(names).toEqual([
+      "submit_pipeline_run",
+      "get_run_status",
+      "debug_pipeline_run",
+    ]);
+  });
+
+  it("get_run_status derives overall status from execution_status_stats", async () => {
+    const getRunDetails = vi.fn().mockResolvedValue({
+      id: "1",
+      root_execution_id: "r-1",
+      execution_status_stats: { SUCCEEDED: 2, FAILED: 1 },
+    });
+    const bridge = makeBridge({ getRunDetails });
+    const { getRunStatus } = createRunTools(bridge);
+
+    const result = (await invoke(getRunStatus, { runId: "1" })) as {
+      run: { id: string };
+      status: string;
+    };
+    expect(result.status).toBe("FAILED");
+    expect(result.run.id).toBe("1");
+  });
+});
diff --git a/src/agent/tools/searchDocs.test.ts b/src/agent/tools/searchDocs.test.ts
new file mode 100644
index 000000000..06b216bc8
--- /dev/null
+++ b/src/agent/tools/searchDocs.test.ts
@@ -0,0 +1,235 @@
+import "fake-indexeddb/auto";
+
+import type OpenAI from "openai";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import type { OpenAIProvider } from "../config";
+import {
+  agentDb,
+  type PersistedVectorStore,
+  TANGLE_ML_DOCS_VECTORS_KEY,
+} from "../idb/agentDb";
+
+const TEST_EMBEDDING_MODEL = "text-embedding-3-small";
+
+const embeddingsCreateMock = vi.fn();
+
+vi.mock("../config", () => ({
+  config: { embeddingModel: TEST_EMBEDDING_MODEL },
+  requireEmbeddingModel: () => TEST_EMBEDDING_MODEL,
+}));
+
+const fakeProvider: OpenAIProvider = {
+  openai: {
+    embeddings: { create: embeddingsCreateMock },
+  } as unknown as OpenAI,
+};
+
+const fetchMock = vi.fn();
+const originalFetch = globalThis.fetch;
+
+/**
+ * Each vector lives on its own basis axis so cosine similarity against
+ * any query is just the query's component along that axis. This makes
+ * top-K assertions deterministic regardless of how many vectors we add
+ * to the fixture.
+ */
+const FIXTURE_TITLES = [
+  "Tasks",
+  "Components",
+  "Bindings",
+  "Pipelines",
+  "Inputs",
+  "Outputs",
+] as const;
+const FIXTURE_DIM = FIXTURE_TITLES.length;
+
+function basisVector(index: number): number[] {
+  return Array.from({ length: FIXTURE_DIM }, (_, i) => (i === index ? 1 : 0));
+}
+
+function makeVector(
+  title: string,
+  index: number,
+): PersistedVectorStore["vectors"][number] {
+  const slug = title.toLowerCase();
+  return {
+    content: `${title} content.`,
+    embedding: basisVector(index),
+    metadata: {
+      id: `${slug}#intro`,
+      title,
+      sectionTitle: title,
+      url: `https://tangleml.com/docs/core-concepts/${slug}`,
+      contentHash: `${slug}-1`,
+    },
+  };
+}
+
+/**
+ * 6 orthogonal vectors so `topK ?? DEFAULT_TOP_K = 5` is exercised
+ * below the fixture size (otherwise the result count is bounded by
+ * the store and the default-topK assertion is meaningless).
+ */
+function makeStore(): PersistedVectorStore {
+  return {
+    version: 2,
+    embeddingModel: TEST_EMBEDDING_MODEL,
+    vectors: FIXTURE_TITLES.map((title, i) => makeVector(title, i)),
+  };
+}
+
+function makeEmptyStore(): PersistedVectorStore {
+  return {
+    version: 2,
+    embeddingModel: TEST_EMBEDDING_MODEL,
+    vectors: [],
+  };
+}
+
+function jsonResponse(body: unknown): Response {
+  return new Response(JSON.stringify(body), {
+    status: 200,
+    headers: { "Content-Type": "application/json" },
+  });
+}
+
+beforeEach(() => {
+  globalThis.fetch = fetchMock as unknown as typeof fetch;
+  fetchMock.mockReset();
+  embeddingsCreateMock.mockReset();
+});
+
+afterEach(async () => {
+  globalThis.fetch = originalFetch;
+  await agentDb.vectors.clear();
+  vi.resetModules();
+});
+
+describe("executeSearchDocs", () => {
+  it("ranks vectors by cosine similarity and returns top-K", async () => {
+    fetchMock.mockResolvedValue(jsonResponse(makeStore()));
+    // Mostly aligned with Tasks (axis 0), small bias toward Components (axis 1).
+    embeddingsCreateMock.mockResolvedValue({
+      data: [{ embedding: [0.9, 0.1, 0, 0, 0, 0] }],
+    });
+
+    const { executeSearchDocs, DocsVectorStoreCache } =
+      await import("./searchDocs");
+    const raw = await executeSearchDocs(
+      { query: "what is a task?", topK: 2 },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    const parsed = JSON.parse(raw) as {
+      results: Array<{
+        title: string;
+        url: string;
+        citation: string;
+        score: number;
+      }>;
+      instruction?: string;
+      error?: string;
+    };
+
+    expect(parsed.error).toBeUndefined();
+    expect(parsed.results).toHaveLength(2);
+    expect(parsed.results[0].title).toBe("Tasks");
+    expect(parsed.results[1].title).toBe("Components");
+    expect(parsed.results[0].score).toBeGreaterThan(parsed.results[1].score);
+    expect(parsed.results[0].citation).toBe(
+      "[Tasks](https://tangleml.com/docs/core-concepts/tasks)",
+    );
+    expect(parsed.instruction).toContain("markdown link");
+  });
+
+  it("defaults topK to 5 when omitted", async () => {
+    // Fixture has 6 vectors so a 5-cap actually has something to drop.
+    fetchMock.mockResolvedValue(jsonResponse(makeStore()));
+    embeddingsCreateMock.mockResolvedValue({
+      data: [{ embedding: [1, 1, 1, 1, 1, 1] }],
+    });
+
+    const { executeSearchDocs, DocsVectorStoreCache } =
+      await import("./searchDocs");
+    const raw = await executeSearchDocs(
+      { query: "anything" },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    const parsed = JSON.parse(raw) as { results: unknown[] };
+
+    expect(parsed.results).toHaveLength(5);
+  });
+
+  it("returns the populate-instruction message when the index is empty", async () => {
+    fetchMock.mockResolvedValue(jsonResponse(makeEmptyStore()));
+
+    const { executeSearchDocs, DocsVectorStoreCache } =
+      await import("./searchDocs");
+    const raw = await executeSearchDocs(
+      { query: "anything" },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    const parsed = JSON.parse(raw) as {
+      results: unknown[];
+      message?: string;
+      error?: string;
+    };
+
+    expect(parsed.error).toBeUndefined();
+    expect(parsed.results).toEqual([]);
+    expect(parsed.message).toMatch(/agent:index-docs/);
+    // Empty index should short-circuit before we ever embed the query.
+    expect(embeddingsCreateMock).not.toHaveBeenCalled();
+  });
+
+  it("populates the IDB cache on first call and reuses it on the second", async () => {
+    fetchMock.mockResolvedValue(jsonResponse(makeStore()));
+    embeddingsCreateMock.mockResolvedValue({
+      data: [{ embedding: basisVector(0) }],
+    });
+
+    const { executeSearchDocs, DocsVectorStoreCache } =
+      await import("./searchDocs");
+
+    await executeSearchDocs(
+      { query: "first" },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    const cachedRow = await agentDb.vectors.get(TANGLE_ML_DOCS_VECTORS_KEY);
+    expect(cachedRow?.embeddingModel).toBe(TEST_EMBEDDING_MODEL);
+    expect(cachedRow?.payload.vectors).toHaveLength(6);
+
+    await executeSearchDocs(
+      { query: "second" },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    expect(embeddingsCreateMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("returns an error payload when fetch fails", async () => {
+    fetchMock.mockResolvedValue(
+      new Response("", { status: 500, statusText: "Internal Server Error" }),
+    );
+    embeddingsCreateMock.mockResolvedValue({
+      data: [{ embedding: basisVector(0) }],
+    });
+
+    const { executeSearchDocs, DocsVectorStoreCache } =
+      await import("./searchDocs");
+    const raw = await executeSearchDocs(
+      { query: "hello" },
+      fakeProvider,
+      new DocsVectorStoreCache(),
+    );
+    const parsed = JSON.parse(raw) as { results: unknown[]; error?: string };
+
+    expect(parsed.results).toEqual([]);
+    expect(parsed.error).toContain("500");
+  });
+});
diff --git a/src/agent/tools/searchDocs.ts b/src/agent/tools/searchDocs.ts
index bffcb2f54..a42cf61b7 100644
--- a/src/agent/tools/searchDocs.ts
+++ b/src/agent/tools/searchDocs.ts
@@ -169,7 +169,7 @@ async function embedQuery(
   return first.embedding;
 }
 
-async function executeSearchDocs(
+export async function executeSearchDocs(
   params: {
     query: string;
     topK?: number | null;
diff --git a/src/agent/util/truncate.test.ts b/src/agent/util/truncate.test.ts
new file mode 100644
index 000000000..6cdc50ca2
--- /dev/null
+++ b/src/agent/util/truncate.test.ts
@@ -0,0 +1,156 @@
+import { describe, expect, it } from "vitest";
+
+import type { ContainerState, ExecutionDetails } from "@/agent/toolBridgeApi";
+
+import {
+  type ContainerLogInput,
+  truncateContainerLog,
+  truncateContainerState,
+  truncateExecutionDetails,
+} from "./truncate";
+
+const LOG_BYTE_BUDGET = 8_192;
+const ORCHESTRATION_ERROR_BUDGET = 2_048;
+const STRING_FIELD_BUDGET = 2_048;
+const MAX_DEBUG_INFO_KEYS = 20;
+
+function makeContainerState(
+  overrides: Partial<ContainerState> = {},
+): ContainerState {
+  return { status: "FAILED", ...overrides };
+}
+
+function makeExecutionDetails(
+  overrides: Partial<ExecutionDetails> = {},
+): ExecutionDetails {
+  return {
+    id: "exec-1",
+    task_spec: {
+      componentRef: { name: "noop" },
+    } as ExecutionDetails["task_spec"],
+    child_task_execution_ids: {},
+    ...overrides,
+  };
+}
+
+describe("truncateContainerLog", () => {
+  it("preserves log fields under their byte budget without flagging truncation", () => {
+    const result = truncateContainerLog({
+      log_text: "hi",
+      orchestration_error_message: "oops",
+    });
+
+    expect(result.log_text).toBe("hi");
+    expect(result.orchestration_error_message).toBe("oops");
+    expect(result.truncated).toBeUndefined();
+  });
+
+  it("keeps the trailing window of an oversized log_text and flags truncation", () => {
+    const oversized = "L".repeat(LOG_BYTE_BUDGET + 5_000);
+    const result = truncateContainerLog({ log_text: oversized });
+
+    expect(result.truncated).toBe(true);
+    // Trailing window preserved verbatim.
+    expect(result.log_text?.endsWith("L".repeat(LOG_BYTE_BUDGET))).toBe(true);
+    // Header reports how many chars were dropped from the front.
+    expect(result.log_text).toMatch(/truncated 5000 chars/);
+  });
+
+  it("applies the smaller orchestration error budget independently", () => {
+    const oversized = "X".repeat(ORCHESTRATION_ERROR_BUDGET + 1);
+    const result = truncateContainerLog({
+      log_text: "fine",
+      orchestration_error_message: oversized,
+    });
+
+    expect(result.truncated).toBe(true);
+    expect(result.log_text).toBe("fine");
+    expect(
+      result.orchestration_error_message?.endsWith(
+        "X".repeat(ORCHESTRATION_ERROR_BUDGET),
+      ),
+    ).toBe(true);
+  });
+
+  it("skips null and undefined fields rather than emitting them as empty strings", () => {
+    const input: ContainerLogInput = {
+      log_text: null,
+      system_error_exception_full: undefined,
+      orchestration_error_message: "kept",
+    };
+    const result = truncateContainerLog(input);
+
+    expect(result).toEqual({ orchestration_error_message: "kept" });
+    expect("log_text" in result).toBe(false);
+    expect("system_error_exception_full" in result).toBe(false);
+  });
+});
+
+describe("truncateContainerState", () => {
+  it("returns the state unchanged when debug_info is missing", () => {
+    const state = makeContainerState({ exit_code: 1 });
+    expect(truncateContainerState(state)).toBe(state);
+  });
+
+  it("caps debug_info to 20 keys, dropping the rest", () => {
+    const debug_info: Record<string, unknown> = {};
+    for (let i = 0; i < MAX_DEBUG_INFO_KEYS + 10; i++) {
+      debug_info[`k${i}`] = `v${i}`;
+    }
+    const result = truncateContainerState(makeContainerState({ debug_info }));
+
+    const keys = Object.keys(result.debug_info ?? {});
+    expect(keys).toHaveLength(MAX_DEBUG_INFO_KEYS);
+    // Insertion order is preserved — the cap drops the tail, not the head.
+    expect(keys[0]).toBe("k0");
+    expect(keys.at(-1)).toBe(`k${MAX_DEBUG_INFO_KEYS - 1}`);
+  });
+
+  it("truncates oversized string values per-key and leaves non-strings untouched", () => {
+    const long = "y".repeat(STRING_FIELD_BUDGET + 100);
+    const result = truncateContainerState(
+      makeContainerState({
+        debug_info: { long, short: "ok", count: 7, nested: { a: 1 } },
+      }),
+    );
+
+    const truncatedLong = result.debug_info?.long;
+    expect(typeof truncatedLong).toBe("string");
+    if (typeof truncatedLong === "string") {
+      expect(truncatedLong.endsWith("y".repeat(STRING_FIELD_BUDGET))).toBe(
+        true,
+      );
+      expect(truncatedLong).toMatch(/truncated 100 chars/);
+    }
+    expect(result.debug_info?.short).toBe("ok");
+    expect(result.debug_info?.count).toBe(7);
+    expect(result.debug_info?.nested).toEqual({ a: 1 });
+  });
+});
+
+describe("truncateExecutionDetails", () => {
+  it("collapses non-empty artifact maps to {} so the model knows they exist", () => {
+    const details = makeExecutionDetails({
+      input_artifacts: { in: { id: "art1" } },
+      output_artifacts: { out: { id: "art2" } },
+    });
+
+    const result = truncateExecutionDetails(details);
+    expect(result.input_artifacts).toEqual({});
+    expect(result.output_artifacts).toEqual({});
+    // Other fields untouched.
+    expect(result.id).toBe(details.id);
+    expect(result.task_spec).toBe(details.task_spec);
+  });
+
+  it("leaves empty / missing artifact maps as-is", () => {
+    const empty = makeExecutionDetails({
+      input_artifacts: {},
+      output_artifacts: undefined,
+    });
+    const result = truncateExecutionDetails(empty);
+
+    expect(result.input_artifacts).toEqual({});
+    expect(result.output_artifacts).toBeUndefined();
+  });
+});
diff --git a/src/routes/v2/pages/Editor/components/AiChat/serializeSpecForAi.test.ts b/src/routes/v2/pages/Editor/components/AiChat/serializeSpecForAi.test.ts
new file mode 100644
index 000000000..571585df9
--- /dev/null
+++ b/src/routes/v2/pages/Editor/components/AiChat/serializeSpecForAi.test.ts
@@ -0,0 +1,189 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  Binding,
+  ComponentSpec,
+  Input,
+  Output,
+  Task,
+} from "@/models/componentSpec";
+
+import { serializeSpecForAi } from "./serializeSpecForAi";
+
+function buildBasicSpec(): ComponentSpec {
+  const spec = new ComponentSpec({ $id: "spec_1", name: "MyPipeline" });
+  spec.setDescription("Loads data and transforms it.");
+
+  spec.addInput(
+    new Input({
+      $id: "in_1",
+      name: "raw_path",
+      type: "String",
+      description: "Path to the raw file",
+      defaultValue: "data.csv",
+      optional: false,
+    }),
+  );
+  spec.addInput(new Input({ $id: "in_2", name: "rows", type: "Integer" }));
+
+  spec.addOutput(
+    new Output({
+      $id: "out_1",
+      name: "result",
+      type: "String",
+      description: "Final artifact path",
+    }),
+  );
+
+  spec.addTask(
+    new Task({
+      $id: "task_1",
+      name: "Load",
+      componentRef: {
+        name: "load",
+        url: "https://example.com/load.yaml",
+        spec: {
+          name: "Load",
+          inputs: [{ name: "path", type: "String" }],
+          outputs: [{ name: "table", type: "String" }],
+          implementation: { container: { image: "loader:1" } },
+        },
+      },
+      arguments: [{ name: "path", value: "data.csv" }],
+    }),
+  );
+
+  spec.addBinding(
+    new Binding({
+      $id: "bind_1",
+      sourceEntityId: "in_1",
+      sourcePortName: "in_1",
+      targetEntityId: "task_1",
+      targetPortName: "path",
+    }),
+  );
+
+  return spec;
+}
+
+describe("serializeSpecForAi", () => {
+  it("serializes pipeline name and description", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec);
+
+    expect(ai.name).toBe("MyPipeline");
+    expect(ai.description).toBe("Loads data and transforms it.");
+  });
+
+  it("omits optional fields when empty", () => {
+    const spec = new ComponentSpec({ $id: "spec_1", name: "Empty" });
+    const ai = serializeSpecForAi(spec);
+
+    expect(ai.description).toBeUndefined();
+    expect(ai.activeSubgraphPath).toBeUndefined();
+    expect(ai.inputs).toEqual([]);
+    expect(ai.outputs).toEqual([]);
+    expect(ai.tasks).toEqual([]);
+    expect(ai.bindings).toEqual([]);
+  });
+
+  it("serializes inputs with all optional fields when present", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec);
+
+    expect(ai.inputs).toHaveLength(2);
+    expect(ai.inputs[0]).toEqual({
+      $id: "in_1",
+      name: "raw_path",
+      type: "String",
+      description: "Path to the raw file",
+      default: "data.csv",
+      optional: false,
+    });
+    expect(ai.inputs[1]).toEqual({
+      $id: "in_2",
+      name: "rows",
+      type: "Integer",
+    });
+  });
+
+  it("serializes outputs with optional fields omitted when missing", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec);
+
+    expect(ai.outputs).toEqual([
+      {
+        $id: "out_1",
+        name: "result",
+        type: "String",
+        description: "Final artifact path",
+      },
+    ]);
+  });
+
+  it("serializes tasks with componentRef, arguments, and isSubgraph flag", () => {
+    const spec = buildBasicSpec();
+    spec.addTask(
+      new Task({
+        $id: "task_2",
+        name: "Sub",
+        componentRef: {
+          name: "sub",
+          spec: {
+            name: "Sub",
+            implementation: { graph: { tasks: {} } },
+          },
+        },
+      }),
+    );
+
+    const ai = serializeSpecForAi(spec);
+    expect(ai.tasks).toHaveLength(2);
+    expect(ai.tasks[0]).toEqual({
+      $id: "task_1",
+      name: "Load",
+      componentRef: {
+        name: "load",
+        url: "https://example.com/load.yaml",
+        spec: {
+          name: "Load",
+          inputs: [{ name: "path", type: "String" }],
+          outputs: [{ name: "table", type: "String" }],
+        },
+      },
+      arguments: [{ name: "path", value: "data.csv" }],
+    });
+    expect(ai.tasks[1].isSubgraph).toBe(true);
+  });
+
+  it("serializes bindings", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec);
+
+    expect(ai.bindings).toEqual([
+      {
+        $id: "bind_1",
+        sourceEntityId: "in_1",
+        sourcePortName: "in_1",
+        targetEntityId: "task_1",
+        targetPortName: "path",
+      },
+    ]);
+  });
+
+  it("surfaces activeSubgraphPath when provided", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec, {
+      activeSubgraphPath: ["preprocess", "split"],
+    });
+
+    expect(ai.activeSubgraphPath).toEqual(["preprocess", "split"]);
+  });
+
+  it("omits activeSubgraphPath when empty array passed", () => {
+    const spec = buildBasicSpec();
+    const ai = serializeSpecForAi(spec, { activeSubgraphPath: [] });
+
+    expect(ai.activeSubgraphPath).toBeUndefined();
+  });
+});
diff --git a/src/routes/v2/pages/Editor/components/AiChat/toolBridge.test.ts b/src/routes/v2/pages/Editor/components/AiChat/toolBridge.test.ts
new file mode 100644
index 000000000..b4c92712c
--- /dev/null
+++ b/src/routes/v2/pages/Editor/components/AiChat/toolBridge.test.ts
@@ -0,0 +1,570 @@
+import type { QueryClient } from "@tanstack/react-query";
+import { describe, expect, it, vi } from "vitest";
+
+import {
+  Binding,
+  ComponentSpec,
+  Input,
+  Output,
+  Task,
+} from "@/models/componentSpec";
+import type { UndoGroupable } from "@/routes/v2/shared/nodes/types";
+
+vi.mock("@/services/componentService", () => ({
+  hydrateComponentReference: vi.fn(async (ref) => ref),
+}));
+
+const fetchPipelineRunMock = vi.fn();
+const fetchExecutionDetailsMock = vi.fn();
+const fetchExecutionStateMock = vi.fn();
+const fetchContainerExecutionStateMock = vi.fn();
+const fetchContainerLogMock = vi.fn();
+
+vi.mock("@/services/executionService", () => ({
+  fetchPipelineRun: (...args: unknown[]) => fetchPipelineRunMock(...args),
+  fetchExecutionDetails: (...args: unknown[]) =>
+    fetchExecutionDetailsMock(...args),
+  fetchExecutionState: (...args: unknown[]) => fetchExecutionStateMock(...args),
+  fetchContainerExecutionState: (...args: unknown[]) =>
+    fetchContainerExecutionStateMock(...args),
+  fetchContainerLog: (...args: unknown[]) => fetchContainerLogMock(...args),
+}));
+
+const submitPipelineRunHelperMock = vi.fn<
+  (
+    _spec: unknown,
+    _url: string,
+    options: {
+      authorizationToken?: string;
+      onSuccess?: (data: unknown) => void;
+      onError?: (error: Error) => void;
+    },
+  ) => void
+>();
+
+vi.mock("@/utils/submitPipeline", () => ({
+  submitPipelineRun: (...args: unknown[]) =>
+    submitPipelineRunHelperMock(
+      ...(args as Parameters<typeof submitPipelineRunHelperMock>),
+    ),
+}));
+
+import { createToolBridge } from "./toolBridge";
+
+/**
+ * Pass-through undo stub: records every withGroup label invoked so tests
+ * can assert that mutations were properly wrapped, while still running
+ * the inner fn synchronously so MobX state actually changes.
+ */
+class RecordingUndo implements UndoGroupable {
+  readonly labels: string[] = [];
+  withGroup<T>(label: string, fn: () => T): T {
+    this.labels.push(label);
+    return fn();
+  }
+}
+
+function buildSpec(): ComponentSpec {
+  const spec = new ComponentSpec({ $id: "spec_1", name: "Pipe" });
+  spec.addInput(new Input({ $id: "input_1", name: "data", type: "String" }));
+  spec.addOutput(
+    new Output({ $id: "output_1", name: "result", type: "String" }),
+  );
+  spec.addTask(
+    new Task({
+      $id: "task_1",
+      name: "Transform",
+      componentRef: {
+        name: "transform",
+        spec: {
+          name: "Transform",
+          inputs: [{ name: "input", type: "String" }],
+          outputs: [{ name: "output", type: "String" }],
+          implementation: { container: { image: "transform:1" } },
+        },
+      },
+    }),
+  );
+  return spec;
+}
+
+function makeBridge() {
+  const spec = buildSpec();
+  const undo = new RecordingUndo();
+  const bridge = createToolBridge({
+    getSpec: () => spec,
+    getActiveSubgraphPath: () => [],
+    undo,
+  });
+  return { bridge, undo, spec };
+}
+
+function makeEmptyBridge() {
+  const undo = new RecordingUndo();
+  const bridge = createToolBridge({
+    getSpec: () => null,
+    getActiveSubgraphPath: () => [],
+    undo,
+  });
+  return { bridge, undo };
+}
+
+const TEST_BACKEND_URL = "http://backend.test";
+
+function makeBackendBridge(
+  overrides: {
+    authToken?: string;
+    queryClient?: QueryClient;
+  } = {},
+) {
+  const spec = buildSpec();
+  const undo = new RecordingUndo();
+  const bridge = createToolBridge({
+    getSpec: () => spec,
+    getActiveSubgraphPath: () => [],
+    undo,
+    getBackendUrl: () => TEST_BACKEND_URL,
+    getAuthToken: () => overrides.authToken,
+    queryClient: overrides.queryClient,
+  });
+  return { bridge, spec };
+}
+
+describe("createToolBridge", () => {
+  describe("requireSpec guard", () => {
+    it("throws on every mutating call when getSpec returns null", async () => {
+      const { bridge } = makeEmptyBridge();
+      await expect(bridge.getPipelineState()).rejects.toThrow(
+        /No pipeline is currently open/,
+      );
+      await expect(bridge.setPipelineName("X")).rejects.toThrow();
+      await expect(bridge.deleteTask("anything")).rejects.toThrow();
+    });
+  });
+
+  describe("getPipelineState", () => {
+    it("returns the serialized spec with the active subgraph path", async () => {
+      const spec = buildSpec();
+      const undo = new RecordingUndo();
+      const bridge = createToolBridge({
+        getSpec: () => spec,
+        getActiveSubgraphPath: () => ["preprocess"],
+        undo,
+      });
+
+      const state = await bridge.getPipelineState();
+      expect(state.name).toBe("Pipe");
+      expect(state.tasks).toHaveLength(1);
+      expect(state.activeSubgraphPath).toEqual(["preprocess"]);
+    });
+  });
+
+  describe("pipeline metadata", () => {
+    it("setPipelineName wraps in an undo group and renames the spec", async () => {
+      const { bridge, undo, spec } = makeBridge();
+      const result = await bridge.setPipelineName("NewName");
+      expect(result).toEqual({ success: true });
+      expect(spec.name).toBe("NewName");
+      expect(undo.labels).toContain("Rename pipeline");
+    });
+
+    it("setPipelineDescription updates the spec inside an undo group", async () => {
+      const { bridge, undo, spec } = makeBridge();
+      const result = await bridge.setPipelineDescription("hi");
+      expect(result).toEqual({ success: true });
+      expect(spec.description).toBe("hi");
+      expect(undo.labels).toContain("Update pipeline description");
+    });
+  });
+
+  describe("tasks", () => {
+    it("addTask adds the task and renames it when the requested name differs from the component name", async () => {
+      const { bridge, undo, spec } = makeBridge();
+      const result = await bridge.addTask({
+        name: "MyLoader",
+        componentRef: { name: "load" },
+      });
+      expect(result.success).toBe(true);
+      expect(result.taskId).toBeDefined();
+      const added = spec.tasks.find((t) => t.$id === result.taskId);
+      expect(added?.name).toBe("MyLoader");
+      expect(undo.labels.filter((l) => l === "Add task")).toHaveLength(1);
+    });
+
+    it("deleteTask returns success false for unknown id", async () => {
+      const { bridge } = makeBridge();
+      const result = await bridge.deleteTask("does-not-exist");
+      expect(result).toEqual({ success: false });
+    });
+
+    it("deleteTask removes an existing task", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.deleteTask("task_1");
+      expect(result.success).toBe(true);
+      expect(spec.tasks.find((t) => t.$id === "task_1")).toBeUndefined();
+    });
+
+    it("renameTask updates the task name", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.renameTask("task_1", "Renamed");
+      expect(result.success).toBe(true);
+      expect(spec.tasks[0].name).toBe("Renamed");
+    });
+  });
+
+  describe("inputs", () => {
+    it("addInput sets type, description, default, and optional in one chain", async () => {
+      const { bridge, undo, spec } = makeBridge();
+      const result = await bridge.addInput({
+        name: "threshold",
+        type: "Float",
+        description: "cutoff",
+        defaultValue: "0.5",
+        optional: true,
+      });
+      expect(result.success).toBe(true);
+      const added = spec.inputs.find((i) => i.$id === result.inputId);
+      expect(added?.type).toBe("Float");
+      expect(added?.description).toBe("cutoff");
+      expect(added?.defaultValue).toBe("0.5");
+      expect(added?.optional).toBe(true);
+      expect(undo.labels).toContain("Set input optional");
+    });
+
+    it("deleteInput removes an existing input", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.deleteInput("input_1");
+      expect(result.success).toBe(true);
+      expect(spec.inputs).toHaveLength(0);
+    });
+
+    it("renameInput updates the input name", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.renameInput("input_1", "renamed_input");
+      expect(result.success).toBe(true);
+      expect(spec.inputs[0].name).toBe("renamed_input");
+    });
+  });
+
+  describe("outputs", () => {
+    it("addOutput sets type and description", async () => {
+      const { bridge, undo, spec } = makeBridge();
+      const result = await bridge.addOutput({
+        name: "metrics",
+        type: "Json",
+        description: "summary",
+      });
+      expect(result.success).toBe(true);
+      const added = spec.outputs.find((o) => o.$id === result.outputId);
+      expect(added?.type).toBe("Json");
+      expect(added?.description).toBe("summary");
+      expect(undo.labels).toContain("Set output type");
+    });
+
+    it("deleteOutput removes an existing output", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.deleteOutput("output_1");
+      expect(result.success).toBe(true);
+      expect(spec.outputs).toHaveLength(0);
+    });
+
+    it("renameOutput updates the output name", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.renameOutput("output_1", "renamed_output");
+      expect(result.success).toBe(true);
+      expect(spec.outputs[0].name).toBe("renamed_output");
+    });
+  });
+
+  describe("connections", () => {
+    it("connectNodes returns the created binding id", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.connectNodes({
+        sourceEntityId: "input_1",
+        sourcePortName: "input_1",
+        targetEntityId: "task_1",
+        targetPortName: "input",
+      });
+      expect(result.success).toBe(true);
+      expect(result.bindingId).toBeDefined();
+      expect(spec.bindings).toHaveLength(1);
+    });
+
+    it("connectNodes refuses input → output direction", async () => {
+      const { bridge } = makeBridge();
+      const result = await bridge.connectNodes({
+        sourceEntityId: "input_1",
+        sourcePortName: "input_1",
+        targetEntityId: "output_1",
+        targetPortName: "output_1",
+      });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/invalid source\/target/);
+    });
+
+    it("deleteEdge removes the binding by id", async () => {
+      const spec = buildSpec();
+      spec.addBinding(
+        new Binding({
+          $id: "bind_1",
+          sourceEntityId: "input_1",
+          sourcePortName: "input_1",
+          targetEntityId: "task_1",
+          targetPortName: "input",
+        }),
+      );
+      const undo = new RecordingUndo();
+      const bridge = createToolBridge({
+        getSpec: () => spec,
+        getActiveSubgraphPath: () => [],
+        undo,
+      });
+
+      const result = await bridge.deleteEdge("bind_1");
+      expect(result.success).toBe(true);
+      expect(spec.bindings).toHaveLength(0);
+    });
+  });
+
+  describe("setTaskArgument", () => {
+    it("sets the literal value on the task", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.setTaskArgument("task_1", "input", "hello");
+      expect(result).toEqual({ success: true });
+      const task = spec.tasks.find((t) => t.$id === "task_1");
+      expect(task?.arguments).toEqual([{ name: "input", value: "hello" }]);
+    });
+  });
+
+  describe("subgraphs", () => {
+    it("createSubgraph returns the new subgraph task id", async () => {
+      const { bridge, spec } = makeBridge();
+      const result = await bridge.createSubgraph(["task_1"], "Group");
+      expect(result.success).toBe(true);
+      expect(result.subgraphTaskId).toBeDefined();
+      expect(spec.tasks.some((t) => t.$id === result.subgraphTaskId)).toBe(
+        true,
+      );
+    });
+
+    it("createSubgraph reports failure for empty selection", async () => {
+      const { bridge } = makeBridge();
+      const result = await bridge.createSubgraph([], "Group");
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Could not create subgraph/);
+    });
+  });
+
+  describe("validatePipeline", () => {
+    it("reports valid: true on a clean spec", async () => {
+      const spec = new ComponentSpec({ $id: "spec_1", name: "Pipe" });
+      spec.addTask(
+        new Task({
+          $id: "task_1",
+          name: "Op",
+          componentRef: {
+            name: "op",
+            spec: {
+              name: "Op",
+              implementation: { container: { image: "op:1" } },
+            },
+          },
+        }),
+      );
+      const undo = new RecordingUndo();
+      const bridge = createToolBridge({
+        getSpec: () => spec,
+        getActiveSubgraphPath: () => [],
+        undo,
+      });
+
+      const result = await bridge.validatePipeline();
+      expect(result.valid).toBe(true);
+      expect(result.issueCount).toBe(0);
+    });
+
+    it("maps validation issues into the wire shape", async () => {
+      const spec = new ComponentSpec({ $id: "spec_1", name: "" });
+      const undo = new RecordingUndo();
+      const bridge = createToolBridge({
+        getSpec: () => spec,
+        getActiveSubgraphPath: () => [],
+        undo,
+      });
+
+      const result = await bridge.validatePipeline();
+      expect(result.valid).toBe(false);
+      expect(result.issueCount).toBeGreaterThan(0);
+      for (const issue of result.issues) {
+        expect(typeof issue.type).toBe("string");
+        expect(typeof issue.severity).toBe("string");
+        expect(typeof issue.message).toBe("string");
+      }
+    });
+  });
+
+  describe("submitPipelineRun", () => {
+    it("returns error when backend is not configured", async () => {
+      const { bridge } = makeBridge();
+      const result = await bridge.submitPipelineRun();
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Backend is not configured/);
+    });
+
+    it("submits the spec, invalidates the cache, and returns ids", async () => {
+      const invalidate = vi.fn();
+      const queryClient = {
+        invalidateQueries: invalidate,
+      } as unknown as QueryClient;
+      submitPipelineRunHelperMock.mockImplementationOnce(
+        (_spec, _url, options) => {
+          options.onSuccess?.({
+            id: 42,
+            root_execution_id: 100,
+            created_at: "2025-01-01T00:00:00Z",
+            created_by: "tester",
+            pipeline_name: "Pipe",
+          });
+        },
+      );
+
+      const { bridge } = makeBackendBridge({
+        authToken: "auth-token",
+        queryClient,
+      });
+      const result = await bridge.submitPipelineRun();
+
+      expect(result).toEqual({
+        success: true,
+        runId: "42",
+        rootExecutionId: "100",
+      });
+      expect(submitPipelineRunHelperMock).toHaveBeenCalledTimes(1);
+      const [, urlArg, optionsArg] = submitPipelineRunHelperMock.mock.calls[0]!;
+      expect(urlArg).toBe(TEST_BACKEND_URL);
+      expect(optionsArg.authorizationToken).toBe("auth-token");
+      expect(invalidate).toHaveBeenCalledWith({ queryKey: ["pipelineRuns"] });
+    });
+
+    it("returns submission failure with the helper's error message", async () => {
+      submitPipelineRunHelperMock.mockImplementationOnce(
+        (_spec, _url, options) => {
+          options.onError?.(new Error("backend rejected"));
+        },
+      );
+
+      const { bridge } = makeBackendBridge();
+      const result = await bridge.submitPipelineRun();
+      expect(result.success).toBe(false);
+      expect(result.error).toBe("backend rejected");
+    });
+  });
+
+  describe("read-only run/debug bridge methods", () => {
+    it("getRunDetails delegates to fetchPipelineRun", async () => {
+      const fakeRun = { id: "1", root_execution_id: "2" };
+      fetchPipelineRunMock.mockResolvedValueOnce(fakeRun);
+      const { bridge } = makeBackendBridge();
+      const result = await bridge.getRunDetails("1");
+      expect(result).toBe(fakeRun);
+      expect(fetchPipelineRunMock).toHaveBeenCalledWith("1", TEST_BACKEND_URL);
+    });
+
+    it("throws when backend url is missing for read-only fetches", async () => {
+      const { bridge } = makeBridge();
+      await expect(bridge.getExecutionDetails("e1")).rejects.toThrow(
+        /Backend is not configured/,
+      );
+    });
+
+    it("getContainerLog drops null fields and returns the inner shape", async () => {
+      fetchContainerLogMock.mockResolvedValueOnce({
+        log_text: "hi",
+        system_error_exception_full: null,
+        orchestration_error_message: undefined,
+      });
+      const { bridge } = makeBackendBridge();
+      const log = await bridge.getContainerLog("e1");
+      expect(log).toEqual({ log_text: "hi" });
+    });
+  });
+
+  describe("debugPipelineRun", () => {
+    it("returns success: false with a clear error when backend is missing", async () => {
+      const { bridge } = makeBridge();
+      const result = await bridge.debugPipelineRun("run-1");
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Backend is not configured/);
+      expect(result.failedChildren).toEqual([]);
+    });
+
+    it("walks failed children and truncates payloads", async () => {
+      fetchPipelineRunMock.mockResolvedValueOnce({
+        id: "run-1",
+        root_execution_id: "root-1",
+      });
+      fetchExecutionDetailsMock.mockImplementation(
+        async (executionId: string) => {
+          if (executionId === "root-1") {
+            return {
+              id: "root-1",
+              task_spec: {},
+              child_task_execution_ids: {
+                taskA: "exec-A",
+                taskB: "exec-B",
+                taskC: "exec-C",
+              },
+            };
+          }
+          return {
+            id: executionId,
+            task_spec: {},
+            child_task_execution_ids: {},
+            input_artifacts: { in1: { id: "art1" } },
+            output_artifacts: { out1: { id: "art2" } },
+          };
+        },
+      );
+      fetchExecutionStateMock.mockResolvedValueOnce({
+        child_execution_status_stats: { taskA: { FAILED: 1 } },
+      });
+      fetchContainerExecutionStateMock.mockImplementation(
+        async (executionId: string) => {
+          if (executionId === "exec-A") {
+            return {
+              status: "FAILED",
+              exit_code: 1,
+              debug_info: { reason: "OOMKilled" },
+            };
+          }
+          if (executionId === "exec-B") {
+            return { status: "SUCCEEDED" };
+          }
+          throw new Error("no container record");
+        },
+      );
+      const longLog = "x".repeat(20_000);
+      fetchContainerLogMock.mockImplementation(async () => ({
+        log_text: longLog,
+        system_error_exception_full: null,
+        orchestration_error_message: null,
+      }));
+
+      const { bridge } = makeBackendBridge();
+      const result = await bridge.debugPipelineRun("run-1");
+
+      expect(result.success).toBe(true);
+      expect(result.run?.id).toBe("run-1");
+      expect(result.rootStatus).toBe("FAILED");
+      expect(result.failedChildren).toHaveLength(1);
+      const failed = result.failedChildren[0];
+      expect(failed.taskId).toBe("taskA");
+      expect(failed.executionId).toBe("exec-A");
+      expect(failed.status).toBe("FAILED");
+      expect(failed.log?.truncated).toBe(true);
+      expect(failed.log?.log_text?.length).toBeLessThan(longLog.length);
+      expect(failed.details?.input_artifacts).toEqual({});
+      expect(failed.details?.output_artifacts).toEqual({});
+    });
+  });
+});