pipecat-ai · aconchillo · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/api-reference/cli/eval.mdx b/api-reference/cli/eval.mdx
@@ -0,0 +1,170 @@
+---
+title: eval
+description: "Run behavioral evals against a Pipecat agent, individually or as a suite"
+---
+
+Run scenario-based behavioral evals. `pipecat eval run` tests scenarios against an already-running agent; `pipecat eval suite` spawns the agents listed in a manifest and runs their scenarios concurrently. Both exit `0` when everything passes and `1` otherwise.
+
+The same commands are also available as `python -m pipecat.evals`.
+
+See the [Pipecat Evals guide](/pipecat/evals/overview) for concepts, the scenario format, and manifests.
+
+## eval run
+
+Run one or more scenarios against an already-running agent (started with `-t eval`).
+
+**Usage:**
+
+```shell
+pipecat eval run [OPTIONS] SCENARIOS...
+```
+
+**Arguments:**
+
+<ParamField path="SCENARIOS..." type="path" required>
+  One or more scenario YAML files.
+</ParamField>
+
+**Options:**
+
+<ParamField path="--bot-url" type="string" default="ws://localhost:7860">
+  WebSocket URL of the agent's eval transport.
+</ParamField>
+
+<ParamField path="--verbose / -v" type="flag">
+  Print a line for each turn and expectation as it resolves.
+</ParamField>
+
+<ParamField path="--audio / -a" type="flag">
+  Record each scenario's conversation audio (audio-mode scenarios).
+</ParamField>
+
+<ParamField path="--record-dir" type="string" default="recordings">
+  Directory for `--audio` recordings: `<record-dir>/<scenario>.wav`.
+</ParamField>
+
+<ParamField path="--cache-dir" type="string">
+  Directory for cached synthesized user audio. Defaults to
+  `<user-cache-dir>/pipecat/tts`.
+</ParamField>
+
+<ParamField path="--no-cache" type="flag">
+  Disable the user-audio cache: re-synthesize every turn (no reads or writes).
+</ParamField>
+
+<ParamField path="--timeout / -t" type="integer" default="60">
+  Default per-expectation timeout in seconds, for expectations without their own
+  `within_ms`.
+</ParamField>
+
+<ParamField path="--logs-dir" type="string" default=".">
+  Directory for each scenario's logs: `<logs-dir>/<scenario>.eval.log` (plus
+  `.debug.log` under `--debug`).
+</ParamField>
+
+<ParamField path="--debug / -d" type="flag">
+  Also save `<scenario>.debug.log` with the harness's full per-pipeline logs.
+</ParamField>
+
+<ParamField path="--stop-bot" type="flag">
+  Cancel the agent's pipeline (exit it) after the run. By default the agent is
+  left running so it can serve more scenarios.
+</ParamField>
+
+## eval suite
+
+Spawn the agents in a manifest and run their scenarios concurrently. Everything except the `suite:` list can be set in the manifest or overridden on the command line (the command line wins).
+
+**Usage:**
+
+```shell
+pipecat eval suite [OPTIONS] MANIFEST_PATH
+```
+
+**Arguments:**
+
+<ParamField path="MANIFEST_PATH" type="path" required>
+  Manifest YAML listing agents and their scenarios.
+</ParamField>
+
+**Options:**
+
+<ParamField path="--pattern / -p" type="string">
+  Only run bots whose path contains this substring.
+</ParamField>
+
+<ParamField path="--scenario / -s" type="string">
+  Only run this scenario name.
+</ParamField>
+
+<ParamField path="--name / -n" type="string">
+  Run subdirectory name under `runs_dir`. Defaults to a timestamp.
+</ParamField>
+
+<ParamField path="--runs-dir" type="path">
+  Output base, overriding the manifest's `runs_dir`. A `<name>/` subdirectory
+  with `logs/` and `recordings/` is created under it. Defaults to `eval-runs`.
+</ParamField>
+
+<ParamField path="--bots-dir" type="path">
+  Override the manifest's `bots_dir` (bot paths are relative to it).
+</ParamField>
+
+<ParamField path="--scenarios-dir" type="path">
+  Override the manifest's `scenarios_dir`.
+</ParamField>
+
+<ParamField path="--concurrency / -c" type="integer">
+  Override the manifest's `concurrency` (how many runs execute at once).
+</ParamField>
+
+<ParamField path="--base-port" type="integer">
+  Override the manifest's `base_port` (default `7900`). Each run gets `base_port
+  + index`.
+</ParamField>
+
+<ParamField path="--cache-dir" type="string">
+  Override the manifest's `cache_dir` for cached synthesized user audio.
+</ParamField>
+
+<ParamField path="--no-cache" type="flag">
+  Disable the user-audio cache: re-synthesize every turn (no reads or writes).
+</ParamField>
+
+<ParamField path="--timeout / -t" type="integer" default="60">
+  Default per-expectation timeout in seconds, for expectations without their own
+  `within_ms`.
+</ParamField>
+
+<ParamField path="--spawn" type="string">
+  Override the manifest's spawn template. Default: `"{python} {bot} -t eval
+  --port {port}"`.
+</ParamField>
+
+<ParamField path="--python" type="string">
+  Override the Python interpreter used to spawn each agent.
+</ParamField>
+
+<ParamField path="--audio / -a" type="flag">
+  Record conversation audio.
+</ParamField>
+
+<ParamField path="--debug / -d" type="flag">
+  Also save `<run>.debug.log` with the harness's full per-pipeline logs.
+</ParamField>
+
+## Examples
+
+```shell
+# Run one scenario against a running agent
+pipecat eval run scenarios/capital_question.yaml
+
+# Run a batch of scenarios, verbosely
+pipecat eval run scenarios/*.yaml -v
+
+# Run a full suite
+pipecat eval suite manifest.yaml
+
+# Only the support agent, 8 runs at a time, named output dir
+pipecat eval suite manifest.yaml -p support -c 8 -n nightly
+```
diff --git a/api-reference/cli/overview.mdx b/api-reference/cli/overview.mdx
@@ -19,11 +19,11 @@ description: "Command-line tool for scaffolding, deploying, and monitoring Pipec
     Push your bots to production with one command
   </Card>
   <Card
-    title="Monitor Live Bots"
-    icon="chart-line"
-    href="/api-reference/cli/tail"
+    title="Run Behavioral Evals"
+    icon="vial-circle-check"
+    href="/api-reference/cli/eval"
   >
-    Watch real-time logs, conversations, and metrics
+    Test your agents with scripted scenarios and an LLM judge
   </Card>
 </CardGroup>
 
@@ -51,7 +51,7 @@ pipecat --version
 
 **[`pipecat init`](/api-reference/cli/init)** - Scaffold new projects with interactive setup
 
-**[`pipecat tail`](/api-reference/cli/tail)** - Monitor sessions in real-time with a terminal dashboard
+**[`pipecat eval`](/api-reference/cli/eval)** - Run behavioral evals against your agents
 
 **[`pipecat cloud`](/api-reference/cli/cloud/auth)** - Deploy and manage bots on Pipecat Cloud
 
@@ -62,7 +62,7 @@ View help for any command:
 ```bash
 pipecat --help
 pipecat init --help
-pipecat tail --help
+pipecat eval --help
 pipecat cloud --help
 ```
 

diff --git a/api-reference/cli/tail.mdx b/api-reference/cli/tail.mdx
diff --git a/docs.json b/docs.json
@@ -80,15 +80,6 @@
               "pipecat/fundamentals/saving-transcripts",
               "pipecat/fundamentals/recording-audio",
               "pipecat/fundamentals/metrics",
-              {
-                "group": "Evaluations",
-                "pages": [
-                  "pipecat/fundamentals/evaluations/overview",
-                  "pipecat/fundamentals/evaluations/bluejay",
-                  "pipecat/fundamentals/evaluations/cekura",
-                  "pipecat/fundamentals/evaluations/coval"
-                ]
-              },
               "pipecat/fundamentals/voicemail",
               "pipecat/fundamentals/ivr",
               "pipecat/fundamentals/custom-frame-processor",
@@ -102,6 +93,25 @@
               }
             ]
           },
+          {
+            "group": "Evals",
+            "pages": [
+              "pipecat/evals/overview",
+              "pipecat/evals/quickstart",
+              "pipecat/evals/scenarios",
+              "pipecat/evals/suites",
+              "pipecat/evals/library",
+              "pipecat/evals/agent-self-improvement",
+              {
+                "group": "Third-party Platforms",
+                "pages": [
+                  "pipecat/evals/platforms/bluejay",
+                  "pipecat/evals/platforms/cekura",
+                  "pipecat/evals/platforms/coval"
+                ]
+              }
+            ]
+          },
           {
             "group": "Features",
             "pages": [
@@ -816,7 +826,7 @@
                 "group": "Commands",
                 "pages": [
                   "api-reference/cli/init",
-                  "api-reference/cli/tail",
+                  "api-reference/cli/eval",
                   {
                     "group": "cloud",
                     "pages": [
@@ -1345,11 +1355,27 @@
     },
     {
       "source": "/guides/fundamentals/evaluations/overview",
-      "destination": "/pipecat/fundamentals/evaluations/overview"
+      "destination": "/pipecat/evals/overview"
     },
     {
       "source": "/guides/fundamentals/evaluations/bluejay",
-      "destination": "/pipecat/fundamentals/evaluations/bluejay"
+      "destination": "/pipecat/evals/platforms/bluejay"
+    },
+    {
+      "source": "/pipecat/fundamentals/evaluations/overview",
+      "destination": "/pipecat/evals/overview"
+    },
+    {
+      "source": "/pipecat/fundamentals/evaluations/bluejay",
+      "destination": "/pipecat/evals/platforms/bluejay"
+    },
+    {
+      "source": "/pipecat/fundamentals/evaluations/cekura",
+      "destination": "/pipecat/evals/platforms/cekura"
+    },
+    {
+      "source": "/pipecat/fundamentals/evaluations/coval",
+      "destination": "/pipecat/evals/platforms/coval"
     },
     {
       "source": "/examples",
@@ -2309,7 +2335,7 @@
     },
     {
       "source": "/cli/tail",
-      "destination": "/api-reference/cli/tail"
+      "destination": "/api-reference/cli/overview"
     },
     {
       "source": "/cli/cloud/agent",