Azure · placerda · Jun 10, 2026 · Jun 10, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,27 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
 
 ## [Unreleased]
 
+### Fixed
+- **`execution: azd` reports no longer ship empty `Dataset:` lines and empty
+  `## Rows` tables.** The `eval.yaml` parser now recognizes the `dataset_file:`
+  field that `azd ai agent eval init` emits, so `report.md` shows the actual
+  dataset path. When azd returns aggregate metrics only (the normal case), the
+  reporter omits the row tables entirely and instead emits a `## Per-row
+  breakdown` section that links to the Foundry run for the per-sample view.
+- **`agentops eval run` prints a clickable Foundry deep link on success.**
+  After a successful azd run, the CLI now emits a `Foundry run: <url>` line
+  alongside the `results.json`/`report.md` paths so users can jump straight to
+  the per-sample table and rubric drill-downs in the Foundry portal.
+
+### Changed
+- **Shorter azd backend log line.** Replaced the verbose `Running azd backend:
+  azd --no-prompt ai agent eval run --config <long path> --output json` line
+  with a concise `Running azd backend: azd ai agent eval run`; the full
+  command remains captured in the per-failure debug logs introduced in 0.3.18.
+- **`execution: azd` startup line uses a workspace-relative recipe path** so
+  the "delegating to azd ai agent eval" message stays readable on long
+  Windows paths.
+
 ## [0.3.18] - 2026-06-10
 
 ### Fixed

diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py
@@ -2766,6 +2766,10 @@ def _run_flat_schema_eval(
     typer.echo(f"{_cli_label('report.md')}:    {_cli_path(output_dir / 'report.md')}")
     if latest_dir is not None:
         typer.echo(f"{_cli_label('latest/')}:      {_cli_path(latest_dir)}")
+    azd_eval = result.config.get("azd_evaluation") if isinstance(result.config, dict) else None
+    report_url = azd_eval.get("report_url") if isinstance(azd_eval, dict) else None
+    if isinstance(report_url, str) and report_url.strip():
+        typer.echo(f"{_cli_label('Foundry run')}:  {report_url.strip()}")
     if result.summary.overall_passed:
         typer.echo(f"{_cli_label('Threshold status')}: {style('PASSED', 'bold', 'green')}")
         return

diff --git a/src/agentops/core/azd_eval.py b/src/agentops/core/azd_eval.py
@@ -139,6 +139,7 @@ class EvalRecipe(BaseModel):
     name: Optional[str] = None
     agent: Optional[EvalAgent] = None
     dataset_reference: Optional[EvalDatasetReference] = None
+    dataset_file: Optional[str] = None
     evaluators: list[EvalEvaluator] = Field(default_factory=list)
     options: Optional[EvalOptions] = None
 

diff --git a/src/agentops/pipeline/azd_runner.py b/src/agentops/pipeline/azd_runner.py
@@ -118,7 +118,7 @@ def run_azd_eval(
         "--output",
         "json",
     ]
-    notify(f"Running azd backend: {' '.join(command)}")
+    notify("Running azd backend: azd ai agent eval run")
 
     started = time.perf_counter()
     completed = _run_command(
@@ -301,7 +301,9 @@ def normalize_to_results(
             "azd_evaluation": {
                 "recipe_path": str(azd_run.recipe_path),
                 "run_id": azd_run.run_id,
+                "eval_id": _extract_eval_id(azd_run.payload),
                 "status": azd_run.status,
+                "report_url": _extract_report_url(azd_run.payload),
                 "dataset": (
                     recipe.dataset_reference.model_dump(mode="json")
                     if recipe.dataset_reference
@@ -477,6 +479,14 @@ def _extract_status(payload: Dict[str, Any]) -> str:
     return "unknown"
 
 
+def _extract_report_url(payload: Dict[str, Any]) -> Optional[str]:
+    for key in ("report_url", "reportUrl", "report_uri", "url"):
+        value = payload.get(key)
+        if isinstance(value, str) and value.strip().lower().startswith(("http://", "https://")):
+            return value.strip()
+    return None
+
+
 def _extract_item_count(payload: Dict[str, Any]) -> int:
     for key in ("items_total", "item_count", "samples", "max_samples", "row_count"):
         value = payload.get(key)
@@ -585,6 +595,11 @@ def _looks_like_metric_name(name: str) -> bool:
 
 
 def _recipe_dataset_path(recipe: EvalRecipe, recipe_path: Path) -> str:
+    if recipe.dataset_file:
+        dataset = Path(recipe.dataset_file)
+        if not dataset.is_absolute():
+            dataset = recipe_path.parent / dataset
+        return str(dataset)
     ref = recipe.dataset_reference
     if ref and ref.local_uri:
         dataset = Path(ref.local_uri)

diff --git a/src/agentops/pipeline/orchestrator.py b/src/agentops/pipeline/orchestrator.py
@@ -537,10 +537,14 @@ def _run_evaluation_azd(
 
     recipe_path = azd_runner.resolve_eval_recipe(workspace, config)
     recipe = load_eval_recipe(recipe_path)
+    try:
+        recipe_display = recipe_path.relative_to(workspace).as_posix()
+    except ValueError:
+        recipe_display = recipe_path.name
     progress(
         f"execution: {style('azd', 'bold')} - delegating to "
-        f"{style('azd ai agent eval', 'cyan')} with recipe "
-        f"{style(str(recipe_path), 'cyan')}."
+        f"{style('azd ai agent eval', 'cyan')} (recipe "
+        f"{style(recipe_display, 'cyan')})."
     )
 
     azd_run = azd_runner.run_azd_eval(

diff --git a/src/agentops/pipeline/reporter.py b/src/agentops/pipeline/reporter.py
@@ -24,7 +24,8 @@ def render(result: RunResult) -> str:
     lines.append(f"- **Target:** `{result.target.raw}` ({result.target.kind})")
     if result.target.protocol:
         lines.append(f"- **Protocol:** {result.target.protocol}")
-    lines.append(f"- **Dataset:** `{result.dataset_path}`")
+    if result.dataset_path:
+        lines.append(f"- **Dataset:** `{result.dataset_path}`")
     lines.append(f"- **Started:** {result.started_at}")
     lines.append(f"- **Duration:** {result.duration_seconds:.2f}s")
     lines.append(f"- **Rows:** {result.summary.items_total}")
@@ -62,22 +63,27 @@ def render(result: RunResult) -> str:
             lines.append(f"| {row.row_index} | {_short(row.error or '', 200)} |")
         lines.append("")
 
-    lines.append("## Rows")
-    lines.append("")
-    lines.append("| # | Latency (s) | Metrics |")
-    lines.append("| --- | --- | --- |")
-    for row in result.rows:
-        lines.append(_row_summary(row))
-    lines.append("")
-
     if result.rows:
+        lines.append("## Rows")
+        lines.append("")
+        lines.append("| # | Latency (s) | Metrics |")
+        lines.append("| --- | --- | --- |")
+        for row in result.rows:
+            lines.append(_row_summary(row))
+        lines.append("")
+
         lines.append("## Row Details")
         lines.append("")
         lines.append("| # | Input | Response | Expected |")
         lines.append("| --- | --- | --- | --- |")
         for row in result.rows:
             lines.append(_row_detail(row))
         lines.append("")
+    else:
+        azd_eval = result.config.get("azd_evaluation")
+        if isinstance(azd_eval, dict):
+            lines.extend(_render_azd_aggregate_note(azd_eval))
+            lines.append("")
 
     cloud = result.config.get("cloud_evaluation")
     if isinstance(cloud, dict):
@@ -120,6 +126,26 @@ def _short(text: str, limit: int) -> str:
     return text if len(text) <= limit else text[: limit - 1] + "…"
 
 
+def _render_azd_aggregate_note(azd: dict) -> List[str]:
+    lines = ["## Per-row breakdown", ""]
+    lines.append(
+        "`execution: azd` reports aggregate metrics only; per-row scores "
+        "are recorded by Foundry."
+    )
+    report_url = azd.get("report_url")
+    if isinstance(report_url, str) and report_url.strip():
+        lines.append("")
+        lines.append(f"**Open the run in Foundry:** {report_url.strip()}")
+    else:
+        lines.append("")
+        lines.append(
+            "Open the latest run in the Foundry portal "
+            "(Agents → your agent → Evaluations) to see the per-sample table "
+            "and rubric drill-downs."
+        )
+    return lines
+
+
 def _render_cloud_evaluation(cloud: dict) -> List[str]:
     lines = ["## Foundry Cloud Session", ""]
     status = str(cloud.get("status") or "unknown")