From 7dbd73bb4817f383c120bc21cfd8ec39996c2649 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:15:45 +0800 Subject: [PATCH 01/11] docs(speed-bench): add generated benchmark summary --- speed-bench/README.md | 16 ++++ speed-bench/update_summary.py | 136 ++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 speed-bench/update_summary.py diff --git a/speed-bench/README.md b/speed-bench/README.md index 32075fe18..823dc9685 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -26,3 +26,19 @@ python3 speed-bench/plot_speed.py speed-bench/m3_max.csv --title "M3 Max t/s" The script uses only the Python standard library. By default it writes a file next to the CSV using the `_ts.svg` suffix, such as `speed-bench/m3_max_ts.svg`. + + +## Generated Benchmark Summary + +Generated from the CSV files in this directory by `python3 speed-bench/update_summary.py`. + +`@ 32k ctx` means the row where `ctx_tokens` is `32768`. + +| Benchmark | Best gen | Gen @ 32k ctx | Avg gen | Best prefill | Prefill @ 32k ctx | Avg prefill | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| M4 Max | 26.76 t/s | 24.52 t/s | 24.57 t/s | 343.76 t/s | 247.91 t/s | 250.39 t/s | +| M2 Ultra | 23.22 t/s | 21.92 t/s | 21.85 t/s | 410.62 t/s | 325.77 t/s | 324.90 t/s | +| GB10 | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | +| PRO model M3 Ultra | 12.42 t/s | 9.56 t/s | 9.90 t/s | 183.06 t/s | 138.82 t/s | 149.28 t/s | + + diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py new file mode 100644 index 000000000..d2ab33fe7 --- /dev/null +++ b/speed-bench/update_summary.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""Update the generated benchmark summary in speed-bench/README.md.""" + +import csv +from dataclasses import dataclass +from pathlib import Path + + +BEGIN_MARKER = "" +END_MARKER = "" +README = Path(__file__).with_name("README.md") +BENCH_DIR = Path(__file__).resolve().parent +REQUIRED_COLUMNS = {"ctx_tokens", "prefill_tps", "gen_tps"} +TARGET_CTX = 32768 + + +@dataclass +class BenchSummary: + name: str + best_gen: float + gen_at_target_ctx: float | None + avg_gen: float + best_prefill: float + prefill_at_target_ctx: float | None + avg_prefill: float + + +def display_name(path: Path) -> str: + replacements = { + "gb10": "GB10", + "m2": "M2", + "m3": "M3", + "m4": "M4", + "m5": "M5", + "pro": "PRO", + "max": "Max", + "ultra": "Ultra", + } + words = path.stem.replace("-", "_").split("_") + return " ".join(replacements.get(word.lower(), word) for word in words) + + +def fmt_tps(value: float | None) -> str: + if value is None: + return "n/a" + return f"{value:.2f} t/s" + + +def read_summary(path: Path) -> BenchSummary: + rows = [] + with path.open("r", encoding="utf-8-sig", newline="") as fp: + reader = csv.DictReader(fp) + missing = REQUIRED_COLUMNS.difference(reader.fieldnames or ()) + if missing: + missing_list = ", ".join(sorted(missing)) + raise SystemExit(f"{path}: missing CSV column(s): {missing_list}") + + for row in reader: + rows.append( + { + "ctx_tokens": int(row["ctx_tokens"]), + "prefill_tps": float(row["prefill_tps"]), + "gen_tps": float(row["gen_tps"]), + } + ) + + if not rows: + raise SystemExit(f"{path}: no benchmark rows") + + target_row = next((row for row in rows if row["ctx_tokens"] == TARGET_CTX), None) + return BenchSummary( + name=display_name(path), + best_gen=max(row["gen_tps"] for row in rows), + gen_at_target_ctx=target_row["gen_tps"] if target_row else None, + avg_gen=sum(row["gen_tps"] for row in rows) / len(rows), + best_prefill=max(row["prefill_tps"] for row in rows), + prefill_at_target_ctx=target_row["prefill_tps"] if target_row else None, + avg_prefill=sum(row["prefill_tps"] for row in rows) / len(rows), + ) + + +def render_summary(summaries: list[BenchSummary]) -> str: + summaries = sorted(summaries, key=lambda item: item.best_gen, reverse=True) + lines = [ + BEGIN_MARKER, + "## Generated Benchmark Summary", + "", + "Generated from the CSV files in this directory by `python3 speed-bench/update_summary.py`.", + "", + f"`@ 32k ctx` means the row where `ctx_tokens` is `{TARGET_CTX}`.", + "", + "| Benchmark | Best gen | Gen @ 32k ctx | Avg gen | Best prefill | Prefill @ 32k ctx | Avg prefill |", + "| --- | ---: | ---: | ---: | ---: | ---: | ---: |", + ] + for summary in summaries: + lines.append( + "| " + + " | ".join( + [ + summary.name, + fmt_tps(summary.best_gen), + fmt_tps(summary.gen_at_target_ctx), + fmt_tps(summary.avg_gen), + fmt_tps(summary.best_prefill), + fmt_tps(summary.prefill_at_target_ctx), + fmt_tps(summary.avg_prefill), + ] + ) + + " |" + ) + lines.extend(["", END_MARKER, ""]) + return "\n".join(lines) + + +def replace_generated_section(readme: str, generated: str) -> str: + begin = readme.find(BEGIN_MARKER) + end = readme.find(END_MARKER) + if begin == -1 and end == -1: + return readme.rstrip() + "\n\n" + generated + if begin == -1 or end == -1 or end < begin: + raise SystemExit("README.md has mismatched generated summary markers") + end += len(END_MARKER) + return readme[:begin].rstrip() + "\n\n" + generated.rstrip() + readme[end:].rstrip() + "\n" + + +def main() -> None: + csv_paths = sorted(BENCH_DIR.glob("*.csv")) + if not csv_paths: + raise SystemExit(f"{BENCH_DIR}: no CSV files found") + summaries = [read_summary(path) for path in csv_paths] + generated = render_summary(summaries) + README.write_text(replace_generated_section(README.read_text(encoding="utf-8"), generated), encoding="utf-8") + + +if __name__ == "__main__": + main() From aa29a54a19fb3a87a5d3dcaf25c667794e20d0f4 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:20:44 +0800 Subject: [PATCH 02/11] docs(speed-bench): simplify summary heading --- speed-bench/README.md | 2 +- speed-bench/update_summary.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index 823dc9685..f18cd78af 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -28,7 +28,7 @@ The script uses only the Python standard library. By default it writes a file next to the CSV using the `_ts.svg` suffix, such as `speed-bench/m3_max_ts.svg`. -## Generated Benchmark Summary +## Benchmark Summary Generated from the CSV files in this directory by `python3 speed-bench/update_summary.py`. diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index d2ab33fe7..e69cd8e26 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -83,7 +83,7 @@ def render_summary(summaries: list[BenchSummary]) -> str: summaries = sorted(summaries, key=lambda item: item.best_gen, reverse=True) lines = [ BEGIN_MARKER, - "## Generated Benchmark Summary", + "## Benchmark Summary", "", "Generated from the CSV files in this directory by `python3 speed-bench/update_summary.py`.", "", From 17ef23c1d0a69726581f1023c151fd3a3a370a17 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:22:06 +0800 Subject: [PATCH 03/11] docs(speed-bench): clarify benchmark labels --- speed-bench/README.md | 2 +- speed-bench/update_summary.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index f18cd78af..162e3718d 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -38,7 +38,7 @@ Generated from the CSV files in this directory by `python3 speed-bench/update_su | --- | ---: | ---: | ---: | ---: | ---: | ---: | | M4 Max | 26.76 t/s | 24.52 t/s | 24.57 t/s | 343.76 t/s | 247.91 t/s | 250.39 t/s | | M2 Ultra | 23.22 t/s | 21.92 t/s | 21.85 t/s | 410.62 t/s | 325.77 t/s | 324.90 t/s | -| GB10 | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | +| DGX Spark / GB10 | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | | PRO model M3 Ultra | 12.42 t/s | 9.56 t/s | 9.90 t/s | 183.06 t/s | 138.82 t/s | 149.28 t/s | diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index e69cd8e26..c46456e52 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -26,6 +26,15 @@ class BenchSummary: def display_name(path: Path) -> str: + name_overrides = { + "gb10": "DGX Spark / GB10", + "m2_ultra": "M2 Ultra", + "m4_max": "M4 Max", + "pro_model_m3_ultra": "PRO model M3 Ultra", + } + if path.stem in name_overrides: + return name_overrides[path.stem] + replacements = { "gb10": "GB10", "m2": "M2", From 3339423194ab04f46be39e82300fa6a6985b177c Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:24:04 +0800 Subject: [PATCH 04/11] docs(speed-bench): spell out benchmark targets --- speed-bench/README.md | 8 ++++---- speed-bench/update_summary.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index 162e3718d..37ea1e8a5 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -36,9 +36,9 @@ Generated from the CSV files in this directory by `python3 speed-bench/update_su | Benchmark | Best gen | Gen @ 32k ctx | Avg gen | Best prefill | Prefill @ 32k ctx | Avg prefill | | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| M4 Max | 26.76 t/s | 24.52 t/s | 24.57 t/s | 343.76 t/s | 247.91 t/s | 250.39 t/s | -| M2 Ultra | 23.22 t/s | 21.92 t/s | 21.85 t/s | 410.62 t/s | 325.77 t/s | 324.90 t/s | -| DGX Spark / GB10 | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | -| PRO model M3 Ultra | 12.42 t/s | 9.56 t/s | 9.90 t/s | 183.06 t/s | 138.82 t/s | 149.28 t/s | +| Apple M4 Max (DeepSeek V4 Flash q2) | 26.76 t/s | 24.52 t/s | 24.57 t/s | 343.76 t/s | 247.91 t/s | 250.39 t/s | +| Apple M2 Ultra (DeepSeek V4 Flash q2) | 23.22 t/s | 21.92 t/s | 21.85 t/s | 410.62 t/s | 325.77 t/s | 324.90 t/s | +| NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2) | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | +| Apple M3 Ultra (DeepSeek V4 PRO q2) | 12.42 t/s | 9.56 t/s | 9.90 t/s | 183.06 t/s | 138.82 t/s | 149.28 t/s | diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index c46456e52..1b08b6c99 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -27,10 +27,10 @@ class BenchSummary: def display_name(path: Path) -> str: name_overrides = { - "gb10": "DGX Spark / GB10", - "m2_ultra": "M2 Ultra", - "m4_max": "M4 Max", - "pro_model_m3_ultra": "PRO model M3 Ultra", + "gb10": "NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2)", + "m2_ultra": "Apple M2 Ultra (DeepSeek V4 Flash q2)", + "m4_max": "Apple M4 Max (DeepSeek V4 Flash q2)", + "pro_model_m3_ultra": "Apple M3 Ultra (DeepSeek V4 PRO q2)", } if path.stem in name_overrides: return name_overrides[path.stem] From e06cba522fe1a8869b7d5a34b073054001cf47f5 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:25:04 +0800 Subject: [PATCH 05/11] docs(speed-bench): move units into table headers --- speed-bench/README.md | 10 +++++----- speed-bench/update_summary.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index 37ea1e8a5..e22382f4b 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -34,11 +34,11 @@ Generated from the CSV files in this directory by `python3 speed-bench/update_su `@ 32k ctx` means the row where `ctx_tokens` is `32768`. -| Benchmark | Best gen | Gen @ 32k ctx | Avg gen | Best prefill | Prefill @ 32k ctx | Avg prefill | +| Benchmark | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| Apple M4 Max (DeepSeek V4 Flash q2) | 26.76 t/s | 24.52 t/s | 24.57 t/s | 343.76 t/s | 247.91 t/s | 250.39 t/s | -| Apple M2 Ultra (DeepSeek V4 Flash q2) | 23.22 t/s | 21.92 t/s | 21.85 t/s | 410.62 t/s | 325.77 t/s | 324.90 t/s | -| NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2) | 14.23 t/s | 12.98 t/s | 13.13 t/s | 402.88 t/s | 346.36 t/s | 343.02 t/s | -| Apple M3 Ultra (DeepSeek V4 PRO q2) | 12.42 t/s | 9.56 t/s | 9.90 t/s | 183.06 t/s | 138.82 t/s | 149.28 t/s | +| Apple M4 Max (DeepSeek V4 Flash q2) | 26.76 | 24.52 | 24.57 | 343.76 | 247.91 | 250.39 | +| Apple M2 Ultra (DeepSeek V4 Flash q2) | 23.22 | 21.92 | 21.85 | 410.62 | 325.77 | 324.90 | +| NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2) | 14.23 | 12.98 | 13.13 | 402.88 | 346.36 | 343.02 | +| Apple M3 Ultra (DeepSeek V4 PRO q2) | 12.42 | 9.56 | 9.90 | 183.06 | 138.82 | 149.28 | diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index 1b08b6c99..a9e4ae127 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -52,7 +52,7 @@ def display_name(path: Path) -> str: def fmt_tps(value: float | None) -> str: if value is None: return "n/a" - return f"{value:.2f} t/s" + return f"{value:.2f}" def read_summary(path: Path) -> BenchSummary: @@ -98,7 +98,7 @@ def render_summary(summaries: list[BenchSummary]) -> str: "", f"`@ 32k ctx` means the row where `ctx_tokens` is `{TARGET_CTX}`.", "", - "| Benchmark | Best gen | Gen @ 32k ctx | Avg gen | Best prefill | Prefill @ 32k ctx | Avg prefill |", + "| Benchmark | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) |", "| --- | ---: | ---: | ---: | ---: | ---: | ---: |", ] for summary in summaries: From a28fdefb7df9bedf17b44f6557b47a438fa16a35 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:27:18 +0800 Subject: [PATCH 06/11] docs(speed-bench): group summary by model --- speed-bench/README.md | 17 ++++++--- speed-bench/update_summary.py | 72 ++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index e22382f4b..92a8ce8fb 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -34,11 +34,18 @@ Generated from the CSV files in this directory by `python3 speed-bench/update_su `@ 32k ctx` means the row where `ctx_tokens` is `32768`. -| Benchmark | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | +### DeepSeek V4 Flash q2 + +| Hardware | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Apple M4 Max | 26.76 | 24.52 | 24.57 | 343.76 | 247.91 | 250.39 | +| Apple M2 Ultra | 23.22 | 21.92 | 21.85 | 410.62 | 325.77 | 324.90 | +| NVIDIA DGX Spark / GB10 | 14.23 | 12.98 | 13.13 | 402.88 | 346.36 | 343.02 | + +### DeepSeek V4 PRO q2 + +| Hardware | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| Apple M4 Max (DeepSeek V4 Flash q2) | 26.76 | 24.52 | 24.57 | 343.76 | 247.91 | 250.39 | -| Apple M2 Ultra (DeepSeek V4 Flash q2) | 23.22 | 21.92 | 21.85 | 410.62 | 325.77 | 324.90 | -| NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2) | 14.23 | 12.98 | 13.13 | 402.88 | 346.36 | 343.02 | -| Apple M3 Ultra (DeepSeek V4 PRO q2) | 12.42 | 9.56 | 9.90 | 183.06 | 138.82 | 149.28 | +| Apple M3 Ultra | 12.42 | 9.56 | 9.90 | 183.06 | 138.82 | 149.28 | diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index a9e4ae127..c56e25550 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -16,7 +16,8 @@ @dataclass class BenchSummary: - name: str + hardware: str + model: str best_gen: float gen_at_target_ctx: float | None avg_gen: float @@ -25,12 +26,12 @@ class BenchSummary: avg_prefill: float -def display_name(path: Path) -> str: +def benchmark_labels(path: Path) -> tuple[str, str]: name_overrides = { - "gb10": "NVIDIA DGX Spark / GB10 (DeepSeek V4 Flash q2)", - "m2_ultra": "Apple M2 Ultra (DeepSeek V4 Flash q2)", - "m4_max": "Apple M4 Max (DeepSeek V4 Flash q2)", - "pro_model_m3_ultra": "Apple M3 Ultra (DeepSeek V4 PRO q2)", + "gb10": ("NVIDIA DGX Spark / GB10", "DeepSeek V4 Flash q2"), + "m2_ultra": ("Apple M2 Ultra", "DeepSeek V4 Flash q2"), + "m4_max": ("Apple M4 Max", "DeepSeek V4 Flash q2"), + "pro_model_m3_ultra": ("Apple M3 Ultra", "DeepSeek V4 PRO q2"), } if path.stem in name_overrides: return name_overrides[path.stem] @@ -46,7 +47,7 @@ def display_name(path: Path) -> str: "ultra": "Ultra", } words = path.stem.replace("-", "_").split("_") - return " ".join(replacements.get(word.lower(), word) for word in words) + return " ".join(replacements.get(word.lower(), word) for word in words), "Unspecified model" def fmt_tps(value: float | None) -> str: @@ -77,8 +78,10 @@ def read_summary(path: Path) -> BenchSummary: raise SystemExit(f"{path}: no benchmark rows") target_row = next((row for row in rows if row["ctx_tokens"] == TARGET_CTX), None) + hardware, model = benchmark_labels(path) return BenchSummary( - name=display_name(path), + hardware=hardware, + model=model, best_gen=max(row["gen_tps"] for row in rows), gen_at_target_ctx=target_row["gen_tps"] if target_row else None, avg_gen=sum(row["gen_tps"] for row in rows) / len(rows), @@ -89,7 +92,14 @@ def read_summary(path: Path) -> BenchSummary: def render_summary(summaries: list[BenchSummary]) -> str: - summaries = sorted(summaries, key=lambda item: item.best_gen, reverse=True) + by_model = {} + for summary in summaries: + by_model.setdefault(summary.model, []).append(summary) + model_groups = sorted( + by_model.items(), + key=lambda item: max(summary.best_gen for summary in item[1]), + reverse=True, + ) lines = [ BEGIN_MARKER, "## Benchmark Summary", @@ -98,26 +108,34 @@ def render_summary(summaries: list[BenchSummary]) -> str: "", f"`@ 32k ctx` means the row where `ctx_tokens` is `{TARGET_CTX}`.", "", - "| Benchmark | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) |", - "| --- | ---: | ---: | ---: | ---: | ---: | ---: |", ] - for summary in summaries: - lines.append( - "| " - + " | ".join( - [ - summary.name, - fmt_tps(summary.best_gen), - fmt_tps(summary.gen_at_target_ctx), - fmt_tps(summary.avg_gen), - fmt_tps(summary.best_prefill), - fmt_tps(summary.prefill_at_target_ctx), - fmt_tps(summary.avg_prefill), - ] - ) - + " |" + for model, model_summaries in model_groups: + lines.extend( + [ + f"### {model}", + "", + "| Hardware | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) |", + "| --- | ---: | ---: | ---: | ---: | ---: | ---: |", + ] ) - lines.extend(["", END_MARKER, ""]) + for summary in sorted(model_summaries, key=lambda item: item.best_gen, reverse=True): + lines.append( + "| " + + " | ".join( + [ + summary.hardware, + fmt_tps(summary.best_gen), + fmt_tps(summary.gen_at_target_ctx), + fmt_tps(summary.avg_gen), + fmt_tps(summary.best_prefill), + fmt_tps(summary.prefill_at_target_ctx), + fmt_tps(summary.avg_prefill), + ] + ) + + " |" + ) + lines.append("") + lines.extend([END_MARKER, ""]) return "\n".join(lines) From 77fe03d81b0857e79c58dba01abc32e9fc15bfd9 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:30:31 +0800 Subject: [PATCH 07/11] docs(speed-bench): round summary to three significant figures --- speed-bench/README.md | 8 ++++---- speed-bench/update_summary.py | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/speed-bench/README.md b/speed-bench/README.md index 92a8ce8fb..a5c3b0bd3 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -38,14 +38,14 @@ Generated from the CSV files in this directory by `python3 speed-bench/update_su | Hardware | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| Apple M4 Max | 26.76 | 24.52 | 24.57 | 343.76 | 247.91 | 250.39 | -| Apple M2 Ultra | 23.22 | 21.92 | 21.85 | 410.62 | 325.77 | 324.90 | -| NVIDIA DGX Spark / GB10 | 14.23 | 12.98 | 13.13 | 402.88 | 346.36 | 343.02 | +| Apple M4 Max | 26.8 | 24.5 | 24.6 | 344 | 248 | 250 | +| Apple M2 Ultra | 23.2 | 21.9 | 21.9 | 411 | 326 | 325 | +| NVIDIA DGX Spark / GB10 | 14.2 | 13.0 | 13.1 | 403 | 346 | 343 | ### DeepSeek V4 PRO q2 | Hardware | Best gen (t/s) | Gen @ 32k ctx (t/s) | Avg gen (t/s) | Best prefill (t/s) | Prefill @ 32k ctx (t/s) | Avg prefill (t/s) | | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| Apple M3 Ultra | 12.42 | 9.56 | 9.90 | 183.06 | 138.82 | 149.28 | +| Apple M3 Ultra | 12.4 | 9.56 | 9.90 | 183 | 139 | 149 | diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index c56e25550..953dd580f 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -53,6 +53,10 @@ def benchmark_labels(path: Path) -> tuple[str, str]: def fmt_tps(value: float | None) -> str: if value is None: return "n/a" + if abs(value) >= 100: + return f"{value:.0f}" + if abs(value) >= 10: + return f"{value:.1f}" return f"{value:.2f}" From 738a042055f00f1bf5a701a7e53fc1f458e1d4f6 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:34:35 +0800 Subject: [PATCH 08/11] docs(speed-bench): add benchmark metadata manifest --- speed-bench/README.md | 1 + speed-bench/benchmarks.json | 57 ++++++++++++++++++++++++++++++ speed-bench/update_summary.py | 66 +++++++++++++++++++++-------------- 3 files changed, 97 insertions(+), 27 deletions(-) create mode 100644 speed-bench/benchmarks.json diff --git a/speed-bench/README.md b/speed-bench/README.md index a5c3b0bd3..2d0844c32 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -17,6 +17,7 @@ Run `ds4-bench` as: Provide PR including your numbers if your hardware was not already tested. Call the benchmark csv file something like `m3_max.csv` or alike, so that it is clear what hardware was used for the benchmark. +Record the machine, backend, model, and run parameters in `benchmarks.json`. To generate an SVG graph from a CSV file: diff --git a/speed-bench/benchmarks.json b/speed-bench/benchmarks.json new file mode 100644 index 000000000..5f93bc2a1 --- /dev/null +++ b/speed-bench/benchmarks.json @@ -0,0 +1,57 @@ +{ + "schema_version": 1, + "benchmarks": [ + { + "csv": "gb10.csv", + "hardware": "NVIDIA DGX Spark / GB10", + "backend": "CUDA", + "model": "DeepSeek V4 Flash", + "quant": "q2", + "model_label": "DeepSeek V4 Flash q2", + "prompt_file": "speed-bench/promessi_sposi.txt", + "ctx_start": 2048, + "ctx_max": 65536, + "step_incr": 2048, + "gen_tokens": 128 + }, + { + "csv": "m2_ultra.csv", + "hardware": "Apple M2 Ultra", + "backend": "Metal", + "model": "DeepSeek V4 Flash", + "quant": "q2", + "model_label": "DeepSeek V4 Flash q2", + "prompt_file": "speed-bench/promessi_sposi.txt", + "ctx_start": 2048, + "ctx_max": 65536, + "step_incr": 2048, + "gen_tokens": 128 + }, + { + "csv": "m4_max.csv", + "hardware": "Apple M4 Max", + "backend": "Metal", + "model": "DeepSeek V4 Flash", + "quant": "q2", + "model_label": "DeepSeek V4 Flash q2", + "prompt_file": "speed-bench/promessi_sposi.txt", + "ctx_start": 2048, + "ctx_max": 65536, + "step_incr": 2048, + "gen_tokens": 128 + }, + { + "csv": "pro_model_m3_ultra.csv", + "hardware": "Apple M3 Ultra", + "backend": "Metal", + "model": "DeepSeek V4 PRO", + "quant": "q2", + "model_label": "DeepSeek V4 PRO q2", + "prompt_file": "speed-bench/promessi_sposi.txt", + "ctx_start": 2048, + "ctx_max": 32768, + "step_incr": 2048, + "gen_tokens": 128 + } + ] +} diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index 953dd580f..2e68815cb 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -2,6 +2,7 @@ """Update the generated benchmark summary in speed-bench/README.md.""" import csv +import json from dataclasses import dataclass from pathlib import Path @@ -10,6 +11,7 @@ END_MARKER = "" README = Path(__file__).with_name("README.md") BENCH_DIR = Path(__file__).resolve().parent +METADATA = BENCH_DIR / "benchmarks.json" REQUIRED_COLUMNS = {"ctx_tokens", "prefill_tps", "gen_tps"} TARGET_CTX = 32768 @@ -26,28 +28,34 @@ class BenchSummary: avg_prefill: float -def benchmark_labels(path: Path) -> tuple[str, str]: - name_overrides = { - "gb10": ("NVIDIA DGX Spark / GB10", "DeepSeek V4 Flash q2"), - "m2_ultra": ("Apple M2 Ultra", "DeepSeek V4 Flash q2"), - "m4_max": ("Apple M4 Max", "DeepSeek V4 Flash q2"), - "pro_model_m3_ultra": ("Apple M3 Ultra", "DeepSeek V4 PRO q2"), - } - if path.stem in name_overrides: - return name_overrides[path.stem] - - replacements = { - "gb10": "GB10", - "m2": "M2", - "m3": "M3", - "m4": "M4", - "m5": "M5", - "pro": "PRO", - "max": "Max", - "ultra": "Ultra", - } - words = path.stem.replace("-", "_").split("_") - return " ".join(replacements.get(word.lower(), word) for word in words), "Unspecified model" +def read_metadata() -> dict[str, dict[str, object]]: + try: + data = json.loads(METADATA.read_text(encoding="utf-8")) + except FileNotFoundError: + raise SystemExit(f"{METADATA}: metadata file is required") from None + except json.JSONDecodeError as exc: + raise SystemExit(f"{METADATA}: invalid JSON: {exc}") from None + + benchmarks = data.get("benchmarks") + if not isinstance(benchmarks, list): + raise SystemExit(f"{METADATA}: expected a benchmarks list") + + by_csv: dict[str, dict[str, object]] = {} + required = {"csv", "hardware", "model_label"} + for item in benchmarks: + if not isinstance(item, dict): + raise SystemExit(f"{METADATA}: benchmark entries must be objects") + missing = required.difference(item) + if missing: + missing_list = ", ".join(sorted(missing)) + raise SystemExit(f"{METADATA}: benchmark entry missing {missing_list}") + csv_name = item["csv"] + if not isinstance(csv_name, str) or not csv_name: + raise SystemExit(f"{METADATA}: benchmark csv must be a non-empty string") + if csv_name in by_csv: + raise SystemExit(f"{METADATA}: duplicate benchmark metadata for {csv_name}") + by_csv[csv_name] = item + return by_csv def fmt_tps(value: float | None) -> str: @@ -60,7 +68,7 @@ def fmt_tps(value: float | None) -> str: return f"{value:.2f}" -def read_summary(path: Path) -> BenchSummary: +def read_summary(path: Path, metadata: dict[str, object]) -> BenchSummary: rows = [] with path.open("r", encoding="utf-8-sig", newline="") as fp: reader = csv.DictReader(fp) @@ -82,10 +90,9 @@ def read_summary(path: Path) -> BenchSummary: raise SystemExit(f"{path}: no benchmark rows") target_row = next((row for row in rows if row["ctx_tokens"] == TARGET_CTX), None) - hardware, model = benchmark_labels(path) return BenchSummary( - hardware=hardware, - model=model, + hardware=str(metadata["hardware"]), + model=str(metadata["model_label"]), best_gen=max(row["gen_tps"] for row in rows), gen_at_target_ctx=target_row["gen_tps"] if target_row else None, avg_gen=sum(row["gen_tps"] for row in rows) / len(rows), @@ -158,7 +165,12 @@ def main() -> None: csv_paths = sorted(BENCH_DIR.glob("*.csv")) if not csv_paths: raise SystemExit(f"{BENCH_DIR}: no CSV files found") - summaries = [read_summary(path) for path in csv_paths] + metadata = read_metadata() + missing = [path.name for path in csv_paths if path.name not in metadata] + if missing: + missing_list = ", ".join(missing) + raise SystemExit(f"{METADATA}: missing metadata for CSV file(s): {missing_list}") + summaries = [read_summary(path, metadata[path.name]) for path in csv_paths] generated = render_summary(summaries) README.write_text(replace_generated_section(README.read_text(encoding="utf-8"), generated), encoding="utf-8") From 365101f3346f528da2f8b505e9bed1d8b3453dfe Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:38:11 +0800 Subject: [PATCH 09/11] docs(speed-bench): parse benchmark metadata as dataclass --- speed-bench/update_summary.py | 75 ++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index 2e68815cb..d154dc4d5 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -16,6 +16,21 @@ TARGET_CTX = 32768 +@dataclass(frozen=True) +class BenchmarkMetadata: + csv: str + hardware: str + backend: str + model: str + quant: str + model_label: str + prompt_file: str + ctx_start: int + ctx_max: int + step_incr: int + gen_tokens: int + + @dataclass class BenchSummary: hardware: str @@ -28,7 +43,40 @@ class BenchSummary: avg_prefill: float -def read_metadata() -> dict[str, dict[str, object]]: +def require_str(item: dict[str, object], field: str) -> str: + value = item.get(field) + if not isinstance(value, str) or not value: + raise SystemExit(f"{METADATA}: benchmark {field} must be a non-empty string") + return value + + +def require_int(item: dict[str, object], field: str) -> int: + value = item.get(field) + if not isinstance(value, int): + raise SystemExit(f"{METADATA}: benchmark {field} must be an integer") + return value + + +def parse_benchmark_metadata(item: object) -> BenchmarkMetadata: + if not isinstance(item, dict): + raise SystemExit(f"{METADATA}: benchmark entries must be objects") + + return BenchmarkMetadata( + csv=require_str(item, "csv"), + hardware=require_str(item, "hardware"), + backend=require_str(item, "backend"), + model=require_str(item, "model"), + quant=require_str(item, "quant"), + model_label=require_str(item, "model_label"), + prompt_file=require_str(item, "prompt_file"), + ctx_start=require_int(item, "ctx_start"), + ctx_max=require_int(item, "ctx_max"), + step_incr=require_int(item, "step_incr"), + gen_tokens=require_int(item, "gen_tokens"), + ) + + +def read_metadata() -> dict[str, BenchmarkMetadata]: try: data = json.loads(METADATA.read_text(encoding="utf-8")) except FileNotFoundError: @@ -40,21 +88,12 @@ def read_metadata() -> dict[str, dict[str, object]]: if not isinstance(benchmarks, list): raise SystemExit(f"{METADATA}: expected a benchmarks list") - by_csv: dict[str, dict[str, object]] = {} - required = {"csv", "hardware", "model_label"} + by_csv: dict[str, BenchmarkMetadata] = {} for item in benchmarks: - if not isinstance(item, dict): - raise SystemExit(f"{METADATA}: benchmark entries must be objects") - missing = required.difference(item) - if missing: - missing_list = ", ".join(sorted(missing)) - raise SystemExit(f"{METADATA}: benchmark entry missing {missing_list}") - csv_name = item["csv"] - if not isinstance(csv_name, str) or not csv_name: - raise SystemExit(f"{METADATA}: benchmark csv must be a non-empty string") - if csv_name in by_csv: - raise SystemExit(f"{METADATA}: duplicate benchmark metadata for {csv_name}") - by_csv[csv_name] = item + metadata = parse_benchmark_metadata(item) + if metadata.csv in by_csv: + raise SystemExit(f"{METADATA}: duplicate benchmark metadata for {metadata.csv}") + by_csv[metadata.csv] = metadata return by_csv @@ -68,7 +107,7 @@ def fmt_tps(value: float | None) -> str: return f"{value:.2f}" -def read_summary(path: Path, metadata: dict[str, object]) -> BenchSummary: +def read_summary(path: Path, metadata: BenchmarkMetadata) -> BenchSummary: rows = [] with path.open("r", encoding="utf-8-sig", newline="") as fp: reader = csv.DictReader(fp) @@ -91,8 +130,8 @@ def read_summary(path: Path, metadata: dict[str, object]) -> BenchSummary: target_row = next((row for row in rows if row["ctx_tokens"] == TARGET_CTX), None) return BenchSummary( - hardware=str(metadata["hardware"]), - model=str(metadata["model_label"]), + hardware=metadata.hardware, + model=metadata.model_label, best_gen=max(row["gen_tps"] for row in rows), gen_at_target_ctx=target_row["gen_tps"] if target_row else None, avg_gen=sum(row["gen_tps"] for row in rows) / len(rows), From 66253da1c5676ebc9cd1de098807aa78248d7f71 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:43:26 +0800 Subject: [PATCH 10/11] docs(speed-bench): use schema for benchmark metadata --- speed-bench/README.md | 2 + speed-bench/benchmarks.json | 1 + speed-bench/benchmarks.schema.json | 79 ++++++++++++++++++++++++++++++ speed-bench/update_summary.py | 51 +++++-------------- 4 files changed, 95 insertions(+), 38 deletions(-) create mode 100644 speed-bench/benchmarks.schema.json diff --git a/speed-bench/README.md b/speed-bench/README.md index 2d0844c32..03a6cad7a 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -18,6 +18,8 @@ Provide PR including your numbers if your hardware was not already tested. Call the benchmark csv file something like `m3_max.csv` or alike, so that it is clear what hardware was used for the benchmark. Record the machine, backend, model, and run parameters in `benchmarks.json`. +The summary updater validates it with `benchmarks.schema.json` and requires the +Python `jsonschema` package. To generate an SVG graph from a CSV file: diff --git a/speed-bench/benchmarks.json b/speed-bench/benchmarks.json index 5f93bc2a1..6ac690aca 100644 --- a/speed-bench/benchmarks.json +++ b/speed-bench/benchmarks.json @@ -1,4 +1,5 @@ { + "$schema": "./benchmarks.schema.json", "schema_version": 1, "benchmarks": [ { diff --git a/speed-bench/benchmarks.schema.json b/speed-bench/benchmarks.schema.json new file mode 100644 index 000000000..180597901 --- /dev/null +++ b/speed-bench/benchmarks.schema.json @@ -0,0 +1,79 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "required": [ + "schema_version", + "benchmarks" + ], + "additionalProperties": false, + "properties": { + "$schema": { + "type": "string" + }, + "schema_version": { + "const": 1 + }, + "benchmarks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "csv", + "hardware", + "backend", + "model", + "quant", + "model_label", + "prompt_file", + "ctx_start", + "ctx_max", + "step_incr", + "gen_tokens" + ], + "additionalProperties": false, + "properties": { + "csv": { + "type": "string", + "minLength": 1 + }, + "hardware": { + "type": "string", + "minLength": 1 + }, + "backend": { + "type": "string", + "minLength": 1 + }, + "model": { + "type": "string", + "minLength": 1 + }, + "quant": { + "type": "string", + "minLength": 1 + }, + "model_label": { + "type": "string", + "minLength": 1 + }, + "prompt_file": { + "type": "string", + "minLength": 1 + }, + "ctx_start": { + "type": "integer" + }, + "ctx_max": { + "type": "integer" + }, + "step_incr": { + "type": "integer" + }, + "gen_tokens": { + "type": "integer" + } + } + } + } + } +} diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index d154dc4d5..04b1f148e 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -6,12 +6,18 @@ from dataclasses import dataclass from pathlib import Path +try: + import jsonschema +except ImportError as exc: + raise SystemExit("python3-jsonschema is required to validate benchmarks.json") from exc + BEGIN_MARKER = "" END_MARKER = "" README = Path(__file__).with_name("README.md") BENCH_DIR = Path(__file__).resolve().parent METADATA = BENCH_DIR / "benchmarks.json" +METADATA_SCHEMA = BENCH_DIR / "benchmarks.schema.json" REQUIRED_COLUMNS = {"ctx_tokens", "prefill_tps", "gen_tps"} TARGET_CTX = 32768 @@ -43,39 +49,6 @@ class BenchSummary: avg_prefill: float -def require_str(item: dict[str, object], field: str) -> str: - value = item.get(field) - if not isinstance(value, str) or not value: - raise SystemExit(f"{METADATA}: benchmark {field} must be a non-empty string") - return value - - -def require_int(item: dict[str, object], field: str) -> int: - value = item.get(field) - if not isinstance(value, int): - raise SystemExit(f"{METADATA}: benchmark {field} must be an integer") - return value - - -def parse_benchmark_metadata(item: object) -> BenchmarkMetadata: - if not isinstance(item, dict): - raise SystemExit(f"{METADATA}: benchmark entries must be objects") - - return BenchmarkMetadata( - csv=require_str(item, "csv"), - hardware=require_str(item, "hardware"), - backend=require_str(item, "backend"), - model=require_str(item, "model"), - quant=require_str(item, "quant"), - model_label=require_str(item, "model_label"), - prompt_file=require_str(item, "prompt_file"), - ctx_start=require_int(item, "ctx_start"), - ctx_max=require_int(item, "ctx_max"), - step_incr=require_int(item, "step_incr"), - gen_tokens=require_int(item, "gen_tokens"), - ) - - def read_metadata() -> dict[str, BenchmarkMetadata]: try: data = json.loads(METADATA.read_text(encoding="utf-8")) @@ -84,13 +57,15 @@ def read_metadata() -> dict[str, BenchmarkMetadata]: except json.JSONDecodeError as exc: raise SystemExit(f"{METADATA}: invalid JSON: {exc}") from None - benchmarks = data.get("benchmarks") - if not isinstance(benchmarks, list): - raise SystemExit(f"{METADATA}: expected a benchmarks list") + schema = json.loads(METADATA_SCHEMA.read_text(encoding="utf-8")) + try: + jsonschema.validate(data, schema) + except jsonschema.ValidationError as exc: + raise SystemExit(f"{METADATA}: invalid metadata: {exc.message}") from None by_csv: dict[str, BenchmarkMetadata] = {} - for item in benchmarks: - metadata = parse_benchmark_metadata(item) + for item in data["benchmarks"]: + metadata = BenchmarkMetadata(**item) if metadata.csv in by_csv: raise SystemExit(f"{METADATA}: duplicate benchmark metadata for {metadata.csv}") by_csv[metadata.csv] = metadata From 3c74bf6a57b813d4077ddb39288f4abdecdd753f Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 15 Jun 2026 14:46:39 +0800 Subject: [PATCH 11/11] docs(speed-bench): keep metadata parsing stdlib-only --- speed-bench/README.md | 2 - speed-bench/benchmarks.json | 1 - speed-bench/benchmarks.schema.json | 79 ------------------------------ speed-bench/update_summary.py | 22 ++++----- 4 files changed, 10 insertions(+), 94 deletions(-) delete mode 100644 speed-bench/benchmarks.schema.json diff --git a/speed-bench/README.md b/speed-bench/README.md index 03a6cad7a..2d0844c32 100644 --- a/speed-bench/README.md +++ b/speed-bench/README.md @@ -18,8 +18,6 @@ Provide PR including your numbers if your hardware was not already tested. Call the benchmark csv file something like `m3_max.csv` or alike, so that it is clear what hardware was used for the benchmark. Record the machine, backend, model, and run parameters in `benchmarks.json`. -The summary updater validates it with `benchmarks.schema.json` and requires the -Python `jsonschema` package. To generate an SVG graph from a CSV file: diff --git a/speed-bench/benchmarks.json b/speed-bench/benchmarks.json index 6ac690aca..5f93bc2a1 100644 --- a/speed-bench/benchmarks.json +++ b/speed-bench/benchmarks.json @@ -1,5 +1,4 @@ { - "$schema": "./benchmarks.schema.json", "schema_version": 1, "benchmarks": [ { diff --git a/speed-bench/benchmarks.schema.json b/speed-bench/benchmarks.schema.json deleted file mode 100644 index 180597901..000000000 --- a/speed-bench/benchmarks.schema.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object", - "required": [ - "schema_version", - "benchmarks" - ], - "additionalProperties": false, - "properties": { - "$schema": { - "type": "string" - }, - "schema_version": { - "const": 1 - }, - "benchmarks": { - "type": "array", - "items": { - "type": "object", - "required": [ - "csv", - "hardware", - "backend", - "model", - "quant", - "model_label", - "prompt_file", - "ctx_start", - "ctx_max", - "step_incr", - "gen_tokens" - ], - "additionalProperties": false, - "properties": { - "csv": { - "type": "string", - "minLength": 1 - }, - "hardware": { - "type": "string", - "minLength": 1 - }, - "backend": { - "type": "string", - "minLength": 1 - }, - "model": { - "type": "string", - "minLength": 1 - }, - "quant": { - "type": "string", - "minLength": 1 - }, - "model_label": { - "type": "string", - "minLength": 1 - }, - "prompt_file": { - "type": "string", - "minLength": 1 - }, - "ctx_start": { - "type": "integer" - }, - "ctx_max": { - "type": "integer" - }, - "step_incr": { - "type": "integer" - }, - "gen_tokens": { - "type": "integer" - } - } - } - } - } -} diff --git a/speed-bench/update_summary.py b/speed-bench/update_summary.py index 04b1f148e..6639e5ab1 100644 --- a/speed-bench/update_summary.py +++ b/speed-bench/update_summary.py @@ -6,18 +6,12 @@ from dataclasses import dataclass from pathlib import Path -try: - import jsonschema -except ImportError as exc: - raise SystemExit("python3-jsonschema is required to validate benchmarks.json") from exc - BEGIN_MARKER = "" END_MARKER = "" README = Path(__file__).with_name("README.md") BENCH_DIR = Path(__file__).resolve().parent METADATA = BENCH_DIR / "benchmarks.json" -METADATA_SCHEMA = BENCH_DIR / "benchmarks.schema.json" REQUIRED_COLUMNS = {"ctx_tokens", "prefill_tps", "gen_tps"} TARGET_CTX = 32768 @@ -57,15 +51,19 @@ def read_metadata() -> dict[str, BenchmarkMetadata]: except json.JSONDecodeError as exc: raise SystemExit(f"{METADATA}: invalid JSON: {exc}") from None - schema = json.loads(METADATA_SCHEMA.read_text(encoding="utf-8")) try: - jsonschema.validate(data, schema) - except jsonschema.ValidationError as exc: - raise SystemExit(f"{METADATA}: invalid metadata: {exc.message}") from None + benchmarks = data["benchmarks"] + except (KeyError, TypeError): + raise SystemExit(f"{METADATA}: expected a benchmarks list") from None + if not isinstance(benchmarks, list): + raise SystemExit(f"{METADATA}: expected a benchmarks list") by_csv: dict[str, BenchmarkMetadata] = {} - for item in data["benchmarks"]: - metadata = BenchmarkMetadata(**item) + for item in benchmarks: + try: + metadata = BenchmarkMetadata(**item) + except TypeError as exc: + raise SystemExit(f"{METADATA}: invalid benchmark metadata: {exc}") from None if metadata.csv in by_csv: raise SystemExit(f"{METADATA}: duplicate benchmark metadata for {metadata.csv}") by_csv[metadata.csv] = metadata