diff --git a/.github/workflows/performance-comment.yml b/.github/workflows/performance-comment.yml new file mode 100644 index 000000000..8e8d1c2e6 --- /dev/null +++ b/.github/workflows/performance-comment.yml @@ -0,0 +1,121 @@ +name: Performance Comment + +# Posts the perf comparison as a sticky PR comment. +# +# This runs in the *base* repository's context via `workflow_run`, which +# means it has a writable `GITHUB_TOKEN` even for pull requests opened +# from forks. The `Performance` workflow itself runs in the (potentially +# untrusted) PR context and only produces an artifact — it has no PR +# write permissions. +# +# Security model: +# * The artifact from the PR-context workflow is treated as untrusted +# input. We only consume the JSON benchmark outputs (`baseline.json` / +# `candidate.json`) and regenerate the comparison markdown here using +# the base repository's checked-out `comparePerf.mjs` so the markdown +# posted under the writable token is never attacker-supplied. +# * The PR number is resolved authoritatively from the `workflow_run` +# event payload (or a GitHub API lookup keyed off the workflow's +# untamperable `head_sha`), never from artifact contents. This +# prevents a malicious PR from writing a different PR number into the +# artifact and tricking us into commenting on an unrelated PR. + +on: + workflow_run: + workflows: ["Performance"] + types: + - completed + +permissions: + contents: read + pull-requests: write + actions: read + +jobs: + comment: + name: Post sticky PR comment + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' + + steps: + - name: Check out base repository (trusted) + uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '22.x' + + - name: Download perf-results artifact + id: download + # Don't fail this job if the upstream Performance run was cancelled + # or failed before producing the artifact — we just skip posting. + continue-on-error: true + uses: actions/download-artifact@v4 + with: + name: perf-results + path: perf-results + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Resolve PR number + id: pr + if: steps.download.outcome == 'success' + uses: actions/github-script@v7 + with: + script: | + const wr = context.payload.workflow_run; + // Prefer the PR list attached to the workflow_run event payload + // (populated for same-repo PRs). Fork PRs leave this empty, so + // fall back to an authoritative API lookup keyed off the + // workflow_run's head_sha, which the triggering PR cannot forge. + let prNumber = wr.pull_requests && wr.pull_requests[0] && wr.pull_requests[0].number; + if (!prNumber) { + const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + owner: context.repo.owner, + repo: context.repo.repo, + commit_sha: wr.head_sha, + }); + const match = prs.find((p) => p.state === "open"); + prNumber = match && match.number; + } + if (!Number.isInteger(prNumber) || prNumber <= 0) { + core.info("Could not determine PR number from workflow_run event; skipping comment."); + core.setOutput("number", ""); + return; + } + core.setOutput("number", String(prNumber)); + + - name: Regenerate report from JSON (trusted code) + id: report + if: steps.pr.outputs.number != '' + run: | + set -euo pipefail + if [ ! -s perf-results/baseline.json ] || [ ! -s perf-results/candidate.json ]; then + echo "baseline.json or candidate.json missing from artifact; skipping comment." >&2 + echo "found=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Use the base repo's comparePerf.mjs (trusted) to render the + # markdown from the PR-supplied JSON data, so we never post + # attacker-supplied markdown under the base repo's writable token. + node test/performanceTests/comparePerf.mjs \ + perf-results/baseline.json \ + perf-results/candidate.json \ + report.md \ + || true + + if [ -s report.md ]; then + echo "found=true" >> "$GITHUB_OUTPUT" + else + echo "found=false" >> "$GITHUB_OUTPUT" + echo "report.md was not produced; skipping comment." >&2 + fi + + - name: Post sticky PR comment + if: steps.pr.outputs.number != '' && steps.report.outputs.found == 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: perf-regression + number: ${{ steps.pr.outputs.number }} + path: report.md diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index ca9d117ae..8730fdb0a 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -8,17 +8,16 @@ on: concurrency: group: perf-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true - + permissions: contents: read - pull-requests: write jobs: perf-regression: runs-on: ubuntu-latest # Headroom for two full benchmark runs (candidate + baseline) plus npm # installs and tarball packing. Each benchmark run is bounded by - # samples * scenarios * (warmup + duration + per-child init). + # scenarios * (warmup + samples * duration + per-child init). timeout-minutes: 40 env: NODE_VERSION: '22.x' @@ -103,7 +102,9 @@ jobs: --duration "$PERF_DURATION" \ --warmup "$PERF_WARMUP" - # ---- Compare and publish ---- + # ---- Compare for gating; the artifact is consumed by the companion + # `performance-comment.yml` workflow which posts the PR comment under + # the base repository's trusted token. ---- - name: Compare results id: compare working-directory: pr/test/performanceTests @@ -115,7 +116,7 @@ jobs: "$GITHUB_WORKSPACE/perf-comparison.md" echo "exit=$?" >> "$GITHUB_OUTPUT" - - name: Upload raw results + - name: Upload perf results artifact if: always() uses: actions/upload-artifact@v4 with: @@ -124,14 +125,7 @@ jobs: baseline.json candidate.json perf-comparison.md - - - name: Comment on PR (best-effort) - if: always() && github.event.pull_request.head.repo.full_name == github.repository - uses: marocchino/sticky-pull-request-comment@v2 - continue-on-error: true - with: - header: perf-regression - path: perf-comparison.md + if-no-files-found: warn - name: Fail job on gating regression if: steps.compare.outputs.exit != '0' diff --git a/test/performanceTests/README.md b/test/performanceTests/README.md index 757c537a8..76a314e8e 100644 --- a/test/performanceTests/README.md +++ b/test/performanceTests/README.md @@ -42,5 +42,12 @@ repo, so they are never used for gate-fail decisions. the base branch as tarballs, installs each in turn under the PR's perf harness, runs the benchmark suite, and fails the job (blocking merge when set as a required check) if any gating scenario regresses beyond -`PERF_REGRESSION_THRESHOLD` percent (default 15%). A sticky comment with the -full comparison table is posted to the PR. +`PERF_REGRESSION_THRESHOLD` percent (default 15%). All benchmark results are +uploaded as the `perf-results` artifact. + +`.github/workflows/performance-comment.yml` then runs in the base repository's +trusted context via `workflow_run`, downloads the artifact, regenerates the +comparison markdown from the JSON inputs using the base branch's own +`comparePerf.mjs` (so attacker-supplied markdown is never posted under the +base repo's writable token), and posts a sticky PR comment. This split is +what allows fork PRs to receive a perf-comparison comment. diff --git a/test/performanceTests/bench.mjs b/test/performanceTests/bench.mjs index 12c204a60..d5be202d8 100644 --- a/test/performanceTests/bench.mjs +++ b/test/performanceTests/bench.mjs @@ -8,26 +8,41 @@ * process makes JSON output and result capture deterministic, and a fresh * Node child per scenario keeps OpenTelemetry global state isolated. * + * All requested samples for the scenario are taken inside this single + * process — warmup runs once, then `--samples` timed durations are + * collected back-to-back. Per-sample Node startup and module-load cost + * (importing `applicationinsights` and calling `useAzureMonitor()`) + * dominate wall-clock time on cold processes, so amortising them across + * samples is the single biggest perf-CI speedup. OpenTelemetry global + * state is still isolated *between scenarios* because runBenchmarks.mjs + * spawns a fresh child per scenario. + * * Usage: - * node bench.mjs --scenario --duration --warmup --out + * node bench.mjs --scenario --duration --warmup + * --samples --out */ import { writeFileSync } from "node:fs"; function parseArgs(argv) { - const a = { duration: 8, warmup: 2 }; + const a = { duration: 8, warmup: 2, samples: 1 }; for (let i = 0; i < argv.length; i++) { const k = argv[i]; const v = () => argv[++i]; if (k === "--scenario") a.scenario = v(); else if (k === "--duration") a.duration = Number(v()); else if (k === "--warmup") a.warmup = Number(v()); + else if (k === "--samples") a.samples = Number(v()); else if (k === "--out") a.out = v(); } if (!a.scenario || !a.out) { console.error("Required: --scenario --out "); process.exit(2); } + if (!Number.isInteger(a.samples) || a.samples < 1) { + console.error("--samples must be a positive integer"); + process.exit(2); + } return a; } @@ -73,34 +88,37 @@ async function main() { } const instance = new Cls(); - // Warmup (not counted) + // Warmup runs once for the whole process, not per sample. The JIT and + // hot caches we're warming up persist across the subsequent timed + // samples in this same V8 instance. if (args.warmup > 0) { await runLoop(instance, args.warmup * 1000); } - const startWall = Date.now(); - const ops = await runLoop(instance, args.duration * 1000); - const elapsedMs = Date.now() - startWall; - const opsPerSec = (ops / elapsedMs) * 1000; + const samples = []; + for (let i = 0; i < args.samples; i++) { + const startWall = Date.now(); + const ops = await runLoop(instance, args.duration * 1000); + const elapsedMs = Date.now() - startWall; + const opsPerSec = (ops / elapsedMs) * 1000; + samples.push({ opsPerSec, ops, elapsedMs }); + console.log( + `[bench] ${args.scenario} sample ${i + 1}/${args.samples}: ${ops} ops in ${elapsedMs}ms => ${opsPerSec.toFixed(0)} ops/s`, + ); + } writeFileSync( args.out, JSON.stringify( { scenario: args.scenario, - opsPerSec, - ops, - elapsedMs, + samples, timestamp: new Date().toISOString(), }, null, 2, ), ); - - console.log( - `[bench] ${args.scenario}: ${ops} ops in ${elapsedMs}ms => ${opsPerSec.toFixed(0)} ops/s`, - ); } main().catch((err) => { diff --git a/test/performanceTests/runBenchmarks.mjs b/test/performanceTests/runBenchmarks.mjs index a015eed8c..9776dcd99 100644 --- a/test/performanceTests/runBenchmarks.mjs +++ b/test/performanceTests/runBenchmarks.mjs @@ -4,8 +4,7 @@ /* * Runs each scenario via bench.mjs in a fresh Node child process to avoid - * OpenTelemetry global-state contamination across scenarios. Each scenario - * is sampled N times; median ops/s is recorded. + * OpenTelemetry global-state contamination across scenarios. Median ops/s is recorded. * * Usage: * node runBenchmarks.mjs --out results.json [--samples 5] [--duration 8] @@ -75,6 +74,8 @@ function runOne(scenario, args, outPath) { String(args.duration), "--warmup", String(args.warmup), + "--samples", + String(args.samples), "--out", outPath, ], @@ -95,10 +96,16 @@ function runOne(scenario, args, outPath) { throw new Error(`Scenario ${scenario} failed with exit code ${child.status}`); } const result = JSON.parse(readFileSync(outPath, "utf8")); - if (!isFinite(result.opsPerSec)) { - throw new Error(`Scenario ${scenario} produced no ops/s value`); + if (!Array.isArray(result.samples) || result.samples.length === 0) { + throw new Error(`Scenario ${scenario} produced no samples`); } - return result.opsPerSec; + const ops = result.samples.map((s) => s.opsPerSec); + for (const v of ops) { + if (!isFinite(v)) { + throw new Error(`Scenario ${scenario} produced a non-finite ops/s value`); + } + } + return ops; } function main() { @@ -112,13 +119,11 @@ function main() { try { for (const scenario of selected) { - const samples = []; - for (let i = 0; i < args.samples; i++) { - const out = join(tmp, `${scenario.name}-${i}.json`); - console.log(`\n[run] ${scenario.name} sample ${i + 1}/${args.samples}`); - const ops = runOne(scenario.name, args, out); - samples.push(ops); - } + const out = join(tmp, `${scenario.name}.json`); + console.log( + `\n[run] ${scenario.name}: ${args.samples} sample(s) x ${args.duration}s (warmup ${args.warmup}s)`, + ); + const samples = runOne(scenario.name, args, out); results.push({ name: scenario.name, tier: scenario.tier,