From 879ab8d6e112270b1f0939ed72e36f41a45969ab Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 15 Jun 2026 11:13:29 -0700 Subject: [PATCH 1/2] perf: amortize per-sample startup and post comments on fork PRs Run all --samples of a given scenario inside a single child Node process. OpenTelemetry global-state isolation is preserved because each scenario still gets a fresh process, but Node startup, importing applicationinsights' large dependency tree, and one-time SDK init (useAzureMonitor()) are now paid once per scenario instead of once per sample. On the perf CI runner this removes several minutes from each "Run X benchmarks" step. Split the perf workflow into producer/consumer to support fork PRs: - performance.yml now runs without pull-requests:write and uploads baseline.json, candidate.json, perf-comparison.md, and pr-number.txt as the perf-results artifact. The inline sticky-comment step (which was gated on head.repo.full_name == github.repository, so forks never got comments) is removed. - performance-comment.yml is new. It triggers on workflow_run after Performance completes, runs in the trusted base-repo context with pull-requests:write, downloads the artifact, validates the PR number is a positive integer, regenerates report.md from the JSON using the base branch's own trusted comparePerf.mjs (so attacker- supplied markdown is never posted under the writable token), and posts the sticky comment. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/performance-comment.yml | 110 ++++++++++++++++++++++ .github/workflows/performance.yml | 25 +++-- test/performanceTests/README.md | 11 ++- test/performanceTests/bench.mjs | 46 ++++++--- test/performanceTests/runBenchmarks.mjs | 29 +++--- 5 files changed, 180 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/performance-comment.yml diff --git a/.github/workflows/performance-comment.yml b/.github/workflows/performance-comment.yml new file mode 100644 index 000000000..22e410f81 --- /dev/null +++ b/.github/workflows/performance-comment.yml @@ -0,0 +1,110 @@ +name: Performance Comment + +# Posts the perf comparison as a sticky PR comment. +# +# This runs in the *base* repository's context via `workflow_run`, which +# means it has a writable `GITHUB_TOKEN` even for pull requests opened +# from forks. The `Performance` workflow itself runs in the (potentially +# untrusted) PR context and only produces an artifact — it has no PR +# write permissions. +# +# Security model: +# * The artifact from the PR-context workflow is treated as untrusted +# input. We only consume the JSON benchmark outputs (`baseline.json` / +# `candidate.json`) and the PR number, and we regenerate the +# comparison markdown here using the base repository's checked-out +# `comparePerf.mjs` so the markdown posted under the writable token +# is never attacker-supplied. +# * The PR number is validated as a positive integer before being used. + +on: + workflow_run: + workflows: ["Performance"] + types: + - completed + +permissions: + contents: read + pull-requests: write + actions: read + +jobs: + comment: + name: Post sticky PR comment + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' + + steps: + - name: Check out base repository (trusted) + uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '22.x' + + - name: Download perf-results artifact + id: download + # Don't fail this job if the upstream Performance run was cancelled + # or failed before producing the artifact — we just skip posting. + continue-on-error: true + uses: actions/download-artifact@v4 + with: + name: perf-results + path: perf-results + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Resolve PR number + id: pr + if: steps.download.outcome == 'success' + run: | + set -euo pipefail + pr="" + if [ -f perf-results/pr-number.txt ]; then + pr="$(tr -d '[:space:]' < perf-results/pr-number.txt)" + fi + + if ! [[ "$pr" =~ ^[1-9][0-9]*$ ]]; then + echo "pr-number.txt missing or not a positive integer (got: '${pr}'); skipping comment." >&2 + echo "number=" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "number=$pr" >> "$GITHUB_OUTPUT" + + - name: Regenerate report from JSON (trusted code) + id: report + if: steps.pr.outputs.number != '' + env: + PERF_REGRESSION_THRESHOLD: '15' + run: | + set -euo pipefail + if [ ! -s perf-results/baseline.json ] || [ ! -s perf-results/candidate.json ]; then + echo "baseline.json or candidate.json missing from artifact; skipping comment." >&2 + echo "found=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Use the base repo's comparePerf.mjs (trusted) to render the + # markdown from the PR-supplied JSON data, so we never post + # attacker-supplied markdown under the base repo's writable token. + node test/performanceTests/comparePerf.mjs \ + perf-results/baseline.json \ + perf-results/candidate.json \ + report.md \ + || true + + if [ -s report.md ]; then + echo "found=true" >> "$GITHUB_OUTPUT" + else + echo "found=false" >> "$GITHUB_OUTPUT" + echo "report.md was not produced; skipping comment." >&2 + fi + + - name: Post sticky PR comment + if: steps.pr.outputs.number != '' && steps.report.outputs.found == 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: perf-regression + number: ${{ steps.pr.outputs.number }} + path: report.md diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index ca9d117ae..fbe9a84c5 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -8,17 +8,16 @@ on: concurrency: group: perf-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true - + permissions: contents: read - pull-requests: write jobs: perf-regression: runs-on: ubuntu-latest # Headroom for two full benchmark runs (candidate + baseline) plus npm # installs and tarball packing. Each benchmark run is bounded by - # samples * scenarios * (warmup + duration + per-child init). + # scenarios * (warmup + samples * duration + per-child init). timeout-minutes: 40 env: NODE_VERSION: '22.x' @@ -103,7 +102,9 @@ jobs: --duration "$PERF_DURATION" \ --warmup "$PERF_WARMUP" - # ---- Compare and publish ---- + # ---- Compare for gating; the artifact is consumed by the companion + # `performance-comment.yml` workflow which posts the PR comment under + # the base repository's trusted token. ---- - name: Compare results id: compare working-directory: pr/test/performanceTests @@ -115,7 +116,11 @@ jobs: "$GITHUB_WORKSPACE/perf-comparison.md" echo "exit=$?" >> "$GITHUB_OUTPUT" - - name: Upload raw results + - name: Record PR number + if: always() && github.event_name == 'pull_request' + run: echo "${{ github.event.pull_request.number }}" > "$GITHUB_WORKSPACE/pr-number.txt" + + - name: Upload perf results artifact if: always() uses: actions/upload-artifact@v4 with: @@ -124,14 +129,8 @@ jobs: baseline.json candidate.json perf-comparison.md - - - name: Comment on PR (best-effort) - if: always() && github.event.pull_request.head.repo.full_name == github.repository - uses: marocchino/sticky-pull-request-comment@v2 - continue-on-error: true - with: - header: perf-regression - path: perf-comparison.md + pr-number.txt + if-no-files-found: warn - name: Fail job on gating regression if: steps.compare.outputs.exit != '0' diff --git a/test/performanceTests/README.md b/test/performanceTests/README.md index 757c537a8..76a314e8e 100644 --- a/test/performanceTests/README.md +++ b/test/performanceTests/README.md @@ -42,5 +42,12 @@ repo, so they are never used for gate-fail decisions. the base branch as tarballs, installs each in turn under the PR's perf harness, runs the benchmark suite, and fails the job (blocking merge when set as a required check) if any gating scenario regresses beyond -`PERF_REGRESSION_THRESHOLD` percent (default 15%). A sticky comment with the -full comparison table is posted to the PR. +`PERF_REGRESSION_THRESHOLD` percent (default 15%). All benchmark results are +uploaded as the `perf-results` artifact. + +`.github/workflows/performance-comment.yml` then runs in the base repository's +trusted context via `workflow_run`, downloads the artifact, regenerates the +comparison markdown from the JSON inputs using the base branch's own +`comparePerf.mjs` (so attacker-supplied markdown is never posted under the +base repo's writable token), and posts a sticky PR comment. This split is +what allows fork PRs to receive a perf-comparison comment. diff --git a/test/performanceTests/bench.mjs b/test/performanceTests/bench.mjs index 12c204a60..ab5c753e1 100644 --- a/test/performanceTests/bench.mjs +++ b/test/performanceTests/bench.mjs @@ -8,26 +8,41 @@ * process makes JSON output and result capture deterministic, and a fresh * Node child per scenario keeps OpenTelemetry global state isolated. * + * All requested samples for the scenario are taken inside this single + * process — warmup runs once, then `--samples` timed durations are + * collected back-to-back. Per-sample Node startup and module-load cost + * (importing `applicationinsights` and calling `useAzureMonitor()`) + * dominate wall-clock time on cold processes, so amortising them across + * samples is the single biggest perf-CI speedup. OpenTelemetry global + * state is still isolated *between scenarios* because runBenchmarks.mjs + * spawns a fresh child per scenario. + * * Usage: - * node bench.mjs --scenario --duration --warmup --out + * node bench.mjs --scenario --duration --warmup + * --samples --out */ import { writeFileSync } from "node:fs"; function parseArgs(argv) { - const a = { duration: 8, warmup: 2 }; + const a = { duration: 8, warmup: 2, samples: 1 }; for (let i = 0; i < argv.length; i++) { const k = argv[i]; const v = () => argv[++i]; if (k === "--scenario") a.scenario = v(); else if (k === "--duration") a.duration = Number(v()); else if (k === "--warmup") a.warmup = Number(v()); + else if (k === "--samples") a.samples = Number(v()); else if (k === "--out") a.out = v(); } if (!a.scenario || !a.out) { console.error("Required: --scenario --out "); process.exit(2); } + if (!Number.isFinite(a.samples) || a.samples < 1) { + console.error("--samples must be a positive integer"); + process.exit(2); + } return a; } @@ -73,34 +88,37 @@ async function main() { } const instance = new Cls(); - // Warmup (not counted) + // Warmup runs once for the whole process, not per sample. The JIT and + // hot caches we're warming up persist across the subsequent timed + // samples in this same V8 instance. if (args.warmup > 0) { await runLoop(instance, args.warmup * 1000); } - const startWall = Date.now(); - const ops = await runLoop(instance, args.duration * 1000); - const elapsedMs = Date.now() - startWall; - const opsPerSec = (ops / elapsedMs) * 1000; + const samples = []; + for (let i = 0; i < args.samples; i++) { + const startWall = Date.now(); + const ops = await runLoop(instance, args.duration * 1000); + const elapsedMs = Date.now() - startWall; + const opsPerSec = (ops / elapsedMs) * 1000; + samples.push({ opsPerSec, ops, elapsedMs }); + console.log( + `[bench] ${args.scenario} sample ${i + 1}/${args.samples}: ${ops} ops in ${elapsedMs}ms => ${opsPerSec.toFixed(0)} ops/s`, + ); + } writeFileSync( args.out, JSON.stringify( { scenario: args.scenario, - opsPerSec, - ops, - elapsedMs, + samples, timestamp: new Date().toISOString(), }, null, 2, ), ); - - console.log( - `[bench] ${args.scenario}: ${ops} ops in ${elapsedMs}ms => ${opsPerSec.toFixed(0)} ops/s`, - ); } main().catch((err) => { diff --git a/test/performanceTests/runBenchmarks.mjs b/test/performanceTests/runBenchmarks.mjs index a015eed8c..9776dcd99 100644 --- a/test/performanceTests/runBenchmarks.mjs +++ b/test/performanceTests/runBenchmarks.mjs @@ -4,8 +4,7 @@ /* * Runs each scenario via bench.mjs in a fresh Node child process to avoid - * OpenTelemetry global-state contamination across scenarios. Each scenario - * is sampled N times; median ops/s is recorded. + * OpenTelemetry global-state contamination across scenarios. Median ops/s is recorded. * * Usage: * node runBenchmarks.mjs --out results.json [--samples 5] [--duration 8] @@ -75,6 +74,8 @@ function runOne(scenario, args, outPath) { String(args.duration), "--warmup", String(args.warmup), + "--samples", + String(args.samples), "--out", outPath, ], @@ -95,10 +96,16 @@ function runOne(scenario, args, outPath) { throw new Error(`Scenario ${scenario} failed with exit code ${child.status}`); } const result = JSON.parse(readFileSync(outPath, "utf8")); - if (!isFinite(result.opsPerSec)) { - throw new Error(`Scenario ${scenario} produced no ops/s value`); + if (!Array.isArray(result.samples) || result.samples.length === 0) { + throw new Error(`Scenario ${scenario} produced no samples`); } - return result.opsPerSec; + const ops = result.samples.map((s) => s.opsPerSec); + for (const v of ops) { + if (!isFinite(v)) { + throw new Error(`Scenario ${scenario} produced a non-finite ops/s value`); + } + } + return ops; } function main() { @@ -112,13 +119,11 @@ function main() { try { for (const scenario of selected) { - const samples = []; - for (let i = 0; i < args.samples; i++) { - const out = join(tmp, `${scenario.name}-${i}.json`); - console.log(`\n[run] ${scenario.name} sample ${i + 1}/${args.samples}`); - const ops = runOne(scenario.name, args, out); - samples.push(ops); - } + const out = join(tmp, `${scenario.name}.json`); + console.log( + `\n[run] ${scenario.name}: ${args.samples} sample(s) x ${args.duration}s (warmup ${args.warmup}s)`, + ); + const samples = runOne(scenario.name, args, out); results.push({ name: scenario.name, tier: scenario.tier, From c43ec5d2077535d58c741d0d97fefc19a33febd4 Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 15 Jun 2026 12:32:48 -0700 Subject: [PATCH 2/2] perf: address review feedback on perf workflow split - bench.mjs: require Number.isInteger for --samples so 1.5 is rejected. - performance-comment.yml: resolve PR number from workflow_run event payload (or listPullRequestsAssociatedWithCommit keyed off the untamperable head_sha) instead of the untrusted pr-number.txt artifact, so a malicious fork PR cannot trick the writable token into commenting on an unrelated PR. - performance-comment.yml: drop the duplicated PERF_REGRESSION_THRESHOLD env var; comparePerf.mjs's built-in default of 15 now applies, removing the third source of truth. - performance.yml: drop the now-unused pr-number.txt writer/upload. --- .github/workflows/performance-comment.yml | 53 ++++++++++++++--------- .github/workflows/performance.yml | 5 --- test/performanceTests/bench.mjs | 2 +- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/.github/workflows/performance-comment.yml b/.github/workflows/performance-comment.yml index 22e410f81..8e8d1c2e6 100644 --- a/.github/workflows/performance-comment.yml +++ b/.github/workflows/performance-comment.yml @@ -11,11 +11,14 @@ name: Performance Comment # Security model: # * The artifact from the PR-context workflow is treated as untrusted # input. We only consume the JSON benchmark outputs (`baseline.json` / -# `candidate.json`) and the PR number, and we regenerate the -# comparison markdown here using the base repository's checked-out -# `comparePerf.mjs` so the markdown posted under the writable token -# is never attacker-supplied. -# * The PR number is validated as a positive integer before being used. +# `candidate.json`) and regenerate the comparison markdown here using +# the base repository's checked-out `comparePerf.mjs` so the markdown +# posted under the writable token is never attacker-supplied. +# * The PR number is resolved authoritatively from the `workflow_run` +# event payload (or a GitHub API lookup keyed off the workflow's +# untamperable `head_sha`), never from artifact contents. This +# prevents a malicious PR from writing a different PR number into the +# artifact and tricking us into commenting on an unrelated PR. on: workflow_run: @@ -57,26 +60,34 @@ jobs: - name: Resolve PR number id: pr if: steps.download.outcome == 'success' - run: | - set -euo pipefail - pr="" - if [ -f perf-results/pr-number.txt ]; then - pr="$(tr -d '[:space:]' < perf-results/pr-number.txt)" - fi - - if ! [[ "$pr" =~ ^[1-9][0-9]*$ ]]; then - echo "pr-number.txt missing or not a positive integer (got: '${pr}'); skipping comment." >&2 - echo "number=" >> "$GITHUB_OUTPUT" - exit 0 - fi - - echo "number=$pr" >> "$GITHUB_OUTPUT" + uses: actions/github-script@v7 + with: + script: | + const wr = context.payload.workflow_run; + // Prefer the PR list attached to the workflow_run event payload + // (populated for same-repo PRs). Fork PRs leave this empty, so + // fall back to an authoritative API lookup keyed off the + // workflow_run's head_sha, which the triggering PR cannot forge. + let prNumber = wr.pull_requests && wr.pull_requests[0] && wr.pull_requests[0].number; + if (!prNumber) { + const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + owner: context.repo.owner, + repo: context.repo.repo, + commit_sha: wr.head_sha, + }); + const match = prs.find((p) => p.state === "open"); + prNumber = match && match.number; + } + if (!Number.isInteger(prNumber) || prNumber <= 0) { + core.info("Could not determine PR number from workflow_run event; skipping comment."); + core.setOutput("number", ""); + return; + } + core.setOutput("number", String(prNumber)); - name: Regenerate report from JSON (trusted code) id: report if: steps.pr.outputs.number != '' - env: - PERF_REGRESSION_THRESHOLD: '15' run: | set -euo pipefail if [ ! -s perf-results/baseline.json ] || [ ! -s perf-results/candidate.json ]; then diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index fbe9a84c5..8730fdb0a 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -116,10 +116,6 @@ jobs: "$GITHUB_WORKSPACE/perf-comparison.md" echo "exit=$?" >> "$GITHUB_OUTPUT" - - name: Record PR number - if: always() && github.event_name == 'pull_request' - run: echo "${{ github.event.pull_request.number }}" > "$GITHUB_WORKSPACE/pr-number.txt" - - name: Upload perf results artifact if: always() uses: actions/upload-artifact@v4 @@ -129,7 +125,6 @@ jobs: baseline.json candidate.json perf-comparison.md - pr-number.txt if-no-files-found: warn - name: Fail job on gating regression diff --git a/test/performanceTests/bench.mjs b/test/performanceTests/bench.mjs index ab5c753e1..d5be202d8 100644 --- a/test/performanceTests/bench.mjs +++ b/test/performanceTests/bench.mjs @@ -39,7 +39,7 @@ function parseArgs(argv) { console.error("Required: --scenario --out "); process.exit(2); } - if (!Number.isFinite(a.samples) || a.samples < 1) { + if (!Number.isInteger(a.samples) || a.samples < 1) { console.error("--samples must be a positive integer"); process.exit(2); }