From 3a3e47a78efadad080b64debbe017d6fe8a52a34 Mon Sep 17 00:00:00 2001 From: loothero Date: Sun, 3 May 2026 13:49:07 -0700 Subject: [PATCH] ci: factor Claude review extraction into a locally-testable script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the per-job Extract-review-output shell from pr-ci.yml into .github/scripts/extract-claude-review.sh so the extraction logic can be verified locally against fixture files before pushing — replacing the slow PR -> merge -> rebase-test-PR -> wait cycle that has been producing follow-up patches. New files: - .github/scripts/extract-claude-review.sh — extraction logic, env-driven (EXECUTION_FILE, HEADING, REVIEW_OUT_FILE), exits 1 with heading-prefixed fallback on any failure - .github/scripts/test-extract-claude-review.sh — local test runner - .github/fixtures/claude-execution-{success,success-with-high,error,empty,bad}.json — fixture inputs covering the regressions we hit on PRs #145/#146 Test cases (all asserted by the runner): 1. Multi-line .result success — full body preserved (catches the tail -1 regression that would silently drop bracketed [HIGH] markers) 2. .result containing [HIGH] — survives extraction so the downstream blocking-check still fires 3. SDKResultError (no .result, has assistant text) — falls back to assistant text concatenation 4. Empty JSON array — exit 1 with heading-prefixed fallback 5. Malformed JSON — exit 1 with heading-prefixed fallback (catches jq exit 5 propagating through bash -eo pipefail) 6. Missing file — exit 1 with heading-prefixed fallback pr-ci.yml changes: - Each of the four claude-review-* jobs replaces 22 lines of inline shell with `run: bash .github/scripts/extract-claude-review.sh` - Add `.github/scripts/**` to review_automation_changed so the security gate that disables AI reviews on workflow-modifying PRs also covers the extracted script Local usage: bash .github/scripts/test-extract-claude-review.sh Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/fixtures/claude-execution-bad.json | 1 + .github/fixtures/claude-execution-empty.json | 1 + .github/fixtures/claude-execution-error.json | 35 +++++ .../claude-execution-success-with-high.json | 17 +++ .../fixtures/claude-execution-success.json | 27 ++++ .github/scripts/extract-claude-review.sh | 59 +++++++++ .github/scripts/test-extract-claude-review.sh | 122 ++++++++++++++++++ .github/workflows/pr-ci.yml | 73 +---------- 8 files changed, 267 insertions(+), 68 deletions(-) create mode 100644 .github/fixtures/claude-execution-bad.json create mode 100644 .github/fixtures/claude-execution-empty.json create mode 100644 .github/fixtures/claude-execution-error.json create mode 100644 .github/fixtures/claude-execution-success-with-high.json create mode 100644 .github/fixtures/claude-execution-success.json create mode 100755 .github/scripts/extract-claude-review.sh create mode 100755 .github/scripts/test-extract-claude-review.sh diff --git a/.github/fixtures/claude-execution-bad.json b/.github/fixtures/claude-execution-bad.json new file mode 100644 index 00000000..2b1271ff --- /dev/null +++ b/.github/fixtures/claude-execution-bad.json @@ -0,0 +1 @@ +not valid json diff --git a/.github/fixtures/claude-execution-empty.json b/.github/fixtures/claude-execution-empty.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/.github/fixtures/claude-execution-empty.json @@ -0,0 +1 @@ +[] diff --git a/.github/fixtures/claude-execution-error.json b/.github/fixtures/claude-execution-error.json new file mode 100644 index 00000000..c282ae4f --- /dev/null +++ b/.github/fixtures/claude-execution-error.json @@ -0,0 +1,35 @@ +[ + { + "type": "system", + "subtype": "init", + "model": "claude-opus-4-7" + }, + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [ + { "type": "text", "text": "Starting review of the contracts diff..." } + ] + } + }, + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [ + { "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } }, + { "type": "text", "text": "Partial review before error" } + ] + } + }, + { + "type": "result", + "subtype": "error_during_execution", + "is_error": true, + "duration_ms": 3000, + "num_turns": 1, + "errors": ["rate_limit"], + "session_id": "session-error" + } +] diff --git a/.github/fixtures/claude-execution-success-with-high.json b/.github/fixtures/claude-execution-success-with-high.json new file mode 100644 index 00000000..8b3f6d79 --- /dev/null +++ b/.github/fixtures/claude-execution-success-with-high.json @@ -0,0 +1,17 @@ +[ + { + "type": "system", + "subtype": "init", + "model": "claude-opus-4-7" + }, + { + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 8000, + "num_turns": 2, + "result": "## Claude Review - Test Scope\n\n[HIGH] contracts/src/foo.cairo:42 - reentrancy: external call before state update\nImpact: an attacker could re-enter and drain.\nFix: move state update before the call.\n\n[LOW] contracts/src/bar.cairo:10 - missing comment\nImpact: minor readability.\nFix: add a doc comment.\n\nSummary: 0 CRITICAL, 1 HIGH, 0 MEDIUM, 1 LOW, 0 INFO", + "total_cost_usd": 0.04, + "session_id": "session-high" + } +] diff --git a/.github/fixtures/claude-execution-success.json b/.github/fixtures/claude-execution-success.json new file mode 100644 index 00000000..d19e7e77 --- /dev/null +++ b/.github/fixtures/claude-execution-success.json @@ -0,0 +1,27 @@ +[ + { + "type": "system", + "subtype": "init", + "model": "claude-opus-4-7" + }, + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [ + { "type": "text", "text": "Looking at the diff..." }, + { "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } } + ] + } + }, + { + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 12500, + "num_turns": 3, + "result": "## Claude Review - Test Scope\n\nrun=1 attempt=1 sha=abc scope=test\n\nNo issues found.\n\nSummary: 0 CRITICAL, 0 HIGH, 0 MEDIUM, 0 LOW, 0 INFO", + "total_cost_usd": 0.05, + "session_id": "session-success" + } +] diff --git a/.github/scripts/extract-claude-review.sh b/.github/scripts/extract-claude-review.sh new file mode 100755 index 00000000..3f06a4da --- /dev/null +++ b/.github/scripts/extract-claude-review.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# Extract the final review text from a Claude Code action `execution_file` +# and write it to REVIEW_OUT_FILE. On any failure, write a heading-prefixed +# fallback message and exit 1 (with a `::error::` annotation for GitHub +# Actions log surfacing). +# +# Inputs (env): +# EXECUTION_FILE path to the action's execution_file +# (a JSON array of SDKMessage objects per +# base-action/src/run-claude-sdk.ts) +# HEADING markdown heading to prefix the fallback message with +# when extraction fails +# REVIEW_OUT_FILE output path (default: /tmp/review.txt) +# +# Exit codes: +# 0 review extracted successfully (REVIEW_OUT_FILE has the model output) +# 1 no extractable review (REVIEW_OUT_FILE has heading + fallback message) +# 2 invalid invocation (HEADING not set) + +set -eo pipefail + +REVIEW_OUT_FILE="${REVIEW_OUT_FILE:-/tmp/review.txt}" + +if [ -z "${HEADING:-}" ]; then + echo "extract-claude-review: HEADING env var is required" >&2 + exit 2 +fi + +write_fallback() { + printf '%s\n\nNo review output was produced.\n' "$HEADING" > "$REVIEW_OUT_FILE" +} + +if [ -z "${EXECUTION_FILE:-}" ] || [ ! -f "$EXECUTION_FILE" ]; then + write_fallback + echo "::error::Claude action did not produce an execution file" + exit 1 +fi + +# Primary path: SDKResultSuccess has `.result: string` with the final text. +# `[…] | last` collects matching messages into an array and takes the last +# (defensive against any future change that emits multiple result messages). +REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' \ + "$EXECUTION_FILE" 2>/dev/null || true) + +# Fallback: SDKResultError has no `.result` field. Concatenate every +# assistant text block so reviewers still see partial work in error cases. +if [ -z "$REVIEW" ]; then + REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \ + "$EXECUTION_FILE" 2>/dev/null || true) +fi + +if [ -z "$REVIEW" ]; then + write_fallback + echo "::error::Claude review produced no extractable output" + exit 1 +fi + +printf '%s\n' "$REVIEW" > "$REVIEW_OUT_FILE" diff --git a/.github/scripts/test-extract-claude-review.sh b/.github/scripts/test-extract-claude-review.sh new file mode 100755 index 00000000..57dbbe7a --- /dev/null +++ b/.github/scripts/test-extract-claude-review.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# +# Local test runner for extract-claude-review.sh. +# +# Iterates over fixture files under .github/fixtures/ and asserts the +# script's exit code and the content of REVIEW_OUT_FILE for each scenario +# we care about end-to-end: +# +# 1. Multi-line .result success — full body preserved (catches the +# `tail -1` regression that would silently drop [HIGH] markers). +# 2. .result containing [HIGH] — survives extraction so the downstream +# `grep -qE '\[(CRITICAL|HIGH)\]'` blocking check still fires. +# 3. SDKResultError (no .result, has assistant text) — falls back to +# assistant text concatenation. +# 4. Empty array — exit 1 with heading-prefixed fallback. +# 5. Malformed JSON — exit 1 with heading-prefixed fallback (catches +# jq exit 5 propagating through `bash -eo pipefail`). +# 6. Missing file — exit 1 with heading-prefixed fallback. +# +# Run from the repo root: +# bash .github/scripts/test-extract-claude-review.sh + +set -eo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +FIXTURES_DIR="$REPO_ROOT/.github/fixtures" +EXTRACT="$SCRIPT_DIR/extract-claude-review.sh" + +if [ ! -x "$EXTRACT" ]; then + echo "extract-claude-review.sh not found or not executable at $EXTRACT" >&2 + exit 2 +fi + +PASS=0 +FAIL=0 +TMPDIR_ROOT="$(mktemp -d)" +trap 'rm -rf "$TMPDIR_ROOT"' EXIT + +# run_case