Skip to content

Scenario Suite - End-to-End Pipeline Validation #30

Scenario Suite - End-to-End Pipeline Validation

Scenario Suite - End-to-End Pipeline Validation #30

name: Scenario Suite - End-to-End Pipeline Validation
on:
workflow_dispatch:
schedule:
- cron: '0 6 * * 1' # Weekly Monday 6am UTC
permissions:
contents: write
actions: write
pull-requests: write
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CASCADE_STATE_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
jobs:
reset:
name: Reset manifest state
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.CASCADE_STATE_TOKEN }}
- name: Configure Git
run: |
git config user.name "scenario-suite"
git config user.email "scenario-suite@users.noreply.github.com"
- name: Reset manifest state
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
# Reset .github/manifest.yaml config.state to {}.
# Touches the manifest only (not src/**), so it does not match
# orchestrate's path filter and correctly does not fire orchestrate.
yq eval '.config.state = {}' -i .github/manifest.yaml
git add .github/manifest.yaml
git commit --no-gpg-sign -m "chore: reset state for scenario test [skip ci]"
git push origin main
- name: Clean slate - delete leftover releases and tags
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
gh release list --repo "$GITHUB_REPOSITORY" --limit 200 --json tagName --jq '.[].tagName' \
| while read -r t; do gh release delete "$t" --repo "$GITHUB_REPOSITORY" --yes --cleanup-tag 2>/dev/null || true; done
git fetch --tags --quiet || true
for t in $(git tag -l 'v*' 'rel-*'); do git push origin --delete "$t" 2>/dev/null || true; done
- name: Verify push succeeded
run: |
git log -1 --oneline
git log --oneline | grep "reset state for scenario test" || exit 1
commit-and-build:
name: Commit dummy change and validate build output
runs-on: ubuntu-latest
needs: reset
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.CASCADE_STATE_TOKEN }}
ref: main
- name: Configure Git
run: |
git config user.name "scenario-suite"
git config user.email "scenario-suite@users.noreply.github.com"
- name: Open and merge PR with src change
id: seed
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
# Emulate a real user: land the src change through branch -> PR ->
# squash-merge, authored by the PAT so the merge push fires orchestrate.
BRANCH="scenario/src-$(date +%s)-$RANDOM"
git fetch origin main --quiet
git checkout -B "$BRANCH" origin/main
mkdir -p src
echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" > src/version.txt
git add src/version.txt
# The squashed commit subject is the PR title, so the conventional
# type must live there. No [skip ci]/[ci skip] anywhere, or GitHub
# cancels the orchestrate trigger.
git commit --no-gpg-sign -m "feat: dummy src change for scenario test"
git push origin "$BRANCH"
gh pr create --base main --head "$BRANCH" \
--title "feat: dummy src change for scenario test" \
--body "Automated scenario run; drives orchestrate on merge."
gh pr merge "$BRANCH" --squash --delete-branch
# Capture the merge SHA on trunk; it keys the orchestrate wait.
git fetch origin main --quiet
MERGE_SHA="$(git rev-parse origin/main)"
echo "merge_sha=$MERGE_SHA" >> "$GITHUB_OUTPUT"
- name: Wait for orchestrate.yaml workflow to complete
id: wait_orchestrate
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
MERGE_SHA="${{ steps.seed.outputs.merge_sha }}"
MAX_ATTEMPTS=5
SLEEP_DURATION=60
ATTEMPT=0
RUN_ID=""
# Wait for the orchestrate run whose headSha is THIS merge commit,
# never a stale "latest" run.
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=orchestrate.yaml \
--branch=main \
--json=databaseId,headSha \
--jq=".[] | select(.headSha==\"$MERGE_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "")
if [ -n "$RUN_ID" ]; then
echo "Found orchestrate run for $MERGE_SHA: $RUN_ID"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then
sleep "$SLEEP_DURATION"
fi
done
if [ -z "$RUN_ID" ]; then
echo "::error::orchestrate.yaml run for $MERGE_SHA did not appear within timeout"
exit 1
fi
gh run watch "$RUN_ID" --exit-status --interval 60
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
- name: Assert per-leg matrix artifacts exist (artifacts API)
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
RUN_ID="${{ steps.wait_orchestrate.outputs.run_id }}"
# Each build-image matrix leg uploads an artifact whose name carries the
# os-arch dimension (build-image.yaml: name=image-${os}-${arch}). The
# artifacts API is the deterministic per-leg signal: assert BOTH leg
# names are present on this exact run, keyed by artifact name (not logs).
ARTIFACTS=$(gh api "repos/$GITHUB_REPOSITORY/actions/runs/$RUN_ID/artifacts" \
--jq '.artifacts[].name')
echo "Run $RUN_ID artifacts:"
echo "$ARTIFACTS"
for LEG in image-linux-amd64 image-linux-arm64; do
if ! echo "$ARTIFACTS" | grep -qx "$LEG"; then
echo "::error::matrix leg artifact '$LEG' missing from run $RUN_ID"
exit 1
fi
done
echo "✓ Per-leg matrix artifacts present: image-linux-amd64, image-linux-arm64"
- name: Refresh manifest from trunk
run: |
# Orchestrate's finalize job pushes the state update as a separate
# "chore: update state" commit on main. The checkout above predates
# that commit, so sync the local manifest to origin/main before
# reading it back. Without this the assertions race the state push.
git fetch origin main --quiet
git reset --hard origin/main
- name: Assert manifest state shape (staging populated)
run: |
# Read manifest and assert staging version exists and matches semver.
# In a [staging, prod] repo, staging is the first env, so orchestrate
# deploys to it and commits state.staging on merge.
STAGING_VERSION=$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml)
if [ -z "$STAGING_VERSION" ]; then
echo "::error::.ci.state.staging.version is empty or missing"
exit 1
fi
# Check semver pattern: v?[0-9]+.[0-9]+.[0-9]+
if ! echo "$STAGING_VERSION" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::.ci.state.staging.version does not match semver pattern: $STAGING_VERSION"
exit 1
fi
echo "✓ Staging version matches semver: $STAGING_VERSION"
- name: Assert manifest state shape (consolidated image build recorded)
run: |
# Orchestrate consolidates the matrix build into a single
# state.staging.builds.image.artifact_id slot (there is no per-leg state
# slot). Assert it carries the known consolidated shape image-linux-*
# produced by the build-image legs, not merely a non-empty value.
ARTIFACT_ID=$(yq eval '.ci.state.staging.builds.image.artifact_id // ""' .github/manifest.yaml)
if [ -z "$ARTIFACT_ID" ]; then
echo "::error::.ci.state.staging.builds.image.artifact_id is empty or missing"
exit 1
fi
case "$ARTIFACT_ID" in
image-linux-*) ;;
*)
echo "::error::consolidated image artifact_id has unexpected shape: $ARTIFACT_ID (want image-linux-*)"
exit 1
;;
esac
echo "✓ Consolidated image build recorded: artifact_id=$ARTIFACT_ID"
- name: Assert GitHub release created
run: |
# The push-triggered orchestrate Finalize calls manage-release with
# action=update, which cascade defines as a DRAFT RC release
# (draft=true, prerelease=false). The prerelease and published states
# are reached later by promote (asserted in the promote-staging job),
# so at this stage the correct invariant is a draft RC, not a
# prerelease. Poll trunk's release list until the draft RC lands rather
# than reading once, so the assertion never races the Finalize commit.
MAX_ATTEMPTS=2
ATTEMPT=0
RELEASE_TAG=""
IS_DRAFT=""
IS_PRERELEASE=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RELEASE_TAG=$(gh release list --json tagName,isDraft,isPrerelease --jq '.[0].tagName // ""')
if [ -n "$RELEASE_TAG" ]; then
IS_DRAFT=$(gh release view "$RELEASE_TAG" --json isDraft --jq '.isDraft')
IS_PRERELEASE=$(gh release view "$RELEASE_TAG" --json isPrerelease --jq '.isPrerelease')
if [ "$IS_DRAFT" = "true" ]; then
break
fi
fi
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ -z "$RELEASE_TAG" ]; then
echo "::error::No GitHub release found"
exit 1
fi
if ! echo "$RELEASE_TAG" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Release tag does not match semver: $RELEASE_TAG"
exit 1
fi
if [ "$IS_DRAFT" != "true" ]; then
echo "::error::Orchestrate RC release is not a draft (draft=$IS_DRAFT, prerelease=$IS_PRERELEASE)"
exit 1
fi
echo "✓ GitHub draft RC release created: $RELEASE_TAG (draft=$IS_DRAFT, prerelease=$IS_PRERELEASE)"
- name: Write assertions summary
if: always()
run: |
{
echo "## commit-and-build Assertions"
echo ""
echo "| Assertion | Result |"
echo "|-----------|--------|"
echo "| Orchestrate run completed | ✓ |"
echo "| Per-leg matrix artifacts (amd64, arm64) | ✓ |"
echo "| Staging version (semver) | ✓ |"
echo "| Consolidated image build recorded | ✓ |"
echo "| GitHub release created | ✓ |"
} >> "$GITHUB_STEP_SUMMARY"
promote-staging:
name: Promote staging to prod
runs-on: ubuntu-latest
needs: commit-and-build
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
ref: main
- name: Dispatch promote.yaml workflow
id: dispatch_promote
run: |
# Stamp the dispatch time so the wait below can correlate the run it
# created, never an older promote run that happens to be newest.
TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT"
gh workflow run promote.yaml \
-f mode=default \
--ref main
- name: Wait for promote.yaml workflow to complete
id: wait_promote
run: |
TS="${{ steps.dispatch_promote.outputs.dispatch_ts }}"
MAX_ATTEMPTS=4
SLEEP_DURATION=60
ATTEMPT=0
RUN_ID=""
# Find the promote run created at or after our dispatch timestamp.
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=promote.yaml \
--branch=main \
--created=">=$TS" \
--limit=1 \
--json=databaseId \
--jq='.[0].databaseId // empty' 2>/dev/null || echo "")
if [ -n "$RUN_ID" ]; then
echo "Found promote run for dispatch $TS: $RUN_ID"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then
sleep "$SLEEP_DURATION"
fi
done
if [ -z "$RUN_ID" ]; then
echo "::error::promote.yaml run for dispatch $TS did not appear within timeout"
exit 1
fi
# Block on THIS run and require a successful conclusion.
gh run watch "$RUN_ID" --exit-status --interval 60
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
- name: Assert manifest state shape (release marker populated)
run: |
# A terminal promotion publishes the release and records the version
# under the .ci.state.release marker (the published-version marker),
# not under a per-environment deploy slot. The final env emits no
# deploy jobs, so .ci.state.prod stays empty by design; assert the
# release marker carries the published semver instead.
#
# finalize pushes its state commit shortly after the promote run
# reports completed, so the wait above can return before the release
# marker lands on main. Poll-pull until the marker is present.
MAX_ATTEMPTS=3
ATTEMPT=0
RELEASE_VERSION=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
git pull origin main --quiet || true
RELEASE_VERSION=$(yq eval '.ci.state.release.version // ""' .github/manifest.yaml)
[ -n "$RELEASE_VERSION" ] && break
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ -z "$RELEASE_VERSION" ]; then
echo "::error::.ci.state.release.version is empty or missing"
exit 1
fi
if ! echo "$RELEASE_VERSION" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::.ci.state.release.version does not match semver pattern: $RELEASE_VERSION"
exit 1
fi
echo "✓ Release version matches semver: $RELEASE_VERSION"
- name: Assert GitHub release is published (not RC)
run: |
# Get the latest release
RELEASE_TAG=$(gh release list --limit=1 | tail -1 | awk '{print $1}')
if [ -z "$RELEASE_TAG" ]; then
echo "::error::No release found"
exit 1
fi
# Check prerelease status
IS_PRERELEASE=$(gh release view "$RELEASE_TAG" --json=isPrerelease --jq='.isPrerelease')
if [ "$IS_PRERELEASE" != "false" ]; then
echo "::error::Release is still marked as prerelease, should be published: $RELEASE_TAG"
exit 1
fi
echo "✓ Release published (not prerelease): $RELEASE_TAG"
- name: Write assertions summary
if: always()
run: |
{
echo "## promote-staging Assertions"
echo ""
echo "| Assertion | Result |"
echo "|-----------|--------|"
echo "| Promote run completed | ✓ |"
echo "| Prod version (semver) | ✓ |"
echo "| Release published (not RC) | ✓ |"
} >> "$GITHUB_STEP_SUMMARY"
rollback-check:
name: Rollback staging to prior version
runs-on: ubuntu-latest
needs: promote-staging
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.CASCADE_STATE_TOKEN }}
ref: main
- name: Configure Git
run: |
git config user.name "scenario-suite"
git config user.email "scenario-suite@users.noreply.github.com"
- name: Land a second staging version so a prior ring entry exists
id: second
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
# A rollback can only resolve a prior target once staging has carried two
# distinct deploy SHAs: the second deploy records the displaced state in
# staging's deploy-history ring. staging is the deployable env in this
# [staging, prod] model (prod is the publish boundary and emits no deploy
# jobs), so it is the env the manual rollback re-deploys. Land a second
# src change through branch -> PR -> squash-merge so orchestrate deploys a
# new SHA to staging.
git pull origin main --quiet
PRIOR_SHA="$(yq eval '.ci.state.staging.sha // ""' .github/manifest.yaml)"
echo "prior_sha=$PRIOR_SHA" >> "$GITHUB_OUTPUT"
BRANCH="scenario/rollback-src-$(date +%s)-$RANDOM"
git checkout -B "$BRANCH" origin/main
mkdir -p src
echo "rollback-marker=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" > src/rollback-marker.txt
git add src/rollback-marker.txt
git commit --no-gpg-sign -m "feat: second staging version for rollback test"
git push origin "$BRANCH"
gh pr create --base main --head "$BRANCH" \
--title "feat: second staging version for rollback test" \
--body "Automated scenario run; drives orchestrate on merge."
gh pr merge "$BRANCH" --squash --delete-branch
git fetch origin main --quiet
MERGE_SHA="$(git rev-parse origin/main)"
echo "merge_sha=$MERGE_SHA" >> "$GITHUB_OUTPUT"
- name: Wait for orchestrate to deploy the second staging version
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
MERGE_SHA="${{ steps.second.outputs.merge_sha }}"
PRIOR_SHA="${{ steps.second.outputs.prior_sha }}"
MAX_ATTEMPTS=5
ATTEMPT=0
RUN_ID=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=orchestrate.yaml \
--branch=main \
--json=databaseId,headSha \
--jq=".[] | select(.headSha==\"$MERGE_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "")
[ -n "$RUN_ID" ] && break
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ -z "$RUN_ID" ]; then
echo "::error::orchestrate run for $MERGE_SHA did not appear within timeout"
exit 1
fi
gh run watch "$RUN_ID" --exit-status --interval 60
# Poll trunk until staging's sha moves off the prior value, so the ring
# carries a distinct previous entry for the rollback to resolve.
ATTEMPT=0
while [ "$ATTEMPT" -lt 3 ]; do
git pull origin main --quiet || true
CUR_SHA="$(yq eval '.ci.state.staging.sha // ""' .github/manifest.yaml)"
if [ -n "$CUR_SHA" ] && [ "$CUR_SHA" != "$PRIOR_SHA" ]; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
- name: Dispatch cascade-rollback.yaml for staging
id: dispatch_rollback
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
# Capture the version staging will roll back FROM, and the prior version
# it should land ON (the previous ring entry), read from the manifest the
# same way the promote job reads release state (yq on .github/manifest.yaml).
git pull origin main --quiet
CUR_VER="$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml)"
PRIOR_VER="$(yq eval '.ci.state.staging.previous[0].version // ""' .github/manifest.yaml)"
echo "cur_ver=$CUR_VER" >> "$GITHUB_OUTPUT"
echo "prior_ver=$PRIOR_VER" >> "$GITHUB_OUTPUT"
# Stamp the dispatch time so the wait correlates the run it created.
TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT"
gh workflow run cascade-rollback.yaml \
-f environment=staging \
-f dry_run=false \
--ref main
- name: Wait for cascade-rollback.yaml to complete
id: wait_rollback
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
TS="${{ steps.dispatch_rollback.outputs.dispatch_ts }}"
MAX_ATTEMPTS=4
ATTEMPT=0
RUN_ID=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=cascade-rollback.yaml \
--branch=main \
--created=">=$TS" \
--limit=1 \
--json=databaseId \
--jq='.[0].databaseId // empty' 2>/dev/null || echo "")
[ -n "$RUN_ID" ] && break
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ -z "$RUN_ID" ]; then
echo "::error::cascade-rollback.yaml run for $TS did not appear within timeout"
exit 1
fi
gh run watch "$RUN_ID" --exit-status --interval 60
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
- name: Assert staging rolled back to the prior version and marked diverged
run: |
set -euo pipefail
CUR_VER="${{ steps.dispatch_rollback.outputs.cur_ver }}"
PRIOR_VER="${{ steps.dispatch_rollback.outputs.prior_ver }}"
# Rollback re-writes staging, which already carried a version, so poll
# trunk until the rollback finalize state commit lands (staging version
# moves back to PRIOR_VER), then assert the divergence ref. Read via yq
# on the manifest, mirroring the promote job's read idiom.
MAX_ATTEMPTS=3
ATTEMPT=0
AFTER_VER=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
git pull origin main --quiet || true
AFTER_VER="$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml)"
if [ -n "$AFTER_VER" ] && [ "$AFTER_VER" != "$CUR_VER" ]; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
AFTER_REF="$(yq eval '.ci.state.staging.ref // ""' .github/manifest.yaml)"
if [ "$AFTER_VER" = "$CUR_VER" ]; then
echo "::error::staging version did not move back after rollback (still $CUR_VER)"
exit 1
fi
if [ -n "$PRIOR_VER" ] && [ "$AFTER_VER" != "$PRIOR_VER" ]; then
echo "::error::staging rolled back to '$AFTER_VER', expected prior '$PRIOR_VER'"
exit 1
fi
if [ "$AFTER_REF" != "rollback/staging" ]; then
echo "::error::staging not marked diverged: ref='$AFTER_REF', want 'rollback/staging'"
exit 1
fi
echo "✓ staging rolled back $CUR_VER -> $AFTER_VER, ref=$AFTER_REF"
- name: Write assertions summary
if: always()
run: |
{
echo "## rollback-check Assertions"
echo ""
echo "| Assertion | Result |"
echo "|-----------|--------|"
echo "| Rollback run completed | ✓ |"
echo "| Staging version moved back to prior | ✓ |"
echo "| Staging marked diverged (ref=rollback/staging) | ✓ |"
} >> "$GITHUB_STEP_SUMMARY"
dispatch-inputs-check:
name: Dispatch input takes effect
runs-on: ubuntu-latest
needs: rollback-check
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.CASCADE_STATE_TOKEN }}
ref: main
- name: Dispatch orchestrate.yaml with a distinctive reason
id: dispatch_inputs
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
# The reason flows operator -> orchestrate -> build-bundle callback ->
# consolidated state. cascade forwards `reason` to build-bundle because
# that callback declares a matching `reason` input; the stub echoes it
# into artifact_id (bundle-<reason>). Use a unique, identifier-safe
# sentinel so the exact value is knowable downstream.
REASON="dispatch-sentinel-${GITHUB_RUN_ID}-${RANDOM}"
echo "reason=$REASON" >> "$GITHUB_OUTPUT"
echo "expected_artifact_id=bundle-$REASON" >> "$GITHUB_OUTPUT"
# Stamp the dispatch time so the wait below correlates the run it
# created, never an older orchestrate run that happens to be newest.
TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT"
gh workflow run orchestrate.yaml \
-f environment=staging \
-f reason="$REASON" \
-f force=true \
--ref main
- name: Wait for dispatched orchestrate run to complete
id: wait_dispatch
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
TS="${{ steps.dispatch_inputs.outputs.dispatch_ts }}"
MAX_ATTEMPTS=5
SLEEP_DURATION=60
ATTEMPT=0
RUN_ID=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=orchestrate.yaml \
--branch=main \
--event=workflow_dispatch \
--created=">=$TS" \
--limit=1 \
--json=databaseId \
--jq='.[0].databaseId // empty' 2>/dev/null || echo "")
if [ -n "$RUN_ID" ]; then
echo "Found dispatched orchestrate run: $RUN_ID"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then
sleep "$SLEEP_DURATION"
fi
done
if [ -z "$RUN_ID" ]; then
echo "::error::dispatched orchestrate.yaml run for $TS did not appear within timeout"
exit 1
fi
# Block on THIS run; a failed orchestrate means the input never reached state.
gh run watch "$RUN_ID" --exit-status --interval 60
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
- name: Assert dispatched reason reached consolidated state
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
EXPECTED="${{ steps.dispatch_inputs.outputs.expected_artifact_id }}"
# finalize pushes the state commit shortly after the run reports
# completed; poll-pull trunk until the bundle slot lands, then assert the
# consolidated artifact_id equals the exact dispatched value. This proves
# the operator input TOOK EFFECT (input -> callback -> state), not merely
# that a run existed.
MAX_ATTEMPTS=3
ATTEMPT=0
ACTUAL=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
git pull origin main --quiet || true
ACTUAL=$(yq eval '.ci.state.staging.builds.bundle.artifact_id // ""' .github/manifest.yaml)
[ "$ACTUAL" = "$EXPECTED" ] && break
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ "$ACTUAL" != "$EXPECTED" ]; then
echo "::error::dispatched reason did not reach state.staging.builds.bundle.artifact_id"
echo "::error::expected '$EXPECTED', got '$ACTUAL'"
exit 1
fi
echo "✓ Dispatch input took effect: bundle.artifact_id == $ACTUAL"
- name: Write assertions summary
if: always()
run: |
{
echo "## dispatch-inputs-check Assertions"
echo ""
echo "| Assertion | Result |"
echo "|-----------|--------|"
echo "| Dispatched orchestrate completed | ✓ |"
echo "| Reason reached consolidated state (exact value) | ✓ |"
} >> "$GITHUB_STEP_SUMMARY"
pr-preview-check:
name: PR preview comment validation
runs-on: ubuntu-latest
needs: dispatch-inputs-check
steps:
- name: Checkout
uses: actions/checkout@v4
with:
token: ${{ secrets.CASCADE_STATE_TOKEN }}
ref: main
- name: Configure Git
run: |
git config user.name "scenario-suite"
git config user.email "scenario-suite@users.noreply.github.com"
- name: Create and push test PR branch
id: branch
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
BRANCH_NAME="test/cascade-preview-$(date +%s)-$RANDOM"
git checkout -b "$BRANCH_NAME"
# Touch src/** so the preview's change detection has a callback to plan.
# No [skip ci]/[ci skip] in the message: GitHub suppresses workflow runs
# for such commits, which would stop cascade-pr-preview from ever firing
# on this PR head and make the wait below time out.
mkdir -p src
echo "preview-$(date -u +%s)" > src/preview-marker.txt
git add src/preview-marker.txt
git commit --no-gpg-sign -m "test: dummy src change for preview check"
git push origin "$BRANCH_NAME"
HEAD_SHA="$(git rev-parse HEAD)"
echo "branch=$BRANCH_NAME" >> "$GITHUB_OUTPUT"
echo "head_sha=$HEAD_SHA" >> "$GITHUB_OUTPUT"
- name: Create test PR
id: create_pr
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
set -euo pipefail
PR_URL=$(gh pr create \
--title "Test PR for cascade preview" \
--body "Scenario test PR to validate cascade-pr-preview.yaml functionality." \
--head "${{ steps.branch.outputs.branch }}" \
--base main)
PR_NUMBER=$(echo "$PR_URL" | grep -oE '[0-9]+$')
echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
- name: Wait for cascade-pr-preview run to succeed
id: wait_preview
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
HEAD_SHA="${{ steps.branch.outputs.head_sha }}"
MAX_ATTEMPTS=5
SLEEP_DURATION=60
ATTEMPT=0
RUN_ID=""
# Correlate on THIS PR head SHA, never a stale "latest" preview run.
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
RUN_ID=$(gh run list \
--workflow=cascade-pr-preview.yaml \
--json=databaseId,headSha \
--jq=".[] | select(.headSha==\"$HEAD_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "")
if [ -n "$RUN_ID" ]; then
echo "Found cascade-pr-preview run for $HEAD_SHA: $RUN_ID"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then
sleep "$SLEEP_DURATION"
fi
done
if [ -z "$RUN_ID" ]; then
echo "::error::cascade-pr-preview.yaml run for $HEAD_SHA did not appear within timeout"
exit 1
fi
# Hard fail unless the preview run concludes success.
gh run watch "$RUN_ID" --exit-status --interval 60
echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT"
- name: Assert preview comment was posted (hard fail)
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
PR_NUMBER="${{ steps.create_pr.outputs.pr_number }}"
# The preview comment is posted by actions/github-script (authored by
# github-actions[bot]); its body's first line is the stable marker
# "<!-- cascade-pr-preview -->". Hard-assert a bot comment carrying that
# exact marker exists. The comment can lag the run conclusion slightly,
# so poll briefly.
MARKER='<!-- cascade-pr-preview -->'
MAX_ATTEMPTS=2
ATTEMPT=0
FOUND=""
while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do
FOUND=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \
--jq "[.[] | select(.user.login == \"github-actions[bot]\") | select(.body | contains(\"$MARKER\"))] | length" \
2>/dev/null || echo 0)
[ "${FOUND:-0}" -gt 0 ] && break
ATTEMPT=$((ATTEMPT + 1))
sleep 60
done
if [ "${FOUND:-0}" -lt 1 ]; then
echo "::error::no github-actions[bot] comment containing '$MARKER' on PR #$PR_NUMBER"
exit 1
fi
echo "✓ Preview comment posted by github-actions[bot] with marker on PR #$PR_NUMBER"
- name: Clean up test PR
if: always()
env:
GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }}
run: |
PR_NUMBER="${{ steps.create_pr.outputs.pr_number }}"
BRANCH_NAME="${{ steps.branch.outputs.branch }}"
gh pr close "$PR_NUMBER" --delete-branch || true
git push origin --delete "$BRANCH_NAME" || true
echo "Cleaned up PR #$PR_NUMBER and branch $BRANCH_NAME"
- name: Write assertions summary
if: always()
run: |
{
echo "## pr-preview-check Assertions"
echo ""
echo "| Assertion | Result |"
echo "|-----------|--------|"
echo "| Test PR created | ✓ |"
echo "| cascade-pr-preview run succeeded | ✓ |"
echo "| Preview comment posted by github-actions[bot] (marker) | ✓ |"
} >> "$GITHUB_STEP_SUMMARY"