Scenario Suite - End-to-End Pipeline Validation #30
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scenario Suite - End-to-End Pipeline Validation | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| - cron: '0 6 * * 1' # Weekly Monday 6am UTC | |
| permissions: | |
| contents: write | |
| actions: write | |
| pull-requests: write | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| CASCADE_STATE_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| jobs: | |
| reset: | |
| name: Reset manifest state | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| - name: Configure Git | |
| run: | | |
| git config user.name "scenario-suite" | |
| git config user.email "scenario-suite@users.noreply.github.com" | |
| - name: Reset manifest state | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| # Reset .github/manifest.yaml config.state to {}. | |
| # Touches the manifest only (not src/**), so it does not match | |
| # orchestrate's path filter and correctly does not fire orchestrate. | |
| yq eval '.config.state = {}' -i .github/manifest.yaml | |
| git add .github/manifest.yaml | |
| git commit --no-gpg-sign -m "chore: reset state for scenario test [skip ci]" | |
| git push origin main | |
| - name: Clean slate - delete leftover releases and tags | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| gh release list --repo "$GITHUB_REPOSITORY" --limit 200 --json tagName --jq '.[].tagName' \ | |
| | while read -r t; do gh release delete "$t" --repo "$GITHUB_REPOSITORY" --yes --cleanup-tag 2>/dev/null || true; done | |
| git fetch --tags --quiet || true | |
| for t in $(git tag -l 'v*' 'rel-*'); do git push origin --delete "$t" 2>/dev/null || true; done | |
| - name: Verify push succeeded | |
| run: | | |
| git log -1 --oneline | |
| git log --oneline | grep "reset state for scenario test" || exit 1 | |
| commit-and-build: | |
| name: Commit dummy change and validate build output | |
| runs-on: ubuntu-latest | |
| needs: reset | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| ref: main | |
| - name: Configure Git | |
| run: | | |
| git config user.name "scenario-suite" | |
| git config user.email "scenario-suite@users.noreply.github.com" | |
| - name: Open and merge PR with src change | |
| id: seed | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| # Emulate a real user: land the src change through branch -> PR -> | |
| # squash-merge, authored by the PAT so the merge push fires orchestrate. | |
| BRANCH="scenario/src-$(date +%s)-$RANDOM" | |
| git fetch origin main --quiet | |
| git checkout -B "$BRANCH" origin/main | |
| mkdir -p src | |
| echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" > src/version.txt | |
| git add src/version.txt | |
| # The squashed commit subject is the PR title, so the conventional | |
| # type must live there. No [skip ci]/[ci skip] anywhere, or GitHub | |
| # cancels the orchestrate trigger. | |
| git commit --no-gpg-sign -m "feat: dummy src change for scenario test" | |
| git push origin "$BRANCH" | |
| gh pr create --base main --head "$BRANCH" \ | |
| --title "feat: dummy src change for scenario test" \ | |
| --body "Automated scenario run; drives orchestrate on merge." | |
| gh pr merge "$BRANCH" --squash --delete-branch | |
| # Capture the merge SHA on trunk; it keys the orchestrate wait. | |
| git fetch origin main --quiet | |
| MERGE_SHA="$(git rev-parse origin/main)" | |
| echo "merge_sha=$MERGE_SHA" >> "$GITHUB_OUTPUT" | |
| - name: Wait for orchestrate.yaml workflow to complete | |
| id: wait_orchestrate | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| MERGE_SHA="${{ steps.seed.outputs.merge_sha }}" | |
| MAX_ATTEMPTS=5 | |
| SLEEP_DURATION=60 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| # Wait for the orchestrate run whose headSha is THIS merge commit, | |
| # never a stale "latest" run. | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=orchestrate.yaml \ | |
| --branch=main \ | |
| --json=databaseId,headSha \ | |
| --jq=".[] | select(.headSha==\"$MERGE_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "") | |
| if [ -n "$RUN_ID" ]; then | |
| echo "Found orchestrate run for $MERGE_SHA: $RUN_ID" | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then | |
| sleep "$SLEEP_DURATION" | |
| fi | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::orchestrate.yaml run for $MERGE_SHA did not appear within timeout" | |
| exit 1 | |
| fi | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" | |
| - name: Assert per-leg matrix artifacts exist (artifacts API) | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| RUN_ID="${{ steps.wait_orchestrate.outputs.run_id }}" | |
| # Each build-image matrix leg uploads an artifact whose name carries the | |
| # os-arch dimension (build-image.yaml: name=image-${os}-${arch}). The | |
| # artifacts API is the deterministic per-leg signal: assert BOTH leg | |
| # names are present on this exact run, keyed by artifact name (not logs). | |
| ARTIFACTS=$(gh api "repos/$GITHUB_REPOSITORY/actions/runs/$RUN_ID/artifacts" \ | |
| --jq '.artifacts[].name') | |
| echo "Run $RUN_ID artifacts:" | |
| echo "$ARTIFACTS" | |
| for LEG in image-linux-amd64 image-linux-arm64; do | |
| if ! echo "$ARTIFACTS" | grep -qx "$LEG"; then | |
| echo "::error::matrix leg artifact '$LEG' missing from run $RUN_ID" | |
| exit 1 | |
| fi | |
| done | |
| echo "✓ Per-leg matrix artifacts present: image-linux-amd64, image-linux-arm64" | |
| - name: Refresh manifest from trunk | |
| run: | | |
| # Orchestrate's finalize job pushes the state update as a separate | |
| # "chore: update state" commit on main. The checkout above predates | |
| # that commit, so sync the local manifest to origin/main before | |
| # reading it back. Without this the assertions race the state push. | |
| git fetch origin main --quiet | |
| git reset --hard origin/main | |
| - name: Assert manifest state shape (staging populated) | |
| run: | | |
| # Read manifest and assert staging version exists and matches semver. | |
| # In a [staging, prod] repo, staging is the first env, so orchestrate | |
| # deploys to it and commits state.staging on merge. | |
| STAGING_VERSION=$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml) | |
| if [ -z "$STAGING_VERSION" ]; then | |
| echo "::error::.ci.state.staging.version is empty or missing" | |
| exit 1 | |
| fi | |
| # Check semver pattern: v?[0-9]+.[0-9]+.[0-9]+ | |
| if ! echo "$STAGING_VERSION" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then | |
| echo "::error::.ci.state.staging.version does not match semver pattern: $STAGING_VERSION" | |
| exit 1 | |
| fi | |
| echo "✓ Staging version matches semver: $STAGING_VERSION" | |
| - name: Assert manifest state shape (consolidated image build recorded) | |
| run: | | |
| # Orchestrate consolidates the matrix build into a single | |
| # state.staging.builds.image.artifact_id slot (there is no per-leg state | |
| # slot). Assert it carries the known consolidated shape image-linux-* | |
| # produced by the build-image legs, not merely a non-empty value. | |
| ARTIFACT_ID=$(yq eval '.ci.state.staging.builds.image.artifact_id // ""' .github/manifest.yaml) | |
| if [ -z "$ARTIFACT_ID" ]; then | |
| echo "::error::.ci.state.staging.builds.image.artifact_id is empty or missing" | |
| exit 1 | |
| fi | |
| case "$ARTIFACT_ID" in | |
| image-linux-*) ;; | |
| *) | |
| echo "::error::consolidated image artifact_id has unexpected shape: $ARTIFACT_ID (want image-linux-*)" | |
| exit 1 | |
| ;; | |
| esac | |
| echo "✓ Consolidated image build recorded: artifact_id=$ARTIFACT_ID" | |
| - name: Assert GitHub release created | |
| run: | | |
| # The push-triggered orchestrate Finalize calls manage-release with | |
| # action=update, which cascade defines as a DRAFT RC release | |
| # (draft=true, prerelease=false). The prerelease and published states | |
| # are reached later by promote (asserted in the promote-staging job), | |
| # so at this stage the correct invariant is a draft RC, not a | |
| # prerelease. Poll trunk's release list until the draft RC lands rather | |
| # than reading once, so the assertion never races the Finalize commit. | |
| MAX_ATTEMPTS=2 | |
| ATTEMPT=0 | |
| RELEASE_TAG="" | |
| IS_DRAFT="" | |
| IS_PRERELEASE="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RELEASE_TAG=$(gh release list --json tagName,isDraft,isPrerelease --jq '.[0].tagName // ""') | |
| if [ -n "$RELEASE_TAG" ]; then | |
| IS_DRAFT=$(gh release view "$RELEASE_TAG" --json isDraft --jq '.isDraft') | |
| IS_PRERELEASE=$(gh release view "$RELEASE_TAG" --json isPrerelease --jq '.isPrerelease') | |
| if [ "$IS_DRAFT" = "true" ]; then | |
| break | |
| fi | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ -z "$RELEASE_TAG" ]; then | |
| echo "::error::No GitHub release found" | |
| exit 1 | |
| fi | |
| if ! echo "$RELEASE_TAG" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then | |
| echo "::error::Release tag does not match semver: $RELEASE_TAG" | |
| exit 1 | |
| fi | |
| if [ "$IS_DRAFT" != "true" ]; then | |
| echo "::error::Orchestrate RC release is not a draft (draft=$IS_DRAFT, prerelease=$IS_PRERELEASE)" | |
| exit 1 | |
| fi | |
| echo "✓ GitHub draft RC release created: $RELEASE_TAG (draft=$IS_DRAFT, prerelease=$IS_PRERELEASE)" | |
| - name: Write assertions summary | |
| if: always() | |
| run: | | |
| { | |
| echo "## commit-and-build Assertions" | |
| echo "" | |
| echo "| Assertion | Result |" | |
| echo "|-----------|--------|" | |
| echo "| Orchestrate run completed | ✓ |" | |
| echo "| Per-leg matrix artifacts (amd64, arm64) | ✓ |" | |
| echo "| Staging version (semver) | ✓ |" | |
| echo "| Consolidated image build recorded | ✓ |" | |
| echo "| GitHub release created | ✓ |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| promote-staging: | |
| name: Promote staging to prod | |
| runs-on: ubuntu-latest | |
| needs: commit-and-build | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| ref: main | |
| - name: Dispatch promote.yaml workflow | |
| id: dispatch_promote | |
| run: | | |
| # Stamp the dispatch time so the wait below can correlate the run it | |
| # created, never an older promote run that happens to be newest. | |
| TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" | |
| echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT" | |
| gh workflow run promote.yaml \ | |
| -f mode=default \ | |
| --ref main | |
| - name: Wait for promote.yaml workflow to complete | |
| id: wait_promote | |
| run: | | |
| TS="${{ steps.dispatch_promote.outputs.dispatch_ts }}" | |
| MAX_ATTEMPTS=4 | |
| SLEEP_DURATION=60 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| # Find the promote run created at or after our dispatch timestamp. | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=promote.yaml \ | |
| --branch=main \ | |
| --created=">=$TS" \ | |
| --limit=1 \ | |
| --json=databaseId \ | |
| --jq='.[0].databaseId // empty' 2>/dev/null || echo "") | |
| if [ -n "$RUN_ID" ]; then | |
| echo "Found promote run for dispatch $TS: $RUN_ID" | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then | |
| sleep "$SLEEP_DURATION" | |
| fi | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::promote.yaml run for dispatch $TS did not appear within timeout" | |
| exit 1 | |
| fi | |
| # Block on THIS run and require a successful conclusion. | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" | |
| - name: Assert manifest state shape (release marker populated) | |
| run: | | |
| # A terminal promotion publishes the release and records the version | |
| # under the .ci.state.release marker (the published-version marker), | |
| # not under a per-environment deploy slot. The final env emits no | |
| # deploy jobs, so .ci.state.prod stays empty by design; assert the | |
| # release marker carries the published semver instead. | |
| # | |
| # finalize pushes its state commit shortly after the promote run | |
| # reports completed, so the wait above can return before the release | |
| # marker lands on main. Poll-pull until the marker is present. | |
| MAX_ATTEMPTS=3 | |
| ATTEMPT=0 | |
| RELEASE_VERSION="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| git pull origin main --quiet || true | |
| RELEASE_VERSION=$(yq eval '.ci.state.release.version // ""' .github/manifest.yaml) | |
| [ -n "$RELEASE_VERSION" ] && break | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ -z "$RELEASE_VERSION" ]; then | |
| echo "::error::.ci.state.release.version is empty or missing" | |
| exit 1 | |
| fi | |
| if ! echo "$RELEASE_VERSION" | grep -qE 'v?[0-9]+\.[0-9]+\.[0-9]+'; then | |
| echo "::error::.ci.state.release.version does not match semver pattern: $RELEASE_VERSION" | |
| exit 1 | |
| fi | |
| echo "✓ Release version matches semver: $RELEASE_VERSION" | |
| - name: Assert GitHub release is published (not RC) | |
| run: | | |
| # Get the latest release | |
| RELEASE_TAG=$(gh release list --limit=1 | tail -1 | awk '{print $1}') | |
| if [ -z "$RELEASE_TAG" ]; then | |
| echo "::error::No release found" | |
| exit 1 | |
| fi | |
| # Check prerelease status | |
| IS_PRERELEASE=$(gh release view "$RELEASE_TAG" --json=isPrerelease --jq='.isPrerelease') | |
| if [ "$IS_PRERELEASE" != "false" ]; then | |
| echo "::error::Release is still marked as prerelease, should be published: $RELEASE_TAG" | |
| exit 1 | |
| fi | |
| echo "✓ Release published (not prerelease): $RELEASE_TAG" | |
| - name: Write assertions summary | |
| if: always() | |
| run: | | |
| { | |
| echo "## promote-staging Assertions" | |
| echo "" | |
| echo "| Assertion | Result |" | |
| echo "|-----------|--------|" | |
| echo "| Promote run completed | ✓ |" | |
| echo "| Prod version (semver) | ✓ |" | |
| echo "| Release published (not RC) | ✓ |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| rollback-check: | |
| name: Rollback staging to prior version | |
| runs-on: ubuntu-latest | |
| needs: promote-staging | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| ref: main | |
| - name: Configure Git | |
| run: | | |
| git config user.name "scenario-suite" | |
| git config user.email "scenario-suite@users.noreply.github.com" | |
| - name: Land a second staging version so a prior ring entry exists | |
| id: second | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| # A rollback can only resolve a prior target once staging has carried two | |
| # distinct deploy SHAs: the second deploy records the displaced state in | |
| # staging's deploy-history ring. staging is the deployable env in this | |
| # [staging, prod] model (prod is the publish boundary and emits no deploy | |
| # jobs), so it is the env the manual rollback re-deploys. Land a second | |
| # src change through branch -> PR -> squash-merge so orchestrate deploys a | |
| # new SHA to staging. | |
| git pull origin main --quiet | |
| PRIOR_SHA="$(yq eval '.ci.state.staging.sha // ""' .github/manifest.yaml)" | |
| echo "prior_sha=$PRIOR_SHA" >> "$GITHUB_OUTPUT" | |
| BRANCH="scenario/rollback-src-$(date +%s)-$RANDOM" | |
| git checkout -B "$BRANCH" origin/main | |
| mkdir -p src | |
| echo "rollback-marker=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" > src/rollback-marker.txt | |
| git add src/rollback-marker.txt | |
| git commit --no-gpg-sign -m "feat: second staging version for rollback test" | |
| git push origin "$BRANCH" | |
| gh pr create --base main --head "$BRANCH" \ | |
| --title "feat: second staging version for rollback test" \ | |
| --body "Automated scenario run; drives orchestrate on merge." | |
| gh pr merge "$BRANCH" --squash --delete-branch | |
| git fetch origin main --quiet | |
| MERGE_SHA="$(git rev-parse origin/main)" | |
| echo "merge_sha=$MERGE_SHA" >> "$GITHUB_OUTPUT" | |
| - name: Wait for orchestrate to deploy the second staging version | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| MERGE_SHA="${{ steps.second.outputs.merge_sha }}" | |
| PRIOR_SHA="${{ steps.second.outputs.prior_sha }}" | |
| MAX_ATTEMPTS=5 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=orchestrate.yaml \ | |
| --branch=main \ | |
| --json=databaseId,headSha \ | |
| --jq=".[] | select(.headSha==\"$MERGE_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "") | |
| [ -n "$RUN_ID" ] && break | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::orchestrate run for $MERGE_SHA did not appear within timeout" | |
| exit 1 | |
| fi | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| # Poll trunk until staging's sha moves off the prior value, so the ring | |
| # carries a distinct previous entry for the rollback to resolve. | |
| ATTEMPT=0 | |
| while [ "$ATTEMPT" -lt 3 ]; do | |
| git pull origin main --quiet || true | |
| CUR_SHA="$(yq eval '.ci.state.staging.sha // ""' .github/manifest.yaml)" | |
| if [ -n "$CUR_SHA" ] && [ "$CUR_SHA" != "$PRIOR_SHA" ]; then | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| - name: Dispatch cascade-rollback.yaml for staging | |
| id: dispatch_rollback | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| # Capture the version staging will roll back FROM, and the prior version | |
| # it should land ON (the previous ring entry), read from the manifest the | |
| # same way the promote job reads release state (yq on .github/manifest.yaml). | |
| git pull origin main --quiet | |
| CUR_VER="$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml)" | |
| PRIOR_VER="$(yq eval '.ci.state.staging.previous[0].version // ""' .github/manifest.yaml)" | |
| echo "cur_ver=$CUR_VER" >> "$GITHUB_OUTPUT" | |
| echo "prior_ver=$PRIOR_VER" >> "$GITHUB_OUTPUT" | |
| # Stamp the dispatch time so the wait correlates the run it created. | |
| TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" | |
| echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT" | |
| gh workflow run cascade-rollback.yaml \ | |
| -f environment=staging \ | |
| -f dry_run=false \ | |
| --ref main | |
| - name: Wait for cascade-rollback.yaml to complete | |
| id: wait_rollback | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| TS="${{ steps.dispatch_rollback.outputs.dispatch_ts }}" | |
| MAX_ATTEMPTS=4 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=cascade-rollback.yaml \ | |
| --branch=main \ | |
| --created=">=$TS" \ | |
| --limit=1 \ | |
| --json=databaseId \ | |
| --jq='.[0].databaseId // empty' 2>/dev/null || echo "") | |
| [ -n "$RUN_ID" ] && break | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::cascade-rollback.yaml run for $TS did not appear within timeout" | |
| exit 1 | |
| fi | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" | |
| - name: Assert staging rolled back to the prior version and marked diverged | |
| run: | | |
| set -euo pipefail | |
| CUR_VER="${{ steps.dispatch_rollback.outputs.cur_ver }}" | |
| PRIOR_VER="${{ steps.dispatch_rollback.outputs.prior_ver }}" | |
| # Rollback re-writes staging, which already carried a version, so poll | |
| # trunk until the rollback finalize state commit lands (staging version | |
| # moves back to PRIOR_VER), then assert the divergence ref. Read via yq | |
| # on the manifest, mirroring the promote job's read idiom. | |
| MAX_ATTEMPTS=3 | |
| ATTEMPT=0 | |
| AFTER_VER="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| git pull origin main --quiet || true | |
| AFTER_VER="$(yq eval '.ci.state.staging.version // ""' .github/manifest.yaml)" | |
| if [ -n "$AFTER_VER" ] && [ "$AFTER_VER" != "$CUR_VER" ]; then | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| AFTER_REF="$(yq eval '.ci.state.staging.ref // ""' .github/manifest.yaml)" | |
| if [ "$AFTER_VER" = "$CUR_VER" ]; then | |
| echo "::error::staging version did not move back after rollback (still $CUR_VER)" | |
| exit 1 | |
| fi | |
| if [ -n "$PRIOR_VER" ] && [ "$AFTER_VER" != "$PRIOR_VER" ]; then | |
| echo "::error::staging rolled back to '$AFTER_VER', expected prior '$PRIOR_VER'" | |
| exit 1 | |
| fi | |
| if [ "$AFTER_REF" != "rollback/staging" ]; then | |
| echo "::error::staging not marked diverged: ref='$AFTER_REF', want 'rollback/staging'" | |
| exit 1 | |
| fi | |
| echo "✓ staging rolled back $CUR_VER -> $AFTER_VER, ref=$AFTER_REF" | |
| - name: Write assertions summary | |
| if: always() | |
| run: | | |
| { | |
| echo "## rollback-check Assertions" | |
| echo "" | |
| echo "| Assertion | Result |" | |
| echo "|-----------|--------|" | |
| echo "| Rollback run completed | ✓ |" | |
| echo "| Staging version moved back to prior | ✓ |" | |
| echo "| Staging marked diverged (ref=rollback/staging) | ✓ |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| dispatch-inputs-check: | |
| name: Dispatch input takes effect | |
| runs-on: ubuntu-latest | |
| needs: rollback-check | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| ref: main | |
| - name: Dispatch orchestrate.yaml with a distinctive reason | |
| id: dispatch_inputs | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| # The reason flows operator -> orchestrate -> build-bundle callback -> | |
| # consolidated state. cascade forwards `reason` to build-bundle because | |
| # that callback declares a matching `reason` input; the stub echoes it | |
| # into artifact_id (bundle-<reason>). Use a unique, identifier-safe | |
| # sentinel so the exact value is knowable downstream. | |
| REASON="dispatch-sentinel-${GITHUB_RUN_ID}-${RANDOM}" | |
| echo "reason=$REASON" >> "$GITHUB_OUTPUT" | |
| echo "expected_artifact_id=bundle-$REASON" >> "$GITHUB_OUTPUT" | |
| # Stamp the dispatch time so the wait below correlates the run it | |
| # created, never an older orchestrate run that happens to be newest. | |
| TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" | |
| echo "dispatch_ts=$TS" >> "$GITHUB_OUTPUT" | |
| gh workflow run orchestrate.yaml \ | |
| -f environment=staging \ | |
| -f reason="$REASON" \ | |
| -f force=true \ | |
| --ref main | |
| - name: Wait for dispatched orchestrate run to complete | |
| id: wait_dispatch | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| TS="${{ steps.dispatch_inputs.outputs.dispatch_ts }}" | |
| MAX_ATTEMPTS=5 | |
| SLEEP_DURATION=60 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=orchestrate.yaml \ | |
| --branch=main \ | |
| --event=workflow_dispatch \ | |
| --created=">=$TS" \ | |
| --limit=1 \ | |
| --json=databaseId \ | |
| --jq='.[0].databaseId // empty' 2>/dev/null || echo "") | |
| if [ -n "$RUN_ID" ]; then | |
| echo "Found dispatched orchestrate run: $RUN_ID" | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then | |
| sleep "$SLEEP_DURATION" | |
| fi | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::dispatched orchestrate.yaml run for $TS did not appear within timeout" | |
| exit 1 | |
| fi | |
| # Block on THIS run; a failed orchestrate means the input never reached state. | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" | |
| - name: Assert dispatched reason reached consolidated state | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| EXPECTED="${{ steps.dispatch_inputs.outputs.expected_artifact_id }}" | |
| # finalize pushes the state commit shortly after the run reports | |
| # completed; poll-pull trunk until the bundle slot lands, then assert the | |
| # consolidated artifact_id equals the exact dispatched value. This proves | |
| # the operator input TOOK EFFECT (input -> callback -> state), not merely | |
| # that a run existed. | |
| MAX_ATTEMPTS=3 | |
| ATTEMPT=0 | |
| ACTUAL="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| git pull origin main --quiet || true | |
| ACTUAL=$(yq eval '.ci.state.staging.builds.bundle.artifact_id // ""' .github/manifest.yaml) | |
| [ "$ACTUAL" = "$EXPECTED" ] && break | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ "$ACTUAL" != "$EXPECTED" ]; then | |
| echo "::error::dispatched reason did not reach state.staging.builds.bundle.artifact_id" | |
| echo "::error::expected '$EXPECTED', got '$ACTUAL'" | |
| exit 1 | |
| fi | |
| echo "✓ Dispatch input took effect: bundle.artifact_id == $ACTUAL" | |
| - name: Write assertions summary | |
| if: always() | |
| run: | | |
| { | |
| echo "## dispatch-inputs-check Assertions" | |
| echo "" | |
| echo "| Assertion | Result |" | |
| echo "|-----------|--------|" | |
| echo "| Dispatched orchestrate completed | ✓ |" | |
| echo "| Reason reached consolidated state (exact value) | ✓ |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| pr-preview-check: | |
| name: PR preview comment validation | |
| runs-on: ubuntu-latest | |
| needs: dispatch-inputs-check | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| ref: main | |
| - name: Configure Git | |
| run: | | |
| git config user.name "scenario-suite" | |
| git config user.email "scenario-suite@users.noreply.github.com" | |
| - name: Create and push test PR branch | |
| id: branch | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| BRANCH_NAME="test/cascade-preview-$(date +%s)-$RANDOM" | |
| git checkout -b "$BRANCH_NAME" | |
| # Touch src/** so the preview's change detection has a callback to plan. | |
| # No [skip ci]/[ci skip] in the message: GitHub suppresses workflow runs | |
| # for such commits, which would stop cascade-pr-preview from ever firing | |
| # on this PR head and make the wait below time out. | |
| mkdir -p src | |
| echo "preview-$(date -u +%s)" > src/preview-marker.txt | |
| git add src/preview-marker.txt | |
| git commit --no-gpg-sign -m "test: dummy src change for preview check" | |
| git push origin "$BRANCH_NAME" | |
| HEAD_SHA="$(git rev-parse HEAD)" | |
| echo "branch=$BRANCH_NAME" >> "$GITHUB_OUTPUT" | |
| echo "head_sha=$HEAD_SHA" >> "$GITHUB_OUTPUT" | |
| - name: Create test PR | |
| id: create_pr | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| PR_URL=$(gh pr create \ | |
| --title "Test PR for cascade preview" \ | |
| --body "Scenario test PR to validate cascade-pr-preview.yaml functionality." \ | |
| --head "${{ steps.branch.outputs.branch }}" \ | |
| --base main) | |
| PR_NUMBER=$(echo "$PR_URL" | grep -oE '[0-9]+$') | |
| echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" | |
| - name: Wait for cascade-pr-preview run to succeed | |
| id: wait_preview | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| HEAD_SHA="${{ steps.branch.outputs.head_sha }}" | |
| MAX_ATTEMPTS=5 | |
| SLEEP_DURATION=60 | |
| ATTEMPT=0 | |
| RUN_ID="" | |
| # Correlate on THIS PR head SHA, never a stale "latest" preview run. | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| RUN_ID=$(gh run list \ | |
| --workflow=cascade-pr-preview.yaml \ | |
| --json=databaseId,headSha \ | |
| --jq=".[] | select(.headSha==\"$HEAD_SHA\") | .databaseId" 2>/dev/null | head -n1 || echo "") | |
| if [ -n "$RUN_ID" ]; then | |
| echo "Found cascade-pr-preview run for $HEAD_SHA: $RUN_ID" | |
| break | |
| fi | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| if [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; then | |
| sleep "$SLEEP_DURATION" | |
| fi | |
| done | |
| if [ -z "$RUN_ID" ]; then | |
| echo "::error::cascade-pr-preview.yaml run for $HEAD_SHA did not appear within timeout" | |
| exit 1 | |
| fi | |
| # Hard fail unless the preview run concludes success. | |
| gh run watch "$RUN_ID" --exit-status --interval 60 | |
| echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" | |
| - name: Assert preview comment was posted (hard fail) | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| PR_NUMBER="${{ steps.create_pr.outputs.pr_number }}" | |
| # The preview comment is posted by actions/github-script (authored by | |
| # github-actions[bot]); its body's first line is the stable marker | |
| # "<!-- cascade-pr-preview -->". Hard-assert a bot comment carrying that | |
| # exact marker exists. The comment can lag the run conclusion slightly, | |
| # so poll briefly. | |
| MARKER='<!-- cascade-pr-preview -->' | |
| MAX_ATTEMPTS=2 | |
| ATTEMPT=0 | |
| FOUND="" | |
| while [ "$ATTEMPT" -lt "$MAX_ATTEMPTS" ]; do | |
| FOUND=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \ | |
| --jq "[.[] | select(.user.login == \"github-actions[bot]\") | select(.body | contains(\"$MARKER\"))] | length" \ | |
| 2>/dev/null || echo 0) | |
| [ "${FOUND:-0}" -gt 0 ] && break | |
| ATTEMPT=$((ATTEMPT + 1)) | |
| sleep 60 | |
| done | |
| if [ "${FOUND:-0}" -lt 1 ]; then | |
| echo "::error::no github-actions[bot] comment containing '$MARKER' on PR #$PR_NUMBER" | |
| exit 1 | |
| fi | |
| echo "✓ Preview comment posted by github-actions[bot] with marker on PR #$PR_NUMBER" | |
| - name: Clean up test PR | |
| if: always() | |
| env: | |
| GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} | |
| run: | | |
| PR_NUMBER="${{ steps.create_pr.outputs.pr_number }}" | |
| BRANCH_NAME="${{ steps.branch.outputs.branch }}" | |
| gh pr close "$PR_NUMBER" --delete-branch || true | |
| git push origin --delete "$BRANCH_NAME" || true | |
| echo "Cleaned up PR #$PR_NUMBER and branch $BRANCH_NAME" | |
| - name: Write assertions summary | |
| if: always() | |
| run: | | |
| { | |
| echo "## pr-preview-check Assertions" | |
| echo "" | |
| echo "| Assertion | Result |" | |
| echo "|-----------|--------|" | |
| echo "| Test PR created | ✓ |" | |
| echo "| cascade-pr-preview run succeeded | ✓ |" | |
| echo "| Preview comment posted by github-actions[bot] (marker) | ✓ |" | |
| } >> "$GITHUB_STEP_SUMMARY" |