From a4f9f14fec8faf8d5aec48cd983933ff4be42f0e Mon Sep 17 00:00:00 2001 From: Venkumahanti Subhankar Date: Tue, 23 Jun 2026 13:22:16 +0000 Subject: [PATCH] [smoke-test] fix a flake: retry tag validation to tolerate Docker Hub rate limiting validate-tags.sh ran a single anonymous `docker manifest inspect` per tag, so on shared-IP CI runners any non-zero exit (including HTTP 429 rate limiting) was misreported as "tag does not exist" and failed the whole smoke-test job with a false "Invalid variants" error, even though the tag exists upstream. - Retry the registry query up to 3 times with a 10s pause before marking a tag invalid, so a transient failure isn't mistaken for a missing tag. - After all retries fail, flag the one unambiguous transient cause, Docker Hub rate limiting (HTTP 429); every other case keeps the original "tag does not exist" message unchanged. - Echo the raw registry response to aid debugging. Fail-closed behaviour is unchanged: a genuinely missing tag still exits 1. --- .github/actions/smoke-test/validate-tags.sh | 37 ++++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/.github/actions/smoke-test/validate-tags.sh b/.github/actions/smoke-test/validate-tags.sh index 7187f8797e..c8551db1e4 100755 --- a/.github/actions/smoke-test/validate-tags.sh +++ b/.github/actions/smoke-test/validate-tags.sh @@ -31,17 +31,44 @@ INVALID_TAGS=() VALID_TAGS=() # Function to check if a Docker image tag exists +# The registry query is retried several times so a transient Docker Hub problem +# (e.g. rate limiting) is not mistaken for a missing tag. It is only reported as +# invalid after every attempt has failed. check_image_exists() { local image_tag="$1" + local max_attempts=3 + local attempt=1 + local output="" + echo " Checking: $image_tag" - - if docker manifest inspect "$image_tag" > /dev/null 2>&1; then - echo " ✓ Valid" - return 0 + + while (( attempt <= max_attempts )); do + if output=$(docker manifest inspect "$image_tag" 2>&1); then + echo " ✓ Valid" + return 0 + fi + + echo " ! Attempt ${attempt}/${max_attempts} could not verify tag" + attempt=$(( attempt + 1 )) + if (( attempt <= max_attempts )); then + sleep 10 + fi + done + + # All retries failed. Docker Hub rate limiting is the only failure with an + # unambiguous signature (HTTP 429), so we flag that case specifically. For + # anything we cannot be certain about, we keep the original default message. + if echo "$output" | grep -qiE 'toomanyrequests|rate limit|429'; then + echo " ✗ Invalid - Docker Hub rate limiting (HTTP 429)" else echo " ✗ Invalid - tag does not exist" - return 1 fi + + if [[ -n "$output" ]]; then + echo " Registry response: ${output}" + fi + + return 1 } # Check if this image has variants