Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5-bug-fixes/pipeline-fixes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed: Update the docker pull logic to retry if initial pull fails and wait when connecting to fresh VMs in hetzner for cd
86 changes: 62 additions & 24 deletions nix/scripts/create-container-dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,72 @@ export REGISTRY_TIMEOUT=600 # Registry specific timeout

output_dir=$1
mkdir -p $1

# Download all the docker images into $1, and append its name to an index.txt
# If this errors out for you, copy default-policy.json from the skopeo repo to
# /etc/containers/policy.json
while IFS= read -r image; do
# sanitize the image file name, replace slashes with underscores, suffix with .tar
image_filename=$(sed -r "s/[:\/]/_/g" <<< $image)
image_path=$(realpath $1)/${image_filename}.tar
if [[ -e $image_path ]];then
echo "Skipping $image_filename…"

# sanitize the image file name, replace slashes with underscores, suffix with .tar
image_filename=$(sed -r "s/[:\/]/_/g" <<< "$image")
image_path="$(realpath "$1")/${image_filename}.tar"

if [[ -s "$image_path" ]]; then
echo "Skipping $image_filename…"
continue
fi

echo "Fetching $image_filename…"

# All of these images should be publicly fetchable, especially given we
# ship public tarballs containing these images.
# ci.sh already honors DOCKER_LOGIN, so do the same here, otherwise
# fallback to unauthorized fetching.

# If an image has both a tag and digest, remove the tag. Return the original if there is no match.
image_trimmed=$(echo "$image" | sed -E 's/(.+)(:.+(@.+))/\1\3/')

tmp_path="${image_path}.tmp"
rm -f "$tmp_path"

success=false

for attempt in {1..5}; do
echo "Attempt $attempt/5 for $image_trimmed"

if [[ -n "${DOCKER_LOGIN:-}" && "$image" =~ quay.io/wire ]]; then
skopeo copy --insecure-policy \
--src-creds "$DOCKER_LOGIN" \
--retry-times 10 \
"docker://$image_trimmed" \
"docker-archive:${tmp_path}" \
--additional-tag "$image" || rc=$?
else
echo "Fetching $image_filename…"

# All of these images should be publicly fetchable, especially given we
# ship public tarballs containing these images.
# ci.sh already honors DOCKER_LOGIN, so do the same here, otherwise
# fallback to unauthorized fetching.

# If an image has both a tag and digest, remove the tag. Return the original if there is no match.
image_trimmed=$(echo "$image" | sed -E 's/(.+)(:.+(@.+))/\1\3/')
if [[ -n "${DOCKER_LOGIN:-}" && "$image" =~ quay.io/wire ]];then
skopeo copy --insecure-policy --src-creds "$DOCKER_LOGIN" --retry-times 10 \
docker://$image_trimmed docker-archive:${image_path} --additional-tag $image
else
skopeo copy --insecure-policy --retry-times 10 \
docker://$image_trimmed docker-archive:${image_path} --additional-tag $image
fi
echo "${image_filename}.tar" >> $(realpath "$1")/index.txt
# passing image and $output_dir
create-build-entry $image $output_dir
skopeo copy --insecure-policy \
--retry-times 10 \
"docker://$image_trimmed" \
"docker-archive:${tmp_path}" \
--additional-tag "$image" || rc=$?
fi

rc=$?

if [[ $rc -eq 0 && -s "$tmp_path" ]]; then
mv "$tmp_path" "$image_path"
success=true
break
fi

echo "Fetch failed for $image_trimmed with rc=$rc; retrying…"
rm -f "$tmp_path"
sleep $((attempt * 20))
done

if [[ "$success" != true ]]; then
echo "ERROR: failed to fetch $image after retries" >&2
exit 1
fi

echo "${image_filename}.tar" >> "$(realpath "$1")/index.txt"
create-build-entry "$image" "$output_dir"
done
4 changes: 2 additions & 2 deletions terraform/examples/wiab-staging-hetzner/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ output "static-inventory" {
}
}
vars = {
ansible_ssh_common_args = "-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3"
ansible_ssh_common_args = "-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3 -o ConnectTimeout=10"
}
}
private = {
Expand All @@ -66,7 +66,7 @@ output "static-inventory" {
adminhost_local = {}
}
vars = {
ansible_ssh_common_args = "-o ProxyCommand=\"ssh -i ssh_private_key -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -W %h:%p -q root@${hcloud_server.adminhost.ipv4_address}\" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3"
ansible_ssh_common_args = "-o ProxyCommand=\"ssh -i ssh_private_key -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -W %h:%p -q root@${hcloud_server.adminhost.ipv4_address}\" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3 -o ConnectTimeout=10"
}
}
adminhost_local = {
Expand Down
16 changes: 16 additions & 0 deletions terraform/examples/wiab-staging-hetzner/setup_nodes.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
---
- name: Wait for adminhost private SSH
hosts: adminhost
gather_facts: no
tasks:
- name: Wait for SSH on public adminhost
wait_for_connection:
timeout: 300
delay: 5

- name: Wait until adminhost private IP is reachable from public adminhost
wait_for:
host: "{{ hostvars['adminhost_local'].ansible_host }}"
port: 22
timeout: 300
delay: 5

- name: Setup adminhost with dnsmasq and Docker
hosts: adminhost_local
become: yes
Expand Down
Loading