From c8ff9ce682da0accec3b90f034e39f4b6303d5df Mon Sep 17 00:00:00 2001 From: gabriel engvall Date: Fri, 5 Jun 2026 21:33:23 +0200 Subject: [PATCH 1/2] fix(build): watchdog must not qm start a template The build-VM watchdog restarts the VM when it has reported status stopped for 20s. A template also reports stopped, so once packer finishes and converts the VM to a template the watchdog calls qm start and gets the error you cannot start a vm if it is a template, repeatedly, until it burns through its restart limit. The build still passes (the error is swallowed) but the log is alarming and the watchdog wastes time. Check the template flag before restarting and exit cleanly instead. --- src/build.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/build.ts b/src/build.ts index e1bc572..97e1cd2 100644 --- a/src/build.ts +++ b/src/build.ts @@ -68,6 +68,13 @@ const buildVmWatchdog = ( sleep 20 _s=$(qm status ${vmid} 2>/dev/null | awk 'NR==1{print $2}') || continue [ "$_s" = "stopped" ] || continue + # A template also reports "stopped". Never qm start it — that fails with + # "you can't start a vm if it's a template". A template at this point means + # packer already finished and converted the VM, so there's nothing to + # restart: exit cleanly instead of burning restart attempts on an error. + if qm config ${vmid} 2>/dev/null | grep -q '^template:'; then + echo "[watchdog] VM ${vmid} is now a template — exiting"; exit 0 + fi _n=$((_n + 1)) if [ "$_n" -gt "$_max" ]; then echo "[watchdog] VM ${vmid}: restart limit reached, giving up" >&2; exit 1 From 111b2069063869e71aff05a8c3db4fc7259471ce Mon Sep 17 00:00:00 2001 From: gabriel engvall Date: Sat, 6 Jun 2026 18:57:29 +0200 Subject: [PATCH 2/2] fix(build): watchdog waits for a stable communicator port before standing down The watchdog exits the moment the communicator port (SSH 22 / WinRM 5985) first responds. On Windows this races with Setup: WinRM opens briefly during OOBE, the watchdog exits, then Windows reboots or powers off for a later phase. With the watchdog gone the powered-off VM is never restarted and Packer waits out its full 4h timeout with no route to host, finishing with no artifact. Require the port to stay up for several consecutive checks (~30s) before standing down; a transient blip resets the counter and the watchdog keeps restarting the VM on power-off until the port is stably up (Packer connected and provisioning). Fixes the intermittent Windows builds that produced no template. --- src/build.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/build.ts b/src/build.ts index 97e1cd2..17d87e2 100644 --- a/src/build.ts +++ b/src/build.ts @@ -57,12 +57,22 @@ const buildVmWatchdog = ( communicatorPort: number ): string => ` ( - _n=0 _max=5 + _n=0 _max=5 _up=0 _need=3 while true; do sleep 10 if timeout 3 bash -c "echo >/dev/tcp/${buildIp}/${communicatorPort}" 2>/dev/null; then - echo "[watchdog] port ${communicatorPort} up on ${buildIp} — exiting"; exit 0 + # Require the communicator port to stay up across several checks before + # standing down. Windows Setup opens WinRM briefly during OOBE then may + # reboot/power off for a later phase; exiting on the first hit leaves that + # shutdown unhandled and Packer waits out its full timeout ("no route to + # host"). Once the port is *stably* up Packer has connected — safe to exit. + _up=$((_up + 1)) + if [ "$_up" -ge "$_need" ]; then + echo "[watchdog] port ${communicatorPort} up on ${buildIp} (stable) — exiting"; exit 0 + fi + continue fi + _up=0 _s=$(qm status ${vmid} 2>/dev/null | awk 'NR==1{print $2}') || continue [ "$_s" = "stopped" ] || continue sleep 20