From fd57e75cbf3c88189caaa9f4a1a074fd67f07433 Mon Sep 17 00:00:00 2001 From: Bug1 Repro Date: Fri, 29 May 2026 10:22:53 -0700 Subject: [PATCH 1/2] Bug #1 repro: crash-loop start to test supervisor port drain Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- build.yaml | 3 ++- flaky-startup.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100755 flaky-startup.sh diff --git a/build.yaml b/build.yaml index 4aea474..52447b5 100644 --- a/build.yaml +++ b/build.yaml @@ -2,4 +2,5 @@ version: 1 platform: python platformVersion: "3.14" run: - port: 8080 \ No newline at end of file + port: 8080 + startCommand: "bash /output/flaky-startup.sh" diff --git a/flaky-startup.sh b/flaky-startup.sh new file mode 100755 index 0000000..0f1ab1f --- /dev/null +++ b/flaky-startup.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set +e +TAG="${EMBR_DEPLOYMENT_ID:-unknown}" +echo "[flaky-startup ${TAG}] starting at $(date -u)" +echo "[flaky-startup ${TAG}] PID=$$, PPID=$PPID" +echo "[flaky-startup ${TAG}] ports in use BEFORE bind:" +ss -ltnp 2>&1 | grep -E ':8080|LISTEN' | head -10 + +python -c " +import socket, sys, time, os +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +try: + s.bind(('0.0.0.0', 8080)) + s.listen(8) + print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BOUND :8080 successfully', flush=True) +except OSError as e: + print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BIND FAILED: {e}', flush=True) + sys.exit(98) +time.sleep(12) +print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] exiting non-zero now', flush=True) +sys.exit(1) +" +EXITCODE=$? +echo "[flaky-startup ${TAG}] python exited ${EXITCODE} at $(date -u); supervisor will restart" +sleep 1 +exit $EXITCODE From e0f22e4709155dc6e3110893429585c43a200220 Mon Sep 17 00:00:00 2001 From: Bug1 Repro Date: Fri, 29 May 2026 10:27:12 -0700 Subject: [PATCH 2/2] Bug #1 repro rev 2: healthy startup with bind-retry instrumentation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- flaky-startup.sh | 52 ++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/flaky-startup.sh b/flaky-startup.sh index 0f1ab1f..590ee0e 100755 --- a/flaky-startup.sh +++ b/flaky-startup.sh @@ -1,27 +1,41 @@ #!/bin/bash set +e TAG="${EMBR_DEPLOYMENT_ID:-unknown}" -echo "[flaky-startup ${TAG}] starting at $(date -u)" -echo "[flaky-startup ${TAG}] PID=$$, PPID=$PPID" -echo "[flaky-startup ${TAG}] ports in use BEFORE bind:" +echo "[healthy-startup ${TAG}] starting at $(date -u)" +echo "[healthy-startup ${TAG}] PID=$$, PPID=$PPID" +echo "[healthy-startup ${TAG}] ports in use BEFORE bind:" ss -ltnp 2>&1 | grep -E ':8080|LISTEN' | head -10 +echo "[healthy-startup ${TAG}] my parent process tree:" +ps -ef --forest 2>&1 | head -30 python -c " import socket, sys, time, os -s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) -try: - s.bind(('0.0.0.0', 8080)) - s.listen(8) - print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BOUND :8080 successfully', flush=True) -except OSError as e: - print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BIND FAILED: {e}', flush=True) +print(f'[healthy-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] attempting bind on :8080', flush=True) +attempts = 0 +while attempts < 30: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + s.bind(('0.0.0.0', 8080)) + s.listen(8) + print(f'[healthy-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BOUND :8080 successfully after {attempts} attempts', flush=True) + break + except OSError as e: + attempts += 1 + print(f'[healthy-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] BIND FAILED attempt {attempts}: {e}', flush=True) + s.close() + time.sleep(1) +else: + print(f'[healthy-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] GAVE UP after 30 attempts', flush=True) sys.exit(98) -time.sleep(12) -print(f'[flaky-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] exiting non-zero now', flush=True) -sys.exit(1) -" -EXITCODE=$? -echo "[flaky-startup ${TAG}] python exited ${EXITCODE} at $(date -u); supervisor will restart" -sleep 1 -exit $EXITCODE +print(f'[healthy-startup {os.environ.get(\"EMBR_DEPLOYMENT_ID\",\"?\")}] serving forever now', flush=True) +# minimal HTTP responder +from http.server import HTTPServer, BaseHTTPRequestHandler +class H(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200); self.end_headers(); self.wfile.write(b'ok') + def log_message(self, *a): pass +HTTPServer.allow_reuse_address = True +HTTPServer(('0.0.0.0', 8080), H, bind_and_activate=False).serve_forever() +" || true +sleep 60