Skip to content

Commit 3f3e53a

Browse files
committed
Trim bisection probes; keep full-main reproducer with better gdb watchdog
Previous attempt exhausted the 30-min job budget on subset probes. All subsets pass — only the full main run with Translation_Tests unskipped reproduces the hang, confirmed at testReconstructTable start. Drop the subset probes. Keep one probe that reproduces 4c4f491's main-run state with: - longer timeout (420s) so we sit *in* the hang, not at its start - pgrep -x php to target PHP itself (not the timeout wrapper we captured last time) - /proc/<pid>/stack + /proc/<pid>/wchan for kernel-side picture - two gdb snapshots (T+360s, T+400s) in case one detaches early
1 parent 9088f60 commit 3f3e53a

1 file changed

Lines changed: 37 additions & 90 deletions

File tree

.github/workflows/phpunit-tests-turso.yml

Lines changed: 37 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -896,109 +896,56 @@ jobs:
896896
-ex "bt 40" \
897897
--args "$(command -v php)" || true
898898
899-
- name: Probe testReconstructTable in isolation
900-
continue-on-error: true
901-
env:
902-
LD_PRELOAD: ${{ steps.preload.outputs.value }}
903-
working-directory: packages/mysql-on-sqlite
904-
# Run just this one test by itself. Previous runs show it hangs at
905-
# ~10 min when executed after the other tests. If it passes here in
906-
# ~1 s, the hang is caused by accumulated process state from the
907-
# preceding tests (likely leaked FuncSlot p_app refs).
908-
run: |
909-
set +e
910-
timeout --kill-after=10 60 \
911-
php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
912-
--filter '^WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$'
913-
echo "testReconstructTable isolated exit: $?"
914-
915-
- name: Probe testReconstructTable after Translation_Tests
916-
continue-on-error: true
917-
env:
918-
LD_PRELOAD: ${{ steps.preload.outputs.value }}
919-
working-directory: packages/mysql-on-sqlite
920-
# Run Translation_Tests then testReconstructTable in one process.
921-
# If this hangs, Translation_Tests specifically are leaving Turso in
922-
# a bad state. If it passes, the trigger is something earlier.
923-
run: |
924-
set +e
925-
timeout --kill-after=10 180 \
926-
php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
927-
--filter '^(WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable)'
928-
echo "Translation+reconstruct exit: $?"
929-
930-
- name: Probe Driver_Tests + Translation + testReconstructTable
931-
continue-on-error: true
932-
env:
933-
LD_PRELOAD: ${{ steps.preload.outputs.value }}
934-
working-directory: packages/mysql-on-sqlite
935-
# Bisecting: is the pollution coming from WP_SQLite_Driver_Tests?
936-
# Prints first-failure marker if testReconstructTable doesn't end.
937-
run: |
938-
set +e
939-
timeout --kill-after=10 600 \
940-
php ./vendor/bin/phpunit -c ./phpunit.xml.dist --debug \
941-
--filter '^(WP_SQLite_Driver_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)' \
942-
2>&1 | grep -E "(testReconstructTable|^Time:|^OK|FAILURES|^Tests:|^ERRORS|test.*started|test.*ended)" | tail -40
943-
echo "Driver+Translation+reconstruct exit: $?"
944-
945-
- name: Probe Metadata_Tests + Translation + testReconstructTable
946-
continue-on-error: true
947-
env:
948-
LD_PRELOAD: ${{ steps.preload.outputs.value }}
949-
working-directory: packages/mysql-on-sqlite
950-
# Bisecting: is the pollution coming from Metadata_Tests?
951-
run: |
952-
set +e
953-
timeout --kill-after=10 300 \
954-
php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
955-
--filter '^(WP_SQLite_Driver_Metadata_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)'
956-
echo "Metadata+Translation+reconstruct exit: $?"
957-
958-
- name: Probe PDO_API + Translation + testReconstructTable
959-
continue-on-error: true
960-
env:
961-
LD_PRELOAD: ${{ steps.preload.outputs.value }}
962-
working-directory: packages/mysql-on-sqlite
963-
# Bisecting: is the pollution coming from PDO_API_Tests?
964-
run: |
965-
set +e
966-
timeout --kill-after=10 300 \
967-
php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
968-
--filter '^(WP_PDO_MySQL_On_SQLite_PDO_API_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)'
969-
echo "PDO_API+Translation+reconstruct exit: $?"
970-
971899
- name: Probe full main run with Translation unskipped + gdb watchdog
972900
continue-on-error: true
973901
env:
974902
LD_PRELOAD: ${{ steps.preload.outputs.value }}
975903
working-directory: packages/mysql-on-sqlite
976-
# Reproduce the 4c4f491 main-run state (Translation_Tests unskipped).
977-
# Previous runs hang here at testReconstructTable for 10 min; install a
978-
# watchdog that snapshots the PHP process with gdb before killing it.
904+
# Reproduce the 4c4f491 main-run state (Translation_Tests unskipped)
905+
# and capture what PHP is actually doing during the hang.
906+
#
907+
# Timeline budget (~7 min total):
908+
# 0-30s: build testcases, run PDO_API + Driver_Tests (fast, ~5k tests)
909+
# 30-60s: Metadata_Tests + Translation_Tests (completed at 60s in 45)
910+
# 60s: testReconstructTable starts and hangs
911+
# 360s: first gdb snapshot (5 min in)
912+
# 400s: second gdb snapshot (in case first detached/crashed)
913+
# 420s: timeout kills php
979914
run: |
980915
set +e
981916
skip_regex='^(?!WP_MySQL_Server_Suite_).+'
982917
983-
# Watchdog: after 150s, grab a backtrace of the hanging PHP.
984-
(
985-
sleep 150
986-
PHP_PID=$(pgrep -f 'phpunit.*--filter' | head -1)
987-
if [ -n "$PHP_PID" ]; then
988-
echo "=== watchdog: attaching gdb to php pid $PHP_PID ==="
989-
sudo gdb -p "$PHP_PID" -batch \
990-
-ex 'set pagination off' \
991-
-ex 'info threads' \
992-
-ex 'thread apply all bt 40' \
993-
2>&1 | head -400
994-
echo "=== watchdog: done ==="
995-
else
996-
echo "=== watchdog: no php pid found ==="
918+
dump_backtraces() {
919+
local label=$1
920+
# Target the PHP process (not the timeout wrapper). Use exact name.
921+
local PHP_PID
922+
PHP_PID=$(pgrep -x php | head -1)
923+
if [ -z "$PHP_PID" ]; then
924+
echo "=== watchdog ($label): no php pid found ==="
925+
return
997926
fi
927+
echo "=== watchdog ($label): attaching gdb to php pid $PHP_PID ==="
928+
# /proc/PID/stack shows what the kernel thinks PHP is waiting for
929+
# — free even without ptrace and cheap to read.
930+
echo "--- /proc/$PHP_PID/wchan: $(cat /proc/$PHP_PID/wchan 2>/dev/null) ---"
931+
echo "--- /proc/$PHP_PID/stack ---"
932+
sudo cat /proc/$PHP_PID/stack 2>/dev/null | head -30
933+
echo "--- gdb bt ---"
934+
sudo gdb -p "$PHP_PID" -batch \
935+
-ex 'set pagination off' \
936+
-ex 'info threads' \
937+
-ex 'thread apply all bt 40' \
938+
2>&1 | head -400
939+
echo "=== watchdog ($label): done ==="
940+
}
941+
942+
(
943+
sleep 360 && dump_backtraces "T+360s"
944+
sleep 40 && dump_backtraces "T+400s"
998945
) &
999946
WATCHDOG=$!
1000947
1001-
timeout --kill-after=10 180 \
948+
timeout --kill-after=10 420 \
1002949
php ./vendor/bin/phpunit -c ./phpunit.xml.dist --debug \
1003950
--filter "$skip_regex" 2>&1 | tail -80
1004951
echo "full-main+Translation exit: $?"

0 commit comments

Comments
 (0)