@@ -896,109 +896,56 @@ jobs:
896896 -ex "bt 40" \
897897 --args "$(command -v php)" || true
898898
899- - name : Probe testReconstructTable in isolation
900- continue-on-error : true
901- env :
902- LD_PRELOAD : ${{ steps.preload.outputs.value }}
903- working-directory : packages/mysql-on-sqlite
904- # Run just this one test by itself. Previous runs show it hangs at
905- # ~10 min when executed after the other tests. If it passes here in
906- # ~1 s, the hang is caused by accumulated process state from the
907- # preceding tests (likely leaked FuncSlot p_app refs).
908- run : |
909- set +e
910- timeout --kill-after=10 60 \
911- php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
912- --filter '^WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$'
913- echo "testReconstructTable isolated exit: $?"
914-
915- - name : Probe testReconstructTable after Translation_Tests
916- continue-on-error : true
917- env :
918- LD_PRELOAD : ${{ steps.preload.outputs.value }}
919- working-directory : packages/mysql-on-sqlite
920- # Run Translation_Tests then testReconstructTable in one process.
921- # If this hangs, Translation_Tests specifically are leaving Turso in
922- # a bad state. If it passes, the trigger is something earlier.
923- run : |
924- set +e
925- timeout --kill-after=10 180 \
926- php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
927- --filter '^(WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable)'
928- echo "Translation+reconstruct exit: $?"
929-
930- - name : Probe Driver_Tests + Translation + testReconstructTable
931- continue-on-error : true
932- env :
933- LD_PRELOAD : ${{ steps.preload.outputs.value }}
934- working-directory : packages/mysql-on-sqlite
935- # Bisecting: is the pollution coming from WP_SQLite_Driver_Tests?
936- # Prints first-failure marker if testReconstructTable doesn't end.
937- run : |
938- set +e
939- timeout --kill-after=10 600 \
940- php ./vendor/bin/phpunit -c ./phpunit.xml.dist --debug \
941- --filter '^(WP_SQLite_Driver_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)' \
942- 2>&1 | grep -E "(testReconstructTable|^Time:|^OK|FAILURES|^Tests:|^ERRORS|test.*started|test.*ended)" | tail -40
943- echo "Driver+Translation+reconstruct exit: $?"
944-
945- - name : Probe Metadata_Tests + Translation + testReconstructTable
946- continue-on-error : true
947- env :
948- LD_PRELOAD : ${{ steps.preload.outputs.value }}
949- working-directory : packages/mysql-on-sqlite
950- # Bisecting: is the pollution coming from Metadata_Tests?
951- run : |
952- set +e
953- timeout --kill-after=10 300 \
954- php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
955- --filter '^(WP_SQLite_Driver_Metadata_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)'
956- echo "Metadata+Translation+reconstruct exit: $?"
957-
958- - name : Probe PDO_API + Translation + testReconstructTable
959- continue-on-error : true
960- env :
961- LD_PRELOAD : ${{ steps.preload.outputs.value }}
962- working-directory : packages/mysql-on-sqlite
963- # Bisecting: is the pollution coming from PDO_API_Tests?
964- run : |
965- set +e
966- timeout --kill-after=10 300 \
967- php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
968- --filter '^(WP_PDO_MySQL_On_SQLite_PDO_API_Tests|WP_SQLite_Driver_Translation_Tests|WP_SQLite_Information_Schema_Reconstructor_Tests::testReconstructTable$)'
969- echo "PDO_API+Translation+reconstruct exit: $?"
970-
971899 - name : Probe full main run with Translation unskipped + gdb watchdog
972900 continue-on-error : true
973901 env :
974902 LD_PRELOAD : ${{ steps.preload.outputs.value }}
975903 working-directory : packages/mysql-on-sqlite
976- # Reproduce the 4c4f491 main-run state (Translation_Tests unskipped).
977- # Previous runs hang here at testReconstructTable for 10 min; install a
978- # watchdog that snapshots the PHP process with gdb before killing it.
904+ # Reproduce the 4c4f491 main-run state (Translation_Tests unskipped)
905+ # and capture what PHP is actually doing during the hang.
906+ #
907+ # Timeline budget (~7 min total):
908+ # 0-30s: build testcases, run PDO_API + Driver_Tests (fast, ~5k tests)
909+ # 30-60s: Metadata_Tests + Translation_Tests (completed at 60s in 45)
910+ # 60s: testReconstructTable starts and hangs
911+ # 360s: first gdb snapshot (5 min in)
912+ # 400s: second gdb snapshot (in case first detached/crashed)
913+ # 420s: timeout kills php
979914 run : |
980915 set +e
981916 skip_regex='^(?!WP_MySQL_Server_Suite_).+'
982917
983- # Watchdog: after 150s, grab a backtrace of the hanging PHP.
984- (
985- sleep 150
986- PHP_PID=$(pgrep -f 'phpunit.*--filter' | head -1)
987- if [ -n "$PHP_PID" ]; then
988- echo "=== watchdog: attaching gdb to php pid $PHP_PID ==="
989- sudo gdb -p "$PHP_PID" -batch \
990- -ex 'set pagination off' \
991- -ex 'info threads' \
992- -ex 'thread apply all bt 40' \
993- 2>&1 | head -400
994- echo "=== watchdog: done ==="
995- else
996- echo "=== watchdog: no php pid found ==="
918+ dump_backtraces() {
919+ local label=$1
920+ # Target the PHP process (not the timeout wrapper). Use exact name.
921+ local PHP_PID
922+ PHP_PID=$(pgrep -x php | head -1)
923+ if [ -z "$PHP_PID" ]; then
924+ echo "=== watchdog ($label): no php pid found ==="
925+ return
997926 fi
927+ echo "=== watchdog ($label): attaching gdb to php pid $PHP_PID ==="
928+ # /proc/PID/stack shows what the kernel thinks PHP is waiting for
929+ # — free even without ptrace and cheap to read.
930+ echo "--- /proc/$PHP_PID/wchan: $(cat /proc/$PHP_PID/wchan 2>/dev/null) ---"
931+ echo "--- /proc/$PHP_PID/stack ---"
932+ sudo cat /proc/$PHP_PID/stack 2>/dev/null | head -30
933+ echo "--- gdb bt ---"
934+ sudo gdb -p "$PHP_PID" -batch \
935+ -ex 'set pagination off' \
936+ -ex 'info threads' \
937+ -ex 'thread apply all bt 40' \
938+ 2>&1 | head -400
939+ echo "=== watchdog ($label): done ==="
940+ }
941+
942+ (
943+ sleep 360 && dump_backtraces "T+360s"
944+ sleep 40 && dump_backtraces "T+400s"
998945 ) &
999946 WATCHDOG=$!
1000947
1001- timeout --kill-after=10 180 \
948+ timeout --kill-after=10 420 \
1002949 php ./vendor/bin/phpunit -c ./phpunit.xml.dist --debug \
1003950 --filter "$skip_regex" 2>&1 | tail -80
1004951 echo "full-main+Translation exit: $?"
0 commit comments