Skip to content

Commit 61e2bb7

Browse files
committed
Fix re-entrant deadlock in Turso's sqlite3_finalize
Root cause captured by gdb: during a UDF callback fired from sqlite3_step (which holds db.inner mutex), PHP's cycle GC fires a PDO statement destructor. The destructor calls sqlite3_finalize, which blocks on the same non-reentrant std::sync::Mutex. #5 sqlite3_finalize <- waits on db.inner #7 php_pdo_free_statement #12 zend_gc_collect_cycles #13-39 execute_ex <- UDF callback running user PHP Patch Turso's sqlite3_finalize and stmt_run_to_completion to use try_lock. On contention (re-entrant path), skip the drain and the stmt_list unlink. The list is only walked by sqlite3_next_stmt (unused by pdo_sqlite) and dropped on close, so a stale entry is harmless; the stmt Box is still freed. Unskip Translation_Tests in the main run now that the hang is fixed.
1 parent 3f3e53a commit 61e2bb7

1 file changed

Lines changed: 108 additions & 9 deletions

File tree

.github/workflows/phpunit-tests-turso.yml

Lines changed: 108 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,110 @@ jobs:
205205
print('patched slot-reuse destroy invocation')
206206
PY_FIX_DESTROY
207207
208+
# sqlite3_finalize uses a non-reentrant std::sync::Mutex on the db.
209+
# PHP's cycle GC can fire a PDO statement destructor while another
210+
# statement's sqlite3_step is in progress (i.e., from inside a UDF
211+
# callback whose bridge re-enters PHP). The outer step holds the
212+
# mutex, the inner finalize blocks on it, and we deadlock.
213+
#
214+
# Fix: use try_lock; on contention, skip the stmt_list unlink and
215+
# the drain. The list is only traversed by sqlite3_next_stmt (which
216+
# pdo_sqlite doesn't call) and dropped on sqlite3_close, so a stale
217+
# entry is harmless; the stmt's own Box is still freed below.
218+
python3 - <<'PY_FIX_FINALIZE'
219+
p = 'sqlite3/src/lib.rs'
220+
s = open(p).read()
221+
old = (
222+
" if !stmt_ref.db.is_null() {\n"
223+
" let db = &mut *stmt_ref.db;\n"
224+
" let mut db_inner = db.inner.lock().unwrap();\n"
225+
"\n"
226+
" if db_inner.stmt_list == stmt {\n"
227+
" db_inner.stmt_list = stmt_ref.next;\n"
228+
" } else {\n"
229+
" let mut current = db_inner.stmt_list;\n"
230+
" while !current.is_null() {\n"
231+
" let current_ref = &mut *current;\n"
232+
" if current_ref.next == stmt {\n"
233+
" current_ref.next = stmt_ref.next;\n"
234+
" break;\n"
235+
" }\n"
236+
" current = current_ref.next;\n"
237+
" }\n"
238+
" }\n"
239+
" }\n"
240+
)
241+
new = (
242+
" if !stmt_ref.db.is_null() {\n"
243+
" let db = &mut *stmt_ref.db;\n"
244+
" // try_lock to avoid deadlock when finalize is invoked\n"
245+
" // re-entrantly (GC destructor during UDF callback).\n"
246+
" if let Ok(mut db_inner) = db.inner.try_lock() {\n"
247+
" if db_inner.stmt_list == stmt {\n"
248+
" db_inner.stmt_list = stmt_ref.next;\n"
249+
" } else {\n"
250+
" let mut current = db_inner.stmt_list;\n"
251+
" while !current.is_null() {\n"
252+
" let current_ref = &mut *current;\n"
253+
" if current_ref.next == stmt {\n"
254+
" current_ref.next = stmt_ref.next;\n"
255+
" break;\n"
256+
" }\n"
257+
" current = current_ref.next;\n"
258+
" }\n"
259+
" }\n"
260+
" }\n"
261+
" }\n"
262+
)
263+
assert old in s, 'sqlite3_finalize stmt_list block not found'
264+
s = s.replace(old, new, 1)
265+
266+
# stmt_run_to_completion (called at the top of sqlite3_finalize)
267+
# invokes sqlite3_step, which also locks db.inner. Under the same
268+
# re-entrant deadlock, this blocks before we even reach the patch
269+
# above. Make the drain loop bail out if stepping can't acquire
270+
# the mutex.
271+
old_src = (
272+
"unsafe fn stmt_run_to_completion(stmt: *mut sqlite3_stmt) -> ffi::c_int {\n"
273+
" let stmt_ref = &mut *stmt;\n"
274+
" while stmt_ref.stmt.execution_state().is_running() {\n"
275+
" let result = sqlite3_step(stmt);\n"
276+
" if result != SQLITE_DONE && result != SQLITE_ROW {\n"
277+
" return result;\n"
278+
" }\n"
279+
" }\n"
280+
" SQLITE_OK\n"
281+
"}\n"
282+
)
283+
new_src = (
284+
"unsafe fn stmt_run_to_completion(stmt: *mut sqlite3_stmt) -> ffi::c_int {\n"
285+
" let stmt_ref = &mut *stmt;\n"
286+
" // Skip drain if we can't acquire the db mutex: we're\n"
287+
" // re-entering from a UDF callback's GC destructor, and\n"
288+
" // sqlite3_step would block forever. The stmt will be\n"
289+
" // freed anyway by the caller.\n"
290+
" if !stmt_ref.db.is_null() {\n"
291+
" let db = &*stmt_ref.db;\n"
292+
" if db.inner.try_lock().is_err() {\n"
293+
" return SQLITE_OK;\n"
294+
" }\n"
295+
" }\n"
296+
" while stmt_ref.stmt.execution_state().is_running() {\n"
297+
" let result = sqlite3_step(stmt);\n"
298+
" if result != SQLITE_DONE && result != SQLITE_ROW {\n"
299+
" return result;\n"
300+
" }\n"
301+
" }\n"
302+
" SQLITE_OK\n"
303+
"}\n"
304+
)
305+
assert old_src in s, 'stmt_run_to_completion block not found'
306+
s = s.replace(old_src, new_src, 1)
307+
308+
open(p, 'w').write(s)
309+
print('patched sqlite3_finalize + stmt_run_to_completion for GC re-entry')
310+
PY_FIX_FINALIZE
311+
208312
# Turso's custom-function registry is capped at 32 pre-generated
209313
# bridge trampolines; the driver registers 44 UDFs, so the last 12
210314
# silently fail. Bump to 64 by adding 32 more func_bridge!/FUNC_BRIDGES
@@ -964,15 +1068,10 @@ jobs:
9641068
# any <error> or <failure>, the step fails.
9651069
run: |
9661070
set +e
967-
# Currently skipped:
968-
# - WP_MySQL_Server_Suite_*: tokenise/parse a 5.7 MB fixture in a
969-
# single loop, runs well over 10 min under LD_PRELOAD (not a
970-
# Turso issue; pure PHP work).
971-
# - WP_SQLite_Driver_Translation_Tests: all 56 tests complete,
972-
# but leave Turso in a state where the next test hangs
973-
# (testReconstructTable 10-min timeout). Re-skip until the
974-
# state-leak is understood.
975-
skip_regex='^(?!WP_MySQL_Server_Suite_|WP_SQLite_Driver_Translation_Tests).+'
1071+
# Only the two CSV-driven server-suite tests remain skipped —
1072+
# they tokenise/parse a 5.7 MB fixture in a single loop and run
1073+
# well over 10 min under LD_PRELOAD (not a Turso issue).
1074+
skip_regex='^(?!WP_MySQL_Server_Suite_).+'
9761075
timeout --kill-after=10 600 \
9771076
php ./vendor/bin/phpunit -c ./phpunit.xml.dist \
9781077
--debug \

0 commit comments

Comments
 (0)