Skip to content

Commit fb470bf

Browse files
Do not clear _in_use from _invalidate
The flag's contract is "owned by the task that claimed it between claim-site and clear-site". Clearing it from an out-of-band path (call_soon_threadsafe from the dbapi sync-timeout arm, heartbeat invalidations, transaction()'s rollback-failure arm) lets a concurrent task enter a _run_protocol-protected critical section while the original claimant is still mid-await — and the claimant's own finally then clobbers the new claim. The in-flight task observes the nulled _protocol on its next read/write and clears the flag via its own finally. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cf79cb1 commit fb470bf

3 files changed

Lines changed: 136 additions & 18 deletions

File tree

src/dqliteclient/connection.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,14 +2323,17 @@ def _invalidate(self, cause: BaseException | None = None) -> None:
23232323
If ``cause`` is provided, it is remembered so a later caller that
23242324
hits "Not connected" can chain it as ``__cause__`` for diagnostics.
23252325
2326-
Also clears the ``_in_use`` slot: invalidation can be invoked
2327-
out-of-band (e.g. scheduled from the dbapi sync-timeout path
2328-
via ``call_soon_threadsafe``), bypassing ``_run_protocol``'s
2329-
finally that normally resets the flag. An invalidated connection
2330-
is no longer holding a meaningful in-flight operation, so the
2331-
flag and the liveness state must stay consistent — otherwise
2332-
the next call deterministically raises "another operation is
2333-
in progress" on a connection that is in fact dead.
2326+
Does NOT touch the ``_in_use`` slot: the flag's contract is
2327+
"owned by the task that claimed it between claim-site and
2328+
clear-site". Clearing it here — when ``_invalidate`` was
2329+
invoked out-of-band (e.g. ``call_soon_threadsafe`` from the
2330+
dbapi sync-timeout path) — would let a second task into a
2331+
``_run_protocol``-protected critical section while the original
2332+
in-flight task is still mid-``await``. The in-flight task
2333+
unblocks promptly once we null ``self._protocol`` below (its
2334+
next read raises a transport error), and its own
2335+
``finally`` then clears ``_in_use`` — restoring the
2336+
"transient" wait the next caller would see anyway.
23342337
23352338
Synchronous writer-close + async bounded drain: ``protocol.close()``
23362339
is synchronous (writer.close()), but ``wait_closed()`` is a
@@ -2432,7 +2435,13 @@ async def _bounded_drain() -> None:
24322435
self._pending_drain = None
24332436
self._protocol = None
24342437
self._db_id = None
2435-
self._in_use = False
2438+
# NOTE: ``_in_use`` is intentionally NOT cleared here. See the
2439+
# method docstring — the flag's task-ownership contract requires
2440+
# that only the task that claimed it clears it (typically via
2441+
# ``_run_protocol``'s finally at line ~2728 or ``connect()``'s
2442+
# finally arms). Clearing it from this out-of-band path can let
2443+
# a concurrent task enter ``_run_protocol`` while the original
2444+
# claimant is still mid-await.
24362445
# Atomic clear of transaction-state flags: an external
24372446
# invalidation (heartbeat path, KeyboardInterrupt mid-yield,
24382447
# ``call_soon_threadsafe``) lands here without going through

tests/test_connection.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -422,19 +422,24 @@ async def test_connect_emits_debug_logs_on_handshake_and_open(
422422
assert any("handshake ok" in m and "localhost:9001" in m for m in messages)
423423
assert any("db opened" in m and "localhost:9001" in m for m in messages)
424424

425-
async def test_invalidate_clears_in_use_flag(self, connected_connection) -> None:
426-
"""_invalidate must reset _in_use alongside dropping the protocol.
427-
428-
If an out-of-band invalidation (e.g. scheduled from a sync-side
429-
timeout path) fires while _in_use=True, leaving the flag set
430-
deterministically blocks the next call on this connection with
431-
"another operation is in progress" — a misleading error for a
432-
connection that is in fact dead and needs reconnecting.
425+
async def test_invalidate_preserves_in_use_flag(self, connected_connection) -> None:
426+
"""_invalidate must NOT clear _in_use.
427+
428+
The flag's contract is "owned by the task that claimed it
429+
between claim-site and clear-site". Clearing it from an
430+
out-of-band path (``call_soon_threadsafe``, heartbeat,
431+
``transaction()`` rollback-failure arm) would let a second task
432+
enter a ``_run_protocol``-protected critical section while the
433+
original claimant is still mid-``await``. The in-flight task
434+
observes the nulled ``_protocol`` and runs its own ``finally``
435+
to clear the flag; the next caller transiently sees "another
436+
operation is in progress" until that finally runs — which is
437+
the correct behaviour for an in-flight claim.
433438
"""
434439
conn, _, _ = connected_connection
435440
conn._in_use = True
436441
conn._invalidate(OperationalError("synthetic", 0))
437-
assert conn._in_use is False
442+
assert conn._in_use is True
438443
assert conn._protocol is None
439444

440445
async def test_invalidate_closes_transport(self, connected_connection) -> None:
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""Pin: ``_invalidate`` does NOT clear ``_in_use``.
2+
3+
The flag's contract is "owned by the task that claimed it between
4+
claim-site and clear-site". Clearing it from an out-of-band path
5+
(``call_soon_threadsafe`` from the dbapi sync-timeout arm,
6+
heartbeat invalidations, ``transaction()`` rollback-failure arm)
7+
would let a second task enter a ``_run_protocol``-protected
8+
critical section while the original claimant is still mid-``await``.
9+
10+
The in-flight task observes the nulled ``_protocol`` at its next
11+
read / write and runs its own ``finally`` to clear ``_in_use`` —
12+
the post-fix shape never lets a concurrent caller see a
13+
spuriously-cleared flag mid-await.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import weakref
19+
20+
import pytest
21+
22+
from dqliteclient.connection import DqliteConnection
23+
from dqliteclient.exceptions import InterfaceError
24+
25+
26+
@pytest.mark.asyncio
27+
async def test_invalidate_preserves_in_use_so_concurrent_claimant_is_rejected() -> None:
28+
"""Task A holds ``_in_use=True`` (the in-flight claimant). A
29+
concurrent ``_invalidate`` lands. The flag must NOT be cleared
30+
here — a second task hitting ``_check_in_use`` must still see
31+
the in-progress claim and raise.
32+
"""
33+
import asyncio
34+
35+
loop = asyncio.get_running_loop()
36+
37+
conn = DqliteConnection.__new__(DqliteConnection)
38+
conn._in_use = True # simulate Task A's mid-_run_protocol claim
39+
conn._protocol = object() # type: ignore[assignment]
40+
conn._db_id = 5
41+
conn._in_transaction = False
42+
conn._tx_owner = None
43+
conn._savepoint_stack = []
44+
conn._savepoint_implicit_begin = False
45+
conn._has_untracked_savepoint = False
46+
conn._invalidation_cause = None
47+
conn._bound_loop_ref = weakref.ref(loop)
48+
conn._close_timeout = 5.0
49+
conn._pending_drain = None
50+
conn._address = "127.0.0.1:1"
51+
import os
52+
53+
conn._creator_pid = os.getpid()
54+
conn._pool_released = False
55+
56+
# Out-of-band _invalidate (e.g. from call_soon_threadsafe).
57+
conn._invalidate(RuntimeError("simulated heartbeat invalidation"))
58+
59+
# Load-bearing: the flag was NOT cleared. A second task hitting
60+
# ``_check_in_use`` must observe the still-claimed state.
61+
assert conn._in_use is True, (
62+
"_invalidate must not clear _in_use; otherwise a concurrent "
63+
"task can enter _run_protocol's critical section while the "
64+
"original claimant is still mid-await"
65+
)
66+
67+
# ``_check_in_use`` rejects with InterfaceError.
68+
with pytest.raises(InterfaceError, match="another operation is in progress"):
69+
conn._check_in_use()
70+
71+
72+
@pytest.mark.asyncio
73+
async def test_invalidate_with_no_active_claimant_leaves_in_use_false() -> None:
74+
"""When no task holds the flag (``_in_use=False`` going in),
75+
``_invalidate`` must not flip it to True. The fix is a delete, not
76+
a swap.
77+
"""
78+
import asyncio
79+
80+
loop = asyncio.get_running_loop()
81+
82+
conn = DqliteConnection.__new__(DqliteConnection)
83+
conn._in_use = False
84+
conn._protocol = object() # type: ignore[assignment]
85+
conn._db_id = 5
86+
conn._in_transaction = False
87+
conn._tx_owner = None
88+
conn._savepoint_stack = []
89+
conn._savepoint_implicit_begin = False
90+
conn._has_untracked_savepoint = False
91+
conn._invalidation_cause = None
92+
conn._bound_loop_ref = weakref.ref(loop)
93+
conn._close_timeout = 5.0
94+
conn._pending_drain = None
95+
conn._address = "127.0.0.1:1"
96+
import os
97+
98+
conn._creator_pid = os.getpid()
99+
conn._pool_released = False
100+
101+
conn._invalidate(RuntimeError("simulated"))
102+
103+
# The flag was untouched — and is still safely false.
104+
assert conn._in_use is False

0 commit comments

Comments
 (0)