From 75d361b2309ea01b1650dadfc98bf0beda9d8aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 19:24:43 +0200 Subject: [PATCH 1/3] policies.py: Remove max reconnect attempts This was kept this way to preserve legacy behavior, but I think changing the behavior will be less of a problem than what the current behavior causes. The policy is used for reconnections (for example, reconnecting control connection). If reconnect policy finishes generation (it will do so after 64 attempts before my change), then the reconnector finish and the driver won't attempt reconnection anymore. This would be a terrible situation. --- cassandra/policies.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index ceb5ebdc45..2cb2625baa 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -773,7 +773,7 @@ class ConstantReconnectionPolicy(ReconnectionPolicy): in-between each reconnection attempt. """ - def __init__(self, delay, max_attempts=64): + def __init__(self, delay, max_attempts=None): """ `delay` should be a floating point number of seconds to wait in-between each attempt. @@ -807,10 +807,7 @@ class ExponentialReconnectionPolicy(ReconnectionPolicy): trying to reconnect at exactly the same time. """ - # TODO: max_attempts is 64 to preserve legacy default behavior - # consider changing to None in major release to prevent the policy - # giving up forever - def __init__(self, base_delay, max_delay, max_attempts=64): + def __init__(self, base_delay, max_delay, max_attempts=None): """ `base_delay` and `max_delay` should be in floating point units of seconds. From 7fb09eb444531919a98e3ca0f905b1172fc92b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 19:58:38 +0200 Subject: [PATCH 2/3] connection.py: Rename timeout to timeout_left This better conveys what this is: not a timeut duration from config, but how much of this timeout is left right now. --- cassandra/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index 08501d0a2b..e45a696f41 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1888,13 +1888,13 @@ def run(self): self._raise_if_stopped() # Wait max `self._timeout` seconds for all HeartbeatFutures to complete - timeout = self._timeout + timeout_left = self._timeout start_time = time.time() for f in futures: self._raise_if_stopped() connection = f.connection try: - f.wait(timeout) + f.wait(timeout_left) # TODO: move this, along with connection locks in pool, down into Connection with connection.lock: connection.in_flight -= 1 @@ -1904,7 +1904,7 @@ def run(self): id(connection), connection.endpoint) failed_connections.append((f.connection, f.owner, e)) - timeout = self._timeout - (time.time() - start_time) + timeout_left = self._timeout - (time.time() - start_time) for connection, owner, exc in failed_connections: self._raise_if_stopped() From 4e2e9bb986dee42207606197ae155ba8c6ca932c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 20:00:46 +0200 Subject: [PATCH 3/3] HearbeatFuture: Use correct timeout in error message The timeout argument in `wait` tells how much we need to wait taking into consideration that we already waited for some other futures. The total wait time that this future had available to complete is different: it includes time we spent waiting for other futures. This created confusing hearbeat messages, that could even show negative wait times. I fixed it by putting both timeouts in the error message. The `timeout` parameter of `OperationTimedOut` I changed to the original timeout because I think it is more useful and relevant here. --- cassandra/connection.py | 8 ++++---- tests/unit/test_connection.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index e45a696f41..da18f4103d 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1821,15 +1821,15 @@ def __init__(self, connection, owner): self._exception = Exception("Failed to send heartbeat because connection 'in_flight' exceeds threshold") self._event.set() - def wait(self, timeout): + def wait(self, timeout, original_timeout): self._event.wait(timeout) if self._event.is_set(): if self._exception: raise self._exception else: - raise OperationTimedOut("Connection heartbeat timeout after %s seconds" % (timeout,), + raise OperationTimedOut("Connection heartbeat timeout (total wait=%s seconds, this wait call=%s seconds)" % (original_timeout, timeout), self.connection.endpoint, - timeout=timeout, + timeout=original_timeout, in_flight=self.connection.in_flight) def _options_callback(self, response): @@ -1894,7 +1894,7 @@ def run(self): self._raise_if_stopped() connection = f.connection try: - f.wait(timeout_left) + f.wait(timeout_left, self._timeout) # TODO: move this, along with connection locks in pool, down into Connection with connection.lock: connection.in_flight -= 1 diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index 2fa7c71196..a1b005544a 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -518,7 +518,7 @@ def send_msg(msg, req_id, msg_callback): connection.defunct.assert_has_calls([call(ANY)] * get_holders.call_count) exc = connection.defunct.call_args_list[0][0][0] assert isinstance(exc, OperationTimedOut) - assert exc.errors == 'Connection heartbeat timeout after 0.05 seconds' + assert exc.errors == 'Connection heartbeat timeout (total wait=0.05 seconds, this wait call=0.05 seconds)' assert exc.last_host == DefaultEndPoint('localhost') assert exc.timeout == 0.05 assert isinstance(exc.in_flight, int)