From b23457faf1c291d0c182f80e68271106f8c5e1d3 Mon Sep 17 00:00:00 2001 From: James Jeffries Date: Wed, 13 May 2026 13:25:45 +0100 Subject: [PATCH 1/4] feat(OBS-2873): configure gRPC HTTP/2 keepalive on client channels Extend shared channel options with keepalive time/timeout, permit pings without calls, and max_pings_without_data parity with netbox-assurance-plugin. Co-authored-by: Cursor --- netboxlabs/diode/sdk/client.py | 39 +++++++++++++++++++++++----------- tests/test_client.py | 19 ++++++++--------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/netboxlabs/diode/sdk/client.py b/netboxlabs/diode/sdk/client.py index 9ece94d..4bf09b1 100644 --- a/netboxlabs/diode/sdk/client.py +++ b/netboxlabs/diode/sdk/client.py @@ -50,6 +50,13 @@ _INGEST_SCOPE = "diode:ingest" _LOGGER = logging.getLogger(__name__) _MAX_RETRIES_ENVVAR_NAME = "DIODE_MAX_AUTH_RETRIES" +# HTTP/2 keepalive: align with netbox-assurance-plugin (ENGHLP-1220) and diode-pro +# server policy (MinTime 10s so client pings must be >= 10s, e.g. 30s interval). +_GRPC_KEEPALIVE_TIME_MS = 30_000 +_GRPC_KEEPALIVE_TIMEOUT_MS = 10_000 +_GRPC_KEEPALIVE_PERMIT_WITHOUT_CALLS = 1 +# 0 = no cap on keepalive pings without data (matches reconciler Python client). +_GRPC_HTTP2_MAX_PINGS_WITHOUT_DATA = 0 def load_dryrun_entities(file_path: str | Path) -> Iterable[Entity]: @@ -138,6 +145,20 @@ def _get_optional_config_value( return value +def _base_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: + """grpc channel options shared by Diode clients: user-agent and keepalive.""" + return [ + ("grpc.primary_user_agent", primary_user_agent_value), + ("grpc.keepalive_time_ms", _GRPC_KEEPALIVE_TIME_MS), + ("grpc.keepalive_timeout_ms", _GRPC_KEEPALIVE_TIMEOUT_MS), + ( + "grpc.keepalive_permit_without_calls", + _GRPC_KEEPALIVE_PERMIT_WITHOUT_CALLS, + ), + ("grpc.http2.max_pings_without_data", _GRPC_HTTP2_MAX_PINGS_WITHOUT_DATA), + ] + + def _get_proxy_env_var(var_name: str) -> str | None: """Get proxy environment variable (case-insensitive).""" value = os.getenv(var_name.upper()) @@ -335,12 +356,9 @@ def __init__( self._authenticate(_INGEST_SCOPE) - channel_opts = [ - ( - "grpc.primary_user_agent", - f"{self._name}/{self._version} {self._app_name}/{self._app_version}", - ), - ] + channel_opts = _base_grpc_channel_options( + f"{self._name}/{self._version} {self._app_name}/{self._app_version}" + ) proxy_url = _get_grpc_proxy_url(self._target, self._tls_verify) if proxy_url: @@ -631,12 +649,9 @@ def __init__( else None ) - channel_opts = [ - ( - "grpc.primary_user_agent", - f"{self._name}/{self._version} {self._app_name}/{self._app_version}", - ), - ] + channel_opts = _base_grpc_channel_options( + f"{self._name}/{self._version} {self._app_name}/{self._app_version}" + ) proxy_url = _get_grpc_proxy_url(self._target, self._tls_verify) if proxy_url: diff --git a/tests/test_client.py b/tests/test_client.py index 1f60717..4d092d5 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -18,6 +18,7 @@ DiodeMethodClientInterceptor, DiodeOTLPClient, _ClientCallDetails, + _base_grpc_channel_options, _DiodeAuthentication, _get_sentry_dsn, _load_certs, @@ -268,11 +269,10 @@ def test_insecure_channel_options_with_primary_user_agent(mock_diode_authenticat mock_insecure_channel.assert_called_once() _, kwargs = mock_insecure_channel.call_args - assert kwargs["options"] == ( - ( - "grpc.primary_user_agent", - f"{client.name}/{client.version} {client.app_name}/{client.app_version}", - ), + assert kwargs["options"] == tuple( + _base_grpc_channel_options( + f"{client.name}/{client.version} {client.app_name}/{client.app_version}" + ) ) @@ -289,11 +289,10 @@ def test_secure_channel_options_with_primary_user_agent(mock_diode_authenticatio mock_secure_channel.assert_called_once() _, kwargs = mock_secure_channel.call_args - assert kwargs["options"] == ( - ( - "grpc.primary_user_agent", - f"{client.name}/{client.version} {client.app_name}/{client.app_version}", - ), + assert kwargs["options"] == tuple( + _base_grpc_channel_options( + f"{client.name}/{client.version} {client.app_name}/{client.app_version}" + ) ) From 3e0a8d47326c31837b8700474a49ffee2882602b Mon Sep 17 00:00:00 2001 From: James Jeffries Date: Wed, 13 May 2026 13:39:11 +0100 Subject: [PATCH 2/4] fix(OBS-2873): keep Diode ingester keepalive off generic OTLP channel Codex/Copilot review: OTLP exporters target arbitrary collectors; aggressive HTTP/2 keepalive can provoke GOAWAY on idle connections. Restrict keepalive plus max_pings_without_data to DiodeClient only; OTLP keeps user-agent-only options plus existing proxy/ssl behavior. Co-authored-by: Cursor --- netboxlabs/diode/sdk/client.py | 19 +++++++++++++++---- tests/test_client.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/netboxlabs/diode/sdk/client.py b/netboxlabs/diode/sdk/client.py index 4bf09b1..8ba9fa6 100644 --- a/netboxlabs/diode/sdk/client.py +++ b/netboxlabs/diode/sdk/client.py @@ -145,10 +145,21 @@ def _get_optional_config_value( return value -def _base_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: - """grpc channel options shared by Diode clients: user-agent and keepalive.""" +def _otlp_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: + """gRPC channel options for generic OTLP collectors (user-agent only). + + Avoid aggressive HTTP/2 keepalive here: many OTLP backends enforce strict ping + limits and may GOAWAY idle exporters when permit-without-stream or unlimited + pings are enabled. + """ return [ ("grpc.primary_user_agent", primary_user_agent_value), + ] + + +def _diode_ingest_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: + """gRPC channel options for the Diode ingester API (keepalive-friendly servers).""" + return _otlp_grpc_channel_options(primary_user_agent_value) + [ ("grpc.keepalive_time_ms", _GRPC_KEEPALIVE_TIME_MS), ("grpc.keepalive_timeout_ms", _GRPC_KEEPALIVE_TIMEOUT_MS), ( @@ -356,7 +367,7 @@ def __init__( self._authenticate(_INGEST_SCOPE) - channel_opts = _base_grpc_channel_options( + channel_opts = _diode_ingest_grpc_channel_options( f"{self._name}/{self._version} {self._app_name}/{self._app_version}" ) @@ -649,7 +660,7 @@ def __init__( else None ) - channel_opts = _base_grpc_channel_options( + channel_opts = _otlp_grpc_channel_options( f"{self._name}/{self._version} {self._app_name}/{self._app_version}" ) diff --git a/tests/test_client.py b/tests/test_client.py index 4d092d5..a23ed41 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -18,7 +18,8 @@ DiodeMethodClientInterceptor, DiodeOTLPClient, _ClientCallDetails, - _base_grpc_channel_options, + _diode_ingest_grpc_channel_options, + _otlp_grpc_channel_options, _DiodeAuthentication, _get_sentry_dsn, _load_certs, @@ -270,7 +271,7 @@ def test_insecure_channel_options_with_primary_user_agent(mock_diode_authenticat mock_insecure_channel.assert_called_once() _, kwargs = mock_insecure_channel.call_args assert kwargs["options"] == tuple( - _base_grpc_channel_options( + _diode_ingest_grpc_channel_options( f"{client.name}/{client.version} {client.app_name}/{client.app_version}" ) ) @@ -290,7 +291,7 @@ def test_secure_channel_options_with_primary_user_agent(mock_diode_authenticatio mock_secure_channel.assert_called_once() _, kwargs = mock_secure_channel.call_args assert kwargs["options"] == tuple( - _base_grpc_channel_options( + _diode_ingest_grpc_channel_options( f"{client.name}/{client.version} {client.app_name}/{client.app_version}" ) ) @@ -883,6 +884,30 @@ def test_otlp_client_grpcs_uses_secure_channel(): base_channel.close.assert_called_once() +def test_otlp_insecure_channel_options_exclude_diode_keepalive(): + """OTLP targets arbitrary collectors; only user-agent is forced (Codex/OBS-2873).""" + with ( + patch("netboxlabs.diode.sdk.client.grpc.insecure_channel") as mock_insecure, + patch("netboxlabs.diode.sdk.client.logs_service_pb2_grpc.LogsServiceStub"), + ): + client = DiodeOTLPClient( + target="grpc://collector:4317", + app_name="orb-producer", + app_version="1.2.3", + ) + + mock_insecure.assert_called_once() + _, kwargs = mock_insecure.call_args + ua = ( + f"{client.name}/{client.version} " + f"{client.app_name}/{client.app_version}" + ) + assert kwargs["options"] == tuple(_otlp_grpc_channel_options(ua)) + assert all( + opt[0] != "grpc.keepalive_time_ms" for opt in kwargs["options"] + ) + + def test_diode_authentication_with_custom_certificates(): """Test _DiodeAuthentication with custom certificates - covers SSL context creation.""" # Create test certificate content From 3b042467e4f47cd6f4d6d245afe7e213bf9cb1a0 Mon Sep 17 00:00:00 2001 From: James Jeffries Date: Wed, 13 May 2026 14:15:22 +0100 Subject: [PATCH 3/4] fix: satisfy Ruff docstring and import-sort rules (PR 90 feedback) Reformat OTLP/ingester helper docstrings for D213/D403 and reorder test_client imports for I001. Co-authored-by: Cursor --- netboxlabs/diode/sdk/client.py | 5 +++-- tests/test_client.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/netboxlabs/diode/sdk/client.py b/netboxlabs/diode/sdk/client.py index 8ba9fa6..c050ca8 100644 --- a/netboxlabs/diode/sdk/client.py +++ b/netboxlabs/diode/sdk/client.py @@ -146,7 +146,8 @@ def _get_optional_config_value( def _otlp_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: - """gRPC channel options for generic OTLP collectors (user-agent only). + """ + Build gRPC channel argument list for generic OTLP collectors (user-agent only). Avoid aggressive HTTP/2 keepalive here: many OTLP backends enforce strict ping limits and may GOAWAY idle exporters when permit-without-stream or unlimited @@ -158,7 +159,7 @@ def _otlp_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, def _diode_ingest_grpc_channel_options(primary_user_agent_value: str) -> list[tuple[str, Any]]: - """gRPC channel options for the Diode ingester API (keepalive-friendly servers).""" + """Build gRPC channel argument list for the Diode ingester API (with keepalive).""" return _otlp_grpc_channel_options(primary_user_agent_value) + [ ("grpc.keepalive_time_ms", _GRPC_KEEPALIVE_TIME_MS), ("grpc.keepalive_timeout_ms", _GRPC_KEEPALIVE_TIMEOUT_MS), diff --git a/tests/test_client.py b/tests/test_client.py index a23ed41..622711c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -19,10 +19,10 @@ DiodeOTLPClient, _ClientCallDetails, _diode_ingest_grpc_channel_options, - _otlp_grpc_channel_options, _DiodeAuthentication, _get_sentry_dsn, _load_certs, + _otlp_grpc_channel_options, load_dryrun_entities, parse_target, ) From c779fec0a9332b425226ca7f33c1dbe39aa99e20 Mon Sep 17 00:00:00 2001 From: James Jeffries Date: Fri, 15 May 2026 09:09:09 +0100 Subject: [PATCH 4/4] remove comment Co-authored-by: Micah Parks <66095735+MicahParks@users.noreply.github.com> --- netboxlabs/diode/sdk/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/netboxlabs/diode/sdk/client.py b/netboxlabs/diode/sdk/client.py index c050ca8..1334366 100644 --- a/netboxlabs/diode/sdk/client.py +++ b/netboxlabs/diode/sdk/client.py @@ -50,7 +50,6 @@ _INGEST_SCOPE = "diode:ingest" _LOGGER = logging.getLogger(__name__) _MAX_RETRIES_ENVVAR_NAME = "DIODE_MAX_AUTH_RETRIES" -# HTTP/2 keepalive: align with netbox-assurance-plugin (ENGHLP-1220) and diode-pro # server policy (MinTime 10s so client pings must be >= 10s, e.g. 30s interval). _GRPC_KEEPALIVE_TIME_MS = 30_000 _GRPC_KEEPALIVE_TIMEOUT_MS = 10_000