From f7c56a69e9abdaa49b8c005466ceefd0c78db723 Mon Sep 17 00:00:00 2001 From: Morgan Wowk Date: Thu, 28 May 2026 11:35:46 -0700 Subject: [PATCH] bugsnag: strip JSON objects from grouping strings, normalize LauncherError Any embedded JSON/dict literal (starting with `{"` or `{'`) is now stripped from error messages before they are used as grouping keys. This prevents Kubernetes pod specs and similar runtime blobs from fragmenting what is structurally the same error into many groups. A dedicated normalizer for LauncherError is also added: it drops everything after the first colon (the serialized pod spec) so that "Failed to create pod: " normalizes to just "LauncherError: Failed to create pod". --- .../instrumentation/error_normalization.py | 19 ++++++++ .../test_error_normalization.py | 45 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/cloud_pipelines_backend/instrumentation/error_normalization.py b/cloud_pipelines_backend/instrumentation/error_normalization.py index 41d2977..06b74b7 100644 --- a/cloud_pipelines_backend/instrumentation/error_normalization.py +++ b/cloud_pipelines_backend/instrumentation/error_normalization.py @@ -16,9 +16,14 @@ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.IGNORECASE ) _LONG_ALNUM_ID_PATTERN = re.compile(r"\b[a-zA-Z0-9]{16,}\b") +# Matches any embedded JSON object or Python dict literal (starts with `{"` or `{'`). +# These are stripped from grouping strings because they contain highly variable +# runtime data (e.g. full Kubernetes pod specs) that would fragment error groups. +_JSON_OBJECT_PATTERN = re.compile(r"\{['\"].*", re.DOTALL) def _strip_generic(*, message: str) -> str: + message = _JSON_OBJECT_PATTERN.sub("{...}", message) message = _OBJECT_REPR_PATTERN.sub("{object}", message) message = _HEX_ADDRESS_PATTERN.sub("{addr}", message) message = _UUID_PATTERN.sub("{uuid}", message) @@ -85,6 +90,19 @@ def _normalize_orchestrator_error(*, exception: BaseException) -> str | None: return f"OrchestratorError: {message}" +def _normalize_launcher_error(*, exception: BaseException) -> str | None: + try: + from ..launchers.interfaces import LauncherError + except ImportError: + return None + if not isinstance(exception, LauncherError): + return None + # Take only the verb phrase before the first colon to drop any embedded + # serialized data (e.g. the full Kubernetes pod spec appended after ": "). + head = str(exception).split(":", 1)[0].strip() + return f"LauncherError: {head}" + + def normalize_error_message(*, exception: BaseException) -> str: """Return a stable normalized string for error grouping.""" for normalizer in ( @@ -92,6 +110,7 @@ def normalize_error_message(*, exception: BaseException) -> str: _normalize_max_retry_error, _normalize_unicode_decode_error, _normalize_orchestrator_error, + _normalize_launcher_error, ): result = normalizer(exception=exception) if result is not None: diff --git a/tests/instrumentation/test_error_normalization.py b/tests/instrumentation/test_error_normalization.py index dc01b55..f12489f 100644 --- a/tests/instrumentation/test_error_normalization.py +++ b/tests/instrumentation/test_error_normalization.py @@ -184,6 +184,41 @@ def test_strips_object_repr(self): ) +class TestNormalizeLauncherError: + def _make_launcher_error( + self, message: str, cause: BaseException | None = None + ) -> Exception: + try: + from cloud_pipelines_backend.launchers.interfaces import LauncherError + except ImportError: + pytest.skip("LauncherError not importable") + if cause: + try: + raise LauncherError(message) from cause + except LauncherError as exc: + return exc + return LauncherError(message) + + def test_strips_pod_spec_json(self): + pod_spec = ( + "{'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'name': 'task-abc-xyz'}}" + ) + exc = self._make_launcher_error(f"Failed to create pod: {pod_spec}") + result = error_normalization.normalize_error_message(exception=exc) + assert result == "LauncherError: Failed to create pod" + + def test_with_timeout_cause(self): + cause = TimeoutError("The read operation timed out") + exc = self._make_launcher_error("Failed to create pod: {big spec}", cause=cause) + result = error_normalization.normalize_error_message(exception=exc) + assert result == "LauncherError: Failed to create pod" + + def test_no_colon_in_message(self): + exc = self._make_launcher_error("launch failed") + result = error_normalization.normalize_error_message(exception=exc) + assert result == "LauncherError: launch failed" + + class TestFallback: def test_strips_hex_address(self): exc = ValueError("object at 0xdeadbeef failed") @@ -204,3 +239,13 @@ def test_stable_message_unchanged(self): exc = AttributeError("'NoneType' object has no attribute 'encode'") result = error_normalization.normalize_error_message(exception=exc) assert result == "AttributeError: 'NoneType' object has no attribute 'encode'" + + def test_strips_json_object(self): + exc = RuntimeError("operation failed: {'key': 'value', 'nested': {'a': 1}}") + result = error_normalization.normalize_error_message(exception=exc) + assert result == "RuntimeError: operation failed: {...}" + + def test_strips_json_object_double_quotes(self): + exc = RuntimeError('operation failed: {"key": "value"}') + result = error_normalization.normalize_error_message(exception=exc) + assert result == "RuntimeError: operation failed: {...}"