Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions cloud_pipelines_backend/instrumentation/error_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.IGNORECASE
)
_LONG_ALNUM_ID_PATTERN = re.compile(r"\b[a-zA-Z0-9]{16,}\b")
# Matches any embedded JSON object or Python dict literal (starts with `{"` or `{'`).
# These are stripped from grouping strings because they contain highly variable
# runtime data (e.g. full Kubernetes pod specs) that would fragment error groups.
_JSON_OBJECT_PATTERN = re.compile(r"\{['\"].*", re.DOTALL)


def _strip_generic(*, message: str) -> str:
message = _JSON_OBJECT_PATTERN.sub("{...}", message)
message = _OBJECT_REPR_PATTERN.sub("{object}", message)
message = _HEX_ADDRESS_PATTERN.sub("{addr}", message)
message = _UUID_PATTERN.sub("{uuid}", message)
Expand Down Expand Up @@ -85,13 +90,27 @@ def _normalize_orchestrator_error(*, exception: BaseException) -> str | None:
return f"OrchestratorError: {message}"


def _normalize_launcher_error(*, exception: BaseException) -> str | None:
try:
from ..launchers.interfaces import LauncherError
except ImportError:
return None
if not isinstance(exception, LauncherError):
return None
# Take only the verb phrase before the first colon to drop any embedded
# serialized data (e.g. the full Kubernetes pod spec appended after ": ").
head = str(exception).split(":", 1)[0].strip()
return f"LauncherError: {head}"


def normalize_error_message(*, exception: BaseException) -> str:
"""Return a stable normalized string for error grouping."""
for normalizer in (
_normalize_k8s_api_exception,
_normalize_max_retry_error,
_normalize_unicode_decode_error,
_normalize_orchestrator_error,
_normalize_launcher_error,
):
result = normalizer(exception=exception)
if result is not None:
Expand Down
45 changes: 45 additions & 0 deletions tests/instrumentation/test_error_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,41 @@ def test_strips_object_repr(self):
)


class TestNormalizeLauncherError:
def _make_launcher_error(
self, message: str, cause: BaseException | None = None
) -> Exception:
try:
from cloud_pipelines_backend.launchers.interfaces import LauncherError
except ImportError:
pytest.skip("LauncherError not importable")
if cause:
try:
raise LauncherError(message) from cause
except LauncherError as exc:
return exc
return LauncherError(message)

def test_strips_pod_spec_json(self):
pod_spec = (
"{'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'name': 'task-abc-xyz'}}"
)
exc = self._make_launcher_error(f"Failed to create pod: {pod_spec}")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: Failed to create pod"

def test_with_timeout_cause(self):
cause = TimeoutError("The read operation timed out")
exc = self._make_launcher_error("Failed to create pod: {big spec}", cause=cause)
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: Failed to create pod"

def test_no_colon_in_message(self):
exc = self._make_launcher_error("launch failed")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "LauncherError: launch failed"


class TestFallback:
def test_strips_hex_address(self):
exc = ValueError("object at 0xdeadbeef failed")
Expand All @@ -204,3 +239,13 @@ def test_stable_message_unchanged(self):
exc = AttributeError("'NoneType' object has no attribute 'encode'")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "AttributeError: 'NoneType' object has no attribute 'encode'"

def test_strips_json_object(self):
exc = RuntimeError("operation failed: {'key': 'value', 'nested': {'a': 1}}")
result = error_normalization.normalize_error_message(exception=exc)
assert result == "RuntimeError: operation failed: {...}"

def test_strips_json_object_double_quotes(self):
exc = RuntimeError('operation failed: {"key": "value"}')
result = error_normalization.normalize_error_message(exception=exc)
assert result == "RuntimeError: operation failed: {...}"
Loading