diff --git a/CHANGELOG.md b/CHANGELOG.md index f751d7e..1f0b1e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,66 @@ and this file MUST be updated together whenever `__version__` changes. --- +## [0.8.0-dev9] — Fix webhook HMAC verify silently failing open (security) + +**Pre-existing bug caught by the dev8 deploy.** While verifying the +new Meraki webhook publisher end-to-end on `cpn-ful-netcortex1`, an +adapter introspection revealed that both +`webhooks/meraki.py::_get_shared_secret` and +`webhooks/catalyst_center.py::_get_shared_secret` called +`backend.get_secret(...)` — which is **not** a real method on +`AwsSecretsManagerBackend` (the actual method is +`backend.get(path, required=True)`). + +The bare `except Exception` swallowed the resulting `AttributeError`, +`_SECRET_CACHE` stayed empty, and both webhook handlers fell through +to their "no-secret-configured" branch — which **accepts the webhook +without HMAC verification**, with only a warning log. + +In other words: every Meraki and Catalyst Center webhook received +since these modules shipped has bypassed signature verification. +With dev8 publishing `:ReflexEvent` nodes from those webhooks, any +unauthenticated POST to `/webhooks/meraki/` or +`/webhooks/catalyst_center/` could now produce graph +artifacts. Fixing immediately. + +### Changed +- `webhooks/meraki.py::_get_shared_secret`: now calls + `await backend.get(path, required=False)` (the correct API) and + distinguishes "no secret stored for this tenant" (return None) + from "backend error" (log loudly + return None). Cache still + populates on success. +- `webhooks/catalyst_center.py::_get_shared_secret`: same fix. + +### Tests +- New `tests/webhooks/test_secret_backend_integration.py` with 6 + cases asserting that both webhook modules use the correct + `backend.get(path, required=False)` call, handle missing tenants + cleanly, swallow backend errors, and cache the result. +- These tests would have caught the original bug. The pre-existing + route tests bypass `_get_shared_secret` entirely (they write to + `_SECRET_CACHE` directly via fixture), which is exactly how the + bug hid for so long. Lesson learned in `conftest.py`'s long + docstring for future webhook receivers (dev10/11/12). + +### Operational notes +- Existing operators who relied on the bypass behavior to test + webhook ingestion without secrets configured will see 401s after + this deploys. The "no secret configured" warning path still works + if `_SECRET_CACHE` is empty AND the backend returns nothing for + the tenant — that's distinct from the actual bug, which was + AttributeError-from-wrong-API-method. +- Verify after deploy: + - Send an unsigned POST to `/webhooks/meraki/` + → expect 401 (was 200 before this fix) + - Send a correctly-signed POST → expect 200 and a published event +- Search logs for `webhook.meraki.secret_fetch_failed` — should be + zero (was previously firing on every webhook for tenants whose + secret was actually present in AWS SM, masked as + AttributeError). + +--- + ## [0.8.0-dev8] — Second sensory publisher: Meraki webhook → NATS First webhook-side sensory publisher. The web process now publishes diff --git a/netcortex/__init__.py b/netcortex/__init__.py index 43231f5..ed2c263 100644 --- a/netcortex/__init__.py +++ b/netcortex/__init__.py @@ -22,4 +22,4 @@ ``CHANGELOG.md`` MUST be kept in sync whenever ``__version__`` changes. """ -__version__ = "0.8.0-dev8" +__version__ = "0.8.0-dev9" diff --git a/netcortex/webhooks/catalyst_center.py b/netcortex/webhooks/catalyst_center.py index af25105..a867d4e 100644 --- a/netcortex/webhooks/catalyst_center.py +++ b/netcortex/webhooks/catalyst_center.py @@ -24,19 +24,33 @@ async def _get_shared_secret(instance_name: str) -> str | None: + """Fetch the Catalyst Center webhook shared token from the backend. + + See ``netcortex/webhooks/meraki.py::_get_shared_secret`` for the + full story on the pre-0.8.0-dev9 ``AttributeError``-silently- + swallowed-by-bare-except bug that this function shared. + """ if instance_name in _SECRET_CACHE: return _SECRET_CACHE[instance_name] + path = f"netcortex/webhooks/catalyst_center/{instance_name}" try: from netcortex.secrets import get_secret_backend backend = get_secret_backend() - data = await backend.get_secret(f"netcortex/webhooks/catalyst_center/{instance_name}") - secret = data.get("shared_secret") - if secret: - _SECRET_CACHE[instance_name] = secret - return secret + data = await backend.get(path, required=False) except Exception as exc: - log.warning("webhook.catc.secret_fetch_failed", instance=instance_name, error=str(exc)) + log.warning( + "webhook.catc.secret_fetch_failed", + instance=instance_name, + path=path, + error=str(exc), + ) + return None + if not data: return None + secret = data.get("shared_secret") + if secret: + _SECRET_CACHE[instance_name] = secret + return secret async def handle_catalyst_center_webhook( diff --git a/netcortex/webhooks/meraki.py b/netcortex/webhooks/meraki.py index b79c9da..b06e19c 100644 --- a/netcortex/webhooks/meraki.py +++ b/netcortex/webhooks/meraki.py @@ -28,20 +28,49 @@ async def _get_shared_secret(instance_name: str) -> str | None: - """Fetch the Meraki webhook shared secret from the secret backend.""" + """Fetch the Meraki webhook shared secret from the secret backend. + + Returns ``None`` when the secret is not configured for this tenant. + Note: returning ``None`` causes the route to accept the webhook + with a loud warning (see :func:`handle_meraki_webhook`). That is + intentional for bootstrapping (operators can validate the receive + path before plumbing secrets), **but** it means a misconfigured + backend silently disables HMAC verification — so this function + distinguishes "secret not configured" (return None) from "backend + unreachable / wrong API" (log loudly + return None — same outcome + so we don't crash the request, but operators can spot the + mismatch in logs). + + Historical note: prior to 0.8.0-dev9 this function called + ``backend.get_secret(...)`` which is not a real method on + :class:`AwsSecretsManagerBackend` — the bare ``except Exception`` + swallowed the resulting ``AttributeError`` and HMAC verification + silently fell open for every tenant. Fixed in dev9 to use the + correct ``backend.get(path, required=False)`` API. + """ if instance_name in _SECRET_CACHE: return _SECRET_CACHE[instance_name] + path = f"netcortex/webhooks/meraki/{instance_name}" try: from netcortex.secrets import get_secret_backend backend = get_secret_backend() - data = await backend.get_secret(f"netcortex/webhooks/meraki/{instance_name}") - secret = data.get("shared_secret") - if secret: - _SECRET_CACHE[instance_name] = secret - return secret + data = await backend.get(path, required=False) except Exception as exc: - log.warning("webhook.meraki.secret_fetch_failed", instance=instance_name, error=str(exc)) + log.warning( + "webhook.meraki.secret_fetch_failed", + instance=instance_name, + path=path, + error=str(exc), + ) + return None + if not data: + # No secret stored for this tenant. Distinct from a backend + # error — the path is well-formed, the secret just isn't set. return None + secret = data.get("shared_secret") + if secret: + _SECRET_CACHE[instance_name] = secret + return secret def _verify_signature(body: bytes, signature_header: str | None, shared_secret: str) -> bool: diff --git a/pyproject.toml b/pyproject.toml index d873398..24feb7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "netcortex" -version = "0.8.0.dev8" +version = "0.8.0.dev9" description = "The intelligence layer for your network — multi-dimensional graph of the network bridging Meraki, Catalyst Center, Intersight, and more with NetBox as SoT" readme = "README.md" requires-python = ">=3.12" diff --git a/tests/webhooks/test_secret_backend_integration.py b/tests/webhooks/test_secret_backend_integration.py new file mode 100644 index 0000000..4477c7b --- /dev/null +++ b/tests/webhooks/test_secret_backend_integration.py @@ -0,0 +1,189 @@ +"""Regression tests for the webhook → secret-backend call paths. + +The bug fixed in 0.8.0-dev9 was that ``webhooks/meraki.py`` and +``webhooks/catalyst_center.py`` called ``backend.get_secret(...)``, +which is **not** a real method on :class:`AwsSecretsManagerBackend`. +The bare ``except Exception`` swallowed the resulting +``AttributeError``, leaving ``_SECRET_CACHE`` empty and causing +:func:`handle_meraki_webhook` to fall through to the +"no-secret-configured" branch — which accepts the webhook **without +HMAC verification**, just with a warning. + +The bug was not caught by existing route tests because those tests +inject the secret directly into ``_SECRET_CACHE`` via fixture, +bypassing the buggy fetch path. + +These tests stand up a fake backend that exposes the real backend's +shape (``get(path, required=False) -> dict | None``) and assert that +the webhook modules: + +1. Call the correct method (``get``, not ``get_secret``). +2. Pass ``required=False`` so a missing tenant returns ``None`` + without raising. +3. Return the extracted ``shared_secret`` when the backend has one. +4. Return ``None`` (gracefully) when the backend has nothing. +5. Log and return ``None`` on a backend exception (don't 5xx). +""" + +from __future__ import annotations + +from typing import Any + +import pytest + + +class FakeBackend: + """Mimics the real ``AwsSecretsManagerBackend.get`` signature. + + The shape is intentionally drawn from + ``inspect.signature(backend.get)`` on a live backend: + ``get(path: str, required: bool = True) -> dict[str, Any]``. + + We track calls so tests can assert how the webhook module invoked + us — in particular, that it used ``get`` (not ``get_secret``) and + that it passed ``required=False``. + """ + + def __init__(self, data: dict[str, dict[str, Any]] | None = None) -> None: + self._data = data or {} + self.calls: list[tuple[str, bool]] = [] + self.raise_on_get: Exception | None = None + + async def get(self, path: str, required: bool = True) -> dict[str, Any] | None: + self.calls.append((path, required)) + if self.raise_on_get is not None: + raise self.raise_on_get + if path in self._data: + return self._data[path] + if required: + # Mimics the real backend's SecretNotFoundError behavior. + raise KeyError(f"secret not found: {path}") + return None + + +@pytest.fixture +def reset_caches(monkeypatch: pytest.MonkeyPatch) -> None: + """Clear the module-level secret caches so each test sees a cold path.""" + from netcortex.webhooks import meraki as meraki_module + from netcortex.webhooks import catalyst_center as catc_module + + monkeypatch.setattr(meraki_module, "_SECRET_CACHE", {}) + monkeypatch.setattr(catc_module, "_SECRET_CACHE", {}) + + +# --------------------------------------------------------------------------- +# Meraki +# --------------------------------------------------------------------------- + + +async def test_meraki_secret_fetch_uses_correct_backend_method( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + fake = FakeBackend({ + "netcortex/webhooks/meraki/PROD_TENANT": {"shared_secret": "test-secret-meraki"}, + }) + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.meraki import _get_shared_secret + + secret = await _get_shared_secret("PROD_TENANT") + assert secret == "test-secret-meraki" + # Confirms the dev9 fix: must call ``get(path, required=False)``, + # NOT ``get_secret(path)``. + assert fake.calls == [("netcortex/webhooks/meraki/PROD_TENANT", False)] + + +async def test_meraki_secret_fetch_missing_tenant_returns_none( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + """A tenant with no stored secret returns None without raising — + the route then accepts (with a loud warning) so operators can + bootstrap the integration.""" + fake = FakeBackend({}) + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.meraki import _get_shared_secret + + secret = await _get_shared_secret("UNCONFIGURED_TENANT") + assert secret is None + assert fake.calls == [("netcortex/webhooks/meraki/UNCONFIGURED_TENANT", False)] + + +async def test_meraki_secret_fetch_backend_failure_logged_and_none( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + """A transient backend error must not 5xx the webhook route — + log and return None, route degrades to no-secret-configured path.""" + fake = FakeBackend({}) + fake.raise_on_get = RuntimeError("AWS Secrets Manager unreachable") + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.meraki import _get_shared_secret + + secret = await _get_shared_secret("PROD_TENANT") + assert secret is None + + +async def test_meraki_secret_fetch_caches_result( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + """Second call hits the cache, not the backend.""" + fake = FakeBackend({ + "netcortex/webhooks/meraki/PROD_TENANT": {"shared_secret": "test-secret-meraki"}, + }) + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.meraki import _get_shared_secret + + s1 = await _get_shared_secret("PROD_TENANT") + s2 = await _get_shared_secret("PROD_TENANT") + assert s1 == s2 == "test-secret-meraki" + assert len(fake.calls) == 1 # second call cached + + +# --------------------------------------------------------------------------- +# Catalyst Center (same bug, same fix, same test surface) +# --------------------------------------------------------------------------- + + +async def test_catalyst_center_secret_fetch_uses_correct_backend_method( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + fake = FakeBackend({ + "netcortex/webhooks/catalyst_center/cpn-ful-catc1": { + "shared_secret": "test-secret-catc", + }, + }) + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.catalyst_center import _get_shared_secret + + secret = await _get_shared_secret("cpn-ful-catc1") + assert secret == "test-secret-catc" + assert fake.calls == [ + ("netcortex/webhooks/catalyst_center/cpn-ful-catc1", False) + ] + + +async def test_catalyst_center_secret_fetch_missing_returns_none( + monkeypatch: pytest.MonkeyPatch, reset_caches: None +) -> None: + fake = FakeBackend({}) + monkeypatch.setattr( + "netcortex.secrets.get_secret_backend", lambda: fake + ) + + from netcortex.webhooks.catalyst_center import _get_shared_secret + + secret = await _get_shared_secret("UNCONFIGURED_TENANT") + assert secret is None