Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,49 @@ and this file MUST be updated together whenever `__version__` changes.

---

## [0.8.0-dev6] — Sanitize whitespace in NATS subject target parts

Hot-fix to dev5, caught within seconds of the dev5 deploy on
`cpn-ful-netcortex1`. Meraki MS switches expose interface names like
`"Port 3"`, `"Port 4"` with literal spaces — NATS subjects forbid
whitespace, so the publisher's validator (correctly) rejected every
such event and the worker log filled with:

```
snmp.link_state.publish_failed device=cpn-arlington-ms1 interface=Port 3
error=sensory subject target part 0='cpn-arlington-ms1|Port 3'
contains whitespace; NATS subjects must be whitespace-free
```

The fix sanitizes whitespace at the `SensoryPublisher` boundary so every
future publisher (Meraki webhook, SNMP trap, gNMI dial-out) inherits
the behavior without each one re-implementing it.

### Changed
- `SensoryPublisher.publish` now collapses any whitespace run in each
target part to a single `_` before building the subject. The original
identifier is preserved verbatim in the payload's `interface` /
`device` / `target` fields, so consumers that need the human-readable
name still have it. Example: `Port 3` → subject token `Port_3`,
payload `interface: "Port 3"`.

### Unchanged on purpose
- Dots in target parts remain a hard error (they are the NATS token
separator; silently splitting would mask a programmer bug).
- The `sensory_subject()` validator and all other Protocol-level
rejections are unchanged.

### Tests
- Added 3 cases to `tests/thalamus/test_sensory_publisher.py`:
whitespace sanitization, multi-whitespace-run collapse, dots-still-reject.

### Operational note
Once deployed, the previously-suppressed Meraki MS port events will
start flowing through the bus on the next SNMP poll cycle and (if any
are actually down) will produce `:ReflexEvent` nodes in Neo4j.

---

## [0.8.0-dev5] — First sensory publisher: SNMP link-state → NATS → :ReflexEvent

Closes the loop on the dev1–dev4 foundation. The bus has been live in
Expand Down
2 changes: 1 addition & 1 deletion netcortex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
``CHANGELOG.md`` MUST be kept in sync whenever ``__version__`` changes.
"""

__version__ = "0.8.0-dev5"
__version__ = "0.8.0-dev6"
33 changes: 31 additions & 2 deletions netcortex/thalamus/sensory_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
fails at startup not at runtime
* tag the payload with ``recorded_at`` and ``source`` for downstream
fusion / dedup without each publisher reinventing the same code
* sanitize whitespace in target parts — vendor identifiers like Meraki's
``"Port 3"`` interface names contain spaces, which NATS subjects
forbid. We replace each whitespace run with ``_`` so the subject is
valid and the original identifier is preserved in the payload's
``interface``/``target`` field.
* emit a structured ``bus.published`` log line so an operator can see the
full event flow without a debugger
* swallow-and-log errors so a transient NATS hiccup doesn't crash the
Expand All @@ -25,6 +30,7 @@
from __future__ import annotations

import logging
import re
from datetime import datetime, timezone
from typing import Any

Expand All @@ -33,6 +39,25 @@

_LOG = logging.getLogger(__name__)

# Matches one-or-more whitespace characters. Whitespace in NATS subjects
# is invalid; we collapse any run (regular space, tab, NBSP, etc.) to a
# single ``_`` so vendor identifiers like "Port 3" or "Tunnel 1" become
# valid subject tokens without losing the structure of the original
# identifier (operators can still read "Port_3" and recognize "Port 3").
_WHITESPACE_RUN = re.compile(r"\s+")


def _sanitize_target_part(part: str) -> str:
"""Make one target part NATS-subject-safe.

Currently only collapses whitespace runs to ``_``. Dots are a hard
programmer error (they are the NATS token separator and would split
the target across tokens), so we deliberately do NOT silently
replace them — the :func:`sensory_subject` validator catches them
and raises, which is what we want.
"""
return _WHITESPACE_RUN.sub("_", part)


class SensoryPublisher:
"""Adapter-friendly facade over an :class:`EventBus`.
Expand Down Expand Up @@ -76,9 +101,13 @@ async def publish(

Programmer errors (unknown event_class, malformed target) raise
immediately so they surface in unit tests rather than in
production logs.
production logs. Whitespace in target parts is **not** an error
— vendor identifiers commonly include spaces and we sanitize
them transparently. The original (un-sanitized) identifier is
preserved in the payload by the caller.
"""
subject = sensory_subject(event_class, self._source, *target_parts)
sanitized_parts = tuple(_sanitize_target_part(p) for p in target_parts)
subject = sensory_subject(event_class, self._source, *sanitized_parts)
full_payload = dict(payload or {})
# `source` echoed into the payload so downstream consumers that
# don't parse the subject (or that re-emit on a derived subject)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "netcortex"
version = "0.8.0.dev5"
version = "0.8.0.dev6"
description = "The intelligence layer for your network — multi-dimensional graph of the network bridging Meraki, Catalyst Center, Intersight, and more with NetBox as SoT"
readme = "README.md"
requires-python = ">=3.12"
Expand Down
72 changes: 72 additions & 0 deletions tests/thalamus/test_sensory_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,78 @@ async def broken_publish(*args: Any, **kwargs: Any) -> None:
await pub.publish("link_down", "r1|Gi0/1")


@pytest.mark.asyncio
async def test_publish_sanitizes_whitespace_in_target_parts() -> None:
"""Vendor identifiers like Meraki's 'Port 3' must round-trip cleanly.

Real bug caught in the dev5 deploy: NATS subjects forbid whitespace,
so without sanitization every Meraki MS port (Port 1, Port 2, ...)
drops its link-state events. The publisher collapses whitespace runs
to '_' so the subject is valid; the payload retains the original
interface name for consumers that care about the human-readable form.
"""
bus = InMemoryEventBus()
pub = SensoryPublisher(bus, source="snmp_poll")
seen: list[Any] = []

async def consume() -> None:
async for msg in bus.subscribe("sensory.link_down.>"):
seen.append(msg)
break

import asyncio
task = asyncio.create_task(consume())
await asyncio.sleep(0.01)

await pub.publish(
"link_down", "cpn-arlington-ms1|Port 3",
payload={"device": "cpn-arlington-ms1", "interface": "Port 3"},
)
await asyncio.wait_for(task, timeout=1.0)

assert seen[0].subject == "sensory.link_down.snmp_poll.cpn-arlington-ms1|Port_3"
# Original whitespace preserved in payload for human-readable display.
assert seen[0].payload["interface"] == "Port 3"


@pytest.mark.asyncio
async def test_publish_collapses_multiple_whitespace_runs() -> None:
"""Tabs, NBSP, and multi-space runs all collapse to a single '_'."""
bus = InMemoryEventBus()
pub = SensoryPublisher(bus, source="snmp_poll")
seen: list[Any] = []

async def consume() -> None:
async for msg in bus.subscribe("sensory.link_down.>"):
seen.append(msg)
break

import asyncio
task = asyncio.create_task(consume())
await asyncio.sleep(0.01)

await pub.publish(
"link_down", "r1|MS130- 12X\tport",
)
await asyncio.wait_for(task, timeout=1.0)

assert seen[0].subject == "sensory.link_down.snmp_poll.r1|MS130-_12X_port"


@pytest.mark.asyncio
async def test_publish_still_rejects_dots_in_target_parts() -> None:
"""Dots are the NATS token separator — they must remain a hard error.

Whitespace gets sanitized, dots do NOT. A caller passing 'a.b' as
one target part almost certainly meant 'a' and 'b' as two parts
and silently merging would mask the bug.
"""
bus = InMemoryEventBus()
pub = SensoryPublisher(bus, source="snmp_poll")
with pytest.raises(ValueError, match="contains '.'"):
await pub.publish("link_down", "device.with.dots")


@pytest.mark.asyncio
async def test_publish_preserves_caller_supplied_source_and_recorded_at() -> None:
"""Caller-supplied source/recorded_at win over the defaults.
Expand Down
Loading