diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ee0277..2e31997 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,48 @@ and this file MUST be updated together whenever `__version__` changes. --- +## [0.8.0-dev7] — :AFFECTS edge resolution + target/subject consistency + +Second small polish to the dev5 pipeline, surfaced by deployment +verification. The dev5 deploy produced 31 `:ReflexEvent` nodes (real +data, including 28 from Meraki MS switches after the dev6 fix landed), +but none of them connected to their `:Device` nodes via `:AFFECTS`. + +Root cause: the `link_down` handler was preferring `payload["device_id"]` +over `payload["device"]`, which produced targets like +`'meraki:Q4CD-Y6FW-EKVS|Port 9'` — a Neo4j node-id form that no Device +node had as its `name` or `platform_id`. + +### Changed +- `link_down` handler now prefers `payload["device"]` (human name) over + `payload["device_id"]` so the target string matches the subject token + built by the publisher. Falls back to `device_id` when `device` is + absent (legacy publishers, traps). +- `Neo4jReflexEventSink._merge_reflex_event_tx` AFFECTS-edge MATCH now + resolves devices on three identifiers: `d.name`, `d.platform_id`, + AND `d.id` — the latter catches node-id-form targets if a future + publisher emits them. + +### Tests +- Two new cases in `tests/reflex/test_handlers.py` covering both + ordering preferences (device > device_id, fallback when device absent). + +### Operational note +After deploying, new `:ReflexEvent` nodes will get `:AFFECTS` edges to +their devices. The 31 existing nodes can be backfilled with this +Cypher once dev7 is live: + +```cypher +MATCH (e:ReflexEvent) WHERE NOT (e)-[:AFFECTS]->(:Device) +WITH e, split(e.target, '|')[0] AS device_key +MATCH (d:Device) +WHERE d.name = device_key OR d.platform_id = device_key OR d.id = device_key +MERGE (e)-[:AFFECTS]->(d) +RETURN count(*) AS backfilled +``` + +--- + ## [0.8.0-dev6] — Sanitize whitespace in NATS subject target parts Hot-fix to dev5, caught within seconds of the dev5 deploy on diff --git a/netcortex/__init__.py b/netcortex/__init__.py index 6536f62..75abd62 100644 --- a/netcortex/__init__.py +++ b/netcortex/__init__.py @@ -22,4 +22,4 @@ ``CHANGELOG.md`` MUST be kept in sync whenever ``__version__`` changes. """ -__version__ = "0.8.0-dev6" +__version__ = "0.8.0-dev7" diff --git a/netcortex/episodic/reflex_event_sink.py b/netcortex/episodic/reflex_event_sink.py index 6760108..bdaae96 100644 --- a/netcortex/episodic/reflex_event_sink.py +++ b/netcortex/episodic/reflex_event_sink.py @@ -302,10 +302,20 @@ async def _merge_reflex_event_tx(tx: Any, props: dict[str, Any], device_name: st # Best-effort AFFECTS edge. We MATCH-only (not MERGE) the device # so we don't create stub nodes from a typo'd device name in a # reflex event — the live state graph stays canonical. + # + # We match on three identifiers: human name, platform_id (vendor + # serial / network id), and node id. The dev6 deploy revealed that + # SNMP-sourced events sometimes carry the node-id form + # (e.g. ``meraki:Q4CD-Y6FW-EKVS``) as the device half of the + # target. Adding ``d.id`` to the OR catches those without needing + # the publisher to know which form Neo4j keyed the device on. await tx.run( """ MATCH (e:ReflexEvent {id: $event_id}) - MATCH (d:Device) WHERE d.name = $device_name OR d.platform_id = $device_name + MATCH (d:Device) + WHERE d.name = $device_name + OR d.platform_id = $device_name + OR d.id = $device_name MERGE (e)-[:AFFECTS]->(d) """, event_id=event_id, diff --git a/netcortex/reflex/handlers/link_down.py b/netcortex/reflex/handlers/link_down.py index 3b22b58..f0faeed 100644 --- a/netcortex/reflex/handlers/link_down.py +++ b/netcortex/reflex/handlers/link_down.py @@ -57,9 +57,16 @@ async def handle( event_class, source, _target_from_subject = parse_sensory_subject( event.subject ) + # Prefer the human-readable device name when present so the target + # string matches the subject token built by the publisher (which + # also uses the name). Falling back to device_id is fine for graph + # lookups — the Neo4j sink matches on both Device.name and + # Device.id when wiring the :AFFECTS edge. Caught in the dev6 + # deploy: handler was preferring device_id, producing targets like + # 'meraki:Q4CD-Y6FW-EKVS|Port 9' that no devices were named. device = ( - payload.get("device_id") - or payload.get("device") + payload.get("device") + or payload.get("device_id") or payload.get("target") ) interface = payload.get("interface") or payload.get("if_name") diff --git a/pyproject.toml b/pyproject.toml index 1d68e86..f04fcea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "netcortex" -version = "0.8.0.dev6" +version = "0.8.0.dev7" description = "The intelligence layer for your network — multi-dimensional graph of the network bridging Meraki, Catalyst Center, Intersight, and more with NetBox as SoT" readme = "README.md" requires-python = ">=3.12" diff --git a/tests/reflex/test_handlers.py b/tests/reflex/test_handlers.py index 5661841..3852ce5 100644 --- a/tests/reflex/test_handlers.py +++ b/tests/reflex/test_handlers.py @@ -101,6 +101,49 @@ async def test_link_down_extracts_device_and_interface() -> None: assert outcome.outcome == "logged" +async def test_link_down_prefers_device_over_device_id() -> None: + """Caught in the dev6 deploy verification: when both ``device`` (human + name) and ``device_id`` (Neo4j node id) are present in the payload, + the handler must prefer ``device`` so the target string matches the + subject token built by the publisher. + + Concretely: the SNMP publisher emits + ``sensory.link_down.snmp_poll.cpn-nash-ms130-1|Port_9`` (subject) with + payload ``{device: 'cpn-nash-ms130-1', device_id: 'meraki:Q4CD-...'}``. + If the handler picks ``device_id``, the resulting target is + ``'meraki:Q4CD-...|Port 9'`` which doesn't line up with the subject + and confuses the AFFECTS edge resolver in the Neo4j sink. + """ + h = get_handler("link_down") + outcome = await h.handle( + _event( + "sensory.link_down.snmp_poll.cpn-nash-ms130-1|Port_9", + { + "device": "cpn-nash-ms130-1", + "device_id": "meraki:Q4CD-Y6FW-EKVS", + "interface": "Port 9", + }, + ), + _empty_ctx(), + ) + assert outcome is not None + assert outcome.target == "cpn-nash-ms130-1|Port 9" + + +async def test_link_down_falls_back_to_device_id_when_device_absent() -> None: + """When only ``device_id`` is supplied (legacy publishers, traps), use it.""" + h = get_handler("link_down") + outcome = await h.handle( + _event( + "sensory.link_down.snmp_trap.r1", + {"device_id": "r1", "interface": "Gi0/1"}, + ), + _empty_ctx(), + ) + assert outcome is not None + assert outcome.target == "r1|Gi0/1" + + async def test_link_down_handles_missing_target_field() -> None: """No device field at all — outcome.target is None.""" h = get_handler("link_down")