diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..775f55e --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,60 @@ +## vexp - Context-Aware AI Coding + +### MANDATORY: use vexp pipeline - do NOT grep or glob the codebase +For every task - bug fixes, features, refactors, debugging: +**call `run_pipeline` FIRST**. It executes context search + impact analysis + +memory recall in a single call, returning compressed results. + +Do NOT use grep, glob, Bash, or cat to search/explore the codebase. +vexp returns pre-indexed, graph-ranked context that is more relevant and +uses fewer tokens than manual searching. Prefer `get_skeleton` over Read to +inspect files (detail: minimal/standard/detailed, 70-90% token savings). +Only use Read when you need exact raw content to edit a specific line. + +### Primary Tool +- `run_pipeline` - **USE THIS FOR EVERYTHING**. Single call that runs + capsule + impact + memory server-side. Returns compressed results. + Auto-detects intent (debug/modify/refactor/explore) from your task. + Includes full file content for pivots. + Examples: + - `run_pipeline({ "task": "fix JWT validation bug" })` - auto-detect + - `run_pipeline({ "task": "refactor db layer", "preset": "refactor" })` - explicit + - `run_pipeline({ "task": "add auth", "observation": "using JWT" })` - save insight in same call + +### Other MCP tools (use only when run_pipeline is insufficient) +- `get_skeleton` - **preferred over Read** for inspecting files (minimal/standard/detailed detail levels, 70-90% token savings) +- `index_status` - indexing status and health check +- `expand_vexp_ref` - expand V-REF hash placeholders in v2 compact output + +### Workflow +1. `run_pipeline("your task")` - ALWAYS FIRST. Returns pivots + impact + memories in 1 call +2. Need more detail on a file? Use `get_skeleton({ files: [...], detail: "detailed" })` - avoid Read unless editing +3. Make targeted changes based on the context returned +4. `run_pipeline` again ONLY if you need more context during implementation +5. Do NOT chain multiple vexp calls - one `run_pipeline` replaces capsule + impact + memory + observation + +### Subagent / Explore / Plan mode +- Subagents CAN and MUST call `run_pipeline` - always include the task description +- The PreToolUse hook blocks Grep/Glob when vexp daemon is running +- Do NOT spawn Agent(Explore) to freely search - call `run_pipeline` first, + then pass the returned context into the agent prompt if needed +- Always: `run_pipeline` -> get context -> spawn agent with context + +### Smart Features (automatic - no action needed) +- **Intent Detection**: auto-detects from your task keywords. "fix bug" -> Debug, "refactor" -> blast-radius, "add" -> Modify +- **Hybrid Search**: keyword + semantic + graph centrality ranking +- **Session Memory**: auto-captures observations; memories auto-surfaced in results +- **LSP Bridge**: VS Code captures type-resolved call edges +- **Change Coupling**: co-changed files included as related context + +### Advanced Parameters +- `preset: "debug"` - forces debug mode (capsule+tests+impact+memory) +- `preset: "refactor"` - deep impact analysis (depth 5) +- `max_tokens: 12000` - increase total budget for complex tasks +- `include_tests: true` - include test files in results +- `include_file_content: false` - omit full file content (lighter response) + +### Multi-Repo Workspaces +`run_pipeline` auto-queries all indexed repos. Use `repos: ["alias"]` to scope. +Use `index_status` to discover available repo aliases. + \ No newline at end of file diff --git a/.claude/hooks/vexp-guard.sh b/.claude/hooks/vexp-guard.sh new file mode 100644 index 0000000..9cefc1d --- /dev/null +++ b/.claude/hooks/vexp-guard.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# vexp-guard: block Grep/Glob when vexp daemon is running AND index is healthy. +# Fast path: if socket file or healthy marker doesn't exist, allow immediately. +# PID check: verify daemon process is alive (handles stale files after kill -9). +VEXP_DIR="${CLAUDE_PROJECT_DIR:-.}/.vexp" +SOCK="$VEXP_DIR/daemon.sock" +HEALTHY="$VEXP_DIR/healthy" +PID_FILE="$VEXP_DIR/daemon.pid" +if [ -S "$SOCK" ] && [ -f "$HEALTHY" ] && [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE" 2>/dev/null)" 2>/dev/null; then + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"vexp daemon is running. Use run_pipeline instead of Grep/Glob."}}' +else + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"allow","permissionDecisionReason":"vexp index not ready, allowing direct search fallback."}}' +fi +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index d2a4469..b594539 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -14,7 +14,23 @@ "Bash(docker run *)", "Bash(python -c *)", "Bash(pip install *)", - "Bash(ruff *)" + "Bash(ruff *)", + "Bash(git add *)", + "Bash(git commit -m ' *)" + ] + }, + "hooks": { + "PreToolUse": [ + { + "matcher": "Grep|Glob|Regex", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/vexp-guard.sh", + "timeout": 3000 + } + ] + } ] } } diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..294f512 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,28 @@ +## vexp context tools + +**MANDATORY: use `run_pipeline` - do NOT grep, glob, or read files manually.** +vexp returns pre-indexed, graph-ranked context in a single call. + +### Workflow +1. `run_pipeline` with your task description - ALWAYS FIRST (replaces all other tools) +2. Make targeted changes based on the context returned +3. `run_pipeline` again only if you need more context + +### Available MCP tools +- `run_pipeline` - **PRIMARY TOOL**. Runs capsule + impact + memory in 1 call. + Auto-detects intent. Includes file content. Example: `run_pipeline({ "task": "fix auth bug" })` +- `get_skeleton` - compact file structure +- `index_status` - indexing status +- `expand_vexp_ref` - expand V-REF placeholders in v2 output + +### Agentic search +- Do NOT use built-in file search, grep, or codebase indexing - always call `run_pipeline` first +- If you spawn sub-agents or background tasks, pass them the context from `run_pipeline` + rather than letting them search the codebase independently + +### Smart Features +Intent auto-detection, hybrid ranking, session memory, auto-expanding budget. + +### Multi-Repo +`run_pipeline` auto-queries all indexed repos. Use `repos: ["alias"]` to scope. Run `index_status` to see aliases. + \ No newline at end of file diff --git a/.gitignore b/.gitignore index a8723b9..8059cac 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,7 @@ htmlcov/ .pytest_cache/ # YAML backups if generated -/config/backups/ \ No newline at end of file +/config/backups/ + +# vexp +.vexp/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 969f247..d1c76d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.1] — 2026-05-29 + +### Fixed +- DNS-only deployments no longer flood the log with a "Service Tracker + Dashboard not enabled" line on every container, every refresh cycle. + When `STD_URL` or `STD_API_TOKEN` is missing, the notifier now logs a + single line at startup that STD integration is disabled, skips the + periodic refresh loop entirely (it only ever served STD), and does + not attempt STD dispatch on subsequent events. + +### Added +- New env var `HOST_NAME_OVERRIDE`. Sets the host name used as the DNS + CNAME target (`.`), overriding the auto-detected + hostname. Useful in WSL or other environments where the host's + hostname differs from the name your DNS entries should point at + (e.g. Docker Desktop reports the LinuxKit/WSL VM name rather than the + real host). Unset preserves the existing auto-detection behavior. + ## [0.4.0] — 2026-05-14 ### Added @@ -168,7 +186,8 @@ Released. Initial public release. -[Unreleased]: https://github.com/crzykidd/docker-api-notifier/compare/v0.4.0...HEAD +[Unreleased]: https://github.com/crzykidd/docker-api-notifier/compare/v0.4.1...HEAD +[0.4.1]: https://github.com/crzykidd/docker-api-notifier/releases/tag/v0.4.1 [0.4.0]: https://github.com/crzykidd/docker-api-notifier/releases/tag/v0.4.0 [0.3.0]: https://github.com/crzykidd/docker-api-notifier/releases/tag/v0.3.0 [0.2.3]: https://github.com/crzykidd/docker-api-notifier/releases/tag/v0.2.3 diff --git a/CLAUDE.md b/CLAUDE.md index 4ad6082..b7e1d6b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,12 +1,30 @@ # docker-api-notifier — Claude Code Instructions -## Always - -- After any change that affects architecture, dependencies, supported - notifier targets, or the wire contract with downstream consumers, - update `docs/PRD.md` and `README.md` accordingly. -- After completing a phase, update `README.md` with what has been built. -- Never leave PRD or README out of sync with the codebase. +## Always — Definition of Done + +No change is complete until its documentation is updated **in the same +commit as the code**. This is not optional and not deferrable: + +1. **CHANGELOG.md — every change, no exceptions.** Add an entry under + `[Unreleased]` describing what changed for the operator. A code + change with no CHANGELOG entry is an incomplete change. +2. **PRD (`docs/PRD.md`) — confirm on every change.** Before + finishing, explicitly check whether the change touches anything the + PRD documents (architecture, the wire contract with STD, supported + notifier targets, env vars, labels, the interpreter YAML format). + If yes, update the PRD and bump its revision-history table. If no, + confirm that in your summary ("PRD reviewed, no change needed") + rather than silently skipping it. +3. **README.md — when operator-facing behavior changes.** Env vars, + labels, deployment, interpreters: keep the README tables current. +4. Never leave CHANGELOG, PRD, or README out of sync with the code. + +> **Known debt:** the PRDs and CLAUDE.md files in both this repo and +> STD have drifted from shipped reality before. A full audit of both +> repos' `docs/` against actual shipped state is a pending task (see +> the matching note in STD's `CLAUDE.md`). Until that audit happens, +> trust the code and CHANGELOG over the PRD where they disagree, and +> flag any contradiction you notice rather than propagating it. ## Commit style @@ -42,20 +60,19 @@ ## Build Status -Current shipped release: **v0.2.3** (latest tag on `main`) +Current shipped release: **v0.4.1** (latest tag on `main`). -Next release target: **v0.3.0** — cleanup release. Cannot ship until -**STD v0.5.0** is released (v0.3.0 emits canonical keys against -`/api/v1/register`, which STD v0.5.0 introduces). +Nothing currently in flight (`[Unreleased]` in `CHANGELOG.md` is +empty). v0.4.1 is a DNS/logging fix release (DNS host-name override, +STD-unconfigured log-flood fix) and does not change the STD wire +contract — the paired STD release for the capture/interpreter features +remains STD v0.6.0+. -- Phase 1 — Documentation baseline: IN PROGRESS -- Phase 2 — Logging consolidation: NOT STARTED -- Phase 3 — Shared retry helper: NOT STARTED -- Phase 4 — Stack-name fallback fix: NOT STARTED -- Phase 5 — `watched_actions` / `NOTIFIER_TRIGGERS` cleanup: NOT STARTED -- Phase 6 — Drop `trigger_reason` param: NOT STARTED -- Phase 7 — Switch to `/api/v1/register` + canonical keys: NOT STARTED - *(blocked until STD v0.5.0 is released)* +> Do not maintain a per-phase checklist here — it rots (this section +> was stale by two minor releases before this note was added). The +> CHANGELOG `[Unreleased]` section is the live record of work in +> flight; git tags are the record of what shipped. On release, update +> only the "Current shipped release" line above. ## Git Workflow @@ -97,14 +114,59 @@ Next release target: **v0.3.0** — cleanup release. Cannot ship until ## Cross-Repo Coordination This project is paired with -[service-tracker-dashboard](https://github.com/crzykidd/service-tracker-dashboard). -The contract is: - -- **STD** owns the wire contract for the register endpoint. -- **Notifier** is a producer — it sends what STD documents. -- Wire-format changes start in STD. The notifier follows. -- For v0.3.0 specifically: STD v0.5.0 must ship first; this notifier - release switches to canonical keys + `/api/v1/register` after. +[service-tracker-dashboard](https://github.com/crzykidd/service-tracker-dashboard) +(STD). They are **two independent apps** — separate version lines, +separate Docker images, separate release cadences. The notifier is not +an STD component: it also drives Technitium DNS and can run with DNS +only, STD only, or both. + +The two are coupled at exactly one seam: STD's `/api/v1/register` wire +contract. + +- **STD owns the contract.** Its pydantic schemas define a valid + payload and use `extra="forbid"` — unknown keys get rejected with + HTTP 422, not ignored. +- **This notifier is the producer.** It sends what STD documents. The + wire format lives in `notifiers/service_tracker_dashboard.py` + (`_to_canonical`). + +### The ordering rule + +Because STD's validator rejects unknown keys, **the schema-accepting +side (STD) must ship before the schema-sending side (this notifier).** + +- Adding a field this notifier will send → STD must accept it first. + Ship STD's schema change, *then* ship the notifier that emits the + field. Shipping the notifier first means STD 422s every payload from + upgraded hosts. +- A field STD wants to consume → STD can add the column/UI, but it + stays empty until a notifier version populates it. Feature isn't live + until both ship, notifier last. + +**Consumer leads, producer follows. STD may lead; this notifier may +lag; this notifier must not lead the schema.** + +### Safe-degradation guarantee (don't break it) + +The notifier must keep older STD versions working: + +- Capture fields (`networks` / `exposed_ports` / `published_ports`) + and `exposure_observations` require STD v0.6.0+. They are documented + as such in the README. +- `exposure_observations` semantics STD relies on: omit the field + entirely when no interpreters are loaded (STD reads absence as "no + update, preserve existing exposure rows"); send `[]` when + interpreters ran but nothing matched (STD reads that as "clear the + rows"). Do not collapse these two cases — `interpreter_loader` + returning `None` vs. `[]` is the distinction, and + `_run_interpreters` in `main.py` preserves it. + +### Version pairing history (informational, not the rule) + +- Notifier v0.3.0 switched to canonical keys + `/api/v1/register`; + requires STD v0.5.0+. +- Notifier v0.4.0 added capture fields + the interpreter mechanism + (`exposure_observations`); requires STD v0.6.0+. ## Notifier Module Conventions diff --git a/README.md b/README.md index f4a531c..651e256 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,9 @@ every Docker host without it touching things you didn't ask it to touch. 2. Subscribes to the Docker event stream for ongoing changes (`start`, `stop`, `die`, `pause`, `unpause`, `destroy`, `kill`, `update`). 3. Re-scans every running container on a periodic interval as a - self-healing measure (default every 60 seconds). + self-healing measure (default every 60 seconds). This loop only runs + when STD is configured — it exists to re-report containers to STD, + and does nothing for DNS-only deployments. For each event, it reads the container's labels and dispatches to whichever notifiers the container has opted in to via `dockernotifier.notifiers`. @@ -66,6 +68,7 @@ notifier targets can be added without touching the core event loop. | `TZ` | No | `UTC` | Timezone for log timestamps. | | `STD_REFRESH_SECONDS` | No | `60` | Periodic re-scan interval in **seconds**. | | `NOTIFIER_LOG_TO_STDOUT` | No | `1` | Set to `0` to silence console output. Logs still go to `/config/notifier.log`. Replaces the per-notifier `DNS_LOG_TO_STDOUT` and `STD_LOG_TO_STDOUT` vars, which are no longer recognized. | +| `HOST_NAME_OVERRIDE` | No | *host hostname* | Overrides the host name used as the DNS CNAME target (`.`). If unset, the notifier reads the host's hostname from the `/etc/hostname` mount (falling back to the container's own hostname). Set this if running in WSL or other environments where the host has a different name than your DNS entries expect. | ### Technitium DNS @@ -95,9 +98,12 @@ notifier targets can be added without touching the core event loop. | `STD_REPORT_ALL_CONTAINERS` | No | `false` | When truthy (`true`, `1`, `yes` — case-insensitive), report **every running container on this host** to STD regardless of whether it has the `dockernotifier.notifiers=service-tracker-dashboard` opt-in label. Default off preserves per-container opt-in behavior. **Only affects STD** — the DNS notifier still requires explicit per-container opt-in via labels. Unrecognized values log a warning at startup and are treated as off. | | `INTERPRETER_RELOAD_ON_EACH_EVENT` | No | `false` | Debug-only. When truthy, re-reads YAML interpreters from disk on every dispatch instead of once at startup. Use while iterating on a new YAML; do not leave on in production. | -If a notifier's required env vars are missing, that notifier silently -no-ops — the container won't fail to start. This is intentional so you -can run the same image with only DNS, only STD, or both. +If a notifier's required env vars are missing, that notifier no-ops — +the container won't fail to start. This is intentional so you can run +the same image with only DNS, only STD, or both. When STD's env vars +(`STD_URL` / `STD_API_TOKEN`) are absent the notifier logs a single +line at startup noting STD is disabled, skips the periodic refresh +loop, and does not attempt STD dispatch on any event. --- @@ -237,6 +243,12 @@ services: - STD_API_TOKEN=TOKENFROMSTDSERVER - TZ=America/Los_Angeles - STD_REFRESH_SECONDS=60 + # Override the host name used as the DNS CNAME target (.). + # If unset, the notifier uses the host's hostname (read from the + # /etc/hostname mount below). Set this if running in WSL or other + # environments where the host has a different name than your DNS + # entries expect. +# - HOST_NAME_OVERRIDE=wsl-host volumes: - /var/run/docker.sock:/var/run/docker.sock - /etc/hostname:/etc/host_hostname:ro diff --git a/docker-compose.yml b/docker-compose.yml index f6e3cc6..6688198 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,9 +10,22 @@ services: - STD_API_TOKEN=${STD_API_TOKEN} - TZ=America/Los_Angeles - STD_REFRESH_SECONDS=120 + # Override the host name used as the DNS CNAME target (.). + # If unset, the notifier uses the host's hostname (read from the + # /etc/hostname mount below). Set this if running in WSL or other + # environments where the host has a different name than your DNS + # entries expect. +# - HOST_NAME_OVERRIDE=wsl-host volumes: - /var/run/docker.sock:/var/run/docker.sock - /etc/hostname:/etc/host_hostname:ro - /var/docker/docker-api-notifier:/config - .:/app restart: unless-stopped + labels: + # dockernotifier + - "dockernotifier.notifiers=dns,service-tracker-dashboard" #which services to run. + - "dockernotifier.dns.containerhostname=container_name" #put the host name you want to use for the cname entry + - "dockernotifier.dns.containerzone=containerzone" # This is the domain or zone name you want to add the container entry to + - "dockernotifier.dns.dockerdomain=home.arpa" #this is the domain your docker host is a member of + \ No newline at end of file diff --git a/docs/HISTORY.md b/docs/HISTORY.md index 2ed5d39..ad60db3 100644 --- a/docs/HISTORY.md +++ b/docs/HISTORY.md @@ -67,3 +67,55 @@ documented in STD's PRD and validated by a pydantic schema. via STD's compat shim until STD v0.6.0. - Notifier v0.3.0+ deployments are required before upgrading any STD instance to v0.6.0. + +--- + +## On-disk configuration introduced (v0.4.0) + +Through v0.3.x, the notifier had no on-disk inputs other than the +Docker socket itself — configuration was entirely env vars and +container labels. v0.4.0 introduces the first on-disk inputs: YAML +**interpreter files** loaded at startup from +`/app/interpreters/builtin/` (baked into the image) and +`/app/interpreters/user/` (operator-mounted, optional). The image +ships two builtins (`traefik.yml`, `dockflare.yml`) that fire +automatically for any container reported to STD. + +### Why + +Translating third-party label schemes (Traefik routers, Dockflare +hostnames, ...) into STD's `exposure_observations` shape in hard-coded +Python would have required a notifier fork per supported tool. +Expressing the match/extract/emit logic as small YAML files keeps +operator-facing extension out of the Python codebase and lets the +community contribute interpreters without rebuilding the image. + +### What changed structurally + +- New module `interpreter_loader.py` with `load_interpreters()` + (called once at startup) and `evaluate()` (called per dispatch). +- New on-disk locations: `/app/interpreters/builtin/`, + `/app/interpreters/user/`. +- New community-reference directory `docs/community-interpreters/` + holding example YAMLs (explicitly non-curated; PRs welcome). +- PRD §1.3 design principles softened: "no state" became "no runtime + state" (interpreter YAML is configuration, not per-event memory); + "all configuration via env vars" became "env vars + labels + YAML + for interpreters." + +### Coordination with STD + +The interpreter mechanism emits `exposure_observations` on every +STD payload. STD's strict pydantic validator rejects unknown keys, +so **STD v0.6.0 must ship before notifier v0.4.0**. The same +ordering applies to the network/port capture fields (`networks`, +`exposed_ports`, `published_ports`) which also shipped in v0.4.0. + +### Note on version numbering + +The v0.4.0 release bundled work originally scoped for three separate +releases — v0.3.1 (STD opt-out env var), v0.3.2 (network/port +capture), and v0.4.0 (interpreters). v0.3.1 and v0.3.2 were never +cut as git tags; `git tag --list` jumps directly from `v0.3.0` to +`v0.4.0`. PRD revision-history rows 0.2 and 0.3 document the planning +work; row 0.4 documents the consolidation. diff --git a/docs/PRD.md b/docs/PRD.md index 3384dba..87592e2 100644 --- a/docs/PRD.md +++ b/docs/PRD.md @@ -12,6 +12,9 @@ | 0.2 | 2026-05-13 | v0.3.1 — STD reporting opt-out mode via `STD_REPORT_ALL_CONTAINERS` env var. §1.3 softened to reflect per-host opt-out scope. | | 0.3 | 2026-05-13 | v0.3.2 — capture container network membership and port information from the Docker API and forward to STD. §3.3 base kwargs contract grows three rows (`networks`, `exposed_ports`, `published_ports`). | | 0.4 | 2026-05-14 | v0.4.0 — YAML interpreter mechanism, STD opt-out env var (`STD_REPORT_ALL_CONTAINERS`), network/ports capture, and design-principle softening. Originally planned as v0.3.1 / v0.3.2 / v0.4.0; consolidated into a single v0.4.0 release. §1.3 softens "no state" and "env vars only" to reflect YAML configuration. §3 architecture grows an interpreter component. §4 documents the interpreter loader paths and volume-mount convention. §11 fully documents the YAML format and wire emission. | +| 0.5 | 2026-05-29 | Optional `HOST_NAME_OVERRIDE` env var for the DNS CNAME target. No PRD section changes — env vars are documented in the README per §4. Addresses environments (e.g. WSL/Docker Desktop) where the detected host name differs from the DNS name. | +| 0.6 | 2026-05-29 | STD dispatch and the periodic refresh loop are now gated on STD being configured. §3.2 event-flow notes the loop starts only when `STD_URL`/`STD_API_TOKEN` are set; DNS-only deployments log the disabled state once instead of per-event. | +| 0.7 | 2026-05-29 | Release prep for v0.4.1. §5 "Current State" bumped to v0.4.1; no behavioral content change — rows 0.5/0.6 already documented the shipped fixes. | --- @@ -21,7 +24,7 @@ 2. [Scope](#2-scope) 3. [Architecture](#3-architecture) 4. [Configuration Model](#4-configuration-model) -5. [Current State (v0.4.0)](#5-current-state-v040) +5. [Current State (v0.4.1)](#5-current-state-v041) 6. [v0.3.0 — Cleanup Release](#6-v030--cleanup-release) 7. [Delivered in v0.4.0](#7-delivered-in-v040) 8. [Versioning, Branches, and Releases](#8-versioning-branches-and-releases) @@ -148,7 +151,11 @@ startup (see §1.3 and §11). authentication, and payload shape. - **Common concerns** that should live outside individual notifier modules: logging configuration, retry helpers, label-to-payload - mapping. (Today these are partially duplicated; see §5.) + mapping. Logging (`logging_setup.py`) and retry (`retry.py`) were + unified in v0.3.0 and are consumed by both notifier modules. + Label-to-payload translation still lives inside each notifier + module — see `notifiers/service_tracker_dashboard.py:_to_canonical` + for STD's mapping. - **`interpreter_loader.py`** — loads and evaluates the YAML interpreters introduced in v0.4.0. Runs once at startup to load YAMLs from `/app/interpreters/builtin/` and `/app/interpreters/user/` @@ -163,11 +170,17 @@ startup (see §1.3 and §11). 2. Docker event subscription — events whose `Action` is in `watched_actions` are processed live. 3. Periodic loop — every `STD_REFRESH_SECONDS` (default 60s), every - running container is reprocessed with `action="refresh"`. + running container is reprocessed with `action="refresh"`. Started + only when STD is configured (see below). The periodic loop exists for resilience: if the notifier missed an event (network blip, container crash mid-event), the next refresh pass -catches it. +catches it. Because `refresh` is an STD-only trigger (DNS fires only on +`boot`/`start`), the loop is started only when STD is configured +(`STD_URL` and `STD_API_TOKEN` both set). On a DNS-only deployment the +notifier logs one line at startup that STD is disabled, does not start +the loop, and skips STD dispatch on every event — rather than logging a +"not enabled" line per container per pass. ### 3.3 Notifier Module Contract @@ -307,12 +320,15 @@ See §11 for the YAML format and emission semantics. --- -## 5. Current State (v0.4.0) +## 5. Current State (v0.4.1) -Tags shipped on `main`: v0.1.0 → v0.4.0. v0.3.0 (2026-05-12) resolved +Tags shipped on `main`: v0.1.0 → v0.4.1. v0.3.0 (2026-05-12) resolved every issue listed in §5.2 below. v0.4.0 (2026-05-14) shipped the work originally scoped across three separate releases (v0.3.1 / v0.3.2 / v0.4.0); the consolidation is summarized in §7. +v0.4.1 (2026-05-29) is a DNS/logging fix release — a DNS host-name +override (`HOST_NAME_OVERRIDE`) and a fix for STD-unconfigured log +flooding. It does not change the STD wire contract. ### 5.1 What works today diff --git a/interpreter_loader.py b/interpreter_loader.py index 5366450..7099bfa 100644 --- a/interpreter_loader.py +++ b/interpreter_loader.py @@ -3,7 +3,7 @@ Interpreters read labels written by third-party tools (Traefik, Dockflare, etc.) and emit structured exposure observations that the -notifier forwards to STD. See PRD §12 for the full design and YAML +notifier forwards to STD. See PRD §11 for the full design and YAML format reference. Public surface: diff --git a/main.py b/main.py index d49339d..df71079 100644 --- a/main.py +++ b/main.py @@ -41,6 +41,18 @@ def _parse_bool_env(name, default=False): "will be reported to STD regardless of opt-in label" ) +# Whether the STD notifier has the env vars it needs. Checked once at +# startup so STD dispatch and the periodic refresh loop (which exists +# only to re-report containers to STD) can be skipped entirely when STD +# is not configured, instead of logging a "not enabled" line on every +# event. DNS-only deployments are the common case for this. +STD_CONFIGURED = service_tracker_dashboard.is_configured() +if not STD_CONFIGURED: + logger.info( + "STD_URL or STD_API_TOKEN is missing — disabling STD integration " + "(no STD reporting, no periodic refresh loop)" + ) + # Debug-only: re-load interpreters on every event instead of once at # startup. Not for production use; intended for iterating on YAML # files without bouncing the notifier. @@ -83,6 +95,9 @@ def periodic_update_loop(docker_host): def get_host_name(): + override = os.environ.get("HOST_NAME_OVERRIDE") + if override and override.strip(): + return override.strip() try: with open("/etc/host_hostname", "r") as f: return f.read().strip() @@ -146,7 +161,8 @@ def handle_container_event(container, docker_host, action): std_via_label = "service-tracker-dashboard" in notifier_list std_via_env = STD_REPORT_ALL_CONTAINERS std_should_fire = ( - (std_via_label or std_via_env) + STD_CONFIGURED + and (std_via_label or std_via_env) and action in NOTIFIER_TRIGGERS["service-tracker-dashboard"] ) dns_should_fire = ( @@ -238,7 +254,13 @@ def main(): except Exception as e: logger.error(f"Failed to process container {container.name} on boot: {e}") - threading.Thread(target=periodic_update_loop, args=(docker_host,), daemon=True).start() + # The periodic loop re-reports containers to STD; it does nothing for + # DNS (which only triggers on boot/start). Skip it entirely when STD + # is not configured. + if STD_CONFIGURED: + threading.Thread(target=periodic_update_loop, args=(docker_host,), daemon=True).start() + else: + logger.info("Periodic refresh loop not started — STD integration disabled") for event in client.events(decode=True): if event.get("Type") != "container": diff --git a/notifiers/service_tracker_dashboard.py b/notifiers/service_tracker_dashboard.py index b939164..f417d87 100644 --- a/notifiers/service_tracker_dashboard.py +++ b/notifiers/service_tracker_dashboard.py @@ -33,7 +33,7 @@ # consumed by STD v0.6.0+. "networks", "exposed_ports", "published_ports", # v0.4.0: interpreter outputs. List of ExposureObservation dicts - # (possibly empty); consumed by STD v0.7.0+. An empty list means + # (possibly empty); consumed by STD v0.6.0+. An empty list means # "interpreters ran and nothing matched" (STD clears exposure # rows). The notifier omits the field entirely when no # interpreters are loaded (STD preserves existing rows). @@ -77,6 +77,15 @@ def _to_canonical(kwargs: dict) -> dict: return out +def is_configured(): + """True when both STD env vars are set, i.e. STD dispatch can run. + + Lets `main.py` gate STD dispatch and the periodic refresh loop + without hardcoding which env vars STD owns. + """ + return bool(os.environ.get("STD_URL") and os.environ.get("STD_API_TOKEN")) + + @with_retry def post_with_retry(endpoint, payload, headers): response = requests.post(endpoint, json=payload, headers=headers) @@ -97,7 +106,10 @@ def register(**kwargs): api_token = os.environ.get("STD_API_TOKEN") if not dashboard_url or not api_token: - logger.info("Not enabling Service Tracker Dashboard integration — missing STD_URL or STD_API_TOKEN") + # Normally unreachable: main.py gates STD dispatch on + # is_configured() and logs the disabled state once at startup. + # Kept as a defensive guard; debug level avoids per-event spam. + logger.debug("STD register() called without STD_URL/STD_API_TOKEN — skipping") return kwargs.setdefault("timestamp", datetime.now().isoformat())