diff --git a/systemctl2mqtt/const.py b/systemctl2mqtt/const.py index 30addb2..62640a9 100644 --- a/systemctl2mqtt/const.py +++ b/systemctl2mqtt/const.py @@ -40,9 +40,32 @@ ANSI_ESCAPE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") # fmt: off STATS_REGISTRATION_ENTRIES = [ - # label,field,device_class,unit,icon - ('CPU', 'cpu', None, '%', 'mdi:chip'), - ('Memory', 'memory', 'data_size', 'MB', 'mdi:memory'), + # label, field, device_class, unit, icon, catetogy + ('CPU', 'cpu', None, '%', 'mdi:chip', None), # CPU utilization percentage + ('Memory (Virtual)', 'memory', 'data_size', 'MB', 'mdi:memory', None), # Total virtual memory usage + ('Memory (Real)', 'memory_real', 'data_size', 'MB', 'mdi:memory', None), # Real memory (calculated from smaps) + ('Memory (Real PSS)', 'memory_real_pss', 'data_size', 'MB', 'mdi:memory', None), # Real memory (calculated from smaps), based on PSS + ('PSS Memory', 'memory_pss', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Proportional Set Size (shared pages divided among processes) + ('PSS Anon', 'memory_pss_anon', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory part of PSS + ('PSS File', 'memory_pss_file', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # File-backed memory part of PSS + ('PSS Dirty', 'memory_pss_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Modified (dirty) memory part of PSS + ('PSS Shmem', 'memory_pss_shmem', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared memory part of PSS + ('RSS', 'memory_rss', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Resident Set Size (non-swapped physical memory) + ('Shared Clean', 'memory_shared_clean', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared pages not modified (clean) + ('Shared Dirty', 'memory_shared_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared pages modified (dirty) + ('Private Clean', 'memory_private_clean', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private pages that are clean + ('Private Dirty', 'memory_private_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private pages that are dirty + ('Referenced', 'memory_referenced', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Recently accessed pages + ('Anonymous', 'memory_anonymous', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory (not file-backed) + ('LazyFree', 'memory_lazyfree', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Pages marked as free-on-demand (MADV_FREE) + ('Anon HugePages', 'memory_anon_hugepages', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory using HugePages + ('Shmem PMD Mapped', 'memory_shmem_pmd_mapped', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared memory mapped with hugepages (PMD) + ('File PMD Mapped', 'memory_file_pmd_mapped', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # File-backed memory mapped with hugepages + ('Shared HugeTLB', 'memory_shared_hugetlb', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared HugeTLB memory usage + ('Private HugeTLB', 'memory_private_hugetlb', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private HugeTLB memory usage + ('Swap', 'memory_swap', 'data_size', 'MB', 'mdi:swap-horizontal', "diagnostic"), # Memory swapped out + ('Swap PSS', 'memory_swappss', 'data_size', 'MB', 'mdi:swap-horizontal', "diagnostic"), # Proportional swap usage + ('Locked', 'memory_locked', 'data_size', 'MB', 'mdi:lock', "diagnostic"), # Locked pages (mlock) ] # fmt: on @@ -65,6 +88,7 @@ "service_blacklist": SERVICE_BLACKLIST, "enable_events": EVENTS_DEFAULT, "enable_stats": STATS_DEFAULT, + "enable_smaps": STATS_DEFAULT, "stats_record_seconds": STATS_RECORD_SECONDS_DEFAULT, } ) diff --git a/systemctl2mqtt/systemctl2mqtt.py b/systemctl2mqtt/systemctl2mqtt.py index 134871f..7a99029 100755 --- a/systemctl2mqtt/systemctl2mqtt.py +++ b/systemctl2mqtt/systemctl2mqtt.py @@ -84,6 +84,8 @@ class Systemctl2Mqtt: Activate the stats b_events Activate the events + b_smaps + Activate the smaps memory stats (more detailed memory info, but more cpu usage) systemctl_events Queue with systemctl events systemctl_stats @@ -127,6 +129,7 @@ class Systemctl2Mqtt: b_stats: bool = False b_events: bool = False + b_smaps: bool = False systemctl_events: Queue[dict[str, str]] = Queue(maxsize=MAX_QUEUE_SIZE) systemctl_stats: Queue[list[str]] = Queue(maxsize=MAX_QUEUE_SIZE) @@ -185,6 +188,8 @@ def __init__(self, cfg: Systemctl2MqttConfig, do_not_exit: bool = False): self.b_events = True if self.cfg["enable_stats"]: self.b_stats = True + if self.cfg["enable_smaps"]: + self.b_smaps = True main_logger.setLevel(self.cfg["log_level"].upper()) events_logger.setLevel(self.cfg["log_level"].upper()) @@ -197,6 +202,11 @@ def __init__(self, cfg: Systemctl2MqttConfig, do_not_exit: bool = False): "Could not get systemctl version" ) from ex + if self.b_smaps and not self.b_stats: + raise Systemctl2MqttConfigException( + "Cannot enable smaps without stats, please enable stats as well." + ) + if not self.do_not_exit: main_logger.info("Register signal handlers for SIGINT and SIGTERM") signal.signal(signal.SIGTERM, self._signal_handler) @@ -204,6 +214,7 @@ def __init__(self, cfg: Systemctl2MqttConfig, do_not_exit: bool = False): main_logger.info("Events enabled: %d", self.b_events) main_logger.info("Stats enabled: %d", self.b_stats) + main_logger.info("Smaps enabled: %d", self.b_smaps) try: # Setup MQTT @@ -720,6 +731,7 @@ def _register_service(self, service_entry: ServiceEvent) -> None: "unit_of_measurement": None, "device": self._device_definition(service_entry), "device_class": "running", + "entity_category": None, "json_attributes_topic": events_topic, "qos": self.cfg["mqtt_qos"], } @@ -736,7 +748,7 @@ def _register_service(self, service_entry: ServiceEvent) -> None: ) # Stats - for label, field, device_class, unit, icon in STATS_REGISTRATION_ENTRIES: + for label, field, device_class, unit, icon, category in STATS_REGISTRATION_ENTRIES: registration_topic = self.discovery_sensor_topic.format( INVALID_HA_TOPIC_CHARS.sub("_", f"{service}_{field}_stats") ) @@ -756,6 +768,7 @@ def _register_service(self, service_entry: ServiceEvent) -> None: "payload_off": None, "json_attributes_topic": stats_topic, "device_class": device_class, + "entity_category": category, "device": self._device_definition(service_entry), "qos": self.cfg["mqtt_qos"], } @@ -800,7 +813,7 @@ def _unregister_service(self, service: str) -> None: ) # Stats - for _, field, _, _, _ in STATS_REGISTRATION_ENTRIES: + for _, field, _, _, _, _ in STATS_REGISTRATION_ENTRIES: self._mqtt_send( self.discovery_sensor_topic.format( INVALID_HA_TOPIC_CHARS.sub("_", f"{service}_{field}_stats") @@ -975,6 +988,38 @@ def _handle_events_queue(self) -> None: retain=True, ) + def get_smaps(self, pid: int) -> dict[str, int]: + """Parse /proc//smaps_rollup into a dictionary. + + Keys are field names (e.g. 'Pss', 'Pss_Anon', 'Pss_File', ...). + Values are integers in kilobytes. + + Parameters + ---------- + pid + The PID of the process to get the smaps for + + Raises + ------ + ValueError + If the process is not found or smaps is not supported. + + """ + + result = {} + path = f"/proc/{pid}/smaps_rollup" + try: + with open(path) as f: + for line in f: + parts = line.split() + if len(parts) >= 2 and parts[1].isdigit(): + key = parts[0].rstrip(":") + val = int(parts[1]) # always in kB + result[key] = val + except FileNotFoundError: + raise ValueError(f"Process {pid} not found or smaps not supported") from None + return result + def _handle_stats_queue(self) -> None: """Check if any stat is present in the queue and process it. @@ -1045,11 +1090,36 @@ def _handle_stats_queue(self) -> None: # self.known_stat_services[service][pid]["last"] - container_date # ).total_seconds() + smaps = self.get_smaps(pid) if self.b_smaps else {} + pid_stats = PIDStats( { "pid": pid, "cpu": float(stat[8]), "memory": parse_top_size(stat[5]) / 1024, # KB --> MB + "memory_real": (smaps.get("Anonymous", 0) + smaps.get("SwapPss", 0)) / 1024, + "memory_real_pss": (smaps.get("Pss_Anon", 0) + smaps.get("Pss_Shm", 0) + smaps.get("SwapPss", 0)) / 1024, + "memory_pss_anon": smaps.get("Pss_Anon", 0) / 1024, + "memory_pss": smaps.get("Pss", 0) / 1024, + "memory_pss_file": smaps.get("Pss_File", 0) / 1024, + "memory_pss_dirty": smaps.get("Pss_Dirty", 0) / 1024, + "memory_pss_shmem": smaps.get("Pss_Shm", 0) / 1024, + "memory_rss": smaps.get("Rss", 0) / 1024, + "memory_shared_clean": smaps.get("Shared_Clean", 0) / 1024, + "memory_shared_dirty": smaps.get("Shared_Dirty", 0) / 1024, + "memory_private_clean": smaps.get("Private_Clean", 0) / 1024, + "memory_private_dirty": smaps.get("Private_Dirty", 0) / 1024, + "memory_referenced": smaps.get("Referenced", 0) / 1024, + "memory_anonymous": smaps.get("Anonymous", 0) / 1024, + "memory_lazyfree": smaps.get("LazyFree", 0) / 1024, + "memory_anon_hugepages": smaps.get("AnonHugePages", 0) / 1024, + "memory_shmem_pmd_mapped": smaps.get("ShmemPmdMapped", 0) / 1024, + "memory_file_pmd_mapped": smaps.get("FilePmdMapped", 0) / 1024, + "memory_shared_hugetlb": smaps.get("Shared_Hugetlb", 0) / 1024, + "memory_private_hugetlb": smaps.get("Private_Hugetlb", 0) / 1024, + "memory_swap": smaps.get("Swap", 0) / 1024, + "memory_swappss": smaps.get("SwapPss", 0) / 1024, + "memory_locked": smaps.get("Locked", 0) / 1024, } ) stats_logger.debug("Printing pid stats: %s", pid_stats) @@ -1060,6 +1130,29 @@ def _handle_stats_queue(self) -> None: "host": self.cfg["systemctl2mqtt_hostname"], "cpu": 0, "memory": 0, + "memory_real": 0, + "memory_real_pss": 0, + "memory_pss": 0, + "memory_pss_anon": 0, + "memory_pss_file": 0, + "memory_pss_dirty": 0, + "memory_pss_shmem": 0, + "memory_rss": 0, + "memory_shared_clean": 0, + "memory_shared_dirty": 0, + "memory_private_clean": 0, + "memory_private_dirty": 0, + "memory_referenced": 0, + "memory_anonymous": 0, + "memory_lazyfree": 0, + "memory_anon_hugepages": 0, + "memory_shmem_pmd_mapped": 0, + "memory_file_pmd_mapped": 0, + "memory_shared_hugetlb": 0, + "memory_private_hugetlb": 0, + "memory_swap": 0, + "memory_swappss": 0, + "memory_locked": 0, "pid_stats": self.last_stat_services[service][ "pid_stats" ] @@ -1071,6 +1164,30 @@ def _handle_stats_queue(self) -> None: for pid_stat in service_stats["pid_stats"].values(): service_stats["memory"] += pid_stat["memory"] + service_stats["memory"] += pid_stat["memory"] + service_stats["memory_real"] += pid_stat["memory_real"] + service_stats["memory_real_pss"] += pid_stat["memory_real_pss"] + service_stats["memory_pss"] += pid_stat["memory_pss"] + service_stats["memory_pss_anon"] += pid_stat["memory_pss_anon"] + service_stats["memory_pss_file"] += pid_stat["memory_pss_file"] + service_stats["memory_pss_dirty"] += pid_stat["memory_pss_dirty"] + service_stats["memory_pss_shmem"] += pid_stat["memory_pss_shmem"] + service_stats["memory_rss"] += pid_stat["memory_rss"] + service_stats["memory_shared_clean"] += pid_stat["memory_shared_clean"] + service_stats["memory_shared_dirty"] += pid_stat["memory_shared_dirty"] + service_stats["memory_private_clean"] += pid_stat["memory_private_clean"] + service_stats["memory_private_dirty"] += pid_stat["memory_private_dirty"] + service_stats["memory_referenced"] += pid_stat["memory_referenced"] + service_stats["memory_anonymous"] += pid_stat["memory_anonymous"] + service_stats["memory_lazyfree"] += pid_stat["memory_lazyfree"] + service_stats["memory_anon_hugepages"] += pid_stat["memory_anon_hugepages"] + service_stats["memory_shmem_pmd_mapped"] += pid_stat["memory_shmem_pmd_mapped"] + service_stats["memory_file_pmd_mapped"] += pid_stat["memory_file_pmd_mapped"] + service_stats["memory_shared_hugetlb"] += pid_stat["memory_shared_hugetlb"] + service_stats["memory_private_hugetlb"] += pid_stat["memory_private_hugetlb"] + service_stats["memory_swap"] += pid_stat["memory_swap"] + service_stats["memory_swappss"] += pid_stat["memory_swappss"] + service_stats["memory_locked"] += pid_stat["memory_locked"] service_stats["cpu"] += pid_stat["cpu"] self.last_stat_services[service] = service_stats @@ -1236,6 +1353,11 @@ def main() -> None: help="Publish Stats", action="store_true", ) + parser.add_argument( + "--smaps", + help="Publish extended memory stats (more detailed memory info, but more cpu usage, only if --stats is enabled)", + action="store_true", + ) parser.add_argument( "--interval", help=f"The number of seconds to record state and make an average (default: {STATS_RECORD_SECONDS_DEFAULT})", @@ -1273,6 +1395,7 @@ def main() -> None: "mqtt_qos": args.qos, "enable_events": args.events, "enable_stats": args.stats, + "enable_smaps": args.smaps, "stats_record_seconds": args.interval, } ) diff --git a/systemctl2mqtt/type_definitions.py b/systemctl2mqtt/type_definitions.py index d181016..1aab636 100644 --- a/systemctl2mqtt/type_definitions.py +++ b/systemctl2mqtt/type_definitions.py @@ -57,6 +57,8 @@ class Systemctl2MqttConfig(TypedDict): Flag to enable event monitoring enable_stats Flag to enable stat monitoring + enable_smaps + Flag to enable smaps memory monitoring (more detailed memory info, but more cpu usage), requires "enable_stats" to be True stats_record_seconds Interval every how many seconds the stats are published via MQTT @@ -79,6 +81,7 @@ class Systemctl2MqttConfig(TypedDict): service_blacklist: list[str] enable_events: bool enable_stats: bool + enable_smaps: bool stats_record_seconds: int @@ -156,15 +159,88 @@ class PIDStats(TypedDict): pid The pid of the Service memory - Used memory in MB + Used memory in MB (virtual, from top) cpu The cpu usage by the Service in cpu-% (ex.: a Systemctl with 4 cores has 400% cpu available) + memory_real_pss + Real memory in MB (calculated from smaps). + Based on Proportional Set Size (PSS): shared pages are divided among processes. + Best metric to estimate actual memory footprint of a process + memory_real + Real memory in MB (calculated from smaps). + Memory that will actually be freed if the process exits. + Based on Anonymous + SwapPss; shared file-backed pages are excluded, not divided. + memory_pss + Proportional Set Size in MB + memory_pss_anon + Anonymous PSS in MB + memory_pss_file + File-backed PSS in MB + memory_pss_dirty + Dirty PSS in MB + memory_pss_shmem + Shared memory PSS in MB + memory_rss + Resident Set Size in MB + memory_shared_clean + Shared clean pages in MB + memory_shared_dirty + Shared dirty pages in MB + memory_private_clean + Private clean pages in MB + memory_private_dirty + Private dirty pages in MB + memory_referenced + Referenced pages in MB + memory_anonymous + Anonymous memory in MB + memory_lazyfree + LazyFree pages in MB + memory_anon_hugepages + Anonymous huge pages in MB + memory_shmem_pmd_mapped + Shared memory PMD mapped pages in MB + memory_file_pmd_mapped + File-backed PMD mapped pages in MB + memory_shared_hugetlb + Shared hugetlb pages in MB + memory_private_hugetlb + Private hugetlb pages in MB + memory_swap + Swap in MB + memory_swappss + Proportional Swap usage in MB + memory_locked + Locked pages in MB """ pid: int cpu: float memory: float + memory_real_pss: float + memory_real: float + memory_pss: float + memory_pss_anon: float + memory_pss_file: float + memory_pss_dirty: float + memory_pss_shmem: float + memory_rss: float + memory_shared_clean: float + memory_shared_dirty: float + memory_private_clean: float + memory_private_dirty: float + memory_referenced: float + memory_anonymous: float + memory_lazyfree: float + memory_anon_hugepages: float + memory_shmem_pmd_mapped: float + memory_file_pmd_mapped: float + memory_shared_hugetlb: float + memory_private_hugetlb: float + memory_swap: float + memory_swappss: float + memory_locked: float class ServiceStats(TypedDict): @@ -177,11 +253,61 @@ class ServiceStats(TypedDict): host The Systemctl host memory - Used memory in MB + Used memory in MB (virtual, from top) cpu The cpu usage by the Service in cpu-% (ex.: a Systemctl with 4 cores has 400% cpu available) pid_stats The stats for all pids + memory_real_pss + Real memory in MB (calculated from smaps). + Based on Proportional Set Size (PSS): shared pages are divided among processes. + Best metric to estimate actual memory footprint of a process + memory_real + Real memory in MB (calculated from smaps). + Memory that will actually be freed if the process exits. + Based on Anonymous + SwapPss; shared file-backed pages are excluded, not divided. + memory_pss + Proportional Set Size in MB + memory_pss_anon + Anonymous PSS in MB + memory_pss_file + File-backed PSS in MB + memory_pss_dirty + Dirty PSS in MB + memory_pss_shmem + Shared memory PSS in MB + memory_rss + Resident Set Size in MB + memory_shared_clean + Shared clean pages in MB + memory_shared_dirty + Shared dirty pages in MB + memory_private_clean + Private clean pages in MB + memory_private_dirty + Private dirty pages in MB + memory_referenced + Referenced pages in MB + memory_anonymous + Anonymous memory in MB + memory_lazyfree + LazyFree pages in MB + memory_anon_hugepages + Anonymous huge pages in MB + memory_shmem_pmd_mapped + Shared memory PMD mapped pages in MB + memory_file_pmd_mapped + File-backed PMD mapped pages in MB + memory_shared_hugetlb + Shared hugetlb pages in MB + memory_private_hugetlb + Private hugetlb pages in MB + memory_swap + Swap in MB + memory_swappss + Proportional Swap usage in MB + memory_locked + Locked pages in MB """ @@ -190,6 +316,29 @@ class ServiceStats(TypedDict): memory: float cpu: float pid_stats: dict[int, PIDStats] + memory_real_pss: float + memory_real: float + memory_pss: float + memory_pss_anon: float + memory_pss_file: float + memory_pss_dirty: float + memory_pss_shmem: float + memory_rss: float + memory_shared_clean: float + memory_shared_dirty: float + memory_private_clean: float + memory_private_dirty: float + memory_referenced: float + memory_anonymous: float + memory_lazyfree: float + memory_anon_hugepages: float + memory_shmem_pmd_mapped: float + memory_file_pmd_mapped: float + memory_shared_hugetlb: float + memory_private_hugetlb: float + memory_swap: float + memory_swappss: float + memory_locked: float class ServiceDeviceEntry(TypedDict): @@ -242,6 +391,8 @@ class ServiceEntry(TypedDict): The device the sensor is attributed to device_class The device class of the sensor + entity_category + The entity category of the sensor state_topic The topic containing all information for the attributes of the sensor qos @@ -262,5 +413,6 @@ class ServiceEntry(TypedDict): payload_off: str | None device: ServiceDeviceEntry device_class: str | None + entity_category: str | None json_attributes_topic: str | None qos: int