Skip to content

Commit 85aa661

Browse files
committed
Add real memory usage metrics from smaps
This commit introduces additional memory statistics based on /proc/<pid>/smaps_rollup: - memory_real_pss: proportional set size (shared pages divided among processes), representing the actual memory footprint of the service. - memory_real: conservative estimate of memory that will be freed if the process exits, based on Anonymous + SwapPss, excluding file-backed shared pages. These new metrics provide a more accurate view of real memory usage compared to the traditional RSS and virtual memory fields. smaps-based metrics are disabled by default and can be enabled with the `--smaps` option. Signed-off-by: Alexey Cluster <cluster@cluster.wtf>
1 parent 0fe7831 commit 85aa661

3 files changed

Lines changed: 305 additions & 7 deletions

File tree

systemctl2mqtt/const.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,32 @@
4040
ANSI_ESCAPE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
4141
# fmt: off
4242
STATS_REGISTRATION_ENTRIES = [
43-
# label,field,device_class,unit,icon
44-
('CPU', 'cpu', None, '%', 'mdi:chip'),
45-
('Memory', 'memory', 'data_size', 'MB', 'mdi:memory'),
43+
# label, field, device_class, unit, icon, catetogy
44+
('CPU', 'cpu', None, '%', 'mdi:chip', None), # CPU utilization percentage
45+
('Memory (Virtual)', 'memory', 'data_size', 'MB', 'mdi:memory', None), # Total virtual memory usage
46+
('Memory (Real)', 'memory_real', 'data_size', 'MB', 'mdi:memory', None), # Real memory (calculated from smaps)
47+
('Memory (Real PSS)', 'memory_real_pss', 'data_size', 'MB', 'mdi:memory', None), # Real memory (calculated from smaps), based on PSS
48+
('PSS Memory', 'memory_pss', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Proportional Set Size (shared pages divided among processes)
49+
('PSS Anon', 'memory_pss_anon', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory part of PSS
50+
('PSS File', 'memory_pss_file', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # File-backed memory part of PSS
51+
('PSS Dirty', 'memory_pss_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Modified (dirty) memory part of PSS
52+
('PSS Shmem', 'memory_pss_shmem', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared memory part of PSS
53+
('RSS', 'memory_rss', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Resident Set Size (non-swapped physical memory)
54+
('Shared Clean', 'memory_shared_clean', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared pages not modified (clean)
55+
('Shared Dirty', 'memory_shared_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared pages modified (dirty)
56+
('Private Clean', 'memory_private_clean', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private pages that are clean
57+
('Private Dirty', 'memory_private_dirty', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private pages that are dirty
58+
('Referenced', 'memory_referenced', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Recently accessed pages
59+
('Anonymous', 'memory_anonymous', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory (not file-backed)
60+
('LazyFree', 'memory_lazyfree', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Pages marked as free-on-demand (MADV_FREE)
61+
('Anon HugePages', 'memory_anon_hugepages', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Anonymous memory using HugePages
62+
('Shmem PMD Mapped', 'memory_shmem_pmd_mapped', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared memory mapped with hugepages (PMD)
63+
('File PMD Mapped', 'memory_file_pmd_mapped', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # File-backed memory mapped with hugepages
64+
('Shared HugeTLB', 'memory_shared_hugetlb', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Shared HugeTLB memory usage
65+
('Private HugeTLB', 'memory_private_hugetlb', 'data_size', 'MB', 'mdi:memory', "diagnostic"), # Private HugeTLB memory usage
66+
('Swap', 'memory_swap', 'data_size', 'MB', 'mdi:swap-horizontal', "diagnostic"), # Memory swapped out
67+
('Swap PSS', 'memory_swappss', 'data_size', 'MB', 'mdi:swap-horizontal', "diagnostic"), # Proportional swap usage
68+
('Locked', 'memory_locked', 'data_size', 'MB', 'mdi:lock', "diagnostic"), # Locked pages (mlock)
4669
]
4770
# fmt: on
4871

@@ -65,6 +88,7 @@
6588
"service_blacklist": SERVICE_BLACKLIST,
6689
"enable_events": EVENTS_DEFAULT,
6790
"enable_stats": STATS_DEFAULT,
91+
"enable_smaps": STATS_DEFAULT,
6892
"stats_record_seconds": STATS_RECORD_SECONDS_DEFAULT,
6993
}
7094
)

systemctl2mqtt/systemctl2mqtt.py

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ class Systemctl2Mqtt:
8484
Activate the stats
8585
b_events
8686
Activate the events
87+
b_smaps
88+
Activate the smaps memory stats (more detailed memory info, but more cpu usage)
8789
systemctl_events
8890
Queue with systemctl events
8991
systemctl_stats
@@ -127,6 +129,7 @@ class Systemctl2Mqtt:
127129

128130
b_stats: bool = False
129131
b_events: bool = False
132+
b_smaps: bool = False
130133

131134
systemctl_events: Queue[dict[str, str]] = Queue(maxsize=MAX_QUEUE_SIZE)
132135
systemctl_stats: Queue[list[str]] = Queue(maxsize=MAX_QUEUE_SIZE)
@@ -185,6 +188,8 @@ def __init__(self, cfg: Systemctl2MqttConfig, do_not_exit: bool = False):
185188
self.b_events = True
186189
if self.cfg["enable_stats"]:
187190
self.b_stats = True
191+
if self.cfg["enable_smaps"]:
192+
self.b_smaps = True
188193

189194
main_logger.setLevel(self.cfg["log_level"].upper())
190195
events_logger.setLevel(self.cfg["log_level"].upper())
@@ -197,13 +202,19 @@ def __init__(self, cfg: Systemctl2MqttConfig, do_not_exit: bool = False):
197202
"Could not get systemctl version"
198203
) from ex
199204

205+
if self.b_smaps and not self.b_stats:
206+
raise Systemctl2MqttConfigException(
207+
"Cannot enable smaps without stats, please enable stats as well."
208+
)
209+
200210
if not self.do_not_exit:
201211
main_logger.info("Register signal handlers for SIGINT and SIGTERM")
202212
signal.signal(signal.SIGTERM, self._signal_handler)
203213
signal.signal(signal.SIGINT, self._signal_handler)
204214

205215
main_logger.info("Events enabled: %d", self.b_events)
206216
main_logger.info("Stats enabled: %d", self.b_stats)
217+
main_logger.info("Smaps enabled: %d", self.b_smaps)
207218

208219
try:
209220
# Setup MQTT
@@ -736,7 +747,7 @@ def _register_service(self, service_entry: ServiceEvent) -> None:
736747
)
737748

738749
# Stats
739-
for label, field, device_class, unit, icon in STATS_REGISTRATION_ENTRIES:
750+
for label, field, device_class, unit, icon, category in STATS_REGISTRATION_ENTRIES:
740751
registration_topic = self.discovery_sensor_topic.format(
741752
INVALID_HA_TOPIC_CHARS.sub("_", f"{service}_{field}_stats")
742753
)
@@ -756,6 +767,7 @@ def _register_service(self, service_entry: ServiceEvent) -> None:
756767
"payload_off": None,
757768
"json_attributes_topic": stats_topic,
758769
"device_class": device_class,
770+
"entity_category": category,
759771
"device": self._device_definition(service_entry),
760772
"qos": self.cfg["mqtt_qos"],
761773
}
@@ -800,7 +812,7 @@ def _unregister_service(self, service: str) -> None:
800812
)
801813

802814
# Stats
803-
for _, field, _, _, _ in STATS_REGISTRATION_ENTRIES:
815+
for _, field, _, _, _, _ in STATS_REGISTRATION_ENTRIES:
804816
self._mqtt_send(
805817
self.discovery_sensor_topic.format(
806818
INVALID_HA_TOPIC_CHARS.sub("_", f"{service}_{field}_stats")
@@ -975,6 +987,38 @@ def _handle_events_queue(self) -> None:
975987
retain=True,
976988
)
977989

990+
def get_smaps(self, pid: int) -> dict[str, int]:
991+
"""Parse /proc/<pid>/smaps_rollup into a dictionary.
992+
993+
Keys are field names (e.g. 'Pss', 'Pss_Anon', 'Pss_File', ...).
994+
Values are integers in kilobytes.
995+
996+
Parameters
997+
----------
998+
pid
999+
The PID of the process to get the smaps for
1000+
1001+
Raises
1002+
------
1003+
ValueError
1004+
If the process is not found or smaps is not supported.
1005+
1006+
"""
1007+
1008+
result = {}
1009+
path = f"/proc/{pid}/smaps_rollup"
1010+
try:
1011+
with open(path) as f:
1012+
for line in f:
1013+
parts = line.split()
1014+
if len(parts) >= 2 and parts[1].isdigit():
1015+
key = parts[0].rstrip(":")
1016+
val = int(parts[1]) # always in kB
1017+
result[key] = val
1018+
except FileNotFoundError:
1019+
raise ValueError(f"Process {pid} not found or smaps not supported") from None
1020+
return result
1021+
9781022
def _handle_stats_queue(self) -> None:
9791023
"""Check if any stat is present in the queue and process it.
9801024
@@ -1045,11 +1089,36 @@ def _handle_stats_queue(self) -> None:
10451089
# self.known_stat_services[service][pid]["last"] - container_date
10461090
# ).total_seconds()
10471091

1092+
smaps = self.get_smaps(pid) if self.b_smaps else {}
1093+
10481094
pid_stats = PIDStats(
10491095
{
10501096
"pid": pid,
10511097
"cpu": float(stat[8]),
10521098
"memory": parse_top_size(stat[5]) / 1024, # KB --> MB
1099+
"memory_real": (smaps.get("Anonymous", 0) + smaps.get("SwapPss", 0)) / 1024,
1100+
"memory_real_pss": (smaps.get("Pss_Anon", 0) + smaps.get("Pss_Shm", 0) + smaps.get("SwapPss", 0)) / 1024,
1101+
"memory_pss_anon": smaps.get("Pss_Anon", 0) / 1024,
1102+
"memory_pss": smaps.get("Pss", 0) / 1024,
1103+
"memory_pss_file": smaps.get("Pss_File", 0) / 1024,
1104+
"memory_pss_dirty": smaps.get("Pss_Dirty", 0) / 1024,
1105+
"memory_pss_shmem": smaps.get("Pss_Shm", 0) / 1024,
1106+
"memory_rss": smaps.get("Rss", 0) / 1024,
1107+
"memory_shared_clean": smaps.get("Shared_Clean", 0) / 1024,
1108+
"memory_shared_dirty": smaps.get("Shared_Dirty", 0) / 1024,
1109+
"memory_private_clean": smaps.get("Private_Clean", 0) / 1024,
1110+
"memory_private_dirty": smaps.get("Private_Dirty", 0) / 1024,
1111+
"memory_referenced": smaps.get("Referenced", 0) / 1024,
1112+
"memory_anonymous": smaps.get("Anonymous", 0) / 1024,
1113+
"memory_lazyfree": smaps.get("LazyFree", 0) / 1024,
1114+
"memory_anon_hugepages": smaps.get("AnonHugePages", 0) / 1024,
1115+
"memory_shmem_pmd_mapped": smaps.get("ShmemPmdMapped", 0) / 1024,
1116+
"memory_file_pmd_mapped": smaps.get("FilePmdMapped", 0) / 1024,
1117+
"memory_shared_hugetlb": smaps.get("Shared_Hugetlb", 0) / 1024,
1118+
"memory_private_hugetlb": smaps.get("Private_Hugetlb", 0) / 1024,
1119+
"memory_swap": smaps.get("Swap", 0) / 1024,
1120+
"memory_swappss": smaps.get("SwapPss", 0) / 1024,
1121+
"memory_locked": smaps.get("Locked", 0) / 1024,
10531122
}
10541123
)
10551124
stats_logger.debug("Printing pid stats: %s", pid_stats)
@@ -1060,6 +1129,29 @@ def _handle_stats_queue(self) -> None:
10601129
"host": self.cfg["systemctl2mqtt_hostname"],
10611130
"cpu": 0,
10621131
"memory": 0,
1132+
"memory_real": 0,
1133+
"memory_real_pss": 0,
1134+
"memory_pss": 0,
1135+
"memory_pss_anon": 0,
1136+
"memory_pss_file": 0,
1137+
"memory_pss_dirty": 0,
1138+
"memory_pss_shmem": 0,
1139+
"memory_rss": 0,
1140+
"memory_shared_clean": 0,
1141+
"memory_shared_dirty": 0,
1142+
"memory_private_clean": 0,
1143+
"memory_private_dirty": 0,
1144+
"memory_referenced": 0,
1145+
"memory_anonymous": 0,
1146+
"memory_lazyfree": 0,
1147+
"memory_anon_hugepages": 0,
1148+
"memory_shmem_pmd_mapped": 0,
1149+
"memory_file_pmd_mapped": 0,
1150+
"memory_shared_hugetlb": 0,
1151+
"memory_private_hugetlb": 0,
1152+
"memory_swap": 0,
1153+
"memory_swappss": 0,
1154+
"memory_locked": 0,
10631155
"pid_stats": self.last_stat_services[service][
10641156
"pid_stats"
10651157
]
@@ -1071,6 +1163,30 @@ def _handle_stats_queue(self) -> None:
10711163

10721164
for pid_stat in service_stats["pid_stats"].values():
10731165
service_stats["memory"] += pid_stat["memory"]
1166+
service_stats["memory"] += pid_stat["memory"]
1167+
service_stats["memory_real"] += pid_stat["memory_real"]
1168+
service_stats["memory_real_pss"] += pid_stat["memory_real_pss"]
1169+
service_stats["memory_pss"] += pid_stat["memory_pss"]
1170+
service_stats["memory_pss_anon"] += pid_stat["memory_pss_anon"]
1171+
service_stats["memory_pss_file"] += pid_stat["memory_pss_file"]
1172+
service_stats["memory_pss_dirty"] += pid_stat["memory_pss_dirty"]
1173+
service_stats["memory_pss_shmem"] += pid_stat["memory_pss_shmem"]
1174+
service_stats["memory_rss"] += pid_stat["memory_rss"]
1175+
service_stats["memory_shared_clean"] += pid_stat["memory_shared_clean"]
1176+
service_stats["memory_shared_dirty"] += pid_stat["memory_shared_dirty"]
1177+
service_stats["memory_private_clean"] += pid_stat["memory_private_clean"]
1178+
service_stats["memory_private_dirty"] += pid_stat["memory_private_dirty"]
1179+
service_stats["memory_referenced"] += pid_stat["memory_referenced"]
1180+
service_stats["memory_anonymous"] += pid_stat["memory_anonymous"]
1181+
service_stats["memory_lazyfree"] += pid_stat["memory_lazyfree"]
1182+
service_stats["memory_anon_hugepages"] += pid_stat["memory_anon_hugepages"]
1183+
service_stats["memory_shmem_pmd_mapped"] += pid_stat["memory_shmem_pmd_mapped"]
1184+
service_stats["memory_file_pmd_mapped"] += pid_stat["memory_file_pmd_mapped"]
1185+
service_stats["memory_shared_hugetlb"] += pid_stat["memory_shared_hugetlb"]
1186+
service_stats["memory_private_hugetlb"] += pid_stat["memory_private_hugetlb"]
1187+
service_stats["memory_swap"] += pid_stat["memory_swap"]
1188+
service_stats["memory_swappss"] += pid_stat["memory_swappss"]
1189+
service_stats["memory_locked"] += pid_stat["memory_locked"]
10741190
service_stats["cpu"] += pid_stat["cpu"]
10751191

10761192
self.last_stat_services[service] = service_stats
@@ -1236,6 +1352,11 @@ def main() -> None:
12361352
help="Publish Stats",
12371353
action="store_true",
12381354
)
1355+
parser.add_argument(
1356+
"--smaps",
1357+
help="Publish extended memory stats (more detailed memory info, but more cpu usage, only if --stats is enabled)",
1358+
action="store_true",
1359+
)
12391360
parser.add_argument(
12401361
"--interval",
12411362
help=f"The number of seconds to record state and make an average (default: {STATS_RECORD_SECONDS_DEFAULT})",
@@ -1273,6 +1394,7 @@ def main() -> None:
12731394
"mqtt_qos": args.qos,
12741395
"enable_events": args.events,
12751396
"enable_stats": args.stats,
1397+
"enable_smaps": args.smaps,
12761398
"stats_record_seconds": args.interval,
12771399
}
12781400
)

0 commit comments

Comments
 (0)