From 106008bbac5189c0573904dfbd0aa0a229ef9595 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 17 Jun 2026 16:35:41 +0200 Subject: [PATCH] feat(server): Add jemalloc stats as Datadog metrics --- Cargo.lock | 26 ++++++++++++++++++---- Cargo.toml | 3 ++- objectstore-server/Cargo.toml | 1 + objectstore-server/src/state.rs | 38 +++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 874be913..616af75a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2646,6 +2646,7 @@ dependencies = [ "stresstest", "tempfile", "thiserror 2.0.18", + "tikv-jemalloc-ctl", "tikv-jemallocator", "tokio", "tower", @@ -2846,6 +2847,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pear" version = "0.2.9" @@ -4370,11 +4377,22 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tikv-jemalloc-ctl" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a184c43b8ab2f41df2733b55556e3f5f632f4aeaa205b1bb018f574b7f5f142" +dependencies = [ + "libc", + "paste", + "tikv-jemalloc-sys", +] + [[package]] name = "tikv-jemalloc-sys" -version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" +version = "0.7.1+5.3.1-0-g81034ce1f1373e37dc865038e1bc8eeecf559ce8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b" +checksum = "1a2825c78386b4ae0314074867860ba9577875de945f05992c38815cbec327f0" dependencies = [ "cc", "libc", @@ -4382,9 +4400,9 @@ dependencies = [ [[package]] name = "tikv-jemallocator" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a" +checksum = "249f09e49ab1609436f34c776e84231bead18d6a955f119f939bdc1d847561bd" dependencies = [ "libc", "tikv-jemalloc-sys", diff --git a/Cargo.toml b/Cargo.toml index 7908086a..d3f00280 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,8 @@ serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" tempfile = "3.20.0" thiserror = "2.0.17" -tikv-jemallocator = { version = "0.6.1", features = ["background_threads", "override_allocator_on_supported_platforms"] } +tikv-jemallocator = { version = "0.7.0", features = ["background_threads", "override_allocator_on_supported_platforms"] } +tikv-jemalloc-ctl = { version = "0.7.0", features = ["stats"] } tokio = "1.47.0" tokio-util = { version = "0.7.15", features = ["rt"] } tracing = "0.1.41" diff --git a/objectstore-server/Cargo.toml b/objectstore-server/Cargo.toml index 78c1ec7f..729ea089 100644 --- a/objectstore-server/Cargo.toml +++ b/objectstore-server/Cargo.toml @@ -44,6 +44,7 @@ serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } tikv-jemallocator = { workspace = true } +tikv-jemalloc-ctl = { workspace = true } tokio = { workspace = true, features = ["full"] } tower = { version = "0.5.2" } tower-http = { version = "0.6.6", default-features = false, features = [ diff --git a/objectstore-server/src/state.rs b/objectstore-server/src/state.rs index 35e608da..5c92d05d 100644 --- a/objectstore-server/src/state.rs +++ b/objectstore-server/src/state.rs @@ -57,6 +57,7 @@ impl Services { /// use in the web server. pub async fn spawn(config: Config) -> Result { tokio::spawn(track_runtime_metrics(config.runtime.metrics_interval)); + tokio::spawn(track_allocator_metrics(config.runtime.metrics_interval)); let backend = backend::from_config(config.storage.clone()).await?; let service = @@ -108,6 +109,43 @@ impl Services { } } +/// Periodically captures and reports jemalloc stats. +async fn track_allocator_metrics(interval: Duration) { + // INVARIANT: MIB resolution only fails if jemalloc is not the active allocator, + // which would be a misconfigured build. Panic early to surface the problem. + let epoch = tikv_jemalloc_ctl::epoch::mib().expect("jemalloc epoch MIB"); + let allocated = tikv_jemalloc_ctl::stats::allocated::mib().expect("jemalloc allocated MIB"); + let active = tikv_jemalloc_ctl::stats::active::mib().expect("jemalloc active MIB"); + let resident = tikv_jemalloc_ctl::stats::resident::mib().expect("jemalloc resident MIB"); + let mapped = tikv_jemalloc_ctl::stats::mapped::mib().expect("jemalloc mapped MIB"); + + let mut ticker = tokio::time::interval(interval); + loop { + ticker.tick().await; + + let Ok(_) = epoch.advance() else { + continue; + }; + + if let Ok(allocated_bytes) = allocated.read() { + // Bytes currently allocated by the application. + objectstore_metrics::gauge!("jemalloc.allocated" = allocated_bytes); + } + if let Ok(active_bytes) = active.read() { + // Bytes in active jemalloc pages (≥ allocated). + objectstore_metrics::gauge!("jemalloc.active" = active_bytes); + } + if let Ok(resident_bytes) = resident.read() { + // Bytes in resident pages mapped from the OS (≥ active). + objectstore_metrics::gauge!("jemalloc.resident" = resident_bytes); + } + if let Ok(mapped_bytes) = mapped.read() { + // Bytes in chunks mapped from the OS (≥ resident). + objectstore_metrics::gauge!("jemalloc.mapped" = mapped_bytes); + } + } +} + /// Periodically captures and reports internal Tokio runtime metrics. async fn track_runtime_metrics(interval: Duration) { let mut ticker = tokio::time::interval(interval);