diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 3984cb7115..1a145040e7 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -234,6 +234,7 @@ tomcat-catalina-jakarta = { module = "org.apache.tomcat:tomcat-catalina", versio
tomcat-embed-jasper-jakarta = { module = "org.apache.tomcat.embed:tomcat-embed-jasper", version = "11.0.22" }
# test libraries
+androidx-benchmark-macro-junit4 = { module = "androidx.benchmark:benchmark-macro-junit4", version = "1.4.1" }
androidx-compose-ui-test-junit4 = { module = "androidx.compose.ui:ui-test-junit4", version = "1.9.5" }
androidx-test-core = { module = "androidx.test:core", version.ref = "androidxTestCore" }
androidx-test-core-ktx = { module = "androidx.test:core-ktx", version.ref = "androidxTestCore" }
diff --git a/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/README.md b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/README.md
new file mode 100644
index 0000000000..eae36ac178
--- /dev/null
+++ b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/README.md
@@ -0,0 +1,53 @@
+# sentry-uitest-android-macrobenchmark
+
+Jetpack Macrobenchmark for cold-start of `sentry-samples-android`, used to evaluate SDK-init
+performance changes on a real device in a **stable, reproducible** way. Not run in CI.
+
+## What it measures
+
+`SentryStartupBenchmark` runs a cold start and reports **`timeToInitialDisplay`**
+(`StartupTimingMetric`) per iteration — the whole app cold start, taken from framework trace
+events. No trace markers are required in the SDK or the app.
+
+The flip side of marker-free measurement: an SDK change has to be large enough (roughly tens of
+milliseconds) to show above cold-start noise. Sub-millisecond changes are not resolvable with
+`timeToInitialDisplay` alone; for those, capture a perfetto trace and inspect the relevant slices
+directly (each iteration's trace is saved under
+`build/outputs/connected_android_test_additional_output/`).
+
+`CompilationMode.Full()` pins ART AOT so dexopt state can't drift between runs. `StartupMode.COLD`
+does the correct force-stop sequencing (it does **not** `pm clear`, so app data/permissions are
+kept). Iterations are capped at 12 because back-to-back cold starts thermally throttle an
+unlocked-clock device after ~14 iterations, inflating the tail of longer runs.
+
+## Running
+
+Connect a device, then:
+
+```bash
+./gradlew :sentry-android-integration-tests:sentry-uitest-android-macrobenchmark:connectedBenchmarkAndroidTest
+```
+
+Results print to the console and are written to
+`build/outputs/connected_android_test_additional_output/.../*-benchmarkData.json`.
+
+### Device hygiene (do this for trustworthy numbers)
+
+- **Wake and unlock the device first** — the launch check fails with "Unable to confirm activity
+ launch completion" on a dozing/locked screen
+ (`adb shell input keyevent KEYCODE_WAKEUP && adb shell wm dismiss-keyguard`).
+- **Charge above 25%** — Macrobenchmark refuses to run below that.
+- **Lock CPU clocks** if the device is rooted: this is the single biggest cure for thermal drift.
+- Otherwise: let the device cool between runs, keep it on AC power, enable airplane mode, and turn
+ animations off (`adb shell settings put global window_animation_scale 0`, plus
+ `transition_animation_scale` and `animator_duration_scale`).
+- Heed Macrobenchmark's warnings about unlocked clocks / low battery — they mean the numbers are
+ noisy.
+
+## A/B-ing an SDK change
+
+Macrobenchmark measures one build per run, so compare separate runs — but **interleave them**:
+running all of variant A followed by all of variant B lets thermal drift systematically penalize
+whichever variant runs second. Instead, alternate A/B rounds (build variant A, run, build variant
+B, run, repeat 2–3 times), keep each round's `*-benchmarkData.json`, and compare the
+`timeToInitialDisplay` values pooled per variant.
diff --git a/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/build.gradle.kts b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/build.gradle.kts
new file mode 100644
index 0000000000..2d2aab48a1
--- /dev/null
+++ b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/build.gradle.kts
@@ -0,0 +1,46 @@
+plugins {
+ id("com.android.test")
+ alias(libs.plugins.kotlin.android)
+}
+
+android {
+ namespace = "io.sentry.uitest.android.macrobenchmark"
+ compileSdk = libs.versions.compileSdk.get().toInt()
+
+ defaultConfig {
+ // Macrobenchmark requires API 23+.
+ minSdk = 24
+ targetSdk = libs.versions.targetSdk.get().toInt()
+ testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+ }
+
+ buildTypes {
+ // Pairs with the app's release build via matchingFallbacks. The test APK itself must be
+ // debuggable (to instrument) and signed (to install); only the target app needs to be
+ // genuinely release-like.
+ create("benchmark") {
+ isDebuggable = true
+ signingConfig = signingConfigs.getByName("debug")
+ matchingFallbacks += listOf("release")
+ }
+ }
+
+ compileOptions {
+ sourceCompatibility = JavaVersion.VERSION_11
+ targetCompatibility = JavaVersion.VERSION_11
+ }
+
+ kotlin { compilerOptions.jvmTarget = org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_11 }
+
+ targetProjectPath = ":sentry-samples:sentry-samples-android"
+ // Run the test in its own process so it measures the target app cold, not itself.
+ experimentalProperties["android.experimental.self-instrumenting"] = true
+}
+
+// Benchmarks only make sense against the release build; drop the debug variant entirely.
+androidComponents { beforeVariants(selector().withBuildType("debug")) { it.enable = false } }
+
+dependencies {
+ implementation(libs.androidx.test.ext.junit)
+ implementation(libs.androidx.benchmark.macro.junit4)
+}
diff --git a/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/AndroidManifest.xml b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..b2d3ea1235
--- /dev/null
+++ b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/AndroidManifest.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/java/io/sentry/uitest/android/macrobenchmark/SentryStartupBenchmark.kt b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/java/io/sentry/uitest/android/macrobenchmark/SentryStartupBenchmark.kt
new file mode 100644
index 0000000000..24b8707ba5
--- /dev/null
+++ b/sentry-android-integration-tests/sentry-uitest-android-macrobenchmark/src/main/java/io/sentry/uitest/android/macrobenchmark/SentryStartupBenchmark.kt
@@ -0,0 +1,48 @@
+package io.sentry.uitest.android.macrobenchmark
+
+import androidx.benchmark.macro.CompilationMode
+import androidx.benchmark.macro.StartupMode
+import androidx.benchmark.macro.StartupTimingMetric
+import androidx.benchmark.macro.junit4.MacrobenchmarkRule
+import androidx.test.ext.junit.runners.AndroidJUnit4
+import org.junit.Rule
+import org.junit.Test
+import org.junit.runner.RunWith
+
+/**
+ * Cold-start benchmark for the sentry-samples-android app, used to evaluate SDK-init changes on a
+ * real device in a stable, repeatable way.
+ *
+ * Reports timeToInitialDisplay ([StartupTimingMetric]) per iteration. This measures the whole app
+ * cold start from framework trace events, with no trace markers in the SDK or the app — which also
+ * means SDK changes need to be large enough (roughly tens of milliseconds) to show above cold-start
+ * noise.
+ *
+ * [CompilationMode.Full] pins ART AOT compilation so dexopt state does not drift between runs.
+ * Iterations are capped at 12: on an unthrottled Pixel 3, back-to-back cold starts hit thermal
+ * throttling after ~14 iterations, which inflates the tail of longer runs. This is NOT a CI test;
+ * it requires a connected device. To A/B an SDK change, see README.md (build the app twice, once
+ * per SDK variant, in interleaved rounds).
+ */
+@RunWith(AndroidJUnit4::class)
+class SentryStartupBenchmark {
+
+ @get:Rule val benchmarkRule = MacrobenchmarkRule()
+
+ @Test
+ fun startupFullCompilation() =
+ benchmarkRule.measureRepeated(
+ packageName = TARGET_PACKAGE,
+ metrics = listOf(StartupTimingMetric()),
+ compilationMode = CompilationMode.Full(),
+ startupMode = StartupMode.COLD,
+ iterations = 12,
+ setupBlock = { pressHome() },
+ ) {
+ startActivityAndWait()
+ }
+
+ private companion object {
+ const val TARGET_PACKAGE = "io.sentry.samples.android"
+ }
+}
diff --git a/sentry-samples/sentry-samples-android/src/main/AndroidManifest.xml b/sentry-samples/sentry-samples-android/src/main/AndroidManifest.xml
index d72087fbfa..61f4df5b8d 100644
--- a/sentry-samples/sentry-samples-android/src/main/AndroidManifest.xml
+++ b/sentry-samples/sentry-samples-android/src/main/AndroidManifest.xml
@@ -31,6 +31,12 @@
android:theme="@style/AppTheme"
tools:ignore="GoogleAppIndexingWarning, UnusedAttribute">
+
+
+