feat: allow configuring TCP listen backlog (#4355)

relu · web-flow · commit ff71575c76f8 · 2026-03-24T17:25:35.000-04:00
Linkerd proxy inbound listeners currently use a fixed TCP accept backlog
(observed as 128 via ss -ltnp) that cannot be configured by operators.

In high-traffic environments, especially during Kubernetes rollouts
where many sidecars simultaneously establish new outbound connections to
newly-ready pods, this fixed backlog can become a limiting factor. When
a connection burst exceeds the proxy’s accept queue capacity, incoming
connections are temporarily dropped or delayed at the TCP level, leading
to short-lived connection failures such as:

```
{"timestamp":"2025-12-12T19:55:11.333411Z","level":"WARN","fields":{"message":"Failed to connect","error":"connect timed out after 1s"},"target":"linkerd_reconnect","threadId":"ThreadId(1)"}
```

Because the proxy backlog is not configurable or documented, operators
have no direct way to tune Linkerd for services that experience high
fan-in or connection storms (for example during rollouts, autoscaling
events, or traffic rebalancing).

This commit introduces support for two new environment variables:
- `LINKERD2_PROXY_INBOUND_TCP_LISTEN_BACKLOG`
- `LINKERD2_PROXY_OUTBOUND_TCP_LISTEN_BACKLOG`

these can be configured using the `proxy.additionalEnv` field in the
Linkerd Helm chart. 

Signed-off-by: Aurel Canciu &lt;aurel.canciu@nexhealth.com&gt;
diff --git a/linkerd/app/core/src/config.rs b/linkerd/app/core/src/config.rs
@@ -2,7 +2,7 @@ pub use crate::exp_backoff::ExponentialBackoff;
 use crate::{
     proxy::http::{h1, h2},
     svc::{queue, ExtractParam, Param},
-    transport::{DualListenAddr, Keepalive, ListenAddr, UserTimeout},
+    transport::{Backlog, DualListenAddr, Keepalive, ListenAddr, UserTimeout},
 };
 use std::time::Duration;
 
@@ -11,6 +11,7 @@ pub struct ServerConfig {
     pub addr: DualListenAddr,
     pub keepalive: Keepalive,
     pub user_timeout: UserTimeout,
+    pub backlog: Backlog,
     pub http2: h2::ServerParams,
 }
 
@@ -84,3 +85,9 @@ impl Param<UserTimeout> for ServerConfig {
         self.user_timeout
     }
 }
+
+impl Param<Backlog> for ServerConfig {
+    fn param(&self) -> Backlog {
+        self.backlog
+    }
+}
diff --git a/linkerd/app/inbound/src/test_util.rs b/linkerd/app/inbound/src/test_util.rs
@@ -8,7 +8,7 @@ use linkerd_app_core::{
         http::{h1, h2},
         tap,
     },
-    transport::{DualListenAddr, Keepalive, UserTimeout},
+    transport::{Backlog, DualListenAddr, Keepalive, UserTimeout},
     ProxyRuntime,
 };
 pub use linkerd_app_test as support;
@@ -59,6 +59,7 @@ pub fn default_config() -> Config {
                 addr: DualListenAddr(([0, 0, 0, 0], 0).into(), None),
                 keepalive: Keepalive(None),
                 user_timeout: UserTimeout(None),
+                backlog: Backlog::default(),
                 http2: h2::ServerParams::default(),
             },
             connect: config::ConnectConfig {
diff --git a/linkerd/app/integration/src/proxy.rs b/linkerd/app/integration/src/proxy.rs
@@ -2,7 +2,8 @@ use super::*;
 use linkerd_app_core::{
     svc::Param,
     transport::{
-        listen, orig_dst, Keepalive, ListenAddr, Local, OrigDstAddr, ServerAddr, UserTimeout,
+        listen, orig_dst, Backlog, Keepalive, ListenAddr, Local, OrigDstAddr, ServerAddr,
+        UserTimeout,
     },
     Result,
 };
@@ -70,7 +71,7 @@ struct MockDualOrigDst {
 
 impl<T> listen::Bind<T> for MockOrigDst
 where
-    T: Param<Keepalive> + Param<UserTimeout> + Param<ListenAddr>,
+    T: Param<Keepalive> + Param<UserTimeout> + Param<ListenAddr> + Param<Backlog>,
 {
     type Addrs = orig_dst::Addrs;
     type BoundAddrs = Local<ServerAddr>;
@@ -120,7 +121,7 @@ impl fmt::Debug for MockOrigDst {
 
 impl<T> listen::Bind<T> for MockDualOrigDst
 where
-    T: Param<Keepalive> + Param<UserTimeout> + Param<ListenAddr>,
+    T: Param<Keepalive> + Param<UserTimeout> + Param<ListenAddr> + Param<Backlog>,
 {
     type Addrs = orig_dst::Addrs;
     type BoundAddrs = (Local<ServerAddr>, Option<Local<ServerAddr>>);
diff --git a/linkerd/app/outbound/src/test_util.rs b/linkerd/app/outbound/src/test_util.rs
@@ -7,7 +7,7 @@ use linkerd_app_core::{
         http::{h1, h2},
         tap,
     },
-    transport::{DualListenAddr, Keepalive, UserTimeout},
+    transport::{Backlog, DualListenAddr, Keepalive, UserTimeout},
     IpMatch, IpNet, ProxyRuntime,
 };
 pub use linkerd_app_test as support;
@@ -27,6 +27,7 @@ pub(crate) fn default_config() -> Config {
                 addr: DualListenAddr(([0, 0, 0, 0], 0).into(), None),
                 keepalive: Keepalive(None),
                 user_timeout: UserTimeout(None),
+                backlog: Backlog::default(),
                 http2: h2::ServerParams::default(),
             },
             connect: config::ConnectConfig {
diff --git a/linkerd/app/src/env.rs b/linkerd/app/src/env.rs
@@ -5,7 +5,7 @@ use linkerd_app_core::{
     control::{Config as ControlConfig, ControlAddr},
     proxy::http::{h1, h2},
     tls,
-    transport::{DualListenAddr, Keepalive, ListenAddr, UserTimeout},
+    transport::{Backlog, DualListenAddr, Keepalive, ListenAddr, UserTimeout},
     AddrMatch, Conditional, IpNet,
 };
 use std::{collections::HashSet, net::SocketAddr, path::PathBuf, time::Duration};
@@ -136,6 +136,9 @@ const ENV_OUTBOUND_ACCEPT_USER_TIMEOUT: &str = "LINKERD2_PROXY_OUTBOUND_ACCEPT_U
 const ENV_INBOUND_CONNECT_USER_TIMEOUT: &str = "LINKERD2_PROXY_INBOUND_CONNECT_USER_TIMEOUT";
 const ENV_OUTBOUND_CONNECT_USER_TIMEOUT: &str = "LINKERD2_PROXY_OUTBOUND_CONNECT_USER_TIMEOUT";
 
+const ENV_INBOUND_TCP_LISTEN_BACKLOG: &str = "LINKERD2_PROXY_INBOUND_TCP_LISTEN_BACKLOG";
+const ENV_OUTBOUND_TCP_LISTEN_BACKLOG: &str = "LINKERD2_PROXY_OUTBOUND_TCP_LISTEN_BACKLOG";
+
 const ENV_INBOUND_MAX_IDLE_CONNS_PER_ENDPOINT: &str = "LINKERD2_PROXY_MAX_IDLE_CONNS_PER_ENDPOINT";
 const ENV_OUTBOUND_MAX_IDLE_CONNS_PER_ENDPOINT: &str =
     "LINKERD2_PROXY_OUTBOUND_MAX_IDLE_CONNS_PER_ENDPOINT";
@@ -389,6 +392,14 @@ pub fn parse_config<S: Strings>(strings: &S) -> Result<super::Config, EnvError>
     let inbound_accept_keepalive = parse(strings, ENV_INBOUND_ACCEPT_KEEPALIVE, parse_duration);
     let outbound_accept_keepalive = parse(strings, ENV_OUTBOUND_ACCEPT_KEEPALIVE, parse_duration);
 
+    let inbound_tcp_listen_backlog =
+        parse(strings, ENV_INBOUND_TCP_LISTEN_BACKLOG, parse_number::<u32>);
+    let outbound_tcp_listen_backlog = parse(
+        strings,
+        ENV_OUTBOUND_TCP_LISTEN_BACKLOG,
+        parse_number::<u32>,
+    );
+
     let inbound_connect_keepalive = parse(strings, ENV_INBOUND_CONNECT_KEEPALIVE, parse_duration);
     let outbound_connect_keepalive = parse(strings, ENV_OUTBOUND_CONNECT_KEEPALIVE, parse_duration);
 
@@ -500,10 +511,12 @@ pub fn parse_config<S: Strings>(strings: &S) -> Result<super::Config, EnvError>
 
         let keepalive = Keepalive(outbound_accept_keepalive?);
         let user_timeout = UserTimeout(outbound_accept_user_timeout?);
+        let backlog = Backlog(outbound_tcp_listen_backlog?);
         let server = ServerConfig {
             addr,
             keepalive,
             user_timeout,
+            backlog,
             http2: http2::parse_server(strings, "LINKERD2_PROXY_OUTBOUND_SERVER_HTTP2")?,
         };
         let discovery_idle_timeout =
@@ -592,10 +605,12 @@ pub fn parse_config<S: Strings>(strings: &S) -> Result<super::Config, EnvError>
         );
         let keepalive = Keepalive(inbound_accept_keepalive?);
         let user_timeout = UserTimeout(inbound_accept_user_timeout?);
+        let backlog = Backlog(inbound_tcp_listen_backlog?);
         let server = ServerConfig {
             addr,
             keepalive,
             user_timeout,
+            backlog,
             http2: http2::parse_server(strings, "LINKERD2_PROXY_INBOUND_SERVER_HTTP2")?,
         };
         let discovery_idle_timeout =
@@ -815,6 +830,7 @@ pub fn parse_config<S: Strings>(strings: &S) -> Result<super::Config, EnvError>
             addr: DualListenAddr(admin_listener_addr, None),
             keepalive: inbound.proxy.server.keepalive,
             user_timeout: inbound.proxy.server.user_timeout,
+            backlog: inbound.proxy.server.backlog,
             http2: inbound.proxy.server.http2.clone(),
         },
 
@@ -869,6 +885,7 @@ pub fn parse_config<S: Strings>(strings: &S) -> Result<super::Config, EnvError>
                 addr: DualListenAddr(addr, None),
                 keepalive: inbound.proxy.server.keepalive,
                 user_timeout: inbound.proxy.server.user_timeout,
+                backlog: inbound.proxy.server.backlog,
                 http2: inbound.proxy.server.http2.clone(),
             },
         })
diff --git a/linkerd/meshtls/tests/util.rs b/linkerd/meshtls/tests/util.rs
@@ -11,7 +11,7 @@ use linkerd_meshtls::{self as meshtls, watch};
 use linkerd_proxy_transport::{
     addrs::*,
     listen::{Addrs, Bind, BindTcp},
-    ConnectTcp, Keepalive, UserTimeout,
+    Backlog, ConnectTcp, Keepalive, UserTimeout,
 };
 use linkerd_stack::{
     layer::Layer, service_fn, ExtractParam, InsertParam, NewService, Param, ServiceExt,
@@ -408,6 +408,11 @@ impl Param<UserTimeout> for Server {
         UserTimeout(None)
     }
 }
+impl Param<Backlog> for Server {
+    fn param(&self) -> Backlog {
+        Backlog(None)
+    }
+}
 
 // === impl ServerParams ===
 
diff --git a/linkerd/proxy/transport/src/lib.rs b/linkerd/proxy/transport/src/lib.rs
@@ -45,6 +45,9 @@ impl From<UserTimeout> for Option<Duration> {
     }
 }
 
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Backlog(pub Option<u32>);
+
 // Misc.
 
 fn set_nodelay_or_warn(socket: &TcpStream) {
diff --git a/linkerd/proxy/transport/src/listen.rs b/linkerd/proxy/transport/src/listen.rs
@@ -1,6 +1,6 @@
 mod dual_bind;
 
-use crate::{addrs::*, Keepalive, UserTimeout};
+use crate::{addrs::*, Backlog, Keepalive, UserTimeout};
 use dual_bind::DualBind;
 use futures::prelude::*;
 use linkerd_error::Result;
@@ -71,7 +71,7 @@ impl BindTcp {
 
 impl<T> Bind<T> for BindTcp
 where
-    T: Param<ListenAddr> + Param<Keepalive> + Param<UserTimeout>,
+    T: Param<ListenAddr> + Param<Keepalive> + Param<UserTimeout> + Param<Backlog>,
 {
     type Addrs = Addrs;
     type BoundAddrs = Local<ServerAddr>;
@@ -81,10 +81,36 @@ where
     fn bind(self, params: &T) -> Result<(Self::BoundAddrs, Self::Incoming)> {
         let listen = {
             let ListenAddr(addr) = params.param();
-            let l = std::net::TcpListener::bind(addr)?;
-            // Ensure that O_NONBLOCK is set on the socket before using it with Tokio.
-            l.set_nonblocking(true)?;
-            tokio::net::TcpListener::from_std(l).expect("listener must be valid")
+            let Backlog(backlog) = params.param();
+
+            match backlog {
+                Some(backlog) => {
+                    // Use TcpSocket to configure a custom listen backlog.
+                    // TcpSocket::new_v4/v6 automatically sets O_NONBLOCK, which is
+                    // required for Tokio's async I/O operations.
+                    let socket = if addr.is_ipv4() {
+                        tokio::net::TcpSocket::new_v4()?
+                    } else {
+                        tokio::net::TcpSocket::new_v6()?
+                    };
+
+                    // Enable SO_REUSEADDR to match std::net::TcpListener::bind
+                    // behavior. On Windows, std does not set SO_REUSEADDR to prevent
+                    // socket hijacking.
+                    #[cfg(not(windows))]
+                    socket.set_reuseaddr(true)?;
+
+                    socket.bind(addr)?;
+                    socket.listen(backlog)?
+                }
+                None => {
+                    // No custom backlog configured; use std::net::TcpListener::bind
+                    // which applies platform-correct defaults.
+                    let l = std::net::TcpListener::bind(addr)?;
+                    l.set_nonblocking(true)?;
+                    tokio::net::TcpListener::from_std(l).expect("listener must be valid")
+                }
+            }
         };
         let server = Local(ServerAddr(listen.local_addr()?));
         let Keepalive(keepalive) = params.param();
@@ -138,3 +164,79 @@ impl Param<AddrPair> for Addrs {
         AddrPair(client, server)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Clone)]
+    struct TestParams {
+        addr: ListenAddr,
+        keepalive: Keepalive,
+        user_timeout: UserTimeout,
+        backlog: crate::Backlog,
+    }
+
+    impl Param<ListenAddr> for TestParams {
+        fn param(&self) -> ListenAddr {
+            self.addr
+        }
+    }
+
+    impl Param<Keepalive> for TestParams {
+        fn param(&self) -> Keepalive {
+            self.keepalive
+        }
+    }
+
+    impl Param<UserTimeout> for TestParams {
+        fn param(&self) -> UserTimeout {
+            self.user_timeout
+        }
+    }
+
+    impl Param<crate::Backlog> for TestParams {
+        fn param(&self) -> crate::Backlog {
+            self.backlog
+        }
+    }
+
+    fn params_with_backlog(backlog: Option<u32>) -> TestParams {
+        TestParams {
+            addr: ListenAddr("127.0.0.1:0".parse().unwrap()),
+            keepalive: Keepalive(None),
+            user_timeout: UserTimeout(None),
+            backlog: crate::Backlog(backlog),
+        }
+    }
+
+    #[tokio::test]
+    async fn bind_with_custom_backlog() {
+        let params = params_with_backlog(Some(1024));
+        let bind = BindTcp::default();
+        let (bound_addr, mut incoming) = bind.bind(&params).expect("failed to bind");
+
+        // Verify we can connect and accept through the listener.
+        let addr = bound_addr.0 .0;
+        let connect = tokio::net::TcpStream::connect(addr);
+        let accept = incoming.next();
+        let (conn, accepted) = tokio::join!(connect, accept);
+        conn.expect("failed to connect");
+        accepted.expect("stream ended").expect("failed to accept");
+    }
+
+    #[tokio::test]
+    async fn bind_with_default_backlog() {
+        let params = params_with_backlog(None);
+        let bind = BindTcp::default();
+        let (bound_addr, mut incoming) = bind.bind(&params).expect("failed to bind");
+
+        // Verify we can connect and accept through the listener.
+        let addr = bound_addr.0 .0;
+        let connect = tokio::net::TcpStream::connect(addr);
+        let accept = incoming.next();
+        let (conn, accepted) = tokio::join!(connect, accept);
+        conn.expect("failed to connect");
+        accepted.expect("stream ended").expect("failed to accept");
+    }
+}
diff --git a/linkerd/proxy/transport/src/listen/dual_bind.rs b/linkerd/proxy/transport/src/listen/dual_bind.rs
@@ -1,4 +1,4 @@
-use crate::{addrs::DualListenAddr, listen::Bind, Keepalive, ListenAddr, UserTimeout};
+use crate::{addrs::DualListenAddr, listen::Bind, Backlog, Keepalive, ListenAddr, UserTimeout};
 use futures::Stream;
 use linkerd_error::Result;
 use linkerd_stack::Param;
@@ -26,7 +26,7 @@ impl<B> From<B> for DualBind<B> {
 
 impl<T, B> Bind<T> for DualBind<B>
 where
-    T: Param<DualListenAddr> + Param<Keepalive> + Param<UserTimeout> + Clone,
+    T: Param<DualListenAddr> + Param<Keepalive> + Param<UserTimeout> + Param<Backlog> + Clone,
     B: Bind<Listen<T>, Io = TcpStream> + Clone + 'static,
 {
     type Addrs = B::Addrs;
@@ -68,6 +68,12 @@ impl<T: Param<UserTimeout>> Param<UserTimeout> for Listen<T> {
     }
 }
 
+impl<T: Param<Backlog>> Param<Backlog> for Listen<T> {
+    fn param(&self) -> Backlog {
+        self.parent.param()
+    }
+}
+
 impl<T> Param<ListenAddr> for Listen<T> {
     fn param(&self) -> ListenAddr {
         ListenAddr(self.addr)

Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,9 @@ impl From<UserTimeout> for Option<Duration> {`
`45`	`45`	`}`
`46`	`46`	`}`
`47`	`47`
	`48`	`+#[derive(Copy, Clone, Debug, Default)]`
	`49`	`+pub struct Backlog(pub Option<u32>);`
	`50`	`+`
`48`	`51`	`// Misc.`
`49`	`52`
`50`	`53`	`fn set_nodelay_or_warn(socket: &TcpStream) {`