From 2f423b6d65663b20ee90ab041ed4d6f62500eeb0 Mon Sep 17 00:00:00 2001 From: Egor Ignatov Date: Thu, 28 May 2026 11:18:48 +0300 Subject: [PATCH 1/3] libtcb: change the type of is_dropped to unsigned int The PRIV_MAGIC_* values used to mark the privilege state do not fit into a signed int, so store them in an unsigned field to avoid a signedness mismatch. --- ChangeLog | 9 +++++++++ include/tcb.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 001f011..4660eea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2026-05-28 Egor Ignatov + + libtcb: change the type of is_dropped to unsigned int. + The PRIV_MAGIC_* values used to mark the privilege state do not + fit into a signed int, so store them in an unsigned field to avoid + a signedness mismatch. + * include/tcb.h (struct tcb_privs): Change the type of the + is_dropped field to unsigned int. + 2024-12-22 Björn Esser tcb_(un)convert: Check for UID and EUID to be 0 before proceeding. diff --git a/include/tcb.h b/include/tcb.h index 9d09822..2ee7d4a 100644 --- a/include/tcb.h +++ b/include/tcb.h @@ -14,7 +14,7 @@ struct tcb_privs { int number_of_groups; gid_t old_gid; uid_t old_uid; - int is_dropped; + unsigned int is_dropped; }; extern int lckpwdf_tcb(const char *); From 7636733ae5157b3890a7bbf5599c0735c0065763 Mon Sep 17 00:00:00 2001 From: Egor Ignatov Date: Thu, 28 May 2026 11:18:51 +0300 Subject: [PATCH 2/3] libtcb: add setgroups_allowed() helper Detect /proc/self/setgroups == "deny" to recognize an unprivileged user namespace where setgroups(2) is permanently denied by the kernel. No-op on non-Linux. --- ChangeLog | 7 +++++++ libs/libtcb.c | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4660eea..ebcf590 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2026-05-28 Egor Ignatov + libtcb: add setgroups_allowed() helper. + Detect /proc/self/setgroups == "deny" to recognize an unprivileged + user namespace where setgroups(2) is permanently denied by the + kernel. No-op on non-Linux. + * libs/libtcb.c (setgroups_allowed) [__linux__]: New function. + (setgroups_allowed) [!__linux__]: New stub returning 1. + libtcb: change the type of is_dropped to unsigned int. The PRIV_MAGIC_* values used to mark the privilege state do not fit into a signed int, so store them in an unsigned field to avoid diff --git a/libs/libtcb.c b/libs/libtcb.c index 872e435..6a9401f 100644 --- a/libs/libtcb.c +++ b/libs/libtcb.c @@ -167,6 +167,33 @@ static int sys_setgroups(size_t size, const gid_t *list) return syscall(SYS_setgroups, size, list); } +/* + * In an unprivileged user namespace the linux kernel writes "deny" to + * /proc/self/setgroups, after which setgroups(2) is permanently + * denied for this process. Used to disambiguate an EPERM from + * setgroups(0, NULL): if this returns 0, the failure is the + * kernel's permanent denial rather than a real error. + */ +static int setgroups_allowed(void) +{ +#ifdef __linux__ + int fd; + char buf[5]; + ssize_t n; + + fd = open("/proc/self/setgroups", O_RDONLY | O_NOCTTY); + if (fd == -1) + return 1; + n = read(fd, buf, 5); + close(fd); + if (n != 5) + return 1; + return memcmp(buf, "deny\n", 5) != 0; +#else + return 1; +#endif +} + #define PRIV_MAGIC 0x1004000a #define PRIV_MAGIC_NONROOT 0xdead000a From 630ffd280b7af35a06d892c4d88f4c0227efa0b1 Mon Sep 17 00:00:00 2001 From: Egor Ignatov Date: Thu, 28 May 2026 11:18:51 +0300 Subject: [PATCH 3/3] libtcb: tolerate setgroups EPERM in unprivileged user namespaces In a user namespace where /proc/self/setgroups is "deny", setgroups(2) is permanently rejected by the kernel. Perform the regular privilege drop and only tolerate sys_setgroups(0, NULL) failing with EPERM in such a namespace; in that case the kernel guarantees no supplementary group could have been gained via the namespace, so leaving the list in place is safe. Record this with a new PRIV_MAGIC_NOSETGROUPS state so that tcb_gain_priv_r() skips the matching setgroups() call. Fixes failures of pam_tcb, libnss_tcb, tcb_unconvert and shadow's shadowtcb_drop_priv() when running under rootless container. --- ChangeLog | 18 ++++++++++++++++++ libs/libtcb.c | 28 ++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index ebcf590..df4c852 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,23 @@ 2026-05-28 Egor Ignatov + libtcb: tolerate setgroups EPERM in unprivileged user namespaces. + In a user namespace where /proc/self/setgroups is "deny", + setgroups(2) is permanently rejected by the kernel. Perform the + regular privilege drop and only tolerate sys_setgroups(0, NULL) + failing with EPERM in such a namespace; in that case the kernel + guarantees no supplementary group could have been gained via the + namespace, so leaving the list in place is safe. Record this + with a new PRIV_MAGIC_NOSETGROUPS state so that tcb_gain_priv_r() + skips the matching setgroups() call. + Fixes failures of pam_tcb, libnss_tcb, tcb_unconvert and shadow's + shadowtcb_drop_priv() when running under rootless container. + * libs/libtcb.c (PRIV_MAGIC_NOSETGROUPS): New magic value. + (tcb_drop_priv_r): Tolerate EPERM from sys_setgroups(0, NULL) + when setgroups_allowed() returns 0 and record this by setting + p->is_dropped to PRIV_MAGIC_NOSETGROUPS. + (tcb_gain_priv_r): Accept PRIV_MAGIC_NOSETGROUPS and skip + sys_setgroups() in that state. + libtcb: add setgroups_allowed() helper. Detect /proc/self/setgroups == "deny" to recognize an unprivileged user namespace where setgroups(2) is permanently denied by the diff --git a/libs/libtcb.c b/libs/libtcb.c index 6a9401f..257ab33 100644 --- a/libs/libtcb.c +++ b/libs/libtcb.c @@ -196,10 +196,12 @@ static int setgroups_allowed(void) #define PRIV_MAGIC 0x1004000a #define PRIV_MAGIC_NONROOT 0xdead000a +#define PRIV_MAGIC_NOSETGROUPS 0xbeef000a int tcb_drop_priv_r(const char *name, struct tcb_privs *p) { int res; + unsigned int magic = PRIV_MAGIC; struct stat st; gid_t shadow_gid = -1; char *dir; @@ -234,14 +236,30 @@ int tcb_drop_priv_r(const char *name, struct tcb_privs *p) p->number_of_groups = res; - if (sys_setgroups(0, NULL) == -1) - return -1; + /* + * Try to clear the supplementary group list. In a user namespace + * where /proc/self/setgroups is "deny", setgroups(2) is permanently + * denied and the call returns EPERM. In that case the kernel + * guarantees the caller did not gain any group via the namespace + * mechanism, so it is safe to leave the list in place; we record + * this via PRIV_MAGIC_NOSETGROUPS so tcb_gain_priv_r() skips the + * matching setgroups() call. + */ + if (sys_setgroups(0, NULL) == -1) { + int saved_errno = errno; + if (errno != EPERM || setgroups_allowed()) { + errno = saved_errno; + return -1; + } + magic = PRIV_MAGIC_NOSETGROUPS; + } + if (!ch_gid(shadow_gid, &p->old_gid)) return -1; if (!ch_uid(st.st_uid, &p->old_uid)) return -1; - p->is_dropped = PRIV_MAGIC; + p->is_dropped = magic; return 0; } @@ -253,6 +271,7 @@ int tcb_gain_priv_r(struct tcb_privs *p) return 0; case PRIV_MAGIC: + case PRIV_MAGIC_NOSETGROUPS: break; default: @@ -264,7 +283,8 @@ int tcb_gain_priv_r(struct tcb_privs *p) return -1; if (!ch_gid(p->old_gid, NULL)) return -1; - if (sys_setgroups(p->number_of_groups, p->grplist) == -1) + if (p->is_dropped != PRIV_MAGIC_NOSETGROUPS && + sys_setgroups(p->number_of_groups, p->grplist) == -1) return -1; p->is_dropped = 0;