diff options
author | Serge Hallyn <serge@hallyn.com> | 2021-04-14 00:09:05 +0000 |
---|---|---|
committer | Serge Hallyn <serge@hallyn.com> | 2021-04-16 21:31:16 -0500 |
commit | 7261101c747aa0370535a76aec517d0b083fdf45 (patch) | |
tree | 86bb435f99cb5180af7dfb0917ebf6987ac55e21 | |
parent | 7e25f40eab52c57ff6772d27d2aef3640a3237d7 (diff) | |
download | linux-2021-04-15/setfcap-nsfscaps-v4.tar.gz |
capabilities: prevent by-passing lack of CAP_SETFCAP (v4)2021-04-15/setfcap-nsfscaps-v4
Current, a process running as uid 0 but without cap_setfcap can
unshare a new user namespace with uid 0 mapped to 0. While this task
will not have new capabilities against the parent namespace, there is
a loophole due to the way namespaced file capabilities work. File
capabilities valid in userns 1 are distinguised from file capabilities
valid in userns 2 by the kuid which underlies uid 0. Therefore
the restricted root process can unshare a new self-mapping namespace,
add a namespaced file capability onto a file, then use that file
capability in the parent namespace.
To prevent that, mark a namespace which should not be allowed to
create file capabilities, and honor that when creating fscaps.
When a task creates a user namespace, mark in the child whether
the parent had cap_setfcap.
When a user namespace gets its uid 0 mapped, check whether that
uid 0 is shared with uid 0 for any ancestors. If so, verify
that that ancestor had cap_setfcap when it created its immediate
child. If not, then mark the new namespace as !may_setfcap.
When creating a namespaced file capability, refuse if may_setfcap
is false.
With this patch:
1. unprivileged user can still unshare -Ur
ubuntu@caps:~$ unshare -Ur
root@caps:~# logout
2. root user can still unshare -Ur
ubuntu@caps:~$ sudo bash
root@caps:/home/ubuntu# unshare -Ur
root@caps:/home/ubuntu# logout
3. root user without CAP_SETFCAP cannot unshare -Ur:
root@caps:/home/ubuntu# /sbin/capsh --drop=cap_setfcap --
root@caps:/home/ubuntu# /sbin/setcap cap_setfcap=p /sbin/setcap
unable to set CAP_SETFCAP effective capability: Operation not permitted
root@caps:/home/ubuntu# unshare -Ur
unshare: write failed /proc/self/uid_map: Operation not permitted
Signed-off-by: Serge Hallyn <serge@hallyn.com>
-rw-r--r-- | include/linux/user_namespace.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/capability.h | 3 | ||||
-rw-r--r-- | kernel/user_namespace.c | 43 | ||||
-rw-r--r-- | security/commoncap.c | 4 |
4 files changed, 54 insertions, 4 deletions
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec9..338026bb1fc99 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -63,6 +63,14 @@ struct user_namespace { kgid_t group; struct ns_common ns; unsigned long flags; + /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP + * in its effective capability set at the child ns creation time. */ + bool parent_could_setfcap; + + /* may_setfcap is false if the namespace's 0 uid is shared with an + * ancestor namespace which did not have cap_setfcap when creating + * its child. */ + bool may_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index c6ca330341471..2ddb4226cd231 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -335,7 +335,8 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_CONTROL 30 -/* Set or remove capabilities on files */ +/* Set or remove capabilities on files. + Map uid=0 into a child user namespace. */ #define CAP_SETFCAP 31 diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index af612945a4d05..e3d443712f632 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -106,6 +106,7 @@ int create_user_ns(struct cred *new) if (!ns) goto fail_dec; + ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP); ret = ns_alloc_inum(&ns->ns); if (ret) goto fail_free; @@ -117,6 +118,7 @@ int create_user_ns(struct cred *new) ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; + ns->may_setfcap = true; INIT_WORK(&ns->work, free_user_ns); for (i = 0; i < UCOUNT_COUNTS; i++) { ns->ucount_max[i] = INT_MAX; @@ -841,6 +843,36 @@ static int sort_idmaps(struct uid_gid_map *map) return 0; } +/* + * We are checking for a case where the current, new, userns, + * shares a root kuid with an ancestor, which did not have + * cap_setfcap when it created its child. + * This means that the ancestor, when it created its child, + * could not create file capabilities, but now through its + * decendents, which it could ptrace, it could create file + * capabilities valid in its own namespace. + */ +static void check_may_setfcap(struct user_namespace *leafns) +{ + kuid_t leafroot = make_kuid(leafns, 0); + struct user_namespace *ns, *nsp = leafns; + + if (!uid_valid(leafroot)) + return; + + for (ns = leafns->parent; ; nsp = ns, ns = ns->parent) { + kuid_t root = make_kuid(ns, 0); + if (uid_eq(leafroot, root)) { + if (!nsp->parent_could_setfcap) { + leafns->may_setfcap = false; + break; + } + } + if (ns == &init_user_ns) + break; + } +} + static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -848,7 +880,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct uid_gid_map *parent_map) { struct seq_file *seq = file->private_data; - struct user_namespace *ns = seq->private; + struct user_namespace *map_ns = seq->private; struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent extent; @@ -895,7 +927,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) + if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN)) goto out; /* Parse the user data */ @@ -965,7 +997,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map)) goto out; ret = -EPERM; @@ -1015,6 +1047,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; + + if (cap_setid == CAP_SETUID) + check_may_setfcap(map_ns); + out: if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(new_map.forward); @@ -1086,6 +1122,7 @@ static bool new_idmap_permitted(const struct file *file, struct uid_gid_map *new_map) { const struct cred *cred = file->f_cred; + /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ diff --git a/security/commoncap.c b/security/commoncap.c index 1c519c8752176..4a9c1e3fab1d7 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -553,6 +553,10 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; + + if (!current_user_ns()->may_setfcap) + return -EPERM; + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ |