aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerge Hallyn <serge@hallyn.com>2021-04-14 00:09:05 +0000
committerSerge Hallyn <serge@hallyn.com>2021-04-16 21:31:16 -0500
commit7261101c747aa0370535a76aec517d0b083fdf45 (patch)
tree86bb435f99cb5180af7dfb0917ebf6987ac55e21
parent7e25f40eab52c57ff6772d27d2aef3640a3237d7 (diff)
downloadlinux-2021-04-15/setfcap-nsfscaps-v4.tar.gz
capabilities: prevent by-passing lack of CAP_SETFCAP (v4)2021-04-15/setfcap-nsfscaps-v4
Current, a process running as uid 0 but without cap_setfcap can unshare a new user namespace with uid 0 mapped to 0. While this task will not have new capabilities against the parent namespace, there is a loophole due to the way namespaced file capabilities work. File capabilities valid in userns 1 are distinguised from file capabilities valid in userns 2 by the kuid which underlies uid 0. Therefore the restricted root process can unshare a new self-mapping namespace, add a namespaced file capability onto a file, then use that file capability in the parent namespace. To prevent that, mark a namespace which should not be allowed to create file capabilities, and honor that when creating fscaps. When a task creates a user namespace, mark in the child whether the parent had cap_setfcap. When a user namespace gets its uid 0 mapped, check whether that uid 0 is shared with uid 0 for any ancestors. If so, verify that that ancestor had cap_setfcap when it created its immediate child. If not, then mark the new namespace as !may_setfcap. When creating a namespaced file capability, refuse if may_setfcap is false. With this patch: 1. unprivileged user can still unshare -Ur ubuntu@caps:~$ unshare -Ur root@caps:~# logout 2. root user can still unshare -Ur ubuntu@caps:~$ sudo bash root@caps:/home/ubuntu# unshare -Ur root@caps:/home/ubuntu# logout 3. root user without CAP_SETFCAP cannot unshare -Ur: root@caps:/home/ubuntu# /sbin/capsh --drop=cap_setfcap -- root@caps:/home/ubuntu# /sbin/setcap cap_setfcap=p /sbin/setcap unable to set CAP_SETFCAP effective capability: Operation not permitted root@caps:/home/ubuntu# unshare -Ur unshare: write failed /proc/self/uid_map: Operation not permitted Signed-off-by: Serge Hallyn <serge@hallyn.com>
-rw-r--r--include/linux/user_namespace.h8
-rw-r--r--include/uapi/linux/capability.h3
-rw-r--r--kernel/user_namespace.c43
-rw-r--r--security/commoncap.c4
4 files changed, 54 insertions, 4 deletions
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 64cf8ebdc4ec9..338026bb1fc99 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -63,6 +63,14 @@ struct user_namespace {
kgid_t group;
struct ns_common ns;
unsigned long flags;
+ /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP
+ * in its effective capability set at the child ns creation time. */
+ bool parent_could_setfcap;
+
+ /* may_setfcap is false if the namespace's 0 uid is shared with an
+ * ancestor namespace which did not have cap_setfcap when creating
+ * its child. */
+ bool may_setfcap;
#ifdef CONFIG_KEYS
/* List of joinable keyrings in this namespace. Modification access of
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index c6ca330341471..2ddb4226cd231 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -335,7 +335,8 @@ struct vfs_ns_cap_data {
#define CAP_AUDIT_CONTROL 30
-/* Set or remove capabilities on files */
+/* Set or remove capabilities on files.
+ Map uid=0 into a child user namespace. */
#define CAP_SETFCAP 31
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index af612945a4d05..e3d443712f632 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -106,6 +106,7 @@ int create_user_ns(struct cred *new)
if (!ns)
goto fail_dec;
+ ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
ret = ns_alloc_inum(&ns->ns);
if (ret)
goto fail_free;
@@ -117,6 +118,7 @@ int create_user_ns(struct cred *new)
ns->level = parent_ns->level + 1;
ns->owner = owner;
ns->group = group;
+ ns->may_setfcap = true;
INIT_WORK(&ns->work, free_user_ns);
for (i = 0; i < UCOUNT_COUNTS; i++) {
ns->ucount_max[i] = INT_MAX;
@@ -841,6 +843,36 @@ static int sort_idmaps(struct uid_gid_map *map)
return 0;
}
+/*
+ * We are checking for a case where the current, new, userns,
+ * shares a root kuid with an ancestor, which did not have
+ * cap_setfcap when it created its child.
+ * This means that the ancestor, when it created its child,
+ * could not create file capabilities, but now through its
+ * decendents, which it could ptrace, it could create file
+ * capabilities valid in its own namespace.
+ */
+static void check_may_setfcap(struct user_namespace *leafns)
+{
+ kuid_t leafroot = make_kuid(leafns, 0);
+ struct user_namespace *ns, *nsp = leafns;
+
+ if (!uid_valid(leafroot))
+ return;
+
+ for (ns = leafns->parent; ; nsp = ns, ns = ns->parent) {
+ kuid_t root = make_kuid(ns, 0);
+ if (uid_eq(leafroot, root)) {
+ if (!nsp->parent_could_setfcap) {
+ leafns->may_setfcap = false;
+ break;
+ }
+ }
+ if (ns == &init_user_ns)
+ break;
+ }
+}
+
static ssize_t map_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos,
int cap_setid,
@@ -848,7 +880,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
struct uid_gid_map *parent_map)
{
struct seq_file *seq = file->private_data;
- struct user_namespace *ns = seq->private;
+ struct user_namespace *map_ns = seq->private;
struct uid_gid_map new_map;
unsigned idx;
struct uid_gid_extent extent;
@@ -895,7 +927,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
/*
* Adjusting namespace settings requires capabilities on the target.
*/
- if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
+ if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
goto out;
/* Parse the user data */
@@ -965,7 +997,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
ret = -EPERM;
/* Validate the user is allowed to use user id's mapped to. */
- if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
+ if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
goto out;
ret = -EPERM;
@@ -1015,6 +1047,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,
*ppos = count;
ret = count;
+
+ if (cap_setid == CAP_SETUID)
+ check_may_setfcap(map_ns);
+
out:
if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(new_map.forward);
@@ -1086,6 +1122,7 @@ static bool new_idmap_permitted(const struct file *file,
struct uid_gid_map *new_map)
{
const struct cred *cred = file->f_cred;
+
/* Don't allow mappings that would allow anything that wouldn't
* be allowed without the establishment of unprivileged mappings.
*/
diff --git a/security/commoncap.c b/security/commoncap.c
index 1c519c8752176..4a9c1e3fab1d7 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -553,6 +553,10 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EINVAL;
if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
return -EPERM;
+
+ if (!current_user_ns()->may_setfcap)
+ return -EPERM;
+
if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
/* user is privileged, just write the v2 */