Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[sfrench/cifs-2.6.git] / security / commoncap.c
index d8e26fb9781d2bc7ae945602cb88a84ff786710d..6bf72b175b49caf336e7ef8f3d83cf256c955120 100644 (file)
@@ -82,8 +82,11 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
                if (ns == cred->user_ns)
                        return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
 
-               /* Have we tried all of the parent namespaces? */
-               if (ns == &init_user_ns)
+               /*
+                * If we're already at a lower level than we're looking for,
+                * we're done searching.
+                */
+               if (ns->level <= cred->user_ns->level)
                        return -EPERM;
 
                /* 
@@ -323,6 +326,209 @@ int cap_inode_killpriv(struct dentry *dentry)
        return error;
 }
 
+static bool rootid_owns_currentns(kuid_t kroot)
+{
+       struct user_namespace *ns;
+
+       if (!uid_valid(kroot))
+               return false;
+
+       for (ns = current_user_ns(); ; ns = ns->parent) {
+               if (from_kuid(ns, kroot) == 0)
+                       return true;
+               if (ns == &init_user_ns)
+                       break;
+       }
+
+       return false;
+}
+
+static __u32 sansflags(__u32 m)
+{
+       return m & ~VFS_CAP_FLAGS_EFFECTIVE;
+}
+
+static bool is_v2header(size_t size, __le32 magic)
+{
+       __u32 m = le32_to_cpu(magic);
+       if (size != XATTR_CAPS_SZ_2)
+               return false;
+       return sansflags(m) == VFS_CAP_REVISION_2;
+}
+
+static bool is_v3header(size_t size, __le32 magic)
+{
+       __u32 m = le32_to_cpu(magic);
+
+       if (size != XATTR_CAPS_SZ_3)
+               return false;
+       return sansflags(m) == VFS_CAP_REVISION_3;
+}
+
+/*
+ * getsecurity: We are called for security.* before any attempt to read the
+ * xattr from the inode itself.
+ *
+ * This gives us a chance to read the on-disk value and convert it.  If we
+ * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
+ *
+ * Note we are not called by vfs_getxattr_alloc(), but that is only called
+ * by the integrity subsystem, which really wants the unconverted values -
+ * so that's good.
+ */
+int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
+                         bool alloc)
+{
+       int size, ret;
+       kuid_t kroot;
+       uid_t root, mappedroot;
+       char *tmpbuf = NULL;
+       struct vfs_cap_data *cap;
+       struct vfs_ns_cap_data *nscap;
+       struct dentry *dentry;
+       struct user_namespace *fs_ns;
+
+       if (strcmp(name, "capability") != 0)
+               return -EOPNOTSUPP;
+
+       dentry = d_find_alias(inode);
+       if (!dentry)
+               return -EINVAL;
+
+       size = sizeof(struct vfs_ns_cap_data);
+       ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS,
+                                &tmpbuf, size, GFP_NOFS);
+       dput(dentry);
+
+       if (ret < 0)
+               return ret;
+
+       fs_ns = inode->i_sb->s_user_ns;
+       cap = (struct vfs_cap_data *) tmpbuf;
+       if (is_v2header((size_t) ret, cap->magic_etc)) {
+               /* If this is sizeof(vfs_cap_data) then we're ok with the
+                * on-disk value, so return that.  */
+               if (alloc)
+                       *buffer = tmpbuf;
+               else
+                       kfree(tmpbuf);
+               return ret;
+       } else if (!is_v3header((size_t) ret, cap->magic_etc)) {
+               kfree(tmpbuf);
+               return -EINVAL;
+       }
+
+       nscap = (struct vfs_ns_cap_data *) tmpbuf;
+       root = le32_to_cpu(nscap->rootid);
+       kroot = make_kuid(fs_ns, root);
+
+       /* If the root kuid maps to a valid uid in current ns, then return
+        * this as a nscap. */
+       mappedroot = from_kuid(current_user_ns(), kroot);
+       if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
+               if (alloc) {
+                       *buffer = tmpbuf;
+                       nscap->rootid = cpu_to_le32(mappedroot);
+               } else
+                       kfree(tmpbuf);
+               return size;
+       }
+
+       if (!rootid_owns_currentns(kroot)) {
+               kfree(tmpbuf);
+               return -EOPNOTSUPP;
+       }
+
+       /* This comes from a parent namespace.  Return as a v2 capability */
+       size = sizeof(struct vfs_cap_data);
+       if (alloc) {
+               *buffer = kmalloc(size, GFP_ATOMIC);
+               if (*buffer) {
+                       struct vfs_cap_data *cap = *buffer;
+                       __le32 nsmagic, magic;
+                       magic = VFS_CAP_REVISION_2;
+                       nsmagic = le32_to_cpu(nscap->magic_etc);
+                       if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
+                               magic |= VFS_CAP_FLAGS_EFFECTIVE;
+                       memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+                       cap->magic_etc = cpu_to_le32(magic);
+               }
+       }
+       kfree(tmpbuf);
+       return size;
+}
+
+static kuid_t rootid_from_xattr(const void *value, size_t size,
+                               struct user_namespace *task_ns)
+{
+       const struct vfs_ns_cap_data *nscap = value;
+       uid_t rootid = 0;
+
+       if (size == XATTR_CAPS_SZ_3)
+               rootid = le32_to_cpu(nscap->rootid);
+
+       return make_kuid(task_ns, rootid);
+}
+
+static bool validheader(size_t size, __le32 magic)
+{
+       return is_v2header(size, magic) || is_v3header(size, magic);
+}
+
+/*
+ * User requested a write of security.capability.  If needed, update the
+ * xattr to change from v2 to v3, or to fixup the v3 rootid.
+ *
+ * If all is ok, we return the new size, on error return < 0.
+ */
+int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
+{
+       struct vfs_ns_cap_data *nscap;
+       uid_t nsrootid;
+       const struct vfs_cap_data *cap = *ivalue;
+       __u32 magic, nsmagic;
+       struct inode *inode = d_backing_inode(dentry);
+       struct user_namespace *task_ns = current_user_ns(),
+               *fs_ns = inode->i_sb->s_user_ns;
+       kuid_t rootid;
+       size_t newsize;
+
+       if (!*ivalue)
+               return -EINVAL;
+       if (!validheader(size, cap->magic_etc))
+               return -EINVAL;
+       if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
+               return -EPERM;
+       if (size == XATTR_CAPS_SZ_2)
+               if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
+                       /* user is privileged, just write the v2 */
+                       return size;
+
+       rootid = rootid_from_xattr(*ivalue, size, task_ns);
+       if (!uid_valid(rootid))
+               return -EINVAL;
+
+       nsrootid = from_kuid(fs_ns, rootid);
+       if (nsrootid == -1)
+               return -EINVAL;
+
+       newsize = sizeof(struct vfs_ns_cap_data);
+       nscap = kmalloc(newsize, GFP_ATOMIC);
+       if (!nscap)
+               return -ENOMEM;
+       nscap->rootid = cpu_to_le32(nsrootid);
+       nsmagic = VFS_CAP_REVISION_3;
+       magic = le32_to_cpu(cap->magic_etc);
+       if (magic & VFS_CAP_FLAGS_EFFECTIVE)
+               nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
+       nscap->magic_etc = cpu_to_le32(nsmagic);
+       memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+
+       kvfree(*ivalue);
+       *ivalue = nscap;
+       return newsize;
+}
+
 /*
  * Calculate the new process capability sets from the capability sets attached
  * to a file.
@@ -376,7 +582,10 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
        __u32 magic_etc;
        unsigned tocopy, i;
        int size;
-       struct vfs_cap_data caps;
+       struct vfs_ns_cap_data data, *nscaps = &data;
+       struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
+       kuid_t rootkuid;
+       struct user_namespace *fs_ns = inode->i_sb->s_user_ns;
 
        memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
 
@@ -384,18 +593,20 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
                return -ENODATA;
 
        size = __vfs_getxattr((struct dentry *)dentry, inode,
-                             XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ);
+                             XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
        if (size == -ENODATA || size == -EOPNOTSUPP)
                /* no data, that's ok */
                return -ENODATA;
+
        if (size < 0)
                return size;
 
        if (size < sizeof(magic_etc))
                return -EINVAL;
 
-       cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
+       cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
 
+       rootkuid = make_kuid(fs_ns, 0);
        switch (magic_etc & VFS_CAP_REVISION_MASK) {
        case VFS_CAP_REVISION_1:
                if (size != XATTR_CAPS_SZ_1)
@@ -407,15 +618,27 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
                        return -EINVAL;
                tocopy = VFS_CAP_U32_2;
                break;
+       case VFS_CAP_REVISION_3:
+               if (size != XATTR_CAPS_SZ_3)
+                       return -EINVAL;
+               tocopy = VFS_CAP_U32_3;
+               rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
+               break;
+
        default:
                return -EINVAL;
        }
+       /* Limit the caps to the mounter of the filesystem
+        * or the more limited uid specified in the xattr.
+        */
+       if (!rootid_owns_currentns(rootkuid))
+               return -ENODATA;
 
        CAP_FOR_EACH_U32(i) {
                if (i >= tocopy)
                        break;
-               cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
-               cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
+               cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
+               cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
        }
 
        cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
@@ -453,8 +676,8 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
        rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
        if (rc < 0) {
                if (rc == -EINVAL)
-                       printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
-                               __func__, rc, bprm->filename);
+                       printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
+                                       bprm->filename);
                else if (rc == -ENODATA)
                        rc = 0;
                goto out;
@@ -633,15 +856,19 @@ skip:
 int cap_inode_setxattr(struct dentry *dentry, const char *name,
                       const void *value, size_t size, int flags)
 {
-       if (!strcmp(name, XATTR_NAME_CAPS)) {
-               if (!capable(CAP_SETFCAP))
-                       return -EPERM;
+       /* Ignore non-security xattrs */
+       if (strncmp(name, XATTR_SECURITY_PREFIX,
+                       sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
+               return 0;
+
+       /*
+        * For XATTR_NAME_CAPS the check will be done in
+        * cap_convert_nscap(), called by setxattr()
+        */
+       if (strcmp(name, XATTR_NAME_CAPS) == 0)
                return 0;
-       }
 
-       if (!strncmp(name, XATTR_SECURITY_PREFIX,
-                    sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-           !capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
        return 0;
 }
@@ -659,15 +886,22 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
  */
 int cap_inode_removexattr(struct dentry *dentry, const char *name)
 {
-       if (!strcmp(name, XATTR_NAME_CAPS)) {
-               if (!capable(CAP_SETFCAP))
+       /* Ignore non-security xattrs */
+       if (strncmp(name, XATTR_SECURITY_PREFIX,
+                       sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
+               return 0;
+
+       if (strcmp(name, XATTR_NAME_CAPS) == 0) {
+               /* security.capability gets namespaced */
+               struct inode *inode = d_backing_inode(dentry);
+               if (!inode)
+                       return -EINVAL;
+               if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
                        return -EPERM;
                return 0;
        }
 
-       if (!strncmp(name, XATTR_SECURITY_PREFIX,
-                    sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-           !capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
        return 0;
 }
@@ -1054,6 +1288,7 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
        LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds),
        LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
        LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
+       LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
        LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
        LSM_HOOK_INIT(mmap_file, cap_mmap_file),
        LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),