Re: [PATCH v2 01/10] vfs: move cap_convert_nscap() call into vfs_setxattr()
From: Eric W. Biederman
Date: Fri Jan 01 2021 - 12:39:33 EST
Miklos Szeredi <mszeredi@xxxxxxxxxx> writes:
> cap_convert_nscap() does permission checking as well as conversion of the
> xattr value conditionally based on fs's user-ns.
>
> This is needed by overlayfs and probably other layered fs (ecryptfs) and is
> what vfs_foo() is supposed to do anyway.
Well crap.
I just noticed this and it turns out this change is wrong.
The problem is that it reads the rootid from the v3 fscap, using
current_user_ns() and then writes it using the sb->s_user_ns.
So any time the stacked filesystems sb->s_user_ns do not match or
current_user_ns does not match sb->s_user_ns this could be a problem.
In a stacked filesystem a second pass through vfs_setxattr will result
in the rootid being translated a second time (with potentially the wrong
namespaces). I think because of the security checks a we won't write
something we shouldn't be able to write to the filesystem. Still we
will be writing the wrong v3 fscap which can go quite badly.
This doesn't look terribly difficult to fix.
Probably convert this into a fs independent form using uids in
init_user_ns at input and have cap_convert_nscap convert the v3 fscap
into the filesystem dependent form. With some way for stackable
filesystems to just skip converting it from the filesystem independent
format.
Uids in xattrs that are expected to go directly to disk, but aren't
always suitable for going directly to disk are tricky.
Eric
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> ---
> fs/xattr.c | 17 +++++++++++------
> include/linux/capability.h | 2 +-
> security/commoncap.c | 3 +--
> 3 files changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/fs/xattr.c b/fs/xattr.c
> index cd7a563e8bcd..fd57153b1f61 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -276,8 +276,16 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
> {
> struct inode *inode = dentry->d_inode;
> struct inode *delegated_inode = NULL;
> + const void *orig_value = value;
> int error;
>
> + if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
> + error = cap_convert_nscap(dentry, &value, size);
> + if (error < 0)
> + return error;
> + size = error;
> + }
> +
> retry_deleg:
> inode_lock(inode);
> error = __vfs_setxattr_locked(dentry, name, value, size, flags,
> @@ -289,6 +297,9 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
> if (!error)
> goto retry_deleg;
> }
> + if (value != orig_value)
> + kfree(value);
> +
> return error;
> }
> EXPORT_SYMBOL_GPL(vfs_setxattr);
> @@ -537,12 +548,6 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
> if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
> (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
> posix_acl_fix_xattr_from_user(kvalue, size);
> - else if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
> - error = cap_convert_nscap(d, &kvalue, size);
> - if (error < 0)
> - goto out;
> - size = error;
> - }
> }
>
> error = vfs_setxattr(d, kname, kvalue, size, flags);
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index 1e7fe311cabe..b2f698915c0f 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -270,6 +270,6 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
> /* audit system wants to get cap info from files as well */
> extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
>
> -extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size);
> +extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size);
>
> #endif /* !_LINUX_CAPABILITY_H */
> diff --git a/security/commoncap.c b/security/commoncap.c
> index 59bf3c1674c8..bacc1111d871 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -473,7 +473,7 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
> *
> * If all is ok, we return the new size, on error return < 0.
> */
> -int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
> +int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
> {
> struct vfs_ns_cap_data *nscap;
> uid_t nsrootid;
> @@ -516,7 +516,6 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
> nscap->magic_etc = cpu_to_le32(nsmagic);
> memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
>
> - kvfree(*ivalue);
> *ivalue = nscap;
> return newsize;
> }