Re: [RFC PATCH 3/3] add listmnt(2) syscall

From: Amir Goldstein
Date: Thu Sep 14 2023 - 02:00:33 EST


On Wed, Sep 13, 2023 at 6:22 PM Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote:
>
> Add way to query the children of a particular mount. This is a more
> flexible way to iterate the mount tree than having to parse the complete
> /proc/self/mountinfo.
>
> Lookup the mount by the old (32bit) or new (64bit) mount ID. If a mount
> needs to be queried based on path, then statx(2) can be used to first query
> the mount ID belonging to the path.
>
> Return an array of new (64bit) mount ID's. Without privileges only mounts
> are listed which are reachable from the task's root.
>
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> ---
> arch/x86/entry/syscalls/syscall_64.tbl | 1 +
> fs/namespace.c | 51 ++++++++++++++++++++++++++
> include/linux/syscalls.h | 2 +
> include/uapi/asm-generic/unistd.h | 5 ++-
> 4 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 6d807c30cd16..0d9a47b0ce9b 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -376,6 +376,7 @@
> 452 common fchmodat2 sys_fchmodat2
> 453 64 map_shadow_stack sys_map_shadow_stack
> 454 common statmnt sys_statmnt
> +455 common listmnt sys_listmnt
>
> #
> # Due to a historical design error, certain syscalls are numbered differently
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 088a52043bba..5362b1ffb26f 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -4988,6 +4988,57 @@ SYSCALL_DEFINE5(statmnt, u64, mnt_id,
> return err;
> }
>
> +static long do_listmnt(struct vfsmount *mnt, u64 __user *buf, size_t bufsize,
> + const struct path *root)
> +{
> + struct mount *r, *m = real_mount(mnt);
> + struct path rootmnt = { .mnt = root->mnt, .dentry = root->mnt->mnt_root };
> + long ctr = 0;
> +
> + if (!capable(CAP_SYS_ADMIN) &&
> + !is_path_reachable(m, mnt->mnt_root, &rootmnt))
> + return -EPERM;
> +
> + list_for_each_entry(r, &m->mnt_mounts, mnt_child) {
> + if (!capable(CAP_SYS_ADMIN) &&
> + !is_path_reachable(r, r->mnt.mnt_root, root))
> + continue;
> +
> + if (ctr >= bufsize)
> + return -EOVERFLOW;
> + if (put_user(r->mnt_id_unique, buf + ctr))
> + return -EFAULT;
> + ctr++;
> + if (ctr < 0)
> + return -ERANGE;

I think it'd be good for userspace to be able to query required
bufsize with NULL buf, listattr style, rather than having to
guess and re-guess on EOVERFLOW.

Thanks,
Amir.






> + }
> + return ctr;
> +}
> +
> +SYSCALL_DEFINE4(listmnt, u64, mnt_id, u64 __user *, buf, size_t, bufsize,
> + unsigned int, flags)
> +{
> + struct vfsmount *mnt;
> + struct path root;
> + long err;
> +
> + if (flags)
> + return -EINVAL;
> +
> + down_read(&namespace_sem);
> + mnt = lookup_mnt_in_ns(mnt_id, current->nsproxy->mnt_ns);
> + err = -ENOENT;
> + if (mnt) {
> + get_fs_root(current->fs, &root);
> + err = do_listmnt(mnt, buf, bufsize, &root);
> + path_put(&root);
> + }
> + up_read(&namespace_sem);
> +
> + return err;
> +}
> +
> +
> static void __init init_mount_tree(void)
> {
> struct vfsmount *mnt;
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 1099bd307fa7..5d776cdb6f18 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -411,6 +411,8 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
> asmlinkage long sys_statmnt(u64 mnt_id, u64 mask,
> struct statmnt __user *buf, size_t bufsize,
> unsigned int flags);
> +asmlinkage long sys_listmnt(u64 mnt_id, u64 __user *buf, size_t bufsize,
> + unsigned int flags);
> asmlinkage long sys_truncate(const char __user *path, long length);
> asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
> #if BITS_PER_LONG == 32
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 640997231ff6..a2b41370f603 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -826,8 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
> #define __NR_statmnt 454
> __SYSCALL(__NR_statmnt, sys_statmnt)
>
> +#define __NR_listmnt 455
> +__SYSCALL(__NR_listmnt, sys_listmnt)
> +
> #undef __NR_syscalls
> -#define __NR_syscalls 455
> +#define __NR_syscalls 456
>
> /*
> * 32 bit systems traditionally used different
> --
> 2.41.0
>