Re: [PATCH v2 1/6] fs: split off vfs_getdents function of getdents64 syscall

From: Christian Brauner
Date: Tue May 23 2023 - 11:39:19 EST


On Wed, May 10, 2023 at 07:52:49PM +0900, Dominique Martinet wrote:
> This splits off the vfs_getdents function from the getdents64 system
> call.
> This will allow io_uring to call the vfs_getdents function.
>
> Co-authored-by: Stefan Roesch <shr@xxxxxx>
> Signed-off-by: Dominique Martinet <asmadeus@xxxxxxxxxxxxx>
> ---
> fs/internal.h | 8 ++++++++
> fs/readdir.c | 34 ++++++++++++++++++++++++++--------
> 2 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/fs/internal.h b/fs/internal.h
> index bd3b2810a36b..e8ca000e6613 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -260,3 +260,11 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po
> struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
> struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
> void mnt_idmap_put(struct mnt_idmap *idmap);
> +
> +/*
> + * fs/readdir.c
> + */
> +struct linux_dirent64;
> +
> +int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> + unsigned int count);
> diff --git a/fs/readdir.c b/fs/readdir.c
> index 9c53edb60c03..ed0803d0011e 100644
> --- a/fs/readdir.c
> +++ b/fs/readdir.c
> @@ -21,6 +21,7 @@
> #include <linux/unistd.h>
> #include <linux/compat.h>
> #include <linux/uaccess.h>
> +#include "internal.h"
>
> #include <asm/unaligned.h>
>
> @@ -351,10 +352,16 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
> return false;
> }
>
> -SYSCALL_DEFINE3(getdents64, unsigned int, fd,
> - struct linux_dirent64 __user *, dirent, unsigned int, count)
> +
> +/**
> + * vfs_getdents - getdents without fdget
> + * @file : pointer to file struct of directory
> + * @dirent : pointer to user directory structure
> + * @count : size of buffer
> + */
> +int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> + unsigned int count)
> {
> - struct fd f;
> struct getdents_callback64 buf = {
> .ctx.actor = filldir64,
> .count = count,
> @@ -362,11 +369,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
> };
> int error;
>
> - f = fdget_pos(fd);
> - if (!f.file)
> - return -EBADF;
> -
> - error = iterate_dir(f.file, &buf.ctx);
> + error = iterate_dir(file, &buf.ctx);

So afaict this isn't enough.
If you look into iterate_shared() you should see that it uses and
updates f_pos. But that can't work for io_uring and also isn't how
io_uring handles read and write. You probably need to use a local pos
similar to what io_uring does in rw.c for rw->kiocb.ki_pos. But in
contrast simply disallow any offsets for getdents completely. Thus not
relying on f_pos anywhere at all.