Re: [PATCH v2 (kindof)] fs: use __fput_sync in close(2)

From: Christian Brauner
Date: Tue Aug 08 2023 - 15:09:29 EST


On Tue, Aug 08, 2023 at 05:07:22PM +0200, Mateusz Guzik wrote:
> I slapped the following variant just for illustration purposes.
>
> - adds __close_fd which returns a struct file
> - adds __filp_close with a flag whether to fput
> - makes close(2) use both
> - transparent to everyone else
>
> Downside is that __fput_sync still loses the assert. Instead of
> losing, it could perhaps be extended with a hack to check syscall
> number -- pass if either this is close (or binary compat close) or a
> kthread, BUG out otherwise. Alternatively perhaps deref could be
> opencoded along with a comment about real fput that this is taking
> place. Or maybe some other cosmetic choice.
>
> I cannot compile-test right now, so down below is a rough copy make
> sure it is clear what I mean.
>
> I feel compelled to note that simple patches get microbenchmarked all
> the time, with these results being the only justification provided.
> I'm confused why this patch is supposed to be an exception given its
> simplicity.
>
> Serious justification should be expected from tough calls --
> complicated, invasive changes, maybe with numerous tradeoffs.
>
> In contrast close(2) doing __fput_sync looks a clear cut thing to do,
> at worst one can argue which way to do it.
>
> diff --git a/fs/file.c b/fs/file.c
> index 3fd003a8604f..c341b07533b0 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -651,20 +651,30 @@ static struct file *pick_file(struct
> files_struct *files, unsigned fd)
> return file;
> }
>
> -int close_fd(unsigned fd)
> +struct file *__close_fd(unsigned fd, struct file_struct *files)
> {
> - struct files_struct *files = current->files;
> struct file *file;
>
> spin_lock(&files->file_lock);
> file = pick_file(files, fd);
> spin_unlock(&files->file_lock);
> +
> + return file;
> +}
> +EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
> +
> +int close_fd(unsigned fd)
> +{
> + struct files_struct *files = current->files;
> + struct file *file;
> +
> + file = __close_fd(fd, files);
> if (!file)
> return -EBADF;
>
> return filp_close(file, files);
> }
> -EXPORT_SYMBOL(close_fd); /* for ksys_close() */
> +EXPORT_SYMBOL(close_fd);
>
> /**
> * last_fd - return last valid index into fd table
> diff --git a/fs/file_table.c b/fs/file_table.c
> index fc7d677ff5ad..b7461f0b73f4 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -463,6 +463,11 @@ void __fput_sync(struct file *file)
> {
> if (atomic_long_dec_and_test(&file->f_count)) {
> struct task_struct *task = current;
> + /*
> + * I see 2 basic options
> + * 1. just remove the assert
> + * 2. demand the flag *or* that the caller is close(2)
> + */
> BUG_ON(!(task->flags & PF_KTHREAD));
> __fput(file);
> }
> diff --git a/fs/open.c b/fs/open.c
> index e6ead0f19964..b1602307c1c3 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1533,7 +1533,16 @@ EXPORT_SYMBOL(filp_close);
> */
> SYSCALL_DEFINE1(close, unsigned int, fd)
> {
> - int retval = close_fd(fd);
> + struct files_struct *files = current->files;
> + struct file *file;
> + int retval;
> +
> + file = __close_fd(fd);
> + if (!file)
> + return -EBADF;
> +
> + retval = __filp_close(file, files, false);
> + __fput_sync(file);
>
> /* can't restart close syscall because file table entry was cleared */
> if (unlikely(retval == -ERESTARTSYS ||
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 562f2623c9c9..e64c0238a65f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2388,7 +2388,11 @@ static inline struct file
> *file_clone_open(struct file *file)
> {
> return dentry_open(&file->f_path, file->f_flags, file->f_cred);
> }
> -extern int filp_close(struct file *, fl_owner_t id);
> +extern int __filp_close(struct file *file, fl_owner_t id, bool dofput);
> +static inline int filp_close(struct file *file, fl_owner_t id)
> +{
> + return __filp_close(file, id, true);
> +}
>
> extern struct filename *getname_flags(const char __user *, int, int *);
> extern struct filename *getname_uflags(const char __user *, int);

At least make this really dumb and obvious and keep the ugliness to
internal.h and open.c

---
fs/file_table.c | 10 ++++++++++
fs/internal.h | 1 +
fs/open.c | 21 +++++++++++++++------
3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index fc7d677ff5ad..18f8adaba972 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -471,6 +471,16 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
EXPORT_SYMBOL(__fput_sync);

+/*
+ * Same as __fput_sync() but for regular close.
+ * Not exported, not for general use.
+ */
+void fput_sync(struct file *file)
+{
+ if (atomic_long_dec_and_test(&file->f_count))
+ __fput(file);
+}
+
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/internal.h b/fs/internal.h
index f7a3dc111026..1ad2a4ce728b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -297,6 +297,7 @@ static inline ssize_t do_get_acl(struct mnt_idmap *idmap,
#endif

ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
+void fput_sync(struct file *file);

/*
* fs/attr.c
diff --git a/fs/open.c b/fs/open.c
index e6ead0f19964..2540b22fb114 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1499,11 +1499,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
}
#endif

-/*
- * "id" is the POSIX thread ID. We use the
- * files pointer for this..
- */
-int filp_close(struct file *filp, fl_owner_t id)
+static int __filp_close(struct file *filp, fl_owner_t id, bool may_delay)
{
int retval = 0;

@@ -1520,10 +1516,23 @@ int filp_close(struct file *filp, fl_owner_t id)
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
}
- fput(filp);
+
+ if (may_delay)
+ fput(filp);
+ else
+ fput_sync(filp);
return retval;
}

+/*
+ * "id" is the POSIX thread ID. We use the
+ * files pointer for this..
+ */
+int filp_close(struct file *filp, fl_owner_t id)
+{
+ return __filp_close(filp, id, true);
+}
+
EXPORT_SYMBOL(filp_close);

/*
--
2.34.1