Re: [PATCH] fuse: move page cache invalidation after AIO to workqueue
From: Jingbo Xu
Date: Tue Mar 03 2026 - 07:28:00 EST
On 3/3/26 6:23 PM, Bernd Schubert wrote:
> From: Cheng Ding <cding@xxxxxxx>
>
> Invalidating the page cache in fuse_aio_complete() causes deadlock.
> Call Trace:
> <TASK>
> __schedule+0x27c/0x6b0
> schedule+0x33/0x110
> io_schedule+0x46/0x80
> folio_wait_bit_common+0x136/0x330
> __folio_lock+0x17/0x30
> invalidate_inode_pages2_range+0x1d2/0x4f0
> fuse_aio_complete+0x258/0x270 [fuse]
> fuse_aio_complete_req+0x87/0xd0 [fuse]
> fuse_request_end+0x18e/0x200 [fuse]
> fuse_uring_req_end+0x87/0xd0 [fuse]
> fuse_uring_cmd+0x241/0xf20 [fuse]
> io_uring_cmd+0x9f/0x140
> io_issue_sqe+0x193/0x410
> io_submit_sqes+0x128/0x3e0
> __do_sys_io_uring_enter+0x2ea/0x490
> __x64_sys_io_uring_enter+0x22/0x40
>
> Move the invalidate_inode_pages2_range() call to a workqueue worker
> to avoid this issue. This approach is similar to
> iomap_dio_bio_end_io().
>
> (Minor edit by Bernd to avoid a merge conflict in Miklos' for-next
> branch). The commit is based on that branch with the addition of
> https://lore.kernel.org/r/20260111073701.6071-1-jefflexu@xxxxxxxxxxxxxxxxx)
I think it would be better to completely drop my previous patch and
rework on the bare ground, as the patch
(https://lore.kernel.org/r/20260111073701.6071-1-jefflexu@xxxxxxxxxxxxxxxxx)
is only in Miklos's branch, not merged to the master yet.
After reverting my previous patch, I think it would be cleaner by:
"The page cache invalidation for FOPEN_DIRECT_IO write in
fuse_direct_io() is moved to fuse_direct_write_iter() (with any progress
in write), to keep consistent with generic_file_direct_write(). This
covers the scenarios of both synchronous FOPEN_DIRECT_IO write
(regardless FUSE_ASYNC_DIO) and asynchronous FOPEN_DIRECT_IO write
without FUSE_ASYNC_DIO.
After that, only asynchronous direct write (for both FOPEN_DIRECT_IO and
non-FOPEN_DIRECT_IO) with FUSE_ASYNC_DIO is left."
```
@@ -1736,15 +1760,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io,
struct iov_iter *iter,
if (res > 0)
*ppos = pos;
- if (res > 0 && write && fopen_direct_io) {
- /*
- * As in generic_file_direct_write(), invalidate after
- * write, to invalidate read-ahead cache that may have
- * with the write.
- */
- invalidate_inode_pages2_range(mapping, idx_from, idx_to);
- }
-
return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
@@ -1799,6 +1814,14 @@ static ssize_t fuse_direct_write_iter(struct
kiocb *iocb, struct iov_iter *from)
FUSE_DIO_WRITE);
fuse_write_update_attr(inode, iocb->ki_pos, res);
}
+
+ /*
+ * As in generic_file_direct_write(), invalidate after
+ * write, to invalidate read-ahead cache that may have
+ * with the write.
+ */
+ if (res > 0)
+ kiocb_invalidate_post_direct_write(iocb, res);
}
fuse_dio_unlock(iocb, exclusive);
```
>
> Cc: Jingbo Xu <jefflexu@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Cheng Ding <cding@xxxxxxx>
> Signed-off-by: Bernd Schubert <bschubert@xxxxxxx>
> ---
> fs/fuse/file.c | 39 +++++++++++++++++++++++++++++----------
> fs/fuse/fuse_i.h | 1 +
> 2 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 64282c68d1ec7e4616e51735c1c0e8f2ec29cfad..b16515e3b42d33795ad45cf1e374ffab674714f7 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -23,6 +23,8 @@
> #include <linux/task_io_accounting_ops.h>
> #include <linux/iomap.h>
>
> +int sb_init_dio_done_wq(struct super_block *sb);
> +
#include "../internal.h" ?
> static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
> unsigned int open_flags, int opcode,
> struct fuse_open_out *outargp)
> @@ -635,6 +637,19 @@ static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
> return io->bytes < 0 ? io->size : io->bytes;
> }
>
> +static void fuse_aio_invalidate_worker(struct work_struct *work)
> +{
> + struct fuse_io_priv *io = container_of(work, struct fuse_io_priv, work);
> + struct address_space *mapping = io->iocb->ki_filp->f_mapping;
> + ssize_t res = fuse_get_res_by_io(io);
> + pgoff_t start = io->offset >> PAGE_SHIFT;
> + pgoff_t end = (io->offset + res - 1) >> PAGE_SHIFT;
> +
> + invalidate_inode_pages2_range(mapping, start, end);
> + io->iocb->ki_complete(io->iocb, res);
> + kref_put(&io->refcnt, fuse_io_release);
> +}
> +
> /*
> * In case of short read, the caller sets 'pos' to the position of
> * actual end of fuse request in IO request. Otherwise, if bytes_requested
> @@ -667,28 +682,32 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
> spin_unlock(&io->lock);
>
> if (!left && !io->blocking) {
> + struct inode *inode = file_inode(io->iocb->ki_filp);
> + struct address_space *mapping = io->iocb->ki_filp->f_mapping;
> ssize_t res = fuse_get_res_by_io(io);
>
> if (res >= 0) {
> - struct inode *inode = file_inode(io->iocb->ki_filp);
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_inode *fi = get_fuse_inode(inode);
> - struct address_space *mapping = io->iocb->ki_filp->f_mapping;
>
> + spin_lock(&fi->lock);
> + fi->attr_version = atomic64_inc_return(&fc->attr_version);
> + spin_unlock(&fi->lock);
> + }
> +
> + if (io->write && res > 0 && mapping->nrpages) {
> /*
> * As in generic_file_direct_write(), invalidate after the
> * write, to invalidate read-ahead cache that may have competed
> * with the write.
> */
> - if (io->write && res && mapping->nrpages) {
> - invalidate_inode_pages2_range(mapping,
> - io->offset >> PAGE_SHIFT,
> - (io->offset + res - 1) >> PAGE_SHIFT);
> + if (!inode->i_sb->s_dio_done_wq)
> + res = sb_init_dio_done_wq(inode->i_sb);
Better to call sb_init_dio_done_wq() from fuse_direct_IO(), and fail the
IO directly if sb_init_dio_done_wq() fails.
> + if (res >= 0) {
> + INIT_WORK(&io->work, fuse_aio_invalidate_worker);
> + queue_work(inode->i_sb->s_dio_done_wq, &io->work);
> + return;
> }
Otherwise, the page cache invalidation would be missed if the previous
sb_init_dio_done_wq() fails.
> -
> - spin_lock(&fi->lock);
> - fi->attr_version = atomic64_inc_return(&fc->attr_version);
> - spin_unlock(&fi->lock);
> }
>
> io->iocb->ki_complete(io->iocb, res);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 7f16049387d15e869db4be23a93605098588eda9..6e8c8cf6b2c82163acbfbd15c44b849898f945c1 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -377,6 +377,7 @@ union fuse_file_args {
> /** The request IO state (for asynchronous processing) */
> struct fuse_io_priv {
> struct kref refcnt;
> + struct work_struct work;
> int async;
> spinlock_t lock;
> unsigned reqs;
--
Thanks,
Jingbo