Re: PROBLEM: FUSE_NOTIFY_INVAL_ENTRY leaves stale negative dentry after c9ba789dad15
From: Miklos Szeredi
Date: Thu Jun 04 2026 - 10:08:17 EST
On Thu, 4 Jun 2026 at 11:43, NeilBrown <neilb@xxxxxxxxxxx> wrote:
> My guess is you refer to the need for retry-on-error.
> I need that design so that locking can be pushed closer to the
> filesystem and ultimately, in some cases, removed.
Aha, but this is filesystem code, we don't need or want to move it
closer to the filesystem, it's already there.
> It is a lot like the reason that we often use cmp-xchg loops instead of
> holding a spinlock while performing a simple operation. The code is
> more complex but it scales better.
Okay, so this is about making VFS more scalable? Can't we just leave
filesystem code alone in that case?
Thanks,
Miklos
> Thanks for the on going review!
>
> This is my current thought about how to fix this code.
>
> Thanks,
> NeilBrown
>
>
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 70948fe385da..7d744c7b923a 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1601,55 +1601,74 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
> dir = d_find_alias(parent);
> if (!dir)
> goto put_parent;
> - while (!entry) {
> - struct dentry *child = try_lookup_noperm(name, dir);
> - if (!child || IS_ERR(child))
> - goto put_parent;
> - entry = start_removing_dentry(dir, child);
> - dput(child);
> - if (IS_ERR(entry))
> - goto put_parent;
> +
> +do_lookup:
> + if (IS_DEADDIR(parent))
> + goto put_parent;
> + entry = try_lookup_noperm(name, dir);
> + if (!entry || IS_ERR(entry))
> + goto put_parent;
> + /*
> + * We need to lock against concurrent create/rename/etc whether
> + * entry is negative or positive, so use whichever of
> + * start_creating_dentry() and start_removing_dentry() works.
> + */
> + if (d_really_is_negative(entry) &&
> + !IS_ERR(start_creating_dentry(dir, entry))) {
> + /* entry is negative, it cannot have been renamed */
> + fuse_dir_changed(parent);
> + if (!(flags & FUSE_EXPIRE_ONLY))
> + d_invalidate(entry);
> + fuse_invalidate_entry_cache(entry);
> + end_creating(entry);
> + err = 0;
> + } else if (!IS_ERR(start_removing_dentry(dir, entry))) {
> + /* entry is positive */
> +
> if (!d_same_name(entry, dir, name)) {
> end_removing(entry);
> - entry = NULL;
> + dput(entry);
> + goto do_lookup;
> }
> - }
>
> - fuse_dir_changed(parent);
> - if (!(flags & FUSE_EXPIRE_ONLY))
> - d_invalidate(entry);
> - fuse_invalidate_entry_cache(entry);
> -
> - if (child_nodeid != 0) {
> - inode_lock(d_inode(entry));
> - if (get_node_id(d_inode(entry)) != child_nodeid) {
> - err = -ENOENT;
> - goto badentry;
> - }
> - if (d_mountpoint(entry)) {
> - err = -EBUSY;
> - goto badentry;
> - }
> - if (d_is_dir(entry)) {
> - shrink_dcache_parent(entry);
> - if (!simple_empty(entry)) {
> - err = -ENOTEMPTY;
> + fuse_dir_changed(parent);
> + if (!(flags & FUSE_EXPIRE_ONLY))
> + d_invalidate(entry);
> + fuse_invalidate_entry_cache(entry);
> + err = 0;
> + if (child_nodeid != 0) {
> + inode_lock(d_inode(entry));
> + if (get_node_id(d_inode(entry)) != child_nodeid) {
> + err = -ENOENT;
> goto badentry;
> }
> - d_inode(entry)->i_flags |= S_DEAD;
> + if (d_mountpoint(entry)) {
> + err = -EBUSY;
> + goto badentry;
> + }
> + if (d_is_dir(entry)) {
> + shrink_dcache_parent(entry);
> + if (!simple_empty(entry)) {
> + err = -ENOTEMPTY;
> + goto badentry;
> + }
> + d_inode(entry)->i_flags |= S_DEAD;
> + }
> + dont_mount(entry);
> + clear_nlink(d_inode(entry));
> + badentry:
> + inode_unlock(d_inode(entry));
> + if (!err)
> + d_delete(entry);
> }
> - dont_mount(entry);
> - clear_nlink(d_inode(entry));
> - err = 0;
> - badentry:
> - inode_unlock(d_inode(entry));
> - if (!err)
> - d_delete(entry);
> +
> + end_removing(entry);
> } else {
> - err = 0;
> + /* must have raced with rename before we got the lock */
> + dput(entry);
> + goto do_lookup;
> }
> -
> - end_removing(entry);
> + dput(entry);
> put_parent:
> dput(dir);
> iput(parent);