Re: [PATCH 03/19] VFS: use d_alloc_parallel() in lookup_one_qstr_excl() and rename it.
From: Jeff Layton
Date: Thu Feb 06 2025 - 09:34:05 EST
On Thu, 2025-02-06 at 16:42 +1100, NeilBrown wrote:
> lookup_one_qstr_excl() is used for lookups prior to directory
> modifications, whether create, unlink, rename, or whatever.
>
> To prepare for allowing modification to happen in parallel, change
> lookup_one_qstr_excl() to use d_alloc_parallel().
>
> To reflect this, name is changed to lookup_one_qtr() - as the directory
> may be locked shared.
>
> If any for the "intent" LOOKUP flags are passed, the caller must ensure
> d_lookup_done() is called at an appropriate time. If none are passed
> then we can be sure ->lookup() will do a real lookup and d_lookup_done()
> is called internally.
>
> Signed-off-by: NeilBrown <neilb@xxxxxxx>
> ---
> fs/namei.c | 47 +++++++++++++++++++++++++------------------
> fs/smb/server/vfs.c | 7 ++++---
> include/linux/namei.h | 9 ++++++---
> 3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 5cdbd2eb4056..d684102d873d 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1665,15 +1665,13 @@ static struct dentry *lookup_dcache(const struct qstr *name,
> }
>
> /*
> - * Parent directory has inode locked exclusive. This is one
> - * and only case when ->lookup() gets called on non in-lookup
> - * dentries - as the matter of fact, this only gets called
> - * when directory is guaranteed to have no in-lookup children
> - * at all.
> + * Parent directory has inode locked: exclusive or shared.
> + * If @flags contains any LOOKUP_INTENT_FLAGS then d_lookup_done()
> + * must be called after the intended operation is performed - or aborted.
> */
> -struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> - struct dentry *base,
> - unsigned int flags)
> +struct dentry *lookup_one_qstr(const struct qstr *name,
> + struct dentry *base,
> + unsigned int flags)
> {
> struct dentry *dentry = lookup_dcache(name, base, flags);
> struct dentry *old;
> @@ -1686,18 +1684,25 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> if (unlikely(IS_DEADDIR(dir)))
> return ERR_PTR(-ENOENT);
>
> - dentry = d_alloc(base, name);
> - if (unlikely(!dentry))
> + dentry = d_alloc_parallel(base, name);
> + if (unlikely(IS_ERR_OR_NULL(dentry)))
> return ERR_PTR(-ENOMEM);
> + if (!d_in_lookup(dentry))
> + /* Raced with another thread which did the lookup */
> + return dentry;
>
> old = dir->i_op->lookup(dir, dentry, flags);
> if (unlikely(old)) {
> + d_lookup_done(dentry);
> dput(dentry);
> dentry = old;
> }
> + if ((flags & LOOKUP_INTENT_FLAGS) == 0)
> + /* ->lookup must have given final answer */
> + d_lookup_done(dentry);
This is kind of an ugly thing for the callers to get right. I think it
would be cleaner to just push the d_lookup_done() into all of the
callers that don't pass any intent flags, and do away with this.
> return dentry;
> }
> -EXPORT_SYMBOL(lookup_one_qstr_excl);
> +EXPORT_SYMBOL(lookup_one_qstr);
>
> /**
> * lookup_fast - do fast lockless (but racy) lookup of a dentry
> @@ -2739,7 +2744,7 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
> return ERR_PTR(-EINVAL);
> }
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> - d = lookup_one_qstr_excl(&last, path->dentry, 0);
> + d = lookup_one_qstr(&last, path->dentry, 0);
> if (IS_ERR(d)) {
> inode_unlock(path->dentry->d_inode);
> path_put(path);
> @@ -4078,8 +4083,8 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> if (last.name[last.len] && !want_dir)
> create_flags = 0;
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> - dentry = lookup_one_qstr_excl(&last, path->dentry,
> - reval_flag | create_flags);
> + dentry = lookup_one_qstr(&last, path->dentry,
> + reval_flag | create_flags);
> if (IS_ERR(dentry))
> goto unlock;
>
> @@ -4103,6 +4108,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> }
> return dentry;
> fail:
> + d_lookup_done(dentry);
> dput(dentry);
> dentry = ERR_PTR(error);
> unlock:
> @@ -4508,7 +4514,7 @@ int do_rmdir(int dfd, struct filename *name)
> goto exit2;
>
> inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
> - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
> + dentry = lookup_one_qstr(&last, path.dentry, lookup_flags);
> error = PTR_ERR(dentry);
> if (IS_ERR(dentry))
> goto exit3;
> @@ -4641,7 +4647,7 @@ int do_unlinkat(int dfd, struct filename *name)
> goto exit2;
> retry_deleg:
> inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
> - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
> + dentry = lookup_one_qstr(&last, path.dentry, lookup_flags);
> error = PTR_ERR(dentry);
> if (!IS_ERR(dentry)) {
>
> @@ -5231,8 +5237,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> goto exit_lock_rename;
> }
>
> - old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
> - lookup_flags);
> + old_dentry = lookup_one_qstr(&old_last, old_path.dentry,
> + lookup_flags);
> error = PTR_ERR(old_dentry);
> if (IS_ERR(old_dentry))
> goto exit3;
> @@ -5240,8 +5246,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> error = -ENOENT;
> if (d_is_negative(old_dentry))
> goto exit4;
> - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
> - lookup_flags | target_flags);
> + new_dentry = lookup_one_qstr(&new_last, new_path.dentry,
> + lookup_flags | target_flags);
> error = PTR_ERR(new_dentry);
> if (IS_ERR(new_dentry))
> goto exit4;
> @@ -5292,6 +5298,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> rd.flags = flags;
> error = vfs_rename(&rd);
> exit5:
> + d_lookup_done(new_dentry);
> dput(new_dentry);
> exit4:
> dput(old_dentry);
> diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
> index 4e580bb7baf8..89b3823f6405 100644
> --- a/fs/smb/server/vfs.c
> +++ b/fs/smb/server/vfs.c
> @@ -109,7 +109,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
> }
>
> inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
> - d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
> + d = lookup_one_qstr(&last, parent_path->dentry, 0);
> if (IS_ERR(d))
> goto err_out;
>
> @@ -726,8 +726,8 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> ksmbd_fd_put(work, parent_fp);
> }
>
> - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
> - lookup_flags | LOOKUP_RENAME_TARGET);
> + new_dentry = lookup_one_qstr(&new_last, new_path.dentry,
> + lookup_flags | LOOKUP_RENAME_TARGET);
> if (IS_ERR(new_dentry)) {
> err = PTR_ERR(new_dentry);
> goto out3;
> @@ -771,6 +771,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
>
> out4:
> + d_lookup_done(new_dentry);
> dput(new_dentry);
> out3:
> dput(old_parent);
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 8ec8fed3bce8..06bb3ea65beb 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -34,6 +34,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
> #define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */
> #define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */
>
> +#define LOOKUP_INTENT_FLAGS (LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL | \
> + LOOKUP_RENAME_TARGET)
> +
> /* internal use only */
> #define LOOKUP_PARENT 0x0010
>
> @@ -52,9 +55,9 @@ extern int path_pts(struct path *path);
>
> extern int user_path_at(int, const char __user *, unsigned, struct path *);
>
> -struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> - struct dentry *base,
> - unsigned int flags);
> +struct dentry *lookup_one_qstr(const struct qstr *name,
> + struct dentry *base,
> + unsigned int flags);
> extern int kern_path(const char *, unsigned, struct path *);
>
> extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
--
Jeff Layton <jlayton@xxxxxxxxxx>