Re: [PATCH 13/15] parallel lookups machinery, part 3

From: J. Bruce Fields
Date: Mon Apr 18 2016 - 16:45:15 EST


On Sat, Apr 16, 2016 at 01:55:25AM +0100, Al Viro wrote:
> From: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
>
> We will need to be able to check if there is an in-lookup
> dentry with matching parent/name. Right now it's impossible,
> but as soon as start locking directories shared such beasts
> will appear.
>
> Add a secondary hash for locating those. Hash chains go through
> the same space where d_alias will be once it's not in-lookup anymore.
> Search is done under the same bitlock we use for modifications -
> with the primary hash we can rely on d_rehash() into the wrong
> chain being the worst that could happen, but here the pointers are
> buggered once it's removed from the chain. On the other hand,
> the chains are not going to be long and normally we'll end up
> adding to the chain anyway. That allows us to avoid bothering with
> ->d_lock when doing the comparisons - everything is stable until
> removed from chain.
>
> New helper: d_alloc_parallel(). Right now it allocates, verifies
> that no hashed and in-lookup matches exist and adds to in-lookup
> hash.
>
> Returns ERR_PTR() for error, hashed match (in the unlikely case it's
> been found) or new dentry. In-lookup matches trigger BUG() for
> now; that will change in the next commit when we introduce waiting
> for ongoing lookup to finish. Note that in-lookup matches won't be
> possible until we actually go for shared locking.
>
> lookup_slow() switched to use of d_alloc_parallel().
>
> Again, these commits are separated only for making it easier to
> review. All this machinery will start doing something useful only
> when we go for shared locking; it's just that the combination is
> too large for my taste.

Thanks for doing that! I'm not sure I get this yet, but I feel like I
have at least a fighting chance....

--b.

>
> Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
> ---
> fs/dcache.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/namei.c | 44 +++++++++++--------------
> include/linux/dcache.h | 2 ++
> 3 files changed, 108 insertions(+), 25 deletions(-)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 3959f18..0552002 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -111,6 +111,17 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
> return dentry_hashtable + hash_32(hash, d_hash_shift);
> }
>
> +#define IN_LOOKUP_SHIFT 10
> +static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
> +
> +static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
> + unsigned int hash)
> +{
> + hash += (unsigned long) parent / L1_CACHE_BYTES;
> + return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
> +}
> +
> +
> /* Statistics gathering. */
> struct dentry_stat_t dentry_stat = {
> .age_limit = 45,
> @@ -2377,9 +2388,85 @@ static inline void end_dir_add(struct inode *dir, unsigned n)
> smp_store_release(&dir->i_dir_seq, n + 2);
> }
>
> +struct dentry *d_alloc_parallel(struct dentry *parent,
> + const struct qstr *name)
> +{
> + unsigned int len = name->len;
> + unsigned int hash = name->hash;
> + const unsigned char *str = name->name;
> + struct hlist_bl_head *b = in_lookup_hash(parent, hash);
> + struct hlist_bl_node *node;
> + struct dentry *new = d_alloc(parent, name);
> + struct dentry *dentry;
> + unsigned seq;
> +
> + if (unlikely(!new))
> + return ERR_PTR(-ENOMEM);
> +
> +retry:
> + seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
> + dentry = d_lookup(parent, name);
> + if (unlikely(dentry)) {
> + dput(new);
> + return dentry;
> + }
> +
> + hlist_bl_lock(b);
> + smp_rmb();
> + if (unlikely(parent->d_inode->i_dir_seq != seq)) {
> + hlist_bl_unlock(b);
> + goto retry;
> + }
> + /*
> + * No changes for the parent since the beginning of d_lookup().
> + * Since all removals from the chain happen with hlist_bl_lock(),
> + * any potential in-lookup matches are going to stay here until
> + * we unlock the chain. All fields are stable in everything
> + * we encounter.
> + */
> + hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
> + if (dentry->d_name.hash != hash)
> + continue;
> + if (dentry->d_parent != parent)
> + continue;
> + if (d_unhashed(dentry))
> + continue;
> + if (parent->d_flags & DCACHE_OP_COMPARE) {
> + int tlen = dentry->d_name.len;
> + const char *tname = dentry->d_name.name;
> + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
> + continue;
> + } else {
> + if (dentry->d_name.len != len)
> + continue;
> + if (dentry_cmp(dentry, str, len))
> + continue;
> + }
> + dget(dentry);
> + hlist_bl_unlock(b);
> + /* impossible until we actually enable parallel lookups */
> + BUG();
> + /* and this will be "wait for it to stop being in-lookup" */
> + /* this one will be handled in the next commit */
> + dput(new);
> + return dentry;
> + }
> + /* we can't take ->d_lock here; it's OK, though. */
> + new->d_flags |= DCACHE_PAR_LOOKUP;
> + hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
> + hlist_bl_unlock(b);
> + return new;
> +}
> +
> void __d_not_in_lookup(struct dentry *dentry)
> {
> + struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
> + dentry->d_name.hash);
> + hlist_bl_lock(b);
> dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
> + __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
> + hlist_bl_unlock(b);
> + INIT_HLIST_NODE(&dentry->d_u.d_alias);
> /* more stuff will land here */
> }
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 0ee8b9d..fbce016 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1603,46 +1603,40 @@ static struct dentry *lookup_slow(const struct qstr *name,
> struct dentry *dir,
> unsigned int flags)
> {
> - struct dentry *dentry, *old;
> + struct dentry *dentry = ERR_PTR(-ENOENT), *old;
> struct inode *inode = dir->d_inode;
>
> inode_lock(inode);
> /* Don't go there if it's already dead */
> - if (unlikely(IS_DEADDIR(inode))) {
> - inode_unlock(inode);
> - return ERR_PTR(-ENOENT);
> - }
> - dentry = d_lookup(dir, name);
> - if (unlikely(dentry)) {
> + if (unlikely(IS_DEADDIR(inode)))
> + goto out;
> +again:
> + dentry = d_alloc_parallel(dir, name);
> + if (IS_ERR(dentry))
> + goto out;
> + if (unlikely(!(dentry->d_flags & DCACHE_PAR_LOOKUP))) {
> if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
> !(flags & LOOKUP_NO_REVAL)) {
> int error = d_revalidate(dentry, flags);
> if (unlikely(error <= 0)) {
> - if (!error)
> + if (!error) {
> d_invalidate(dentry);
> + dput(dentry);
> + goto again;
> + }
> dput(dentry);
> dentry = ERR_PTR(error);
> }
> }
> - if (dentry) {
> - inode_unlock(inode);
> - return dentry;
> + } else {
> + old = inode->i_op->lookup(inode, dentry, flags);
> + d_not_in_lookup(dentry);
> + if (unlikely(old)) {
> + dput(dentry);
> + dentry = old;
> }
> }
> - dentry = d_alloc(dir, name);
> - if (unlikely(!dentry)) {
> - inode_unlock(inode);
> - return ERR_PTR(-ENOMEM);
> - }
> - spin_lock(&dentry->d_lock);
> - dentry->d_flags |= DCACHE_PAR_LOOKUP;
> - spin_unlock(&dentry->d_lock);
> - old = inode->i_op->lookup(inode, dentry, flags);
> - d_not_in_lookup(dentry);
> - if (unlikely(old)) {
> - dput(dentry);
> - dentry = old;
> - }
> +out:
> inode_unlock(inode);
> return dentry;
> }
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index cfc1240..3ab5ce4 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -131,6 +131,7 @@ struct dentry {
> */
> union {
> struct hlist_node d_alias; /* inode alias list */
> + struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */
> struct rcu_head d_rcu;
> } d_u;
> };
> @@ -248,6 +249,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
> /* allocate/de-allocate */
> extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
> extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
> +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
> extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
> extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
> extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
> --
> 2.8.0.rc3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html