Re: fs/dcache.c - BUG: soft lockup - CPU#5 stuck for 22s! [systemd-udevd:1667]

From: Linus Torvalds
Date: Wed May 28 2014 - 15:43:29 EST


On Wed, May 28, 2014 at 11:39 AM, Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
>
> OK, the warnings about averting your eyes very much apply; the thing below
> definitely needs more massage before it becomes acceptable

I've been looking at the this too, and I have to say, I absolutely
hate your DCACHE_MAY_FREE logic. It makes it really hard to follow
what the heck is happening across threads.

So just to understand the code, how about this (UNTESTED!) patch? It
gets rid of the DCACHE_MAY_FREE logic entirely, and makes the rules
imho much more straightforward:

- whoever calls "dentry_kill()" first is the one that frees things.
- dentry_kill() does *not* touch the shrink list at all. In fact,
*nothing* touches the shrink list, except for the shrinker.
- the shrinkers remove entries from their own lists
- the shrinker list logic depends on the actual freeing of the dentry
to be delayed until the RCU grace period (already true for RCU-lookup
dentries)

In other words, that whole "may-free" thing goes away, the whole
shrink-list locking issues go away, there are no subtle rules. Nobody
else ever touches the shrink-list entries than the entity walking the
shrink-lists. Once DCACHE_SHRINK_LIST is set, nobody else is

It does require that the dentry shrinking code always hold the RCU
lock for reading, because others may actually be doing the final
dput() while the thing is on the shrinking list (and holding the RCU
lock is what protects the entry from actually being free'd).

NOTE! I don't claim that this fixes anything, but I do think that it
makes that whole cross-thread complexity of that DCACHE_MAY_FREE go
away. I think it's easier to understand, and it removes code in the
process. Comments?

Linus
fs/dcache.c | 32 ++++++++++++++------------------
include/linux/dcache.h | 2 --
2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 42ae01eefc07..6821479a378e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -248,11 +248,7 @@ static void __d_free(struct rcu_head *head)

static void dentry_free(struct dentry *dentry)
{
- /* if dentry was never visible to RCU, immediate free is OK */
- if (!(dentry->d_flags & DCACHE_RCUACCESS))
- __d_free(&dentry->d_u.d_rcu);
- else
- call_rcu(&dentry->d_u.d_rcu, __d_free);
+ call_rcu(&dentry->d_u.d_rcu, __d_free);
}

/**
@@ -453,12 +449,11 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure)
{
struct inode *inode;
struct dentry *parent = NULL;
- bool can_free = true;

- if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
- can_free = dentry->d_flags & DCACHE_MAY_FREE;
+ /* Did somebody else already free it? */
+ if (__lockref_is_dead(&dentry->d_lockref)) {
spin_unlock(&dentry->d_lock);
- goto out;
+ return NULL;
}

inode = dentry->d_inode;
@@ -514,15 +509,7 @@ relock:
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);

- spin_lock(&dentry->d_lock);
- if (dentry->d_flags & DCACHE_SHRINK_LIST) {
- dentry->d_flags |= DCACHE_MAY_FREE;
- can_free = false;
- }
- spin_unlock(&dentry->d_lock);
-out:
- if (likely(can_free))
- dentry_free(dentry);
+ dentry_free(dentry);
return parent;
}

@@ -921,9 +908,11 @@ long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
LIST_HEAD(dispose);
long freed;

+ rcu_read_lock();
freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
&dispose, &nr_to_scan);
shrink_dentry_list(&dispose);
+ rcu_read_unlock();
return freed;
}

@@ -962,11 +951,13 @@ void shrink_dcache_sb(struct super_block *sb)
do {
LIST_HEAD(dispose);

+ rcu_read_lock();
freed = list_lru_walk(&sb->s_dentry_lru,
dentry_lru_isolate_shrink, &dispose, UINT_MAX);

this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list(&dispose);
+ rcu_read_unlock();
} while (freed > 0);
}
EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1230,13 +1221,16 @@ void shrink_dcache_parent(struct dentry *parent)
data.start = parent;
data.found = 0;

+ rcu_read_lock();
d_walk(parent, &data, select_collect, NULL);
if (!data.found)
break;

shrink_dentry_list(&data.dispose);
+ rcu_read_unlock();
cond_resched();
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL(shrink_dcache_parent);

@@ -1338,11 +1332,13 @@ int check_submounts_and_drop(struct dentry *dentry)
data.start = dentry;
data.found = 0;

+ rcu_read_lock();
d_walk(dentry, &data, check_and_collect, check_and_drop);
ret = data.found;

if (!list_empty(&data.dispose))
shrink_dentry_list(&data.dispose);
+ rcu_read_unlock();

if (ret <= 0)
break;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3c7ec327ebd2..3b9bfdb83ba6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -221,8 +221,6 @@ struct dentry_operations {
#define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */
#define DCACHE_FILE_TYPE 0x00400000 /* Other file type */

-#define DCACHE_MAY_FREE 0x00800000
-
extern seqlock_t rename_lock;

static inline int dname_external(const struct dentry *dentry)