Re: [PATCH] Fix dcache race during umount

From: Jan Blunck
Date: Fri Jun 23 2006 - 09:26:55 EST


On Thu, Jun 22, David Howells wrote:

> I'd like to propose an alternative to your patch to fix the dcache race
> between unmounting a filesystem and the memory shrinker.
>
> In my patch, generic_shutdown_super() is made to call shrink_dcache_sb()
> instead of shrink_dcache_anon(), and the latter function is discarded
> completely since it's no longer used.

I've updated my patches to Linus git repository from today. Additionally, I've
changed shrink_dcache_parent() to not use prune_dcache() anymore.

Comments are welcome.

Regards,
Jan
Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -387,101 +387,96 @@ static void prune_one_dentry(struct dent
* which are being unmounted.
*
* Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
+ * more memory. When we need to unmount something
+ * we call shrink_dcache_sb().
*
* This function may fail to free any resources if
* all the dentries are in use.
*/
-
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count)
{
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
- struct rw_semaphore *s_umount;

cond_resched_lock(&dcache_lock);

tmp = dentry_unused.prev;
- if (unlikely(sb)) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
if (tmp == &dentry_unused)
break;
- list_del_init(tmp);
- prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
+ /*
+ * We want to prefetch the predecessor of our predecessor since
+ * our predecessor is already accessed in list_del_init().
+ */
+ prefetch(tmp->prev->prev);
dentry = list_entry(tmp, struct dentry, d_lru);
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);

- spin_lock(&dentry->d_lock);
+ spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
- * dentry_unused because of laziness during lookup. Do not free
+ * dentry_unused because of laziness during lookup. Do not free
* it - just keep it off the dentry_unused list.
*/
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- /* If the dentry was recently referenced, don't free it. */
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- spin_unlock(&dentry->d_lock);
- continue;
- }
- /*
- * If the dentry is not DCACHED_REFERENCED, it is time
- * to remove it from the dcache, provided the super block is
- * NULL (which means we are trying to reclaim memory)
- * or this dentry belongs to the same super block that
- * we want to shrink.
- */
- /*
- * If this dentry is for "my" filesystem, then I can prune it
- * without taking the s_umount lock (I already hold it).
- */
- if (sb && dentry->d_sb == sb) {
- prune_one_dentry(dentry);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
continue;
}
/*
- * ...otherwise we need to be sure this filesystem isn't being
- * unmounted, otherwise we could race with
- * generic_shutdown_super(), and end up holding a reference to
- * an inode while the filesystem is unmounted.
- * So we try to get s_umount, and make sure s_root isn't NULL.
- * (Take a local copy of s_umount to avoid a use-after-free of
- * `dentry').
+ * If the dentry was recently referenced, or the dentry's
+ * filesystem is going to be unmounted, don't free it.
*/
- s_umount = &dentry->d_sb->s_umount;
- if (down_read_trylock(s_umount)) {
- if (dentry->d_sb->s_root != NULL) {
+ if (!(dentry->d_flags & DCACHE_REFERENCED) &&
+ likely(down_read_trylock(&dentry->d_sb->s_umount))) {
+ struct super_block *sb = dentry->d_sb;
+
+ if (dentry->d_sb->s_root) {
prune_one_dentry(dentry);
- up_read(s_umount);
+ up_read(&sb->s_umount);
continue;
}
- up_read(s_umount);
+ up_read(&sb->s_umount);
}
- spin_unlock(&dentry->d_lock);
- /* Cannot remove the first dentry, and it isn't appropriate
- * to move it to the head of the list, so give up, and try
- * later
+ /*
+ * Either the dentry was recently referenced or its filesystem
+ * is going to be unmounted. Add the dentry to the beginning of
+ * the LRU list so shrink_dcache_sb() can find it faster.
*/
- break;
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * __shrink_dcache_sb - Shrink the dentries for a superblock
+ * @sb: superblock
+ * @list: list of dentries
+ *
+ * Prune a list of dentries of a superblock.
+ */
+static void __shrink_dcache_sb(struct super_block *sb, struct list_head *list)
+{
+ struct dentry *dentry, *pos;
+
+ spin_lock(&dcache_lock);
+repeat:
+ list_for_each_entry_safe(dentry, pos, list, d_lru) {
+ BUG_ON(dentry->d_sb != sb);
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
+ prune_one_dentry(dentry);
+ cond_resched_lock(&dcache_lock);
+ goto repeat;
}
spin_unlock(&dcache_lock);
}
@@ -505,47 +500,31 @@ static void prune_dcache(int count, stru
*
* Shrink the dcache for the specified super block. This
* is used to free the dcache before unmounting a file
- * system
+ * system.
+ *
+ * This must be called with sb->s_umount locked.
*/
-
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
+ struct dentry *dentry, *pos;
+ LIST_HEAD(list);

/*
* Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
+ * superblock to the temporarily list.
*/
spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
+ list_for_each_entry_safe(dentry, pos, &dentry_unused, d_lru) {
if (dentry->d_sb != sb)
continue;
- list_del(tmp);
- list_add(tmp, &dentry_unused);
+ list_move(&dentry->d_lru, &list);
}
+ spin_unlock(&dcache_lock);

/*
* Pass two ... free the dentries for this superblock.
*/
-repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- dentry_stat.nr_unused--;
- list_del_init(tmp);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- prune_one_dentry(dentry);
- cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
+ __shrink_dcache_sb(sb, &list);
}

/*
@@ -614,7 +593,7 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, struct list_head *list)
{
struct dentry *this_parent = parent;
struct list_head *next;
@@ -638,7 +617,7 @@ resume:
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add(&dentry->d_lru, list);
dentry_stat.nr_unused++;
found++;
}
@@ -681,50 +660,11 @@ out:

void shrink_dcache_parent(struct dentry * parent)
{
+ LIST_HEAD(list);
int found;

- while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
-}
-
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not hlist_for_each_entry_rcu() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct super_block *sb)
-{
- struct hlist_node *lp;
- struct hlist_head *head = &sb->s_anon;
- int found;
- do {
- found = 0;
- spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
-
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
- spin_unlock(&dcache_lock);
- prune_dcache(found, sb);
- } while(found);
+ while ((found = select_parent(parent, &list)) != 0)
+ __shrink_dcache_sb(parent->d_sb, &list);
}

/*
@@ -744,7 +684,7 @@ static int shrink_dcache_memory(int nr,
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr, NULL);
+ prune_dcache(nr);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
@@ -1659,19 +1599,38 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry,
+ d_u.d_child);
next = tmp->next;
+
if (d_unhashed(dentry)||!dentry->d_inode)
continue;
+
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+ /*
+ * We can lower the reference count here:
+ * - if the refcount is zero afterwards, the dentry hasn't got
+ * any children
+ * - if the recount isn't zero afterwards, we visit the
+ * chrildren next
+ * - because we always hold the dcache lock, nobody else can
+ * kill the unused dentries yet
+ */
+ if (atomic_dec_and_test(&dentry->d_count)) {
+ list_add_tail(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
goto repeat;
}
- atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
this_parent = this_parent->d_parent;
goto resume;
}
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -230,8 +230,7 @@ void generic_shutdown_super(struct super

if (root) {
sb->s_root = NULL;
- shrink_dcache_parent(root);
- shrink_dcache_anon(sb);
+ shrink_dcache_sb(sb);
dput(root);
fsync_super(sb);
lock_super(sb);
Index: linux-2.6/include/linux/dcache.h
===================================================================
--- linux-2.6.orig/include/linux/dcache.h
+++ linux-2.6/include/linux/dcache.h
@@ -217,7 +217,6 @@ extern struct dentry * d_alloc_anon(stru
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct super_block *);
extern int d_invalidate(struct dentry *);

/* only used at mount-time */