[PATCH 1/2] Re: [autofs] Bad race condition in the new autofsprotocol somewhere

From: Ian Kent
Date: Mon Feb 12 2007 - 01:42:52 EST


On Thu, 2007-02-08 at 11:33 +0900, Ian Kent wrote:
> On Wed, 2007-02-07 at 19:18 +0100, Olivier Galibert wrote:
> > On Thu, Feb 08, 2007 at 03:07:41AM +0900, Ian Kent wrote:
> > > It may be better to update to a later kernel so I don't have to port the
> > > patch to several different kernels. Is that possible?
> >
> > Sure, 2.6.20 or -git?
>
> 2.6.20 has all the patches I've proposed so far except for the one we're
> working on so that would be best for me.
>
> Seems there may still be a problem with the patch so I'll let you know
> what's happening as soon as I can.

I think I'm just about done.

Could you try using the two patches here against 2.6.20 please:

Ian

---

--- linux-2.6.20/fs/autofs4/autofs_i.h.lookup-expire-race 2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/autofs_i.h 2007-02-12 12:15:17.000000000 +0900
@@ -52,6 +52,8 @@ struct autofs_info {

int flags;

+ struct list_head rehash;
+
struct autofs_sb_info *sbi;
unsigned long last_used;
atomic_t count;
@@ -110,6 +112,8 @@ struct autofs_sb_info {
struct mutex wq_mutex;
spinlock_t fs_lock;
struct autofs_wait_queue *queues; /* Wait queue pointer */
+ spinlock_t rehash_lock;
+ struct list_head rehash_list;
};

static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
--- linux-2.6.20/fs/autofs4/root.c.lookup-expire-race 2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/root.c 2007-02-12 12:14:51.000000000 +0900
@@ -263,7 +263,7 @@ static int try_to_fill_dentry(struct den
*/
status = d_invalidate(dentry);
if (status != -EBUSY)
- return -ENOENT;
+ return -EAGAIN;
}

DPRINTK("dentry=%p %.*s ino=%p",
@@ -413,7 +413,16 @@ static int autofs4_revalidate(struct den
*/
status = try_to_fill_dentry(dentry, flags);
if (status == 0)
- return 1;
+ return 1;
+
+ /*
+ * A status of EAGAIN here means that the dentry has gone
+ * away while waiting for an expire to complete. If we are
+ * racing with expire lookup will wait for it so this must
+ * be a revalidate and we need to send it to lookup.
+ */
+ if (status == -EAGAIN)
+ return 0;

return status;
}
@@ -459,9 +468,18 @@ void autofs4_dentry_release(struct dentr
de->d_fsdata = NULL;

if (inf) {
+ struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
+
inf->dentry = NULL;
inf->inode = NULL;

+ if (sbi) {
+ spin_lock(&sbi->rehash_lock);
+ if (!list_empty(&inf->rehash))
+ list_del(&inf->rehash);
+ spin_unlock(&sbi->rehash_lock);
+ }
+
autofs4_free_ino(inf);
}
}
@@ -478,10 +496,80 @@ static struct dentry_operations autofs4_
.d_release = autofs4_dentry_release,
};

+static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct list_head *p, *head;
+
+ spin_lock(&dcache_lock);
+ spin_lock(&sbi->rehash_lock);
+ head = &sbi->rehash_list;
+ list_for_each(p, head) {
+ struct autofs_info *ino;
+ struct dentry *dentry;
+ struct qstr *qstr;
+
+ ino = list_entry(p, struct autofs_info, rehash);
+ dentry = ino->dentry;
+
+ spin_lock(&dentry->d_lock);
+
+ /* Bad luck, we've already been dentry_iput */
+ if (!dentry->d_inode)
+ goto next;
+
+ qstr = &dentry->d_name;
+
+ if (dentry->d_name.hash != hash)
+ goto next;
+ if (dentry->d_parent != parent)
+ goto next;
+
+ if (qstr->len != len)
+ goto next;
+ if (memcmp(qstr->name, str, len))
+ goto next;
+
+ if (d_unhashed(dentry)) {
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct inode *inode = dentry->d_inode;
+
+ list_del_init(&ino->rehash);
+ dget(dentry);
+ /*
+ * Make the rehashed dentry negative so the VFS
+ * behaves as it should.
+ */
+ if (inode) {
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&sbi->rehash_lock);
+ spin_unlock(&dcache_lock);
+ iput(inode);
+ return dentry;
+ }
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&sbi->rehash_lock);
+ spin_unlock(&dcache_lock);
+ return dentry;
+ }
+next:
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&sbi->rehash_lock);
+ spin_unlock(&dcache_lock);
+
+ return NULL;
+}
+
/* Lookups in the root directory */
static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct autofs_sb_info *sbi;
+ struct dentry *unhashed;
int oz_mode;

DPRINTK("name = %.*s",
@@ -497,25 +585,46 @@ static struct dentry *autofs4_lookup(str
DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
current->pid, process_group(current), sbi->catatonic, oz_mode);

- /*
- * Mark the dentry incomplete, but add it. This is needed so
- * that the VFS layer knows about the dentry, and we can count
- * on catching any lookups through the revalidate.
- *
- * Let all the hard work be done by the revalidate function that
- * needs to be able to do this anyway..
- *
- * We need to do this before we release the directory semaphore.
- */
- dentry->d_op = &autofs4_root_dentry_operations;
+ unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
+ if (!unhashed) {
+ /*
+ * Mark the dentry incomplete, but add it. This is needed so
+ * that the VFS layer knows about the dentry, and we can count
+ * on catching any lookups through the revalidate.
+ *
+ * Let all the hard work be done by the revalidate function that
+ * needs to be able to do this anyway..
+ *
+ * We need to do this before we release the directory semaphore.
+ */
+ dentry->d_op = &autofs4_root_dentry_operations;
+
+ dentry->d_fsdata = NULL;
+ d_add(dentry, NULL);
+ } else {
+ struct autofs_info *ino = autofs4_dentry_ino(unhashed);
+ DPRINTK("rehash %p with %p", dentry, unhashed);
+ /*
+ * If we are racing with expire the request might not
+ * be quite complete but the directory has been removed
+ * so it must have been successful, so just wait for it.
+ */
+ if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+ DPRINTK("wait for incomplete expire %p name=%.*s",
+ unhashed, unhashed->d_name.len,
+ unhashed->d_name.name);
+ autofs4_wait(sbi, unhashed, NFY_NONE);
+ DPRINTK("request completed");
+ }
+ d_rehash(unhashed);
+ dentry = unhashed;
+ }

if (!oz_mode) {
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_AUTOFS_PENDING;
spin_unlock(&dentry->d_lock);
}
- dentry->d_fsdata = NULL;
- d_add(dentry, NULL);

if (dentry->d_op && dentry->d_op->d_revalidate) {
mutex_unlock(&dir->i_mutex);
@@ -534,6 +643,8 @@ static struct dentry *autofs4_lookup(str
if (sigismember (sigset, SIGKILL) ||
sigismember (sigset, SIGQUIT) ||
sigismember (sigset, SIGINT)) {
+ if (unhashed)
+ dput(unhashed);
return ERR_PTR(-ERESTARTNOINTR);
}
}
@@ -548,8 +659,14 @@ static struct dentry *autofs4_lookup(str
* doesn't do the right thing for all system calls, but it should
* be OK for the operations we permit from an autofs.
*/
- if (dentry->d_inode && d_unhashed(dentry))
+ if (dentry->d_inode && d_unhashed(dentry)) {
+ if (unhashed)
+ dput(unhashed);
return ERR_PTR(-ENOENT);
+ }
+
+ if (unhashed)
+ return dentry;

return NULL;
}
@@ -611,9 +728,10 @@ static int autofs4_dir_symlink(struct in
* Normal filesystems would do a "d_delete()" to tell the VFS dcache
* that the file no longer exists. However, doing that means that the
* VFS layer can turn the dentry into a negative dentry. We don't want
- * this, because since the unlink is probably the result of an expire.
- * We simply d_drop it, which allows the dentry lookup to remount it
- * if necessary.
+ * this, because the unlink is probably the result of an expire.
+ * We simply d_drop it and add it to a rehash candidates list in the
+ * super block, which allows the dentry lookup to reuse it retaining
+ * the flags, such as expire in progress, in case we're racing with expire.
*
* If a process is blocked on the dentry waiting for the expire to finish,
* it will invalidate the dentry and try to mount with a new one.
@@ -642,7 +760,14 @@ static int autofs4_dir_unlink(struct ino

dir->i_mtime = CURRENT_TIME;

- d_drop(dentry);
+ spin_lock(&dcache_lock);
+ spin_lock(&sbi->rehash_lock);
+ list_add(&ino->rehash, &sbi->rehash_list);
+ spin_unlock(&sbi->rehash_lock);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);

return 0;
}
@@ -653,6 +778,9 @@ static int autofs4_dir_rmdir(struct inod
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;

+ DPRINTK("dentry %p, removing %.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
if (!autofs4_oz_mode(sbi))
return -EACCES;

@@ -661,6 +789,9 @@ static int autofs4_dir_rmdir(struct inod
spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
+ spin_lock(&sbi->rehash_lock);
+ list_add(&ino->rehash, &sbi->rehash_list);
+ spin_unlock(&sbi->rehash_lock);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
--- linux-2.6.20/fs/autofs4/inode.c.lookup-expire-race 2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/inode.c 2007-02-12 12:16:27.000000000 +0900
@@ -48,6 +48,8 @@ struct autofs_info *autofs4_init_ino(str
ino->dentry = NULL;
ino->size = 0;

+ INIT_LIST_HEAD(&ino->rehash);
+
ino->last_used = jiffies;
atomic_set(&ino->count, 0);

@@ -158,14 +160,13 @@ void autofs4_kill_sb(struct super_block
if (!sbi)
goto out_kill_sb;

- sb->s_fs_info = NULL;
-
- if ( !sbi->catatonic )
+ if (!sbi->catatonic)
autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */

/* Clean up and release dangling references */
autofs4_force_release(sbi);

+ sb->s_fs_info = NULL;
kfree(sbi);

out_kill_sb:
@@ -336,6 +337,8 @@ int autofs4_fill_super(struct super_bloc
mutex_init(&sbi->wq_mutex);
spin_lock_init(&sbi->fs_lock);
sbi->queues = NULL;
+ spin_lock_init(&sbi->rehash_lock);
+ INIT_LIST_HEAD(&sbi->rehash_list);
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/