[PATCH v2] Fix i_mutex handling in nfsd readdir

From: David Woodhouse
Date: Sun Apr 19 2009 - 08:28:22 EST


Commit 14f7dd63 ("Copy XFS readdir hack into nfsd code") introduced a
bug to generic code which had been extant for a long time in the XFS
version -- it started to call through into lookup_one_len() and hence
into the file systems' ->lookup() methods without i_mutex held on the
directory.

This patch fixes it by locking the directory's i_mutex again before
calling the filldir functions. The original deadlocks which commit
14f7dd63 was designed to avoid are still avoided, because they were due
to fs-internal locking, not i_mutex.

Commit 05f4f678 ("nfsd4: don't do lookup within readdir in recovery
code") introduced a similar problem there, which this also addresses.

While we're at it, fix the return type of nfsd_buffered_readdir() which
should be a __be32 not an int -- it's an NFS errno, not a Linux errno.
And return nfserrno(-ENOMEM) when allocation fails, not just -ENOMEM.
Sparse would have caught both of those if it wasn't so busy bitching
about __cold__.

Commit 05f4f678 ("nfsd4: don't do lookup within readdir in recovery
code") introduced a similar problem with calling lookup_one_len()
without i_mutex, which this patch also addresses.

Reported-by: J. R. Okajima <hooanon05@xxxxxxxxxxx>
Signed-off-by: David Woodhouse <David.Woodhouse@xxxxxxxxx>
Umm-I-can-live-with-that-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
Still haven't tested the NFSv4 bit -- Bruce?

This time I remembered to remove the no-longer-used 'done:' label from
nfsd_buffered_readdir() too.

diff --git a/fs/namei.c b/fs/namei.c
index b8433eb..78f253c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
int err;
struct qstr this;

+ WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+
err = __lookup_one_len(name, &this, base, len);
if (err)
return ERR_PTR(err);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c00..210709c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
goto out;
status = vfs_readdir(filp, nfsd4_build_namelist, &names);
fput(filp);
+ mutex_lock(&dir->d_inode->i_mutex);
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);

dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- goto out;
+ break;
}
status = f(dir, dentry);
dput(dentry);
if (status)
- goto out;
+ break;
list_del(&entry->list);
kfree(entry);
}
+ mutex_unlock(&dir->d_inode->i_mutex);
out:
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcf..0874299 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1885,8 +1885,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
return 0;
}

-static int nfsd_buffered_readdir(struct file *file, filldir_t func,
- struct readdir_cd *cdp, loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+ struct readdir_cd *cdp, loff_t *offsetp)
{
struct readdir_data buf;
struct buffered_dirent *de;
@@ -1896,11 +1896,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,

buf.dirent = (void *)__get_free_page(GFP_KERNEL);
if (!buf.dirent)
- return -ENOMEM;
+ return nfserrno(-ENOMEM);

offset = *offsetp;

while (1) {
+ struct inode *dir_inode = file->f_path.dentry->d_inode;
unsigned int reclen;

cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1920,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
if (!size)
break;

+ /*
+ * Various filldir functions may end up calling back into
+ * lookup_one_len() and the file system's ->lookup() method.
+ * These expect i_mutex to be held, as it would within readdir.
+ */
+ host_err = mutex_lock_killable(&dir_inode->i_mutex);
+ if (host_err)
+ break;
+
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;

if (func(cdp, de->name, de->namlen, de->offset,
de->ino, de->d_type))
- goto done;
+ break;

if (cdp->err != nfs_ok)
- goto done;
+ break;

reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
+ mutex_unlock(&dir_inode->i_mutex);
+ if (size > 0) /* We bailed out early */
+ break;
+
offset = vfs_llseek(file, 0, SEEK_CUR);
}

- done:
free_page((unsigned long)(buf.dirent));

if (host_err)

--
dwmw2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/