[109/272] NFS: Dont use vm_map_ram() in readdir

From: Greg KH
Date: Tue Feb 15 2011 - 20:33:49 EST


2.6.37-stable review patch. If anyone has any objections, please let us know.

------------------

From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>

commit 6650239a4b01077e80d5a4468562756d77afaa59 upstream.

vm_map_ram() is not available on NOMMU platforms, and causes trouble
on incoherrent architectures such as ARM when we access the page data
through both the direct and the virtual mapping.

The alternative is to use the direct mapping to access page data
for the case when we are not crossing a page boundary, but to copy
the data into a linear scratch buffer when we are accessing data
that spans page boundaries.

Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Tested-by: Marc Kleine-Budde <mkl@xxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
fs/nfs/dir.c | 44 ++++++------
fs/nfs/nfs2xdr.c | 6 -
fs/nfs/nfs3xdr.c | 6 -
fs/nfs/nfs4xdr.c | 6 -
include/linux/sunrpc/xdr.h | 4 -
net/sunrpc/xdr.c | 155 ++++++++++++++++++++++++++++++++++++---------
6 files changed, 148 insertions(+), 73 deletions(-)

--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,7 +33,6 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/sched.h>
-#include <linux/vmalloc.h>
#include <linux/kmemleak.h>

#include "delegation.h"
@@ -459,25 +458,26 @@ out:
/* Perform conversion from xdr to cache array */
static
int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
- void *xdr_page, struct page *page, unsigned int buflen)
+ struct page **xdr_pages, struct page *page, unsigned int buflen)
{
struct xdr_stream stream;
- struct xdr_buf buf;
- __be32 *ptr = xdr_page;
+ struct xdr_buf buf = {
+ .pages = xdr_pages,
+ .page_len = buflen,
+ .buflen = buflen,
+ .len = buflen,
+ };
+ struct page *scratch;
struct nfs_cache_array *array;
unsigned int count = 0;
int status;

- buf.head->iov_base = xdr_page;
- buf.head->iov_len = buflen;
- buf.tail->iov_len = 0;
- buf.page_base = 0;
- buf.page_len = 0;
- buf.buflen = buf.head->iov_len;
- buf.len = buf.head->iov_len;
-
- xdr_init_decode(&stream, &buf, ptr);
+ scratch = alloc_page(GFP_KERNEL);
+ if (scratch == NULL)
+ return -ENOMEM;

+ xdr_init_decode(&stream, &buf, NULL);
+ xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);

do {
status = xdr_decode(desc, entry, &stream);
@@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_
} else
status = PTR_ERR(array);
}
+
+ put_page(scratch);
return status;
}

@@ -521,7 +523,6 @@ static
void nfs_readdir_free_large_page(void *ptr, struct page **pages,
unsigned int npages)
{
- vm_unmap_ram(ptr, npages);
nfs_readdir_free_pagearray(pages, npages);
}

@@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *p
* to nfs_readdir_free_large_page
*/
static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
{
- void *ptr;
unsigned int i;

for (i = 0; i < npages; i++) {
@@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page
goto out_freepages;
pages[i] = page;
}
+ return 0;

- ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
- if (!IS_ERR_OR_NULL(ptr))
- return ptr;
out_freepages:
nfs_readdir_free_pagearray(pages, i);
- return NULL;
+ return -ENOMEM;
}

static
@@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir
memset(array, 0, sizeof(struct nfs_cache_array));
array->eof_index = -1;

- pages_ptr = nfs_readdir_large_page(pages, array_size);
- if (!pages_ptr)
+ status = nfs_readdir_large_page(pages, array_size);
+ if (status < 0)
goto out_release_array;
do {
unsigned int pglen;
@@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir
if (status < 0)
break;
pglen = status;
- status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+ status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
if (status < 0) {
if (status == -ENOSPC)
status = 0;
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr

entry->d_type = DT_UNKNOWN;

- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
return p;

out_overflow:
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xd
memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
}

- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
return p;

out_overflow:
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_st
if (verify_attr_len(xdr, p, len) < 0)
goto out_overflow;

- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
return p;

out_overflow:
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -201,6 +201,8 @@ struct xdr_stream {

__be32 *end; /* end of available buffer space */
struct kvec *iov; /* pointer to the current kvec */
+ struct kvec scratch; /* Scratch buffer */
+ struct page **page_ptr; /* pointer to the current page */
};

extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
@@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *
}
EXPORT_SYMBOL_GPL(xdr_write_pages);

+static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+ __be32 *p, unsigned int len)
+{
+ if (len > iov->iov_len)
+ len = iov->iov_len;
+ if (p == NULL)
+ p = (__be32*)iov->iov_base;
+ xdr->p = p;
+ xdr->end = (__be32*)(iov->iov_base + len);
+ xdr->iov = iov;
+ xdr->page_ptr = NULL;
+}
+
+static int xdr_set_page_base(struct xdr_stream *xdr,
+ unsigned int base, unsigned int len)
+{
+ unsigned int pgnr;
+ unsigned int maxlen;
+ unsigned int pgoff;
+ unsigned int pgend;
+ void *kaddr;
+
+ maxlen = xdr->buf->page_len;
+ if (base >= maxlen)
+ return -EINVAL;
+ maxlen -= base;
+ if (len > maxlen)
+ len = maxlen;
+
+ base += xdr->buf->page_base;
+
+ pgnr = base >> PAGE_SHIFT;
+ xdr->page_ptr = &xdr->buf->pages[pgnr];
+ kaddr = page_address(*xdr->page_ptr);
+
+ pgoff = base & ~PAGE_MASK;
+ xdr->p = (__be32*)(kaddr + pgoff);
+
+ pgend = pgoff + len;
+ if (pgend > PAGE_SIZE)
+ pgend = PAGE_SIZE;
+ xdr->end = (__be32*)(kaddr + pgend);
+ xdr->iov = NULL;
+ return 0;
+}
+
+static void xdr_set_next_page(struct xdr_stream *xdr)
+{
+ unsigned int newbase;
+
+ newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
+ newbase -= xdr->buf->page_base;
+
+ if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+}
+
+static bool xdr_set_next_buffer(struct xdr_stream *xdr)
+{
+ if (xdr->page_ptr != NULL)
+ xdr_set_next_page(xdr);
+ else if (xdr->iov == xdr->buf->head) {
+ if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ }
+ return xdr->p != xdr->end;
+}
+
/**
* xdr_init_decode - Initialize an xdr_stream for decoding data.
* @xdr: pointer to xdr_stream struct
@@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
*/
void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
{
- struct kvec *iov = buf->head;
- unsigned int len = iov->iov_len;
-
- if (len > buf->len)
- len = buf->len;
xdr->buf = buf;
- xdr->iov = iov;
- xdr->p = p;
- xdr->end = (__be32 *)((char *)iov->iov_base + len);
+ xdr->scratch.iov_base = NULL;
+ xdr->scratch.iov_len = 0;
+ if (buf->head[0].iov_len != 0)
+ xdr_set_iov(xdr, buf->head, p, buf->len);
+ else if (buf->page_len != 0)
+ xdr_set_page_base(xdr, 0, buf->len);
}
EXPORT_SYMBOL_GPL(xdr_init_decode);

-/**
- * xdr_inline_peek - Allow read-ahead in the XDR data stream
- * @xdr: pointer to xdr_stream struct
- * @nbytes: number of bytes of data to decode
- *
- * Check if the input buffer is long enough to enable us to decode
- * 'nbytes' more bytes of data starting at the current position.
- * If so return the current pointer without updating the current
- * pointer position.
- */
-__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p = xdr->p;
__be32 *q = p + XDR_QUADLEN(nbytes);

if (unlikely(q > xdr->end || q < p))
return NULL;
+ xdr->p = q;
return p;
}
-EXPORT_SYMBOL_GPL(xdr_inline_peek);

/**
- * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+ xdr->scratch.iov_base = buf;
+ xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
+static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p;
+ void *cpdest = xdr->scratch.iov_base;
+ size_t cplen = (char *)xdr->end - (char *)xdr->p;
+
+ if (nbytes > xdr->scratch.iov_len)
+ return NULL;
+ memcpy(cpdest, xdr->p, cplen);
+ cpdest += cplen;
+ nbytes -= cplen;
+ if (!xdr_set_next_buffer(xdr))
+ return NULL;
+ p = __xdr_inline_decode(xdr, nbytes);
+ if (p == NULL)
+ return NULL;
+ memcpy(cpdest, p, nbytes);
+ return xdr->scratch.iov_base;
+}
+
+/**
+ * xdr_inline_decode - Retrieve XDR data to decode
* @xdr: pointer to xdr_stream struct
* @nbytes: number of bytes of data to decode
*
@@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
*/
__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
- __be32 *p = xdr->p;
- __be32 *q = p + XDR_QUADLEN(nbytes);
+ __be32 *p;

- if (unlikely(q > xdr->end || q < p))
+ if (nbytes == 0)
+ return xdr->p;
+ if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
return NULL;
- xdr->p = q;
- return p;
+ p = __xdr_inline_decode(xdr, nbytes);
+ if (p != NULL)
+ return p;
+ return xdr_copy_to_scratch(xdr, nbytes);
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);

@@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
*/
void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
{
- char * kaddr = page_address(xdr->buf->pages[0]);
xdr_read_pages(xdr, len);
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
- if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
- len = PAGE_CACHE_SIZE - xdr->buf->page_base;
- xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
- xdr->end = (__be32 *)((char *)xdr->p + len);
+ xdr_set_page_base(xdr, 0, len);
}
EXPORT_SYMBOL_GPL(xdr_enter_page);



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/