Re: [patch 2/3] splice: implement default splice_read method

From: Miklos Szeredi
Date: Mon May 18 2009 - 08:37:19 EST


Hi Jens,

On Thu, 14 May 2009, Jens Axboe wrote:
> On Thu, May 14 2009, Miklos Szeredi wrote:
> > > > Hmm. Simple solution would be to do a write() for each buffer. But
> > > > this only affects HIGHMEM kernels, so it's a bit pointless to do that
> > > > on all archs. Sigh...
> > >
> > > It is unfortunate, we are going to be stuck with that for some time
> > > still...

Here's a patch that fixes the multiple kmap() issue. It goes on top
of the previous splice patches.

Thanks,
Miklos

----
Subject: splice: fix kmaps in default_file_splice_write()
From: Miklos Szeredi <mszeredi@xxxxxxx>

Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.

Change the implementation so that it does a separate write() for each
buffer. This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.

This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations. In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---
fs/splice.c | 130 ++++++++++--------------------------------------------------
1 file changed, 22 insertions(+), 108 deletions(-)

Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c 2009-05-18 13:22:49.000000000 +0200
+++ linux-2.6/fs/splice.c 2009-05-18 14:18:22.000000000 +0200
@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file
return res;
}

-static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
- unsigned long vlen, loff_t *ppos)
+static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+ loff_t pos)
{
mm_segment_t old_fs;
ssize_t res;
@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file
old_fs = get_fs();
set_fs(get_ds());
/* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos);
+ res = vfs_write(file, (const char __user *)buf, count, &pos);
set_fs(old_fs);

return res;
@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_in

EXPORT_SYMBOL(generic_file_splice_write);

-static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n)
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
{
- return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
+ int ret;
+ void *data;
+
+ ret = buf->ops->confirm(pipe, buf);
+ if (ret)
+ return ret;
+
+ data = buf->ops->map(pipe, buf, 0);
+ ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+ buf->ops->unmap(pipe, buf, data);
+
+ return ret;
}

static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
- ssize_t ret = 0;
- ssize_t total_len = 0;
- int do_wakeup = 0;
-
- pipe_lock(pipe);
- while (len) {
- struct pipe_buffer *buf;
- void *data[PIPE_BUFFERS];
- struct iovec vec[PIPE_BUFFERS];
- unsigned int nr_pages = 0;
- unsigned int write_len = 0;
- unsigned int now_len = len;
- unsigned int this_len;
- int i;
-
- BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
- for (i = 0; i < pipe->nrbufs && now_len; i++) {
- buf = nth_pipe_buf(pipe, i);
-
- ret = buf->ops->confirm(pipe, buf);
- if (ret)
- break;
-
- data[i] = buf->ops->map(pipe, buf, 0);
- this_len = min(buf->len, now_len);
- vec[i].iov_base = (void __user *) data[i] + buf->offset;
- vec[i].iov_len = this_len;
- now_len -= this_len;
- write_len += this_len;
- nr_pages++;
- }
-
- if (nr_pages) {
- ret = kernel_writev(out, vec, nr_pages, ppos);
- if (ret == 0)
- ret = -EIO;
- if (ret > 0) {
- len -= ret;
- total_len += ret;
- }
- }
-
- for (i = 0; i < nr_pages; i++) {
- buf = nth_pipe_buf(pipe, i);
- buf->ops->unmap(pipe, buf, data[i]);
-
- if (ret > 0) {
- this_len = min_t(unsigned, vec[i].iov_len, ret);
- buf->offset += this_len;
- buf->len -= this_len;
- ret -= this_len;
- }
- }
-
- if (ret < 0)
- break;
-
- while (pipe->nrbufs) {
- const struct pipe_buf_operations *ops;
-
- buf = nth_pipe_buf(pipe, 0);
- if (buf->len)
- break;
-
- ops = buf->ops;
- buf->ops = NULL;
- ops->release(pipe, buf);
- pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
- pipe->nrbufs--;
- if (pipe->inode)
- do_wakeup = 1;
- }
-
- if (pipe->nrbufs)
- continue;
- if (!pipe->writers)
- break;
- if (!pipe->waiting_writers) {
- if (total_len)
- break;
- }
-
- if (flags & SPLICE_F_NONBLOCK) {
- ret = -EAGAIN;
- break;
- }
-
- if (signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- if (do_wakeup) {
- wakeup_pipe_writers(pipe);
- do_wakeup = 0;
- }
-
- pipe_wait(pipe);
- }
- pipe_unlock(pipe);
+ ssize_t ret;

- if (do_wakeup)
- wakeup_pipe_writers(pipe);
+ ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+ if (ret > 0)
+ *ppos += ret;

- return total_len ? total_len : ret;
+ return ret;
}

/**

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/