[PATCH] generic_file_sendpage

From: Jan Blunck
Date: Fri Jul 15 2005 - 05:54:49 EST


This is a generic sendpage() for regular files.

With generic_file_sendpage() it is possible to use sendfile() on file targets, instead of only sending data to sockets.

This implementation is basically an extension of Joern's original patch (http://marc.theaimsgroup.com/?l=linux-kernel&m=109455958522766&w=2) but is honoring signals. I also removed some unnecessary code: no IOVs, no AIO.

qemu-debian:/home/root/sendfile_file# time ./fastcp 100mb test
real 0m11.037s
user 0m0.010s
sys 0m7.600s

qemu-debian:/home/root/sendfile_file# time cp 100mb test
real 0m13.342s
user 0m0.400s
sys 0m9.080s

Comments please,
Jan

Signed-off-by: Jörn Engel <joern@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Jan Blunck <j.blunck@xxxxxxxxxxxxx>

fs/ext2/file.c | 1
fs/ext3/file.c | 1
include/linux/fs.h | 2
mm/filemap.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 174 insertions(+)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -1502,6 +1502,8 @@ extern ssize_t do_sync_write(struct file
ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t,
+ loff_t *, int);
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -1086,6 +1086,27 @@ int file_send_actor(read_descriptor_t *
return written;
}

+/*
+ * Simple generic file sendpage. Just write a kernel buffer to the file.
+ */
+static ssize_t
+__generic_kernel_file_write(struct file *, const char *, size_t, loff_t *);
+
+ssize_t generic_file_sendpage(struct file *file, struct page *page,
+ int offset, size_t size, loff_t *ppos, int more)
+{
+ ssize_t ret;
+ char *kaddr;
+
+ kaddr = kmap(page);
+ ret = __generic_kernel_file_write(file, kaddr + offset, size, ppos);
+ kunmap(page);
+
+ return ret;
+}
+
+EXPORT_SYMBOL(generic_file_sendpage);
+
ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
size_t count, read_actor_t actor, void *target)
{
@@ -1715,6 +1736,19 @@ int remove_suid(struct dentry *dentry)
}
EXPORT_SYMBOL(remove_suid);

+static inline size_t
+filemap_copy_from_kernel(struct page *page, unsigned long offset,
+ const char *buf, unsigned bytes)
+{
+ char *kaddr;
+
+ kaddr = kmap(page);
+ memcpy(kaddr + offset, buf, bytes);
+ kunmap(page);
+
+ return bytes;
+}
+
size_t
__filemap_copy_from_user_iovec(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
@@ -1862,6 +1896,142 @@ generic_file_direct_write(struct kiocb *
}
EXPORT_SYMBOL(generic_file_direct_write);

+/*
+ * TODO:
+ * This largely tries to copy generic_file_aio_write_nolock(), although it
+ * doesn't have to be nearly as generic. A real cleanup should either
+ * merge this into generic_file_aio_write_nolock() as well or keep it special
+ * and remove as much code as possible.
+ *
+ * Check for pending signals here. Otherwise return -EINTR early.
+ *
+ * No iov, no kiocb. If you think this is a problem, use the source ;)
+ */
+static ssize_t
+__generic_kernel_file_write(struct file *file, const char *buf,
+ size_t count, loff_t *ppos)
+{
+ struct address_space * mapping = file->f_mapping;
+ struct address_space_operations *a_ops = mapping->a_ops;
+ struct inode *inode = mapping->host;
+ long status = 0;
+ loff_t pos;
+ struct page *page;
+ struct page *cached_page = NULL;
+ const int isblk = S_ISBLK(inode->i_mode);
+ ssize_t written;
+ ssize_t err;
+ size_t bytes;
+ struct pagevec lru_pvec;
+
+ /* There is no sane reason to use O_DIRECT */
+ BUG_ON(file->f_flags & O_DIRECT);
+
+ if (unlikely(signal_pending(current)))
+ return -EINTR;
+
+ if (unlikely(count < 0))
+ return -EINVAL;
+
+ down(&inode->i_sem);
+
+ pos = *ppos;
+ pagevec_init(&lru_pvec, 0);
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = mapping->backing_dev_info;
+ written = 0;
+
+ err = generic_write_checks(file, &pos, &count, isblk);
+ if (err)
+ goto out;
+
+ if (count == 0)
+ goto out;
+
+ remove_suid(file->f_dentry);
+ inode_update_time(inode, 1);
+
+ do {
+ unsigned long index;
+ unsigned long offset;
+ size_t copied;
+
+ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ index = pos >> PAGE_CACHE_SHIFT;
+ bytes = PAGE_CACHE_SIZE - offset;
+ if (bytes > count)
+ bytes = count;
+
+ page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
+ if (!page) {
+ status = -ENOMEM;
+ break;
+ }
+
+ status = a_ops->prepare_write(file, page, offset, offset+bytes);
+ if (unlikely(status)) {
+ loff_t isize = i_size_read(inode);
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again.
+ */
+ unlock_page(page);
+ page_cache_release(page);
+ if (pos + bytes > isize)
+ vmtruncate(inode, isize);
+ break;
+ }
+
+ copied = filemap_copy_from_kernel(page, offset, buf, bytes);
+
+ flush_dcache_page(page);
+ status = a_ops->commit_write(file, page, offset, offset+bytes);
+ if (likely(copied > 0)) {
+ if (!status)
+ status = copied;
+
+ if (status >= 0) {
+ written += status;
+ count -= status;
+ pos += status;
+ buf += status;
+ }
+ }
+ if (unlikely(copied != bytes))
+ if (status >= 0)
+ status = -EFAULT;
+ unlock_page(page);
+ mark_page_accessed(page);
+ page_cache_release(page);
+ if (status < 0)
+ break;
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
+ } while (count);
+ *ppos = pos;
+
+ if (cached_page)
+ page_cache_release(cached_page);
+
+ /*
+ * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
+ */
+ if (status >= 0) {
+ if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
+ status = generic_osync_inode(inode, mapping,
+ OSYNC_METADATA|OSYNC_DATA);
+ }
+
+ err = written ? written : status;
+out:
+ pagevec_lru_add(&lru_pvec);
+ current->backing_dev_info = 0;
+
+ up(&inode->i_sem);
+ return err;
+}
+
ssize_t
generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
Index: linux-2.6/fs/ext2/file.c
===================================================================
--- linux-2.6.orig/fs/ext2/file.c
+++ linux-2.6/fs/ext2/file.c
@@ -53,6 +53,7 @@ struct file_operations ext2_file_operati
.readv = generic_file_readv,
.writev = generic_file_writev,
.sendfile = generic_file_sendfile,
+ .sendpage = generic_file_sendpage,
};

#ifdef CONFIG_EXT2_FS_XIP
Index: linux-2.6/fs/ext3/file.c
===================================================================
--- linux-2.6.orig/fs/ext3/file.c
+++ linux-2.6/fs/ext3/file.c
@@ -119,6 +119,7 @@ struct file_operations ext3_file_operati
.release = ext3_release_file,
.fsync = ext3_sync_file,
.sendfile = generic_file_sendfile,
+ .sendpage = generic_file_sendpage,
};

struct inode_operations ext3_file_inode_operations = {