FIRST: this is about interface. Please DO NOT LOOK AT PATCH before you
read comments.
You seem to care about Linux's interfaces, and you have (right) idea
that kernel should not export things like HZ. Ok, but just now linux
does one of similary ugly things: sys_sendfile() works iff underlying
filesystem uses pagecache. I think it is a bug, userland should not
care about fs implementation in kernel.
As a solution, I propose sendfile() works on anything and it just
falls back internaly to read()/write(). This will allow us to keep
same interface forever.
Pavel
PS: There's pretty simple patch to do this. I'd like if you first
looked at proposal, if it is ok but patch is wrong, it is fixable -
and please let me know.
PPS: Dave, what happened to that TCP mode where you explicitly tell
TCP layer to flush its buffers? It was promissed in debate about
sendfile(), but AFAIK never done.
--- clean//mm/filemap.c Sat Oct 24 22:24:44 1998
+++ linux/mm/filemap.c Sat Oct 24 22:20:05 1998
@@ -880,6 +880,48 @@
return written;
}
+ssize_t trivial_copyfd(int out_fd, int in_fd, size_t count)
+{
+extern asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count);
+extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count);
+
+ /* This is really trivial, we could be quite a bit more
+ * clever: checking both files outside of loop could be easily
+ * done... But this code is 'obviously right'. */
+ char *buffer = (char *) __get_free_page( GFP_KERNEL );
+ /* We do not need page to be cleared */
+ int i, j;
+ int res = 0;
+
+ if (!buffer)
+ return -ENOMEM;
+ set_fs(KERNEL_DS);
+ while (count && !signal_pending(current)) {
+ i = count>PAGE_SIZE ? PAGE_SIZE : count;
+ i = sys_read(in_fd, buffer, i );
+ if (i<0) {
+ res = i;
+ break;
+ }
+ if (!i)
+ break;
+ j = sys_write(out_fd, buffer, i );
+ if (j<0) {
+ res = j;
+ break;
+ }
+ res += j;
+ if (i!=j)
+ break;
+ count -= j;
+
+ }
+ set_fs(USER_DS);
+
+ free_page( (long) buffer );
+ return res;
+}
+
asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count)
{
ssize_t retval;
@@ -900,9 +942,10 @@
retval = -EINVAL;
in_inode = in_file->f_dentry->d_inode;
if (!in_inode)
- goto fput_in;
+ goto fallback;
if (!in_inode->i_op || !in_inode->i_op->readpage)
- goto fput_in;
+ goto fallback;
+
retval = locks_verify_area(FLOCK_VERIFY_READ, in_inode, in_file, in_file->f_pos, count);
if (retval)
goto fput_in;
@@ -960,6 +1003,12 @@
out:
unlock_kernel();
return retval;
+
+fallback:
+ fput(in_file);
+ unlock_kernel();
+ if (offset) return -EINVAL;
+ return trivial_copyfd( out_fd, in_fd, count );
}
/*
-- I'm really pavel@atrey.karlin.mff.cuni.cz. Pavel Look at http://atrey.karlin.mff.cuni.cz/~pavel/ ;-).- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/