[RFC] iovec in ->aio_read/->aio_write

From: Christoph Hellwig (hch@sgi.com)
Date: Tue Oct 15 2002 - 21:33:15 EST


I've recently looked into implementing the aio read and write
methods for XFS. Although all of read/write readv/writev
and aio_read/aio_write end up calling the exactly same code
in filemap.c (for the generic filesystem I/O code). Filesystems
like XFS that need additional code before calling the generic
functionality have to duplicated it though.

I don't think it makes sense to keep all those interfaces. As
the read/write entry points are used by most drivers I suggest
starting with the other two, that have far less users. The
patch below (compiled but not booted!) changes the aio_read/
aio_write interface to take the same array of iovec like
readv/write and updates all users. Note that we don't support
vectored I/O for the aio interface yet, but it seems like a
logical addition to me.

Proposed next steps: Convert over all readv/writev users
to aio_read/aio_write and remove the methods. Implement
aio_read/aio_write in all filesystems using the generic
pagecache code and kill the "normal" generic_file_read
and generic_file_write.

Comments?

--- 1.22/fs/aio.c Sun Oct 13 17:39:40 2002
+++ edited/fs/aio.c Tue Oct 15 20:45:37 2002
@@ -987,6 +987,28 @@
         return -EINVAL;
 }
 
+ssize_t aio_read_single(struct kiocb *iocb, char *buf,
+ size_t count, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+
+ iocb->ki_single.iov_base = buf;
+ iocb->ki_single.iov_len = count;
+
+ return file->f_op->aio_read(iocb, &iocb->ki_single, 1, iocb->ki_pos);
+}
+
+ssize_t aio_write_single(struct kiocb *iocb, const char *buf,
+ size_t count, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+
+ iocb->ki_single.iov_base = (void *)buf;
+ iocb->ki_single.iov_len = count;
+
+ return file->f_op->aio_write(iocb, &iocb->ki_single, 1, iocb->ki_pos);
+}
+
 static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
                                   struct iocb *iocb));
 static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
@@ -1048,7 +1070,7 @@
                         goto out_put_req;
                 ret = -EINVAL;
                 if (file->f_op->aio_read)
- ret = file->f_op->aio_read(req, buf,
+ ret = aio_read_single(req, buf,
                                         iocb->aio_nbytes, req->ki_pos);
                 break;
         case IOCB_CMD_PWRITE:
@@ -1060,7 +1082,7 @@
                         goto out_put_req;
                 ret = -EINVAL;
                 if (file->f_op->aio_write)
- ret = file->f_op->aio_write(req, buf,
+ ret = aio_write_single(req, buf,
                                         iocb->aio_nbytes, req->ki_pos);
                 break;
         case IOCB_CMD_FDSYNC:
--- 1.19/fs/read_write.c Thu Oct 10 23:36:26 2002
+++ edited/fs/read_write.c Tue Oct 15 20:44:24 2002
@@ -184,7 +184,7 @@
 
         init_sync_kiocb(&kiocb, filp);
         kiocb.ki_pos = *ppos;
- ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
+ ret = aio_read_single(&kiocb, buf, len, kiocb.ki_pos);
         if (-EIOCBQUEUED == ret)
                 ret = wait_on_sync_kiocb(&kiocb);
         *ppos = kiocb.ki_pos;
@@ -224,7 +224,7 @@
 
         init_sync_kiocb(&kiocb, filp);
         kiocb.ki_pos = *ppos;
- ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
+ ret = aio_write_single(&kiocb, buf, len, kiocb.ki_pos);
         if (-EIOCBQUEUED == ret)
                 ret = wait_on_sync_kiocb(&kiocb);
         *ppos = kiocb.ki_pos;
@@ -340,6 +340,37 @@
         }
         return seg;
 }
+
+ssize_t do_sync_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ ret = filp->f_op->aio_write(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+ssize_t do_sync_readv(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ ret = filp->f_op->aio_write(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
 
 static ssize_t do_readv_writev(int type, struct file *file,
                                const struct iovec * vector,
--- 1.9/fs/ext3/file.c Wed Oct 9 20:32:29 2002
+++ edited/fs/ext3/file.c Tue Oct 15 20:46:54 2002
@@ -61,7 +61,8 @@
  */
 
 static ssize_t
-ext3_file_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos)
+ext3_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_dentry->d_inode;
@@ -76,15 +77,15 @@
         if (IS_SYNC(inode) || (file->f_flags & O_SYNC))
                 mark_inode_dirty(inode);
 
- return generic_file_aio_write(iocb, buf, count, pos);
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
 struct file_operations ext3_file_operations = {
         .llseek = generic_file_llseek,
         .read = do_sync_read,
         .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = ext3_file_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = ext3_file_aio_write,
         .readv = generic_file_readv,
         .writev = generic_file_writev,
         .ioctl = ext3_ioctl,
--- 1.21/fs/nfs/file.c Tue Oct 8 23:37:02 2002
+++ edited/fs/nfs/file.c Tue Oct 15 20:55:56 2002
@@ -35,8 +35,10 @@
 #define NFSDBG_FACILITY NFSDBG_FILE
 
 static int nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_read(struct kiocb *, char *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char *, size_t, loff_t);
+static ssize_t nfs_file_aio_read(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+static ssize_t nfs_file_aio_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
 static int nfs_file_flush(struct file *);
 static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
 
@@ -44,8 +46,8 @@
         .llseek = remote_llseek,
         .read = do_sync_read,
         .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_aio_read,
+ .aio_write = nfs_file_aio_write,
         .mmap = nfs_file_mmap,
         .open = nfs_open,
         .flush = nfs_file_flush,
@@ -91,19 +93,20 @@
 }
 
 static ssize_t
-nfs_file_read(struct kiocb *iocb, char * buf, size_t count, loff_t pos)
+nfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
         struct dentry * dentry = iocb->ki_filp->f_dentry;
         struct inode * inode = dentry->d_inode;
         ssize_t result;
 
- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long)pos);
 
         result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
         if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
         return result;
 }
 
@@ -211,15 +214,16 @@
  * Write to a file (through the page cache).
  */
 static ssize_t
-nfs_file_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos)
+nfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
         struct dentry * dentry = iocb->ki_filp->f_dentry;
         struct inode * inode = dentry->d_inode;
         ssize_t result;
 
- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) pos);
 
         result = -EBUSY;
         if (IS_SWAPFILE(inode))
@@ -228,11 +232,7 @@
         if (result)
                 goto out;
 
- result = count;
- if (!count)
- goto out;
-
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
 out:
         return result;
 
--- 1.6/include/linux/aio.h Thu Oct 3 22:19:27 2002
+++ edited/include/linux/aio.h Tue Oct 15 20:38:00 2002
@@ -2,6 +2,7 @@
 #define __LINUX__AIO_H
 
 #include <linux/list.h>
+#include <linux/uio.h>
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
 
@@ -60,6 +61,7 @@
                                                  * for cancellation */
 
         void *ki_user_obj; /* pointer to userland's iocb */
+ struct iovec ki_single; /* iovec for non-vectored I/O */
         __u64 ki_user_data; /* user's data for completion */
         loff_t ki_pos;
 
@@ -145,6 +147,10 @@
 extern void FASTCALL(kick_iocb(struct kiocb *iocb));
 extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
 extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
+extern ssize_t aio_read_single(struct kiocb *iocb, char *buf,
+ size_t count, loff_t pos);
+extern ssize_t aio_write_single(struct kiocb *iocb, const char *buf,
+ size_t count, loff_t pos);
 struct mm_struct;
 extern void FASTCALL(exit_aio(struct mm_struct *mm));
 
--- 1.170/include/linux/fs.h Fri Oct 11 10:49:46 2002
+++ edited/include/linux/fs.h Tue Oct 15 20:37:27 2002
@@ -744,9 +744,11 @@
         struct module *owner;
         loff_t (*llseek) (struct file *, loff_t, int);
         ssize_t (*read) (struct file *, char *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
         ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char *, size_t, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
         int (*readdir) (struct file *, void *, filldir_t);
         unsigned int (*poll) (struct file *, struct poll_table_struct *);
         int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1242,8 +1244,10 @@
 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
 extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char *, size_t, loff_t);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos);
 ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
--- 1.23/include/net/sock.h Fri Oct 11 01:14:45 2002
+++ edited/include/net/sock.h Tue Oct 15 20:06:13 2002
@@ -303,7 +303,6 @@
         struct socket *sock;
         struct sock *sk;
         struct msghdr *msg, async_msg;
- struct iovec async_iov;
         struct scm_cookie *scm, async_scm;
 };
 
--- 1.147/mm/filemap.c Sun Oct 13 17:39:40 2002
+++ edited/mm/filemap.c Tue Oct 15 20:41:07 2002
@@ -884,12 +884,11 @@
 }
 
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, char *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
         BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
 }
 
 ssize_t
@@ -1645,10 +1644,10 @@
         return err;
 }
 
-ssize_t generic_file_aio_write(struct kiocb *iocb, const char *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
- return generic_file_write(iocb->ki_filp, buf, count, &iocb->ki_pos);
+ return generic_file_writev(iocb->ki_filp, iov, nr_segs, &iocb->ki_pos);
 }
 
 ssize_t generic_file_write(struct file *file, const char *buf,
--- 1.30/net/socket.c Sat Oct 12 09:37:17 2002
+++ edited/net/socket.c Tue Oct 15 21:03:19 2002
@@ -90,10 +90,10 @@
 #include <linux/netfilter.h>
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
 
 static int sock_close(struct inode *inode, struct file *file);
@@ -586,31 +586,31 @@
  * area ubuf...ubuf+size-1 is writable before asking the protocol.
  */
 
-static ssize_t sock_aio_read(struct kiocb *iocb, char *ubuf,
- size_t size, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
         struct sock_iocb *x = kiocb_to_siocb(iocb);
         struct socket *sock;
         int flags;
+ size_t tot_len = 0;
+ int i;
 
         if (pos != 0)
                 return -ESPIPE;
- if (size==0) /* Match SYS5 behaviour */
- return 0;
 
         sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode);
 
         x->async_msg.msg_name = NULL;
         x->async_msg.msg_namelen = 0;
- x->async_msg.msg_iov = &x->async_iov;
- x->async_msg.msg_iovlen = 1;
+ x->async_msg.msg_iov = (struct iovec *)iov;
+ x->async_msg.msg_iovlen = nr_segs;
         x->async_msg.msg_control = NULL;
         x->async_msg.msg_controllen = 0;
- x->async_iov.iov_base = ubuf;
- x->async_iov.iov_len = size;
         flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 
- return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
+ for (i = 0 ; i < nr_segs ; i++)
+ tot_len += iov[i].iov_len;
+ return __sock_recvmsg(iocb, sock, &x->async_msg, flags, tot_len);
 }
 
 
@@ -619,32 +619,32 @@
  * is readable by the user process.
  */
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const char *ubuf,
- size_t size, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
 {
         struct sock_iocb *x = kiocb_to_siocb(iocb);
         struct socket *sock;
+ size_t tot_len = 0;
+ int i;
         
         if (pos != 0)
                 return -ESPIPE;
- if(size==0) /* Match SYS5 behaviour */
- return 0;
 
         sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode);
 
         x->async_msg.msg_name = NULL;
         x->async_msg.msg_namelen = 0;
- x->async_msg.msg_iov = &x->async_iov;
- x->async_msg.msg_iovlen = 1;
+ x->async_msg.msg_iov = (struct iovec *)iov;
+ x->async_msg.msg_iovlen = nr_segs;
         x->async_msg.msg_control = NULL;
         x->async_msg.msg_controllen = 0;
         x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
         if (sock->type == SOCK_SEQPACKET)
                 x->async_msg.msg_flags |= MSG_EOR;
- x->async_iov.iov_base = (void *)ubuf;
- x->async_iov.iov_len = size;
         
- return __sock_sendmsg(iocb, sock, &x->async_msg, size);
+ for (i = 0 ; i < nr_segs ; i++)
+ tot_len += iov[i].iov_len;
+ return __sock_sendmsg(iocb, sock, &x->async_msg, tot_len);
 }
 
 ssize_t sock_sendpage(struct file *file, struct page *page,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Oct 15 2002 - 22:00:56 EST